Update for YAZ 3s new OID system
[idzebra-moved-to-github.git] / index / rpnsearch.c
1 /* $Id: rpnsearch.c,v 1.11 2007-04-16 08:44:32 adam Exp $
2    Copyright (C) 1995-2007
3    Index Data ApS
4
5 This file is part of the Zebra server.
6
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
11
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
20
21 */
22
23 #include <stdio.h>
24 #include <assert.h>
25 #ifdef WIN32
26 #include <io.h>
27 #endif
28 #if HAVE_UNISTD_H
29 #include <unistd.h>
30 #endif
31 #include <ctype.h>
32
33 #include <yaz/diagbib1.h>
34 #include "index.h"
35 #include <zebra_xpath.h>
36 #include <attrfind.h>
37 #include <charmap.h>
38 #include <rset.h>
39
40 static int log_level_set = 0;
41 static int log_level_rpn = 0;
42
43 #define TERMSET_DISABLE 1
44
45 static const char **rpn_char_map_handler(void *vp, const char **from, int len)
46 {
47     struct rpn_char_map_info *p = (struct rpn_char_map_info *) vp;
48     const char **out = zebra_maps_input(p->zm, p->reg_type, from, len, 0);
49 #if 0
50     if (out && *out)
51     {
52         const char *outp = *out;
53         yaz_log(YLOG_LOG, "---");
54         while (*outp)
55         {
56             yaz_log(YLOG_LOG, "%02X", *outp);
57             outp++;
58         }
59     }
60 #endif
61     return out;
62 }
63
64 void rpn_char_map_prepare(struct zebra_register *reg, int reg_type,
65                           struct rpn_char_map_info *map_info)
66 {
67     map_info->zm = reg->zebra_maps;
68     map_info->reg_type = reg_type;
69     dict_grep_cmap(reg->dict, map_info, rpn_char_map_handler);
70 }
71
72 #define TERM_COUNT        
73        
74 struct grep_info {        
75 #ifdef TERM_COUNT        
76     int *term_no;        
77 #endif        
78     ISAM_P *isam_p_buf;
79     int isam_p_size;        
80     int isam_p_indx;
81     int trunc_max;
82     ZebraHandle zh;
83     int reg_type;
84     ZebraSet termset;
85 };        
86
87 static int add_isam_p(const char *name, const char *info,
88                       struct grep_info *p)
89 {
90     if (!log_level_set)
91     {
92         log_level_rpn = yaz_log_module_level("rpn");
93         log_level_set = 1;
94     }
95     /* we may have to stop this madness.. NOTE: -1 so that if
96        truncmax == trunxlimit we do *not* generate result sets */
97     if (p->isam_p_indx >= p->trunc_max - 1)
98         return 1;
99
100     if (p->isam_p_indx == p->isam_p_size)
101     {
102         ISAM_P *new_isam_p_buf;
103 #ifdef TERM_COUNT        
104         int *new_term_no;        
105 #endif
106         p->isam_p_size = 2*p->isam_p_size + 100;
107         new_isam_p_buf = (ISAM_P *) xmalloc(sizeof(*new_isam_p_buf) *
108                                             p->isam_p_size);
109         if (p->isam_p_buf)
110         {
111             memcpy(new_isam_p_buf, p->isam_p_buf,
112                     p->isam_p_indx * sizeof(*p->isam_p_buf));
113             xfree(p->isam_p_buf);
114         }
115         p->isam_p_buf = new_isam_p_buf;
116
117 #ifdef TERM_COUNT
118         new_term_no = (int *) xmalloc(sizeof(*new_term_no) * p->isam_p_size);
119         if (p->term_no)
120         {
121             memcpy(new_term_no, p->isam_p_buf,
122                     p->isam_p_indx * sizeof(*p->term_no));
123             xfree(p->term_no);
124         }
125         p->term_no = new_term_no;
126 #endif
127     }
128     assert(*info == sizeof(*p->isam_p_buf));
129     memcpy(p->isam_p_buf + p->isam_p_indx, info+1, sizeof(*p->isam_p_buf));
130
131     if (p->termset)
132     {
133         const char *db;
134         char term_tmp[IT_MAX_WORD];
135         int ord = 0;
136         const char *index_name;
137         int len = key_SU_decode (&ord, (const unsigned char *) name);
138         
139         zebra_term_untrans  (p->zh, p->reg_type, term_tmp, name+len);
140         yaz_log(log_level_rpn, "grep: %d %c %s", ord, name[len], term_tmp);
141         zebraExplain_lookup_ord(p->zh->reg->zei,
142                                 ord, 0 /* index_type */, &db, &index_name);
143         yaz_log(log_level_rpn, "grep:  db=%s index=%s", db, index_name);
144         
145         resultSetAddTerm(p->zh, p->termset, name[len], db,
146                          index_name, term_tmp);
147     }
148     (p->isam_p_indx)++;
149     return 0;
150 }
151
152 static int grep_handle(char *name, const char *info, void *p)
153 {
154     return add_isam_p(name, info, (struct grep_info *) p);
155 }
156
157 static int term_pre(ZebraMaps zebra_maps, int reg_type, const char **src,
158                     const char *ct1, const char *ct2, int first)
159 {
160     const char *s1, *s0 = *src;
161     const char **map;
162
163     /* skip white space */
164     while (*s0)
165     {
166         if (ct1 && strchr(ct1, *s0))
167             break;
168         if (ct2 && strchr(ct2, *s0))
169             break;
170         s1 = s0;
171         map = zebra_maps_input(zebra_maps, reg_type, &s1, strlen(s1), first);
172         if (**map != *CHR_SPACE)
173             break;
174         s0 = s1;
175     }
176     *src = s0;
177     return *s0;
178 }
179
180
181 static void esc_str(char *out_buf, size_t out_size,
182                     const char *in_buf, int in_size)
183 {
184     int k;
185
186     assert(out_buf);
187     assert(in_buf);
188     assert(out_size > 20);
189     *out_buf = '\0';
190     for (k = 0; k<in_size; k++)
191     {
192         int c = in_buf[k] & 0xff;
193         int pc;
194         if (c < 32 || c > 126)
195             pc = '?';
196         else
197             pc = c;
198         sprintf(out_buf +strlen(out_buf), "%02X:%c  ", c, pc);
199         if (strlen(out_buf) > out_size-20)
200         {
201             strcat(out_buf, "..");
202             break;
203         }
204     }
205 }
206
207 #define REGEX_CHARS " []()|.*+?!"
208
209 /* term_100: handle term, where trunc = none(no operators at all) */
210 static int term_100(ZebraMaps zebra_maps, int reg_type,
211                     const char **src, char *dst, int space_split,
212                     char *dst_term)
213 {
214     const char *s0;
215     const char **map;
216     int i = 0;
217     int j = 0;
218
219     const char *space_start = 0;
220     const char *space_end = 0;
221
222     if (!term_pre(zebra_maps, reg_type, src, NULL, NULL, !space_split))
223         return 0;
224     s0 = *src;
225     while (*s0)
226     {
227         const char *s1 = s0;
228         int q_map_match = 0;
229         map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), 
230                                 &q_map_match);
231         if (space_split)
232         {
233             if (**map == *CHR_SPACE)
234                 break;
235         }
236         else  /* complete subfield only. */
237         {
238             if (**map == *CHR_SPACE)
239             {   /* save space mapping for later  .. */
240                 space_start = s1;
241                 space_end = s0;
242                 continue;
243             }
244             else if (space_start)
245             {   /* reload last space */
246                 while (space_start < space_end)
247                 {
248                     if (strchr(REGEX_CHARS, *space_start))
249                         dst[i++] = '\\';
250                     dst_term[j++] = *space_start;
251                     dst[i++] = *space_start++;
252                 }
253                 /* and reset */
254                 space_start = space_end = 0;
255             }
256         }
257         /* add non-space char */
258         memcpy(dst_term+j, s1, s0 - s1);
259         j += (s0 - s1);
260         if (!q_map_match)
261         {
262             while (s1 < s0)
263             {
264                 if (strchr(REGEX_CHARS, *s1))
265                     dst[i++] = '\\';
266                 dst[i++] = *s1++;
267             }
268         }
269         else
270         {
271             char tmpbuf[80];
272             esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
273             
274             strcpy(dst + i, map[0]);
275             i += strlen(map[0]);
276         }
277     }
278     dst[i] = '\0';
279     dst_term[j] = '\0';
280     *src = s0;
281     return i;
282 }
283
284 /* term_101: handle term, where trunc = Process # */
285 static int term_101(ZebraMaps zebra_maps, int reg_type,
286                     const char **src, char *dst, int space_split,
287                     char *dst_term)
288 {
289     const char *s0;
290     const char **map;
291     int i = 0;
292     int j = 0;
293
294     if (!term_pre(zebra_maps, reg_type, src, "#", "#", !space_split))
295         return 0;
296     s0 = *src;
297     while (*s0)
298     {
299         if (*s0 == '#')
300         {
301             dst[i++] = '.';
302             dst[i++] = '*';
303             dst_term[j++] = *s0++;
304         }
305         else
306         {
307             const char *s1 = s0;
308             int q_map_match = 0;
309             map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), 
310                                     &q_map_match);
311             if (space_split && **map == *CHR_SPACE)
312                 break;
313
314             /* add non-space char */
315             memcpy(dst_term+j, s1, s0 - s1);
316             j += (s0 - s1);
317             if (!q_map_match)
318             {
319                 while (s1 < s0)
320                 {
321                     if (strchr(REGEX_CHARS, *s1))
322                         dst[i++] = '\\';
323                     dst[i++] = *s1++;
324                 }
325             }
326             else
327             {
328                 char tmpbuf[80];
329                 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
330                 
331                 strcpy(dst + i, map[0]);
332                 i += strlen(map[0]);
333             }
334         }
335     }
336     dst[i] = '\0';
337     dst_term[j++] = '\0';
338     *src = s0;
339     return i;
340 }
341
342 /* term_103: handle term, where trunc = re-2 (regular expressions) */
343 static int term_103(ZebraMaps zebra_maps, int reg_type, const char **src,
344                     char *dst, int *errors, int space_split,
345                     char *dst_term)
346 {
347     int i = 0;
348     int j = 0;
349     const char *s0;
350     const char **map;
351
352     if (!term_pre(zebra_maps, reg_type, src, "^\\()[].*+?|", "(", !space_split))
353         return 0;
354     s0 = *src;
355     if (errors && *s0 == '+' && s0[1] && s0[2] == '+' && s0[3] &&
356         isdigit(((const unsigned char *)s0)[1]))
357     {
358         *errors = s0[1] - '0';
359         s0 += 3;
360         if (*errors > 3)
361             *errors = 3;
362     }
363     while (*s0)
364     {
365         if (strchr("^\\()[].*+?|-", *s0))
366         {
367             dst_term[j++] = *s0;
368             dst[i++] = *s0++;
369         }
370         else
371         {
372             const char *s1 = s0;
373             int q_map_match = 0;
374             map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), 
375                                     &q_map_match);
376             if (space_split && **map == *CHR_SPACE)
377                 break;
378
379             /* add non-space char */
380             memcpy(dst_term+j, s1, s0 - s1);
381             j += (s0 - s1);
382             if (!q_map_match)
383             {
384                 while (s1 < s0)
385                 {
386                     if (strchr(REGEX_CHARS, *s1))
387                         dst[i++] = '\\';
388                     dst[i++] = *s1++;
389                 }
390             }
391             else
392             {
393                 char tmpbuf[80];
394                 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
395                 
396                 strcpy(dst + i, map[0]);
397                 i += strlen(map[0]);
398             }
399         }
400     }
401     dst[i] = '\0';
402     dst_term[j] = '\0';
403     *src = s0;
404     
405     return i;
406 }
407
408 /* term_103: handle term, where trunc = re-1 (regular expressions) */
409 static int term_102(ZebraMaps zebra_maps, int reg_type, const char **src,
410                     char *dst, int space_split, char *dst_term)
411 {
412     return term_103(zebra_maps, reg_type, src, dst, NULL, space_split,
413                     dst_term);
414 }
415
416
417 /* term_104: handle term, where trunc = Process # and ! */
418 static int term_104(ZebraMaps zebra_maps, int reg_type,
419                     const char **src, char *dst, int space_split,
420                     char *dst_term)
421 {
422     const char *s0;
423     const char **map;
424     int i = 0;
425     int j = 0;
426
427     if (!term_pre(zebra_maps, reg_type, src, "?*#", "?*#", !space_split))
428         return 0;
429     s0 = *src;
430     while (*s0)
431     {
432         if (*s0 == '?')
433         {
434             dst_term[j++] = *s0++;
435             if (*s0 >= '0' && *s0 <= '9')
436             {
437                 int limit = 0;
438                 while (*s0 >= '0' && *s0 <= '9')
439                 {
440                     limit = limit * 10 + (*s0 - '0');
441                     dst_term[j++] = *s0++;
442                 }
443                 if (limit > 20)
444                     limit = 20;
445                 while (--limit >= 0)
446                 {
447                     dst[i++] = '.';
448                     dst[i++] = '?';
449                 }
450             }
451             else
452             {
453                 dst[i++] = '.';
454                 dst[i++] = '*';
455             }
456         }
457         else if (*s0 == '*')
458         {
459             dst[i++] = '.';
460             dst[i++] = '*';
461             dst_term[j++] = *s0++;
462         }
463         else if (*s0 == '#')
464         {
465             dst[i++] = '.';
466             dst_term[j++] = *s0++;
467         }
468         else
469         {
470             const char *s1 = s0;
471             int q_map_match = 0;
472             map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), 
473                                     &q_map_match);
474             if (space_split && **map == *CHR_SPACE)
475                 break;
476
477             /* add non-space char */
478             memcpy(dst_term+j, s1, s0 - s1);
479             j += (s0 - s1);
480             if (!q_map_match)
481             {
482                 while (s1 < s0)
483                 {
484                     if (strchr(REGEX_CHARS, *s1))
485                         dst[i++] = '\\';
486                     dst[i++] = *s1++;
487                 }
488             }
489             else
490             {
491                 char tmpbuf[80];
492                 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
493                 
494                 strcpy(dst + i, map[0]);
495                 i += strlen(map[0]);
496             }
497         }
498     }
499     dst[i] = '\0';
500     dst_term[j++] = '\0';
501     *src = s0;
502     return i;
503 }
504
505 /* term_105/106: handle term, where trunc = Process * and ! and right trunc */
506 static int term_105(ZebraMaps zebra_maps, int reg_type,
507                     const char **src, char *dst, int space_split,
508                     char *dst_term, int right_truncate)
509 {
510     const char *s0;
511     const char **map;
512     int i = 0;
513     int j = 0;
514
515     if (!term_pre(zebra_maps, reg_type, src, "*!", "*!", !space_split))
516         return 0;
517     s0 = *src;
518     while (*s0)
519     {
520         if (*s0 == '*')
521         {
522             dst[i++] = '.';
523             dst[i++] = '*';
524             dst_term[j++] = *s0++;
525         }
526         else if (*s0 == '!')
527         {
528             dst[i++] = '.';
529             dst_term[j++] = *s0++;
530         }
531         else
532         {
533             const char *s1 = s0;
534             int q_map_match = 0;
535             map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), 
536                                     &q_map_match);
537             if (space_split && **map == *CHR_SPACE)
538                 break;
539
540             /* add non-space char */
541             memcpy(dst_term+j, s1, s0 - s1);
542             j += (s0 - s1);
543             if (!q_map_match)
544             {
545                 while (s1 < s0)
546                 {
547                     if (strchr(REGEX_CHARS, *s1))
548                         dst[i++] = '\\';
549                     dst[i++] = *s1++;
550                 }
551             }
552             else
553             {
554                 char tmpbuf[80];
555                 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
556                 
557                 strcpy(dst + i, map[0]);
558                 i += strlen(map[0]);
559             }
560         }
561     }
562     if (right_truncate)
563     {
564         dst[i++] = '.';
565         dst[i++] = '*';
566     }
567     dst[i] = '\0';
568     
569     dst_term[j++] = '\0';
570     *src = s0;
571     return i;
572 }
573
574
575 /* gen_regular_rel - generate regular expression from relation
576  *  val:     border value (inclusive)
577  *  islt:    1 if <=; 0 if >=.
578  */
579 static void gen_regular_rel(char *dst, int val, int islt)
580 {
581     int dst_p;
582     int w, d, i;
583     int pos = 0;
584     char numstr[20];
585
586     yaz_log(YLOG_DEBUG, "gen_regular_rel. val=%d, islt=%d", val, islt);
587     if (val >= 0)
588     {
589         if (islt)
590             strcpy(dst, "(-[0-9]+|(");
591         else
592             strcpy(dst, "((");
593     } 
594     else
595     {
596         if (!islt)
597         {
598             strcpy(dst, "([0-9]+|-(");
599             dst_p = strlen(dst);
600             islt = 1;
601         }
602         else
603         {
604             strcpy(dst, "(-(");
605             islt = 0;
606         }
607         val = -val;
608     }
609     dst_p = strlen(dst);
610     sprintf(numstr, "%d", val);
611     for (w = strlen(numstr); --w >= 0; pos++)
612     {
613         d = numstr[w];
614         if (pos > 0)
615         {
616             if (islt)
617             {
618                 if (d == '0')
619                     continue;
620                 d--;
621             } 
622             else
623             {
624                 if (d == '9')
625                     continue;
626                 d++;
627             }
628         }
629         
630         strcpy(dst + dst_p, numstr);
631         dst_p = strlen(dst) - pos - 1;
632
633         if (islt)
634         {
635             if (d != '0')
636             {
637                 dst[dst_p++] = '[';
638                 dst[dst_p++] = '0';
639                 dst[dst_p++] = '-';
640                 dst[dst_p++] = d;
641                 dst[dst_p++] = ']';
642             }
643             else
644                 dst[dst_p++] = d;
645         }
646         else
647         {
648             if (d != '9')
649             { 
650                 dst[dst_p++] = '[';
651                 dst[dst_p++] = d;
652                 dst[dst_p++] = '-';
653                 dst[dst_p++] = '9';
654                 dst[dst_p++] = ']';
655             }
656             else
657                 dst[dst_p++] = d;
658         }
659         for (i = 0; i<pos; i++)
660         {
661             dst[dst_p++] = '[';
662             dst[dst_p++] = '0';
663             dst[dst_p++] = '-';
664             dst[dst_p++] = '9';
665             dst[dst_p++] = ']';
666         }
667         dst[dst_p++] = '|';
668     }
669     dst[dst_p] = '\0';
670     if (islt)
671     {
672         /* match everything less than 10^(pos-1) */
673         strcat(dst, "0*");
674         for (i = 1; i<pos; i++)
675             strcat(dst, "[0-9]?");
676     }
677     else
678     {
679         /* match everything greater than 10^pos */
680         for (i = 0; i <= pos; i++)
681             strcat(dst, "[0-9]");
682         strcat(dst, "[0-9]*");
683     }
684     strcat(dst, "))");
685 }
686
687 void string_rel_add_char(char **term_p, const char *src, int *indx)
688 {
689     if (src[*indx] == '\\')
690         *(*term_p)++ = src[(*indx)++];
691     *(*term_p)++ = src[(*indx)++];
692 }
693
694 /*
695  *   >  abc     ([b-].*|a[c-].*|ab[d-].*|abc.+)
696  *              ([^-a].*|a[^-b].*ab[^-c].*|abc.+)
697  *   >= abc     ([b-].*|a[c-].*|ab[c-].*)
698  *              ([^-a].*|a[^-b].*|ab[c-].*)
699  *   <  abc     ([-0].*|a[-a].*|ab[-b].*)
700  *              ([^a-].*|a[^b-].*|ab[^c-].*)
701  *   <= abc     ([-0].*|a[-a].*|ab[-b].*|abc)
702  *              ([^a-].*|a[^b-].*|ab[^c-].*|abc)
703  */
704 static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
705                            const char **term_sub, char *term_dict,
706                            const int *attributeSet,
707                            int reg_type, int space_split, char *term_dst,
708                            int *error_code)
709 {
710     AttrType relation;
711     int relation_value;
712     int i;
713     char *term_tmp = term_dict + strlen(term_dict);
714     char term_component[2*IT_MAX_WORD+20];
715
716     attr_init_APT(&relation, zapt, 2);
717     relation_value = attr_find(&relation, NULL);
718
719     *error_code = 0;
720     yaz_log(YLOG_DEBUG, "string relation value=%d", relation_value);
721     switch (relation_value)
722     {
723     case 1:
724         if (!term_100(zh->reg->zebra_maps, reg_type,
725                       term_sub, term_component,
726                       space_split, term_dst))
727             return 0;
728         yaz_log(log_level_rpn, "Relation <");
729         
730         *term_tmp++ = '(';
731         for (i = 0; term_component[i]; )
732         {
733             int j = 0;
734
735             if (i)
736                 *term_tmp++ = '|';
737             while (j < i)
738                 string_rel_add_char(&term_tmp, term_component, &j);
739
740             *term_tmp++ = '[';
741
742             *term_tmp++ = '^';
743
744             *term_tmp++ = 1;
745             *term_tmp++ = FIRST_IN_FIELD_CHAR;
746
747             string_rel_add_char(&term_tmp, term_component, &i);
748             *term_tmp++ = '-';
749
750             *term_tmp++ = ']';
751             *term_tmp++ = '.';
752             *term_tmp++ = '*';
753
754             if ((term_tmp - term_dict) > IT_MAX_WORD)
755                 break;
756         }
757         *term_tmp++ = ')';
758         *term_tmp = '\0';
759         yaz_log(YLOG_LOG, "term_dict=%s", term_dict);
760         break;
761     case 2:
762         if (!term_100(zh->reg->zebra_maps, reg_type,
763                       term_sub, term_component,
764                       space_split, term_dst))
765             return 0;
766         yaz_log(log_level_rpn, "Relation <=");
767
768         *term_tmp++ = '(';
769         for (i = 0; term_component[i]; )
770         {
771             int j = 0;
772
773             while (j < i)
774                 string_rel_add_char(&term_tmp, term_component, &j);
775             *term_tmp++ = '[';
776
777             *term_tmp++ = '^';
778
779             *term_tmp++ = 1;
780             *term_tmp++ = FIRST_IN_FIELD_CHAR;
781
782             string_rel_add_char(&term_tmp, term_component, &i);
783             *term_tmp++ = '-';
784
785             *term_tmp++ = ']';
786             *term_tmp++ = '.';
787             *term_tmp++ = '*';
788
789             *term_tmp++ = '|';
790
791             if ((term_tmp - term_dict) > IT_MAX_WORD)
792                 break;
793         }
794         for (i = 0; term_component[i]; )
795             string_rel_add_char(&term_tmp, term_component, &i);
796         *term_tmp++ = ')';
797         *term_tmp = '\0';
798         break;
799     case 5:
800         if (!term_100 (zh->reg->zebra_maps, reg_type,
801                        term_sub, term_component, space_split, term_dst))
802             return 0;
803         yaz_log(log_level_rpn, "Relation >");
804
805         *term_tmp++ = '(';
806         for (i = 0; term_component[i];)
807         {
808             int j = 0;
809
810             while (j < i)
811                 string_rel_add_char(&term_tmp, term_component, &j);
812             *term_tmp++ = '[';
813             
814             *term_tmp++ = '^';
815             *term_tmp++ = '-';
816             string_rel_add_char(&term_tmp, term_component, &i);
817
818             *term_tmp++ = ']';
819             *term_tmp++ = '.';
820             *term_tmp++ = '*';
821
822             *term_tmp++ = '|';
823
824             if ((term_tmp - term_dict) > IT_MAX_WORD)
825                 break;
826         }
827         for (i = 0; term_component[i];)
828             string_rel_add_char(&term_tmp, term_component, &i);
829         *term_tmp++ = '.';
830         *term_tmp++ = '+';
831         *term_tmp++ = ')';
832         *term_tmp = '\0';
833         break;
834     case 4:
835         if (!term_100(zh->reg->zebra_maps, reg_type, term_sub,
836                       term_component, space_split, term_dst))
837             return 0;
838         yaz_log(log_level_rpn, "Relation >=");
839
840         *term_tmp++ = '(';
841         for (i = 0; term_component[i];)
842         {
843             int j = 0;
844
845             if (i)
846                 *term_tmp++ = '|';
847             while (j < i)
848                 string_rel_add_char(&term_tmp, term_component, &j);
849             *term_tmp++ = '[';
850
851             if (term_component[i+1])
852             {
853                 *term_tmp++ = '^';
854                 *term_tmp++ = '-';
855                 string_rel_add_char(&term_tmp, term_component, &i);
856             }
857             else
858             {
859                 string_rel_add_char(&term_tmp, term_component, &i);
860                 *term_tmp++ = '-';
861             }
862             *term_tmp++ = ']';
863             *term_tmp++ = '.';
864             *term_tmp++ = '*';
865
866             if ((term_tmp - term_dict) > IT_MAX_WORD)
867                 break;
868         }
869         *term_tmp++ = ')';
870         *term_tmp = '\0';
871         break;
872     case 3:
873     case 102:
874     case -1:
875         if (!**term_sub)
876             return 1;
877         yaz_log(log_level_rpn, "Relation =");
878         if (!term_100(zh->reg->zebra_maps, reg_type, term_sub,
879                       term_component, space_split, term_dst))
880             return 0;
881         strcat(term_tmp, "(");
882         strcat(term_tmp, term_component);
883         strcat(term_tmp, ")");
884         break;
885     case 103:
886         yaz_log(log_level_rpn, "Relation always matches");
887         /* skip to end of term (we don't care what it is) */
888         while (**term_sub != '\0')
889             (*term_sub)++;
890         break;
891     default:
892         *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
893         return 0;
894     }
895     return 1;
896 }
897
898 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
899                              const char **term_sub, 
900                              const int *attributeSet, NMEM stream,
901                              struct grep_info *grep_info,
902                              int reg_type, int complete_flag,
903                              int num_bases, char **basenames,
904                              char *term_dst,
905                              const char *xpath_use,
906                              struct ord_list **ol);
907
908 static ZEBRA_RES term_limits_APT(ZebraHandle zh,
909                                  Z_AttributesPlusTerm *zapt,
910                                  zint *hits_limit_value,
911                                  const char **term_ref_id_str,
912                                  NMEM nmem)
913 {
914     AttrType term_ref_id_attr;
915     AttrType hits_limit_attr;
916     int term_ref_id_int;
917  
918     attr_init_APT(&hits_limit_attr, zapt, 11);
919     *hits_limit_value  = attr_find(&hits_limit_attr, NULL);
920
921     attr_init_APT(&term_ref_id_attr, zapt, 10);
922     term_ref_id_int = attr_find_ex(&term_ref_id_attr, NULL, term_ref_id_str);
923     if (term_ref_id_int >= 0)
924     {
925         char *res = nmem_malloc(nmem, 20);
926         sprintf(res, "%d", term_ref_id_int);
927         *term_ref_id_str = res;
928     }
929
930     /* no limit given ? */
931     if (*hits_limit_value == -1)
932     {
933         if (*term_ref_id_str)
934         {
935             /* use global if term_ref is present */
936             *hits_limit_value = zh->approx_limit;
937         }
938         else
939         {
940             /* no counting if term_ref is not present */
941             *hits_limit_value = 0;
942         }
943     }
944     else if (*hits_limit_value == 0)
945     {
946         /* 0 is the same as global limit */
947         *hits_limit_value = zh->approx_limit;
948     }
949     yaz_log(YLOG_DEBUG, "term_limits_APT ref_id=%s limit=" ZINT_FORMAT,
950             *term_ref_id_str ? *term_ref_id_str : "none",
951             *hits_limit_value);
952     return ZEBRA_OK;
953 }
954
955 static ZEBRA_RES term_trunc(ZebraHandle zh,
956                             Z_AttributesPlusTerm *zapt,
957                             const char **term_sub, 
958                             const int *attributeSet, NMEM stream,
959                             struct grep_info *grep_info,
960                             int reg_type, int complete_flag,
961                             int num_bases, char **basenames,
962                             char *term_dst,
963                             const char *rank_type, 
964                             const char *xpath_use,
965                             NMEM rset_nmem,
966                             RSET *rset,
967                             struct rset_key_control *kc)
968 {
969     ZEBRA_RES res;
970     struct ord_list *ol;
971     zint hits_limit_value;
972     const char *term_ref_id_str = 0;
973     *rset = 0;
974
975     term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str, stream);
976     grep_info->isam_p_indx = 0;
977     res = string_term(zh, zapt, term_sub, attributeSet, stream, grep_info,
978                       reg_type, complete_flag, num_bases, basenames,
979                       term_dst, xpath_use, &ol);
980     if (res != ZEBRA_OK)
981         return res;
982     if (!*term_sub)  /* no more terms ? */
983         return res;
984     yaz_log(log_level_rpn, "term: %s", term_dst);
985     *rset = rset_trunc(zh, grep_info->isam_p_buf,
986                        grep_info->isam_p_indx, term_dst,
987                        strlen(term_dst), rank_type, 1 /* preserve pos */,
988                        zapt->term->which, rset_nmem,
989                        kc, kc->scope, ol, reg_type, hits_limit_value,
990                        term_ref_id_str);
991     if (!*rset)
992         return ZEBRA_FAIL;
993     return ZEBRA_OK;
994 }
995
996 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
997                              const char **term_sub, 
998                              const int *attributeSet, NMEM stream,
999                              struct grep_info *grep_info,
1000                              int reg_type, int complete_flag,
1001                              int num_bases, char **basenames,
1002                              char *term_dst,
1003                              const char *xpath_use,
1004                              struct ord_list **ol)
1005 {
1006     char term_dict[2*IT_MAX_WORD+4000];
1007     int j, r, base_no;
1008     AttrType truncation;
1009     int truncation_value;
1010     const char *termp;
1011     struct rpn_char_map_info rcmi;
1012     int space_split = complete_flag ? 0 : 1;
1013
1014     int bases_ok = 0;     /* no of databases with OK attribute */
1015
1016     *ol = ord_list_create(stream);
1017
1018     rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
1019     attr_init_APT(&truncation, zapt, 5);
1020     truncation_value = attr_find(&truncation, NULL);
1021     yaz_log(log_level_rpn, "truncation value %d", truncation_value);
1022
1023     for (base_no = 0; base_no < num_bases; base_no++)
1024     {
1025         int ord = -1;
1026         int regex_range = 0;
1027         int max_pos, prefix_len = 0;
1028         int relation_error;
1029         char ord_buf[32];
1030         int ord_len, i;
1031
1032         termp = *term_sub; /* start of term for each database */
1033
1034         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1035         {
1036             zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
1037                            basenames[base_no]);
1038             return ZEBRA_FAIL;
1039         }
1040         
1041         if (zebra_apt_get_ord(zh, zapt, reg_type, xpath_use,
1042                               attributeSet, &ord) != ZEBRA_OK)
1043             continue;
1044
1045         bases_ok++;
1046
1047         *ol = ord_list_append(stream, *ol, ord);
1048         ord_len = key_SU_encode (ord, ord_buf);
1049         
1050         term_dict[prefix_len++] = '(';
1051         for (i = 0; i<ord_len; i++)
1052         {
1053             term_dict[prefix_len++] = 1;  /* our internal regexp escape char */
1054             term_dict[prefix_len++] = ord_buf[i];
1055         }
1056         term_dict[prefix_len++] = ')';
1057         term_dict[prefix_len] = '\0';
1058         j = prefix_len;
1059         switch (truncation_value)
1060         {
1061         case -1:         /* not specified */
1062         case 100:        /* do not truncate */
1063             if (!string_relation(zh, zapt, &termp, term_dict,
1064                                  attributeSet,
1065                                  reg_type, space_split, term_dst,
1066                                  &relation_error))
1067             {
1068                 if (relation_error)
1069                 {
1070                     zebra_setError(zh, relation_error, 0);
1071                     return ZEBRA_FAIL;
1072                 }
1073                 *term_sub = 0;
1074                 return ZEBRA_OK;
1075             }
1076             break;
1077         case 1:          /* right truncation */
1078             term_dict[j++] = '(';
1079             if (!term_100(zh->reg->zebra_maps, reg_type,
1080                           &termp, term_dict + j, space_split, term_dst))
1081             {
1082                 *term_sub = 0;
1083                 return ZEBRA_OK;
1084             }
1085             strcat(term_dict, ".*)");
1086             break;
1087         case 2:          /* keft truncation */
1088             term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*';
1089             if (!term_100(zh->reg->zebra_maps, reg_type,
1090                           &termp, term_dict + j, space_split, term_dst))
1091             {
1092                 *term_sub = 0;
1093                 return ZEBRA_OK;
1094             }
1095             strcat(term_dict, ")");
1096             break;
1097         case 3:          /* left&right truncation */
1098             term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*';
1099             if (!term_100(zh->reg->zebra_maps, reg_type,
1100                           &termp, term_dict + j, space_split, term_dst))
1101             {
1102                 *term_sub = 0;
1103                 return ZEBRA_OK;
1104             }
1105             strcat(term_dict, ".*)");
1106             break;
1107         case 101:        /* process # in term */
1108             term_dict[j++] = '(';
1109             if (!term_101(zh->reg->zebra_maps, reg_type,
1110                           &termp, term_dict + j, space_split, term_dst))
1111             {
1112                 *term_sub = 0;
1113                 return ZEBRA_OK;
1114             }
1115             strcat(term_dict, ")");
1116             break;
1117         case 102:        /* Regexp-1 */
1118             term_dict[j++] = '(';
1119             if (!term_102(zh->reg->zebra_maps, reg_type,
1120                           &termp, term_dict + j, space_split, term_dst))
1121             {
1122                 *term_sub = 0;
1123                 return ZEBRA_OK;
1124             }
1125             strcat(term_dict, ")");
1126             break;
1127         case 103:       /* Regexp-2 */
1128             regex_range = 1;
1129             term_dict[j++] = '(';
1130             if (!term_103(zh->reg->zebra_maps, reg_type,
1131                           &termp, term_dict + j, &regex_range,
1132                           space_split, term_dst))
1133             {
1134                 *term_sub = 0;
1135                 return ZEBRA_OK;
1136             }
1137             strcat(term_dict, ")");
1138             break;
1139         case 104:        /* process # and ! in term */
1140             term_dict[j++] = '(';
1141             if (!term_104(zh->reg->zebra_maps, reg_type,
1142                           &termp, term_dict + j, space_split, term_dst))
1143             {
1144                 *term_sub = 0;
1145                 return ZEBRA_OK;
1146             }
1147             strcat(term_dict, ")");
1148             break;
1149         case 105:        /* process * and ! in term */
1150             term_dict[j++] = '(';
1151             if (!term_105(zh->reg->zebra_maps, reg_type,
1152                           &termp, term_dict + j, space_split, term_dst, 1))
1153             {
1154                 *term_sub = 0;
1155                 return ZEBRA_OK;
1156             }
1157             strcat(term_dict, ")");
1158             break;
1159         case 106:        /* process * and ! in term */
1160             term_dict[j++] = '(';
1161             if (!term_105(zh->reg->zebra_maps, reg_type,
1162                           &termp, term_dict + j, space_split, term_dst, 0))
1163             {
1164                 *term_sub = 0;
1165                 return ZEBRA_OK;
1166             }
1167             strcat(term_dict, ")");
1168             break;
1169         default:
1170             zebra_setError_zint(zh,
1171                                 YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
1172                                 truncation_value);
1173             return ZEBRA_FAIL;
1174         }
1175         if (1)
1176         {
1177             char buf[80];
1178             const char *input = term_dict + prefix_len;
1179             esc_str(buf, sizeof(buf), input, strlen(input));
1180         }
1181         yaz_log(log_level_rpn, "dict_lookup_grep: %s", term_dict+prefix_len);
1182         r = dict_lookup_grep(zh->reg->dict, term_dict, regex_range,
1183                              grep_info, &max_pos, 
1184                              ord_len /* number of "exact" chars */,
1185                              grep_handle);
1186         if (r == 1)
1187             zebra_set_partial_result(zh);
1188         else if (r)
1189             yaz_log(YLOG_WARN, "dict_lookup_grep fail %d", r);
1190     }
1191     if (!bases_ok)
1192         return ZEBRA_FAIL;
1193     *term_sub = termp;
1194     yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1195     return ZEBRA_OK;
1196 }
1197
1198
1199
1200 static void grep_info_delete(struct grep_info *grep_info)
1201 {
1202 #ifdef TERM_COUNT
1203     xfree(grep_info->term_no);
1204 #endif
1205     xfree(grep_info->isam_p_buf);
1206 }
1207
1208 static ZEBRA_RES grep_info_prepare(ZebraHandle zh,
1209                                    Z_AttributesPlusTerm *zapt,
1210                                    struct grep_info *grep_info,
1211                                    int reg_type)
1212 {
1213 #ifdef TERM_COUNT
1214     grep_info->term_no = 0;
1215 #endif
1216     grep_info->trunc_max = atoi(res_get_def(zh->res, "truncmax", "10000"));
1217     grep_info->isam_p_size = 0;
1218     grep_info->isam_p_buf = NULL;
1219     grep_info->zh = zh;
1220     grep_info->reg_type = reg_type;
1221     grep_info->termset = 0;
1222     if (zapt)
1223     {
1224         AttrType truncmax;
1225         int truncmax_value;
1226
1227         attr_init_APT(&truncmax, zapt, 13);
1228         truncmax_value = attr_find(&truncmax, NULL);
1229         if (truncmax_value != -1)
1230             grep_info->trunc_max = truncmax_value;
1231     }
1232     if (zapt)
1233     {
1234         AttrType termset;
1235         int termset_value_numeric;
1236         const char *termset_value_string;
1237
1238         attr_init_APT(&termset, zapt, 8);
1239         termset_value_numeric =
1240             attr_find_ex(&termset, NULL, &termset_value_string);
1241         if (termset_value_numeric != -1)
1242         {
1243 #if TERMSET_DISABLE
1244             zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, "termset");
1245             return ZEBRA_FAIL;
1246 #else
1247             char resname[32];
1248             const char *termset_name = 0;
1249             if (termset_value_numeric != -2)
1250             {
1251                 
1252                 sprintf(resname, "%d", termset_value_numeric);
1253                 termset_name = resname;
1254             }
1255             else
1256             termset_name = termset_value_string;
1257             yaz_log(log_level_rpn, "creating termset set %s", termset_name);
1258             grep_info->termset = resultSetAdd(zh, termset_name, 1);
1259             if (!grep_info->termset)
1260             {
1261                 zebra_setError(zh, YAZ_BIB1_ILLEGAL_RESULT_SET_NAME, termset_name);
1262                 return ZEBRA_FAIL;
1263             }
1264 #endif
1265         }
1266     }
1267     return ZEBRA_OK;
1268 }
1269                                
1270 /**
1271   \brief Create result set(s) for list of terms
1272   \param zh Zebra Handle
1273   \param zapt Attributes Plust Term (RPN leaf)
1274   \param termz term as used in query but converted to UTF-8
1275   \param attributeSet default attribute set
1276   \param stream memory for result
1277   \param reg_type register type ('w', 'p',..)
1278   \param complete_flag whether it's phrases or not
1279   \param rank_type term flags for ranking
1280   \param xpath_use use attribute for X-Path (-1 for no X-path)
1281   \param num_bases number of databases
1282   \param basenames array of databases
1283   \param rset_nmem memory for result sets
1284   \param result_sets output result set for each term in list (output)
1285   \param num_result_sets number of output result sets
1286   \param kc rset key control to be used for created result sets
1287 */
1288 static ZEBRA_RES term_list_trunc(ZebraHandle zh,
1289                                  Z_AttributesPlusTerm *zapt,
1290                                  const char *termz,
1291                                  const int *attributeSet,
1292                                  NMEM stream,
1293                                  int reg_type, int complete_flag,
1294                                  const char *rank_type,
1295                                  const char *xpath_use,
1296                                  int num_bases, char **basenames, 
1297                                  NMEM rset_nmem,
1298                                  RSET **result_sets, int *num_result_sets,
1299                                  struct rset_key_control *kc)
1300 {
1301     char term_dst[IT_MAX_WORD+1];
1302     struct grep_info grep_info;
1303     const char *termp = termz;
1304     int alloc_sets = 0;
1305
1306     *num_result_sets = 0;
1307     *term_dst = 0;
1308     if (grep_info_prepare(zh, zapt, &grep_info, reg_type) == ZEBRA_FAIL)
1309         return ZEBRA_FAIL;
1310     while(1)
1311     { 
1312         ZEBRA_RES res;
1313
1314         if (alloc_sets == *num_result_sets)
1315         {
1316             int add = 10;
1317             RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) * 
1318                                               sizeof(*rnew));
1319             if (alloc_sets)
1320                 memcpy(rnew, *result_sets, alloc_sets * sizeof(*rnew));
1321             alloc_sets = alloc_sets + add;
1322             *result_sets = rnew;
1323         }
1324         res = term_trunc(zh, zapt, &termp, attributeSet,
1325                          stream, &grep_info,
1326                          reg_type, complete_flag,
1327                          num_bases, basenames,
1328                          term_dst, rank_type,
1329                          xpath_use, rset_nmem,
1330                          &(*result_sets)[*num_result_sets],
1331                          kc);
1332         if (res != ZEBRA_OK)
1333         {
1334             int i;
1335             for (i = 0; i < *num_result_sets; i++)
1336                 rset_delete((*result_sets)[i]);
1337             grep_info_delete (&grep_info);
1338             return res;
1339         }
1340         if ((*result_sets)[*num_result_sets] == 0)
1341             break;
1342         (*num_result_sets)++;
1343
1344         if (!*termp)
1345             break;
1346     }
1347     grep_info_delete(&grep_info);
1348     return ZEBRA_OK;
1349 }
1350
1351 static ZEBRA_RES rpn_search_APT_position(ZebraHandle zh,
1352                                          Z_AttributesPlusTerm *zapt,
1353                                          const int *attributeSet,
1354                                          int reg_type,
1355                                          int num_bases, char **basenames,
1356                                          NMEM rset_nmem,
1357                                          RSET *rset,
1358                                          struct rset_key_control *kc)
1359 {
1360     RSET *f_set;
1361     int base_no;
1362     int position_value;
1363     int num_sets = 0;
1364     AttrType position;
1365
1366     attr_init_APT(&position, zapt, 3);
1367     position_value = attr_find(&position, NULL);
1368     switch(position_value)
1369     {
1370     case 3:
1371     case -1:
1372         return ZEBRA_OK;
1373     case 1:
1374     case 2:
1375         break;
1376     default:
1377         zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1378                             position_value);
1379         return ZEBRA_FAIL;
1380     }
1381
1382     if (!zebra_maps_is_first_in_field(zh->reg->zebra_maps, reg_type))
1383     {
1384         zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1385                             position_value);
1386         return ZEBRA_FAIL;
1387     }
1388
1389     if (!zh->reg->isamb && !zh->reg->isamc)
1390     {
1391         zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1392                             position_value);
1393         return ZEBRA_FAIL;
1394     }
1395     f_set = xmalloc(sizeof(RSET) * num_bases);
1396     for (base_no = 0; base_no < num_bases; base_no++)
1397     {
1398         int ord = -1;
1399         char ord_buf[32];
1400         char term_dict[100];
1401         int ord_len;
1402         char *val;
1403         ISAM_P isam_p;
1404
1405         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1406         {
1407             zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
1408                            basenames[base_no]);
1409             return ZEBRA_FAIL;
1410         }
1411         
1412         if (zebra_apt_get_ord(zh, zapt, reg_type, 0,
1413                               attributeSet, &ord) != ZEBRA_OK)
1414             continue;
1415
1416         ord_len = key_SU_encode (ord, ord_buf);
1417         memcpy(term_dict, ord_buf, ord_len);
1418         strcpy(term_dict+ord_len, FIRST_IN_FIELD_STR);
1419         val = dict_lookup(zh->reg->dict, term_dict);
1420         if (!val)
1421             continue;
1422         assert(*val == sizeof(ISAM_P));
1423         memcpy(&isam_p, val+1, sizeof(isam_p));
1424         
1425
1426         if (zh->reg->isamb)
1427             f_set[num_sets++] = rsisamb_create(rset_nmem, kc, kc->scope,
1428                                                zh->reg->isamb, isam_p, 0);
1429         else if (zh->reg->isamc)
1430             f_set[num_sets++] = rsisamc_create(rset_nmem, kc, kc->scope,
1431                                                zh->reg->isamc, isam_p, 0);
1432     }
1433     if (num_sets)
1434     {
1435         *rset = rset_create_or(rset_nmem, kc, kc->scope,
1436                                0 /* termid */, num_sets, f_set);
1437     }
1438     xfree(f_set);
1439     return ZEBRA_OK;
1440 }
1441                                          
1442 static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh,
1443                                        Z_AttributesPlusTerm *zapt,
1444                                        const char *termz_org,
1445                                        const int *attributeSet,
1446                                        NMEM stream,
1447                                        int reg_type, int complete_flag,
1448                                        const char *rank_type,
1449                                        const char *xpath_use,
1450                                        int num_bases, char **basenames, 
1451                                        NMEM rset_nmem,
1452                                        RSET *rset,
1453                                        struct rset_key_control *kc)
1454 {
1455     RSET *result_sets = 0;
1456     int num_result_sets = 0;
1457     ZEBRA_RES res =
1458         term_list_trunc(zh, zapt, termz_org, attributeSet,
1459                         stream, reg_type, complete_flag,
1460                         rank_type, xpath_use,
1461                         num_bases, basenames,
1462                         rset_nmem,
1463                         &result_sets, &num_result_sets, kc);
1464
1465     if (res != ZEBRA_OK)
1466         return res;
1467
1468     if (num_result_sets > 0)
1469     {
1470         RSET first_set = 0;
1471         res = rpn_search_APT_position(zh, zapt, attributeSet, 
1472                                       reg_type,
1473                                       num_bases, basenames,
1474                                       rset_nmem, &first_set,
1475                                       kc);
1476         if (res != ZEBRA_OK)
1477             return res;
1478         if (first_set)
1479         {
1480             RSET *nsets = nmem_malloc(stream,
1481                                       sizeof(RSET) * (num_result_sets+1));
1482             nsets[0] = first_set;
1483             memcpy(nsets+1, result_sets, sizeof(RSET) * num_result_sets);
1484             result_sets = nsets;
1485             num_result_sets++;
1486         }
1487     }
1488     if (num_result_sets == 0)
1489         *rset = rset_create_null(rset_nmem, kc, 0); 
1490     else if (num_result_sets == 1)
1491         *rset = result_sets[0];
1492     else
1493         *rset = rset_create_prox(rset_nmem, kc, kc->scope,
1494                                  num_result_sets, result_sets,
1495                                  1 /* ordered */, 0 /* exclusion */,
1496                                  3 /* relation */, 1 /* distance */);
1497     if (!*rset)
1498         return ZEBRA_FAIL;
1499     return ZEBRA_OK;
1500 }
1501
1502 static ZEBRA_RES rpn_search_APT_or_list(ZebraHandle zh,
1503                                         Z_AttributesPlusTerm *zapt,
1504                                         const char *termz_org,
1505                                         const int *attributeSet,
1506                                         NMEM stream,
1507                                         int reg_type, int complete_flag,
1508                                         const char *rank_type,
1509                                         const char *xpath_use,
1510                                         int num_bases, char **basenames,
1511                                         NMEM rset_nmem,
1512                                         RSET *rset,
1513                                         struct rset_key_control *kc)
1514 {
1515     RSET *result_sets = 0;
1516     int num_result_sets = 0;
1517     int i;
1518     ZEBRA_RES res =
1519         term_list_trunc(zh, zapt, termz_org, attributeSet,
1520                         stream, reg_type, complete_flag,
1521                         rank_type, xpath_use,
1522                         num_bases, basenames,
1523                         rset_nmem,
1524                         &result_sets, &num_result_sets, kc);
1525     if (res != ZEBRA_OK)
1526         return res;
1527
1528     for (i = 0; i<num_result_sets; i++)
1529     {
1530         RSET first_set = 0;
1531         res = rpn_search_APT_position(zh, zapt, attributeSet, 
1532                                       reg_type,
1533                                       num_bases, basenames,
1534                                       rset_nmem, &first_set,
1535                                       kc);
1536         if (res != ZEBRA_OK)
1537         {
1538             for (i = 0; i<num_result_sets; i++)
1539                 rset_delete(result_sets[i]);
1540             return res;
1541         }
1542
1543         if (first_set)
1544         {
1545             RSET tmp_set[2];
1546
1547             tmp_set[0] = first_set;
1548             tmp_set[1] = result_sets[i];
1549             
1550             result_sets[i] = rset_create_prox(
1551                 rset_nmem, kc, kc->scope,
1552                 2, tmp_set,
1553                 1 /* ordered */, 0 /* exclusion */,
1554                 3 /* relation */, 1 /* distance */);
1555         }
1556     }
1557     if (num_result_sets == 0)
1558         *rset = rset_create_null(rset_nmem, kc, 0); 
1559     else if (num_result_sets == 1)
1560         *rset = result_sets[0];
1561     else
1562         *rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* termid */,
1563                                num_result_sets, result_sets);
1564     if (!*rset)
1565         return ZEBRA_FAIL;
1566     return ZEBRA_OK;
1567 }
1568
1569 static ZEBRA_RES rpn_search_APT_and_list(ZebraHandle zh,
1570                                          Z_AttributesPlusTerm *zapt,
1571                                          const char *termz_org,
1572                                          const int *attributeSet,
1573                                          NMEM stream,
1574                                          int reg_type, int complete_flag,
1575                                          const char *rank_type, 
1576                                          const char *xpath_use,
1577                                          int num_bases, char **basenames,
1578                                          NMEM rset_nmem,
1579                                          RSET *rset,
1580                                          struct rset_key_control *kc)
1581 {
1582     RSET *result_sets = 0;
1583     int num_result_sets = 0;
1584     int i;
1585     ZEBRA_RES res =
1586         term_list_trunc(zh, zapt, termz_org, attributeSet,
1587                         stream, reg_type, complete_flag,
1588                         rank_type, xpath_use,
1589                         num_bases, basenames,
1590                         rset_nmem,
1591                         &result_sets, &num_result_sets,
1592                         kc);
1593     if (res != ZEBRA_OK)
1594         return res;
1595     for (i = 0; i<num_result_sets; i++)
1596     {
1597         RSET first_set = 0;
1598         res = rpn_search_APT_position(zh, zapt, attributeSet, 
1599                                       reg_type,
1600                                       num_bases, basenames,
1601                                       rset_nmem, &first_set,
1602                                       kc);
1603         if (res != ZEBRA_OK)
1604         {
1605             for (i = 0; i<num_result_sets; i++)
1606                 rset_delete(result_sets[i]);
1607             return res;
1608         }
1609
1610         if (first_set)
1611         {
1612             RSET tmp_set[2];
1613
1614             tmp_set[0] = first_set;
1615             tmp_set[1] = result_sets[i];
1616             
1617             result_sets[i] = rset_create_prox(
1618                 rset_nmem, kc, kc->scope,
1619                 2, tmp_set,
1620                 1 /* ordered */, 0 /* exclusion */,
1621                 3 /* relation */, 1 /* distance */);
1622         }
1623     }
1624
1625
1626     if (num_result_sets == 0)
1627         *rset = rset_create_null(rset_nmem, kc, 0); 
1628     else if (num_result_sets == 1)
1629         *rset = result_sets[0];
1630     else
1631         *rset = rset_create_and(rset_nmem, kc, kc->scope,
1632                                 num_result_sets, result_sets);
1633     if (!*rset)
1634         return ZEBRA_FAIL;
1635     return ZEBRA_OK;
1636 }
1637
1638 static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1639                             const char **term_sub,
1640                             char *term_dict,
1641                             const int *attributeSet,
1642                             struct grep_info *grep_info,
1643                             int *max_pos,
1644                             int reg_type,
1645                             char *term_dst,
1646                             int *error_code)
1647 {
1648     AttrType relation;
1649     int relation_value;
1650     int term_value;
1651     int r;
1652     char *term_tmp = term_dict + strlen(term_dict);
1653
1654     *error_code = 0;
1655     attr_init_APT(&relation, zapt, 2);
1656     relation_value = attr_find(&relation, NULL);
1657
1658     yaz_log(log_level_rpn, "numeric relation value=%d", relation_value);
1659
1660     switch (relation_value)
1661     {
1662     case 1:
1663         yaz_log(log_level_rpn, "Relation <");
1664         if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1665                       term_dst))
1666             return 0;
1667         term_value = atoi (term_tmp);
1668         gen_regular_rel(term_tmp, term_value-1, 1);
1669         break;
1670     case 2:
1671         yaz_log(log_level_rpn, "Relation <=");
1672         if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1673                       term_dst))
1674             return 0;
1675         term_value = atoi (term_tmp);
1676         gen_regular_rel(term_tmp, term_value, 1);
1677         break;
1678     case 4:
1679         yaz_log(log_level_rpn, "Relation >=");
1680         if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1681                       term_dst))
1682             return 0;
1683         term_value = atoi (term_tmp);
1684         gen_regular_rel(term_tmp, term_value, 0);
1685         break;
1686     case 5:
1687         yaz_log(log_level_rpn, "Relation >");
1688         if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1689                       term_dst))
1690             return 0;
1691         term_value = atoi (term_tmp);
1692         gen_regular_rel(term_tmp, term_value+1, 0);
1693         break;
1694     case -1:
1695     case 3:
1696         yaz_log(log_level_rpn, "Relation =");
1697         if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1698                       term_dst))
1699             return 0;
1700         term_value = atoi (term_tmp);
1701         sprintf(term_tmp, "(0*%d)", term_value);
1702         break;
1703     case 103:
1704         /* term_tmp untouched.. */
1705         while (**term_sub != '\0')
1706             (*term_sub)++;
1707         break;
1708     default:
1709         *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
1710         return 0;
1711     }
1712     yaz_log(log_level_rpn, "dict_lookup_grep: %s", term_tmp);
1713     r = dict_lookup_grep(zh->reg->dict, term_dict, 0, grep_info, max_pos,
1714                           0, grep_handle);
1715
1716     if (r == 1)
1717         zebra_set_partial_result(zh);
1718     else if (r)
1719         yaz_log(YLOG_WARN, "dict_lookup_grep fail, rel = gt: %d", r);
1720     yaz_log(log_level_rpn, "%d positions", grep_info->isam_p_indx);
1721     return 1;
1722 }
1723
1724 static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1725                               const char **term_sub, 
1726                               const int *attributeSet, NMEM stream,
1727                               struct grep_info *grep_info,
1728                               int reg_type, int complete_flag,
1729                               int num_bases, char **basenames,
1730                               char *term_dst, 
1731                               const char *xpath_use,
1732                               struct ord_list **ol)
1733 {
1734     char term_dict[2*IT_MAX_WORD+2];
1735     int base_no;
1736     const char *termp;
1737     struct rpn_char_map_info rcmi;
1738
1739     int bases_ok = 0;     /* no of databases with OK attribute */
1740
1741     *ol = ord_list_create(stream);
1742
1743     rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
1744
1745     for (base_no = 0; base_no < num_bases; base_no++)
1746     {
1747         int max_pos, prefix_len = 0;
1748         int relation_error = 0;
1749         int ord, ord_len, i;
1750         char ord_buf[32];
1751
1752         termp = *term_sub;
1753
1754         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1755         {
1756             zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
1757                            basenames[base_no]);
1758             return ZEBRA_FAIL;
1759         }
1760
1761         if (zebra_apt_get_ord(zh, zapt, reg_type, xpath_use,
1762                               attributeSet, &ord) != ZEBRA_OK)
1763             continue;
1764         bases_ok++;
1765
1766         *ol = ord_list_append(stream, *ol, ord);
1767
1768         ord_len = key_SU_encode (ord, ord_buf);
1769
1770         term_dict[prefix_len++] = '(';
1771         for (i = 0; i < ord_len; i++)
1772         {
1773             term_dict[prefix_len++] = 1;
1774             term_dict[prefix_len++] = ord_buf[i];
1775         }
1776         term_dict[prefix_len++] = ')';
1777         term_dict[prefix_len] = '\0';
1778
1779         if (!numeric_relation(zh, zapt, &termp, term_dict,
1780                               attributeSet, grep_info, &max_pos, reg_type,
1781                               term_dst, &relation_error))
1782         {
1783             if (relation_error)
1784             {
1785                 zebra_setError(zh, relation_error, 0);
1786                 return ZEBRA_FAIL;
1787             }
1788             *term_sub = 0;
1789             return ZEBRA_OK;
1790         }
1791     }
1792     if (!bases_ok)
1793         return ZEBRA_FAIL;
1794     *term_sub = termp;
1795     yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1796     return ZEBRA_OK;
1797 }
1798
1799                                  
1800 static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh,
1801                                         Z_AttributesPlusTerm *zapt,
1802                                         const char *termz,
1803                                         const int *attributeSet,
1804                                         NMEM stream,
1805                                         int reg_type, int complete_flag,
1806                                         const char *rank_type, 
1807                                         const char *xpath_use,
1808                                         int num_bases, char **basenames,
1809                                         NMEM rset_nmem,
1810                                         RSET *rset,
1811                                         struct rset_key_control *kc)
1812 {
1813     char term_dst[IT_MAX_WORD+1];
1814     const char *termp = termz;
1815     RSET *result_sets = 0;
1816     int num_result_sets = 0;
1817     ZEBRA_RES res;
1818     struct grep_info grep_info;
1819     int alloc_sets = 0;
1820     zint hits_limit_value;
1821     const char *term_ref_id_str = 0;
1822
1823     term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str, stream);
1824
1825     yaz_log(log_level_rpn, "APT_numeric t='%s'", termz);
1826     if (grep_info_prepare(zh, zapt, &grep_info, reg_type) == ZEBRA_FAIL)
1827         return ZEBRA_FAIL;
1828     while (1)
1829     { 
1830         struct ord_list *ol;
1831         if (alloc_sets == num_result_sets)
1832         {
1833             int add = 10;
1834             RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) * 
1835                                               sizeof(*rnew));
1836             if (alloc_sets)
1837                 memcpy(rnew, result_sets, alloc_sets * sizeof(*rnew));
1838             alloc_sets = alloc_sets + add;
1839             result_sets = rnew;
1840         }
1841         yaz_log(YLOG_DEBUG, "APT_numeric termp=%s", termp);
1842         grep_info.isam_p_indx = 0;
1843         res = numeric_term(zh, zapt, &termp, attributeSet, stream, &grep_info,
1844                            reg_type, complete_flag, num_bases, basenames,
1845                            term_dst, xpath_use, &ol);
1846         if (res == ZEBRA_FAIL || termp == 0)
1847             break;
1848         yaz_log(YLOG_DEBUG, "term: %s", term_dst);
1849         result_sets[num_result_sets] =
1850             rset_trunc(zh, grep_info.isam_p_buf,
1851                        grep_info.isam_p_indx, term_dst,
1852                        strlen(term_dst), rank_type,
1853                        0 /* preserve position */,
1854                        zapt->term->which, rset_nmem, 
1855                        kc, kc->scope, ol, reg_type,
1856                        hits_limit_value,
1857                        term_ref_id_str);
1858         if (!result_sets[num_result_sets])
1859             break;
1860         num_result_sets++;
1861         if (!*termp)
1862             break;
1863     }
1864     grep_info_delete(&grep_info);
1865
1866     if (res != ZEBRA_OK)
1867         return res;
1868     if (num_result_sets == 0)
1869         *rset = rset_create_null(rset_nmem, kc, 0);
1870     else if (num_result_sets == 1)
1871         *rset = result_sets[0];
1872     else
1873         *rset = rset_create_and(rset_nmem, kc, kc->scope,
1874                                 num_result_sets, result_sets);
1875     if (!*rset)
1876         return ZEBRA_FAIL;
1877     return ZEBRA_OK;
1878 }
1879
1880 static ZEBRA_RES rpn_search_APT_local(ZebraHandle zh,
1881                                       Z_AttributesPlusTerm *zapt,
1882                                       const char *termz,
1883                                       const int *attributeSet,
1884                                       NMEM stream,
1885                                       const char *rank_type, NMEM rset_nmem,
1886                                       RSET *rset,
1887                                       struct rset_key_control *kc)
1888 {
1889     Record rec;
1890     zint sysno = atozint(termz);
1891     
1892     if (sysno <= 0)
1893         sysno = 0;
1894     rec = rec_get(zh->reg->records, sysno);
1895     if (!rec)
1896         sysno = 0;
1897
1898     rec_free(&rec);
1899
1900     if (sysno <= 0)
1901     {
1902         *rset = rset_create_null(rset_nmem, kc, 0);
1903     }
1904     else
1905     {
1906         RSFD rsfd;
1907         struct it_key key;
1908         *rset = rset_create_temp(rset_nmem, kc, kc->scope,
1909                                  res_get(zh->res, "setTmpDir"), 0);
1910         rsfd = rset_open(*rset, RSETF_WRITE);
1911         
1912         key.mem[0] = sysno;
1913         key.mem[1] = 1;
1914         key.len = 2;
1915         rset_write(rsfd, &key);
1916         rset_close(rsfd);
1917     }
1918     return ZEBRA_OK;
1919 }
1920
1921 static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1922                                const int *attributeSet, NMEM stream,
1923                                Z_SortKeySpecList *sort_sequence,
1924                                const char *rank_type,
1925                                NMEM rset_nmem,
1926                                RSET *rset,
1927                                struct rset_key_control *kc)
1928 {
1929     int i;
1930     int sort_relation_value;
1931     AttrType sort_relation_type;
1932     Z_SortKeySpec *sks;
1933     Z_SortKey *sk;
1934     char termz[20];
1935     
1936     attr_init_APT(&sort_relation_type, zapt, 7);
1937     sort_relation_value = attr_find(&sort_relation_type, &attributeSet);
1938
1939     if (!sort_sequence->specs)
1940     {
1941         sort_sequence->num_specs = 10;
1942         sort_sequence->specs = (Z_SortKeySpec **)
1943             nmem_malloc(stream, sort_sequence->num_specs *
1944                          sizeof(*sort_sequence->specs));
1945         for (i = 0; i<sort_sequence->num_specs; i++)
1946             sort_sequence->specs[i] = 0;
1947     }
1948     if (zapt->term->which != Z_Term_general)
1949         i = 0;
1950     else
1951         i = atoi_n ((char *) zapt->term->u.general->buf,
1952                     zapt->term->u.general->len);
1953     if (i >= sort_sequence->num_specs)
1954         i = 0;
1955     sprintf(termz, "%d", i);
1956
1957     sks = (Z_SortKeySpec *) nmem_malloc(stream, sizeof(*sks));
1958     sks->sortElement = (Z_SortElement *)
1959         nmem_malloc(stream, sizeof(*sks->sortElement));
1960     sks->sortElement->which = Z_SortElement_generic;
1961     sk = sks->sortElement->u.generic = (Z_SortKey *)
1962         nmem_malloc(stream, sizeof(*sk));
1963     sk->which = Z_SortKey_sortAttributes;
1964     sk->u.sortAttributes = (Z_SortAttributes *)
1965         nmem_malloc(stream, sizeof(*sk->u.sortAttributes));
1966
1967     sk->u.sortAttributes->id = odr_oiddup_nmem(stream, attributeSet);
1968     sk->u.sortAttributes->list = zapt->attributes;
1969
1970     sks->sortRelation = (int *)
1971         nmem_malloc(stream, sizeof(*sks->sortRelation));
1972     if (sort_relation_value == 1)
1973         *sks->sortRelation = Z_SortKeySpec_ascending;
1974     else if (sort_relation_value == 2)
1975         *sks->sortRelation = Z_SortKeySpec_descending;
1976     else 
1977         *sks->sortRelation = Z_SortKeySpec_ascending;
1978
1979     sks->caseSensitivity = (int *)
1980         nmem_malloc(stream, sizeof(*sks->caseSensitivity));
1981     *sks->caseSensitivity = 0;
1982
1983     sks->which = Z_SortKeySpec_null;
1984     sks->u.null = odr_nullval ();
1985     sort_sequence->specs[i] = sks;
1986     *rset = rset_create_null(rset_nmem, kc, 0);
1987     return ZEBRA_OK;
1988 }
1989
1990
1991 static int rpn_check_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1992                            const int *attributeSet,
1993                            struct xpath_location_step *xpath, int max,
1994                            NMEM mem)
1995 {
1996     const int *curAttributeSet = attributeSet;
1997     AttrType use;
1998     const char *use_string = 0;
1999     
2000     attr_init_APT(&use, zapt, 1);
2001     attr_find_ex(&use, &curAttributeSet, &use_string);
2002
2003     if (!use_string || *use_string != '/')
2004         return -1;
2005
2006     return zebra_parse_xpath_str(use_string, xpath, max, mem);
2007 }
2008  
2009                
2010
2011 static RSET xpath_trunc(ZebraHandle zh, NMEM stream,
2012                         int reg_type, const char *term, 
2013                         const char *xpath_use,
2014                         NMEM rset_nmem,
2015                         struct rset_key_control *kc)
2016 {
2017     RSET rset;
2018     struct grep_info grep_info;
2019     char term_dict[2048];
2020     char ord_buf[32];
2021     int prefix_len = 0;
2022     int ord = zebraExplain_lookup_attr_str(zh->reg->zei, 
2023                                            zinfo_index_category_index,
2024                                            reg_type,
2025                                            xpath_use);
2026     int ord_len, i, r, max_pos;
2027     int term_type = Z_Term_characterString;
2028     const char *flags = "void";
2029
2030     if (grep_info_prepare(zh, 0 /* zapt */, &grep_info, '0') == ZEBRA_FAIL)
2031         return rset_create_null(rset_nmem, kc, 0);
2032     
2033     if (ord < 0)
2034         return rset_create_null(rset_nmem, kc, 0);
2035     if (prefix_len)
2036         term_dict[prefix_len++] = '|';
2037     else
2038         term_dict[prefix_len++] = '(';
2039     
2040     ord_len = key_SU_encode (ord, ord_buf);
2041     for (i = 0; i<ord_len; i++)
2042     {
2043         term_dict[prefix_len++] = 1;
2044         term_dict[prefix_len++] = ord_buf[i];
2045     }
2046     term_dict[prefix_len++] = ')';
2047     strcpy(term_dict+prefix_len, term);
2048     
2049     grep_info.isam_p_indx = 0;
2050     r = dict_lookup_grep(zh->reg->dict, term_dict, 0,
2051                           &grep_info, &max_pos, 0, grep_handle);
2052     yaz_log(YLOG_DEBUG, "%s %d positions", term,
2053              grep_info.isam_p_indx);
2054     rset = rset_trunc(zh, grep_info.isam_p_buf,
2055                       grep_info.isam_p_indx, term, strlen(term),
2056                       flags, 1, term_type,rset_nmem,
2057                       kc, kc->scope, 0, reg_type, 0 /* hits_limit */,
2058                       0 /* term_ref_id_str */);
2059     grep_info_delete(&grep_info);
2060     return rset;
2061 }
2062
2063 static
2064 ZEBRA_RES rpn_search_xpath(ZebraHandle zh,
2065                            int num_bases, char **basenames,
2066                            NMEM stream, const char *rank_type, RSET rset,
2067                            int xpath_len, struct xpath_location_step *xpath,
2068                            NMEM rset_nmem,
2069                            RSET *rset_out,
2070                            struct rset_key_control *kc)
2071 {
2072     int base_no;
2073     int i;
2074     int always_matches = rset ? 0 : 1;
2075
2076     if (xpath_len < 0)
2077     {
2078         *rset_out = rset;
2079         return ZEBRA_OK;
2080     }
2081
2082     yaz_log(YLOG_DEBUG, "xpath len=%d", xpath_len);
2083     for (i = 0; i<xpath_len; i++)
2084     {
2085         yaz_log(log_level_rpn, "XPATH %d %s", i, xpath[i].part);
2086
2087     }
2088
2089     /*
2090       //a    ->    a/.*
2091       //a/b  ->    b/a/.*
2092       /a     ->    a/
2093       /a/b   ->    b/a/
2094
2095       /      ->    none
2096
2097    a[@attr = value]/b[@other = othervalue]
2098
2099  /e/@a val      range(e/,range(@a,freetext(w,1015,val),@a),e/)
2100  /a/b val       range(b/a/,freetext(w,1016,val),b/a/)
2101  /a/b/@c val    range(b/a/,range(@c,freetext(w,1016,val),@c),b/a/)
2102  /a/b[@c = y] val range(b/a/,freetext(w,1016,val),b/a/,@c = y)
2103  /a[@c = y]/b val range(a/,range(b/a/,freetext(w,1016,val),b/a/),a/,@c = y)
2104  /a[@c = x]/b[@c = y] range(a/,range(b/a/,freetext(w,1016,val),b/a/,@c = y),a/,@c = x)
2105       
2106     */
2107
2108     dict_grep_cmap (zh->reg->dict, 0, 0);
2109
2110     for (base_no = 0; base_no < num_bases; base_no++)
2111     {
2112         int level = xpath_len;
2113         int first_path = 1;
2114         
2115         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
2116         {
2117             zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
2118                            basenames[base_no]);
2119             *rset_out = rset;
2120             return ZEBRA_FAIL;
2121         }
2122         while (--level >= 0)
2123         {
2124             WRBUF xpath_rev = wrbuf_alloc();
2125             int i;
2126             RSET rset_start_tag = 0, rset_end_tag = 0, rset_attr = 0;
2127
2128             for (i = level; i >= 1; --i)
2129             {
2130                 const char *cp = xpath[i].part;
2131                 if (*cp)
2132                 {
2133                     for (; *cp; cp++)
2134                     {
2135                         if (*cp == '*')
2136                             wrbuf_puts(xpath_rev, "[^/]*");
2137                         else if (*cp == ' ')
2138                             wrbuf_puts(xpath_rev, "\001 ");
2139                         else
2140                             wrbuf_putc(xpath_rev, *cp);
2141
2142                         /* wrbuf_putc does not null-terminate , but
2143                            wrbuf_puts below ensures it does.. so xpath_rev
2144                            is OK iff length is > 0 */
2145                     }
2146                     wrbuf_puts(xpath_rev, "/");
2147                 }
2148                 else if (i == 1)  /* // case */
2149                     wrbuf_puts(xpath_rev, ".*");
2150             }
2151             if (xpath[level].predicate &&
2152                 xpath[level].predicate->which == XPATH_PREDICATE_RELATION &&
2153                 xpath[level].predicate->u.relation.name[0])
2154             {
2155                 WRBUF wbuf = wrbuf_alloc();
2156                 wrbuf_puts(wbuf, xpath[level].predicate->u.relation.name+1);
2157                 if (xpath[level].predicate->u.relation.value)
2158                 {
2159                     const char *cp = xpath[level].predicate->u.relation.value;
2160                     wrbuf_putc(wbuf, '=');
2161                     
2162                     while (*cp)
2163                     {
2164                         if (strchr(REGEX_CHARS, *cp))
2165                             wrbuf_putc(wbuf, '\\');
2166                         wrbuf_putc(wbuf, *cp);
2167                         cp++;
2168                     }
2169                 }
2170                 rset_attr = xpath_trunc(
2171                     zh, stream, '0', wrbuf_cstr(wbuf), ZEBRA_XPATH_ATTR_NAME, 
2172                     rset_nmem, kc);
2173                 wrbuf_destroy(wbuf);
2174             } 
2175             else 
2176             {
2177                 if (!first_path)
2178                 {
2179                     wrbuf_destroy(xpath_rev);
2180                     continue;
2181                 }
2182             }
2183             yaz_log(log_level_rpn, "xpath_rev (%d) = %s", level, 
2184                     wrbuf_cstr(xpath_rev));
2185             if (wrbuf_len(xpath_rev))
2186             {
2187                 rset_start_tag = xpath_trunc(zh, stream, '0', 
2188                                              wrbuf_cstr(xpath_rev),
2189                                              ZEBRA_XPATH_ELM_BEGIN, 
2190                                              rset_nmem, kc);
2191                 if (always_matches)
2192                     rset = rset_start_tag;
2193                 else
2194                 {
2195                     rset_end_tag = xpath_trunc(zh, stream, '0', 
2196                                                wrbuf_cstr(xpath_rev),
2197                                                ZEBRA_XPATH_ELM_END, 
2198                                                rset_nmem, kc);
2199                     
2200                     rset = rset_create_between(rset_nmem, kc, kc->scope,
2201                                                rset_start_tag, rset,
2202                                                rset_end_tag, rset_attr);
2203                 }
2204             }
2205             wrbuf_destroy(xpath_rev);
2206             first_path = 0;
2207         }
2208     }
2209     *rset_out = rset;
2210     return ZEBRA_OK;
2211 }
2212
2213 #define MAX_XPATH_STEPS 10
2214
2215 static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2216                                 const int *attributeSet, NMEM stream,
2217                                 Z_SortKeySpecList *sort_sequence,
2218                                 int num_bases, char **basenames, 
2219                                 NMEM rset_nmem,
2220                                 RSET *rset,
2221                                 struct rset_key_control *kc)
2222 {
2223     ZEBRA_RES res = ZEBRA_OK;
2224     unsigned reg_id;
2225     char *search_type = NULL;
2226     char rank_type[128];
2227     int complete_flag;
2228     int sort_flag;
2229     char termz[IT_MAX_WORD+1];
2230     int xpath_len;
2231     const char *xpath_use = 0;
2232     struct xpath_location_step xpath[MAX_XPATH_STEPS];
2233
2234     if (!log_level_set)
2235     {
2236         log_level_rpn = yaz_log_module_level("rpn");
2237         log_level_set = 1;
2238     }
2239     zebra_maps_attr(zh->reg->zebra_maps, zapt, &reg_id, &search_type,
2240                     rank_type, &complete_flag, &sort_flag);
2241     
2242     yaz_log(YLOG_DEBUG, "reg_id=%c", reg_id);
2243     yaz_log(YLOG_DEBUG, "complete_flag=%d", complete_flag);
2244     yaz_log(YLOG_DEBUG, "search_type=%s", search_type);
2245     yaz_log(YLOG_DEBUG, "rank_type=%s", rank_type);
2246
2247     if (zapt_term_to_utf8(zh, zapt, termz) == ZEBRA_FAIL)
2248         return ZEBRA_FAIL;
2249
2250     if (sort_flag)
2251         return rpn_sort_spec(zh, zapt, attributeSet, stream, sort_sequence,
2252                              rank_type, rset_nmem, rset, kc);
2253     /* consider if an X-Path query is used */
2254     xpath_len = rpn_check_xpath(zh, zapt, attributeSet, 
2255                                 xpath, MAX_XPATH_STEPS, stream);
2256     if (xpath_len >= 0)
2257     {
2258         if (xpath[xpath_len-1].part[0] == '@') 
2259             xpath_use = ZEBRA_XPATH_ATTR_CDATA;  /* last step is attribute  */
2260         else
2261             xpath_use = ZEBRA_XPATH_CDATA;  /* searching for cdata */        
2262
2263         if (1)
2264         {
2265             AttrType relation;
2266             int relation_value;
2267
2268             attr_init_APT(&relation, zapt, 2);
2269             relation_value = attr_find(&relation, NULL);
2270
2271             if (relation_value == 103) /* alwaysmatches */
2272             {
2273                 *rset = 0; /* signal no "term" set */
2274                 return rpn_search_xpath(zh, num_bases, basenames,
2275                                         stream, rank_type, *rset, 
2276                                         xpath_len, xpath, rset_nmem, rset, kc);
2277             }
2278         }
2279     }
2280
2281     /* search using one of the various search type strategies
2282        termz is our UTF-8 search term
2283        attributeSet is top-level default attribute set 
2284        stream is ODR for search
2285        reg_id is the register type
2286        complete_flag is 1 for complete subfield, 0 for incomplete
2287        xpath_use is use-attribute to be used for X-Path search, 0 for none
2288     */
2289     if (!strcmp(search_type, "phrase"))
2290     {
2291         res = rpn_search_APT_phrase(zh, zapt, termz, attributeSet, stream,
2292                                     reg_id, complete_flag, rank_type,
2293                                     xpath_use,
2294                                     num_bases, basenames, rset_nmem,
2295                                     rset, kc);
2296     }
2297     else if (!strcmp(search_type, "and-list"))
2298     {
2299         res = rpn_search_APT_and_list(zh, zapt, termz, attributeSet, stream,
2300                                       reg_id, complete_flag, rank_type,
2301                                       xpath_use,
2302                                       num_bases, basenames, rset_nmem,
2303                                       rset, kc);
2304     }
2305     else if (!strcmp(search_type, "or-list"))
2306     {
2307         res = rpn_search_APT_or_list(zh, zapt, termz, attributeSet, stream,
2308                                      reg_id, complete_flag, rank_type,
2309                                      xpath_use,
2310                                      num_bases, basenames, rset_nmem,
2311                                      rset, kc);
2312     }
2313     else if (!strcmp(search_type, "local"))
2314     {
2315         res = rpn_search_APT_local(zh, zapt, termz, attributeSet, stream,
2316                                    rank_type, rset_nmem, rset, kc);
2317     }
2318     else if (!strcmp(search_type, "numeric"))
2319     {
2320         res = rpn_search_APT_numeric(zh, zapt, termz, attributeSet, stream,
2321                                      reg_id, complete_flag, rank_type,
2322                                      xpath_use,
2323                                      num_bases, basenames, rset_nmem,
2324                                      rset, kc);
2325     }
2326     else
2327     {
2328         zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2329         res = ZEBRA_FAIL;
2330     }
2331     if (res != ZEBRA_OK)
2332         return res;
2333     if (!*rset)
2334         return ZEBRA_FAIL;
2335     return rpn_search_xpath(zh, num_bases, basenames,
2336                             stream, rank_type, *rset, 
2337                             xpath_len, xpath, rset_nmem, rset, kc);
2338 }
2339
2340 static ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2341                                       const int *attributeSet, 
2342                                       NMEM stream, NMEM rset_nmem,
2343                                       Z_SortKeySpecList *sort_sequence,
2344                                       int num_bases, char **basenames,
2345                                       RSET **result_sets, int *num_result_sets,
2346                                       Z_Operator *parent_op,
2347                                       struct rset_key_control *kc);
2348
2349 ZEBRA_RES rpn_get_top_approx_limit(ZebraHandle zh, Z_RPNStructure *zs,
2350                                    zint *approx_limit)
2351 {
2352     ZEBRA_RES res = ZEBRA_OK;
2353     if (zs->which == Z_RPNStructure_complex)
2354     {
2355         if (res == ZEBRA_OK)
2356             res = rpn_get_top_approx_limit(zh, zs->u.complex->s1,
2357                                            approx_limit);
2358         if (res == ZEBRA_OK)
2359             res = rpn_get_top_approx_limit(zh, zs->u.complex->s2,
2360                                            approx_limit);
2361     }
2362     else if (zs->which == Z_RPNStructure_simple)
2363     {
2364         if (zs->u.simple->which == Z_Operand_APT)
2365         {
2366             Z_AttributesPlusTerm *zapt = zs->u.simple->u.attributesPlusTerm;
2367             AttrType global_hits_limit_attr;
2368             int l;
2369             
2370             attr_init_APT(&global_hits_limit_attr, zapt, 12);
2371             
2372             l = attr_find(&global_hits_limit_attr, NULL);
2373             if (l != -1)
2374                 *approx_limit = l;
2375         }
2376     }
2377     return res;
2378 }
2379
2380 ZEBRA_RES rpn_search_top(ZebraHandle zh, Z_RPNStructure *zs,
2381                          const int *attributeSet, 
2382                          NMEM stream, NMEM rset_nmem,
2383                          Z_SortKeySpecList *sort_sequence,
2384                          int num_bases, char **basenames,
2385                          RSET *result_set)
2386 {
2387     RSET *result_sets = 0;
2388     int num_result_sets = 0;
2389     ZEBRA_RES res;
2390     struct rset_key_control *kc = zebra_key_control_create(zh);
2391
2392     res = rpn_search_structure(zh, zs, attributeSet,
2393                                stream, rset_nmem,
2394                                sort_sequence, 
2395                                num_bases, basenames,
2396                                &result_sets, &num_result_sets,
2397                                0 /* no parent op */,
2398                                kc);
2399     if (res != ZEBRA_OK)
2400     {
2401         int i;
2402         for (i = 0; i<num_result_sets; i++)
2403             rset_delete(result_sets[i]);
2404         *result_set = 0;
2405     }
2406     else
2407     {
2408         assert(num_result_sets == 1);
2409         assert(result_sets);
2410         assert(*result_sets);
2411         *result_set = *result_sets;
2412     }
2413     (*kc->dec)(kc);
2414     return res;
2415 }
2416
2417 ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2418                                const int *attributeSet, 
2419                                NMEM stream, NMEM rset_nmem,
2420                                Z_SortKeySpecList *sort_sequence,
2421                                int num_bases, char **basenames,
2422                                RSET **result_sets, int *num_result_sets,
2423                                Z_Operator *parent_op,
2424                                struct rset_key_control *kc)
2425 {
2426     *num_result_sets = 0;
2427     if (zs->which == Z_RPNStructure_complex)
2428     {
2429         ZEBRA_RES res;
2430         Z_Operator *zop = zs->u.complex->roperator;
2431         RSET *result_sets_l = 0;
2432         int num_result_sets_l = 0;
2433         RSET *result_sets_r = 0;
2434         int num_result_sets_r = 0;
2435
2436         res = rpn_search_structure(zh, zs->u.complex->s1,
2437                                    attributeSet, stream, rset_nmem,
2438                                    sort_sequence,
2439                                    num_bases, basenames,
2440                                    &result_sets_l, &num_result_sets_l,
2441                                    zop, kc);
2442         if (res != ZEBRA_OK)
2443         {
2444             int i;
2445             for (i = 0; i<num_result_sets_l; i++)
2446                 rset_delete(result_sets_l[i]);
2447             return res;
2448         }
2449         res = rpn_search_structure(zh, zs->u.complex->s2,
2450                                    attributeSet, stream, rset_nmem,
2451                                    sort_sequence,
2452                                    num_bases, basenames,
2453                                    &result_sets_r, &num_result_sets_r,
2454                                    zop, kc);
2455         if (res != ZEBRA_OK)
2456         {
2457             int i;
2458             for (i = 0; i<num_result_sets_l; i++)
2459                 rset_delete(result_sets_l[i]);
2460             for (i = 0; i<num_result_sets_r; i++)
2461                 rset_delete(result_sets_r[i]);
2462             return res;
2463         }
2464
2465         /* make a new list of result for all children */
2466         *num_result_sets = num_result_sets_l + num_result_sets_r;
2467         *result_sets = nmem_malloc(stream, *num_result_sets * 
2468                                    sizeof(**result_sets));
2469         memcpy(*result_sets, result_sets_l, 
2470                num_result_sets_l * sizeof(**result_sets));
2471         memcpy(*result_sets + num_result_sets_l, result_sets_r, 
2472                num_result_sets_r * sizeof(**result_sets));
2473
2474         if (!parent_op || parent_op->which != zop->which
2475             || (zop->which != Z_Operator_and &&
2476                 zop->which != Z_Operator_or))
2477         {
2478             /* parent node different from this one (or non-present) */
2479             /* we must combine result sets now */
2480             RSET rset;
2481             switch (zop->which)
2482             {
2483             case Z_Operator_and:
2484                 rset = rset_create_and(rset_nmem, kc,
2485                                        kc->scope,
2486                                        *num_result_sets, *result_sets);
2487                 break;
2488             case Z_Operator_or:
2489                 rset = rset_create_or(rset_nmem, kc,
2490                                       kc->scope, 0, /* termid */
2491                                       *num_result_sets, *result_sets);
2492                 break;
2493             case Z_Operator_and_not:
2494                 rset = rset_create_not(rset_nmem, kc,
2495                                        kc->scope,
2496                                        (*result_sets)[0],
2497                                        (*result_sets)[1]);
2498                 break;
2499             case Z_Operator_prox:
2500                 if (zop->u.prox->which != Z_ProximityOperator_known)
2501                 {
2502                     zebra_setError(zh, 
2503                                    YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2504                                    0);
2505                     return ZEBRA_FAIL;
2506                 }
2507                 if (*zop->u.prox->u.known != Z_ProxUnit_word)
2508                 {
2509                     zebra_setError_zint(zh,
2510                                         YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2511                                         *zop->u.prox->u.known);
2512                     return ZEBRA_FAIL;
2513                 }
2514                 else
2515                 {
2516                     rset = rset_create_prox(rset_nmem, kc,
2517                                             kc->scope,
2518                                             *num_result_sets, *result_sets, 
2519                                             *zop->u.prox->ordered,
2520                                             (!zop->u.prox->exclusion ? 
2521                                              0 : *zop->u.prox->exclusion),
2522                                             *zop->u.prox->relationType,
2523                                             *zop->u.prox->distance );
2524                 }
2525                 break;
2526             default:
2527                 zebra_setError(zh, YAZ_BIB1_OPERATOR_UNSUPP, 0);
2528                 return ZEBRA_FAIL;
2529             }
2530             *num_result_sets = 1;
2531             *result_sets = nmem_malloc(stream, *num_result_sets * 
2532                                        sizeof(**result_sets));
2533             (*result_sets)[0] = rset;
2534         }
2535     }
2536     else if (zs->which == Z_RPNStructure_simple)
2537     {
2538         RSET rset;
2539         ZEBRA_RES res;
2540
2541         if (zs->u.simple->which == Z_Operand_APT)
2542         {
2543             yaz_log(YLOG_DEBUG, "rpn_search_APT");
2544             res = rpn_search_APT(zh, zs->u.simple->u.attributesPlusTerm,
2545                                  attributeSet, stream, sort_sequence,
2546                                  num_bases, basenames, rset_nmem, &rset,
2547                                  kc);
2548             if (res != ZEBRA_OK)
2549                 return res;
2550         }
2551         else if (zs->u.simple->which == Z_Operand_resultSetId)
2552         {
2553             yaz_log(YLOG_DEBUG, "rpn_search_ref");
2554             rset = resultSetRef(zh, zs->u.simple->u.resultSetId);
2555             if (!rset)
2556             {
2557                 zebra_setError(zh, 
2558                                YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
2559                                zs->u.simple->u.resultSetId);
2560                 return ZEBRA_FAIL;
2561             }
2562             rset_dup(rset);
2563         }
2564         else
2565         {
2566             zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2567             return ZEBRA_FAIL;
2568         }
2569         *num_result_sets = 1;
2570         *result_sets = nmem_malloc(stream, *num_result_sets * 
2571                                    sizeof(**result_sets));
2572         (*result_sets)[0] = rset;
2573     }
2574     else
2575     {
2576         zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2577         return ZEBRA_FAIL;
2578     }
2579     return ZEBRA_OK;
2580 }
2581
2582
2583
2584 /*
2585  * Local variables:
2586  * c-basic-offset: 4
2587  * indent-tabs-mode: nil
2588  * End:
2589  * vim: shiftwidth=4 tabstop=8 expandtab
2590  */
2591