ICU term lists are handled for search.
[idzebra-moved-to-github.git] / index / rpnsearch.c
1 /* $Id: rpnsearch.c,v 1.27 2007-12-07 14:09:09 adam Exp $
2    Copyright (C) 1995-2007
3    Index Data ApS
4
5 This file is part of the Zebra server.
6
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
11
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
20
21 */
22
23 #include <stdio.h>
24 #include <assert.h>
25 #ifdef WIN32
26 #include <io.h>
27 #endif
28 #if HAVE_UNISTD_H
29 #include <unistd.h>
30 #endif
31 #include <ctype.h>
32
33 #include <yaz/diagbib1.h>
34 #include "index.h"
35 #include <zebra_xpath.h>
36 #include <attrfind.h>
37 #include <charmap.h>
38 #include <rset.h>
39
40 static int log_level_set = 0;
41 static int log_level_rpn = 0;
42
43 #define TERMSET_DISABLE 1
44
45 static const char **rpn_char_map_handler(void *vp, const char **from, int len)
46 {
47     struct rpn_char_map_info *p = (struct rpn_char_map_info *) vp;
48     const char **out = zebra_maps_input(p->zm, from, len, 0);
49 #if 0
50     if (out && *out)
51     {
52         const char *outp = *out;
53         yaz_log(YLOG_LOG, "---");
54         while (*outp)
55         {
56             yaz_log(YLOG_LOG, "%02X", *outp);
57             outp++;
58         }
59     }
60 #endif
61     return out;
62 }
63
64 void rpn_char_map_prepare(struct zebra_register *reg, zebra_map_t zm,
65                           struct rpn_char_map_info *map_info)
66 {
67     map_info->zm = zm;
68     if (zebra_maps_is_icu(zm))
69         dict_grep_cmap(reg->dict, 0, 0);
70     else
71         dict_grep_cmap(reg->dict, map_info, rpn_char_map_handler);
72 }
73
74 #define TERM_COUNT        
75        
76 struct grep_info {        
77 #ifdef TERM_COUNT        
78     int *term_no;        
79 #endif        
80     ISAM_P *isam_p_buf;
81     int isam_p_size;        
82     int isam_p_indx;
83     int trunc_max;
84     ZebraHandle zh;
85     const char *index_type;
86     ZebraSet termset;
87 };        
88
89 static int add_isam_p(const char *name, const char *info,
90                       struct grep_info *p)
91 {
92     if (!log_level_set)
93     {
94         log_level_rpn = yaz_log_module_level("rpn");
95         log_level_set = 1;
96     }
97     /* we may have to stop this madness.. NOTE: -1 so that if
98        truncmax == trunxlimit we do *not* generate result sets */
99     if (p->isam_p_indx >= p->trunc_max - 1)
100         return 1;
101
102     if (p->isam_p_indx == p->isam_p_size)
103     {
104         ISAM_P *new_isam_p_buf;
105 #ifdef TERM_COUNT        
106         int *new_term_no;        
107 #endif
108         p->isam_p_size = 2*p->isam_p_size + 100;
109         new_isam_p_buf = (ISAM_P *) xmalloc(sizeof(*new_isam_p_buf) *
110                                             p->isam_p_size);
111         if (p->isam_p_buf)
112         {
113             memcpy(new_isam_p_buf, p->isam_p_buf,
114                    p->isam_p_indx * sizeof(*p->isam_p_buf));
115             xfree(p->isam_p_buf);
116         }
117         p->isam_p_buf = new_isam_p_buf;
118
119 #ifdef TERM_COUNT
120         new_term_no = (int *) xmalloc(sizeof(*new_term_no) * p->isam_p_size);
121         if (p->term_no)
122         {
123             memcpy(new_term_no, p->isam_p_buf,
124                    p->isam_p_indx * sizeof(*p->term_no));
125             xfree(p->term_no);
126         }
127         p->term_no = new_term_no;
128 #endif
129     }
130     assert(*info == sizeof(*p->isam_p_buf));
131     memcpy(p->isam_p_buf + p->isam_p_indx, info+1, sizeof(*p->isam_p_buf));
132
133     if (p->termset)
134     {
135         const char *db;
136         char term_tmp[IT_MAX_WORD];
137         int ord = 0;
138         const char *index_name;
139         int len = key_SU_decode(&ord, (const unsigned char *) name);
140         
141         zebra_term_untrans (p->zh, p->index_type, term_tmp, name+len);
142         yaz_log(log_level_rpn, "grep: %d %c %s", ord, name[len], term_tmp);
143         zebraExplain_lookup_ord(p->zh->reg->zei,
144                                 ord, 0 /* index_type */, &db, &index_name);
145         yaz_log(log_level_rpn, "grep:  db=%s index=%s", db, index_name);
146         
147         resultSetAddTerm(p->zh, p->termset, name[len], db,
148                          index_name, term_tmp);
149     }
150     (p->isam_p_indx)++;
151     return 0;
152 }
153
154 static int grep_handle(char *name, const char *info, void *p)
155 {
156     return add_isam_p(name, info, (struct grep_info *) p);
157 }
158
159 static int term_pre(zebra_map_t zm, const char **src,
160                     const char *ct1, const char *ct2, int first)
161 {
162     const char *s1, *s0 = *src;
163     const char **map;
164
165     /* skip white space */
166     while (*s0)
167     {
168         if (ct1 && strchr(ct1, *s0))
169             break;
170         if (ct2 && strchr(ct2, *s0))
171             break;
172         s1 = s0;
173         map = zebra_maps_input(zm, &s1, strlen(s1), first);
174         if (**map != *CHR_SPACE)
175             break;
176         s0 = s1;
177     }
178     *src = s0;
179     return *s0;
180 }
181
182
183 static void esc_str(char *out_buf, size_t out_size,
184                     const char *in_buf, int in_size)
185 {
186     int k;
187
188     assert(out_buf);
189     assert(in_buf);
190     assert(out_size > 20);
191     *out_buf = '\0';
192     for (k = 0; k<in_size; k++)
193     {
194         int c = in_buf[k] & 0xff;
195         int pc;
196         if (c < 32 || c > 126)
197             pc = '?';
198         else
199             pc = c;
200         sprintf(out_buf +strlen(out_buf), "%02X:%c  ", c, pc);
201         if (strlen(out_buf) > out_size-20)
202         {
203             strcat(out_buf, "..");
204             break;
205         }
206     }
207 }
208
209 #define REGEX_CHARS " []()|.*+?!"
210
211 static void add_non_space(const char *start, const char *end,
212                           WRBUF term_dict,
213                           char *dst_term, int *dst_ptr,
214                           const char **map, int q_map_match)
215 {
216     size_t sz = end - start;
217     memcpy(dst_term + *dst_ptr, start, sz);
218     (*dst_ptr) += sz;
219     if (!q_map_match)
220     {
221         while (start < end)
222         {
223             if (strchr(REGEX_CHARS, *start))
224                 wrbuf_putc(term_dict, '\\');
225             wrbuf_putc(term_dict, *start);
226             start++;
227         }
228     }
229     else
230     {
231         char tmpbuf[80];
232         esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
233         
234         wrbuf_puts(term_dict, map[0]);
235     }
236 }
237
238
239 static int term_100_icu(zebra_map_t zm,
240                         const char **src, WRBUF term_dict, int space_split,
241                         char *dst_term)
242 {
243     int i;
244     const char *res_buf = 0;
245     size_t res_len = 0;
246     if (!zebra_map_tokenize_next(zm, &res_buf, &res_len))
247     {
248         *src += strlen(*src);
249         return 0;
250     }
251     strcat(dst_term, *src);
252     for (i = 0; i < res_len; i++)
253     {
254         if (strchr(REGEX_CHARS, res_buf[i]))
255             wrbuf_putc(term_dict, '\\');
256         if (res_buf[i] < 32)
257             wrbuf_putc(term_dict, 1);
258         wrbuf_putc(term_dict, res_buf[i]);
259     }
260     return 1;
261 }
262
263 /* term_100: handle term, where trunc = none(no operators at all) */
264 static int term_100(zebra_map_t zm,
265                     const char **src, WRBUF term_dict, int space_split,
266                     char *dst_term)
267 {
268     const char *s0;
269     const char **map;
270     int i = 0;
271     int j = 0;
272
273     const char *space_start = 0;
274     const char *space_end = 0;
275
276     if (zebra_maps_is_icu(zm))
277         return term_100_icu(zm, src, term_dict, space_split, dst_term);
278
279     if (!term_pre(zm, src, NULL, NULL, !space_split))
280         return 0;
281     s0 = *src;
282     while (*s0)
283     {
284         const char *s1 = s0;
285         int q_map_match = 0;
286         map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
287         if (space_split)
288         {
289             if (**map == *CHR_SPACE)
290                 break;
291         }
292         else  /* complete subfield only. */
293         {
294             if (**map == *CHR_SPACE)
295             {   /* save space mapping for later  .. */
296                 space_start = s1;
297                 space_end = s0;
298                 continue;
299             }
300             else if (space_start)
301             {   /* reload last space */
302                 while (space_start < space_end)
303                 {
304                     if (strchr(REGEX_CHARS, *space_start))
305                         wrbuf_putc(term_dict, '\\');
306                     dst_term[j++] = *space_start;
307                     wrbuf_putc(term_dict, *space_start);
308                     space_start++;
309                                
310                 }
311                 /* and reset */
312                 space_start = space_end = 0;
313             }
314         }
315         i++;
316
317         add_non_space(s1, s0, term_dict, dst_term, &j,
318                       map, q_map_match);
319     }
320     dst_term[j] = '\0';
321     *src = s0;
322     return i;
323 }
324
325 /* term_101: handle term, where trunc = Process # */
326 static int term_101(zebra_map_t zm,
327                     const char **src, WRBUF term_dict, int space_split,
328                     char *dst_term)
329 {
330     const char *s0;
331     const char **map;
332     int i = 0;
333     int j = 0;
334
335     if (!term_pre(zm, src, "#", "#", !space_split))
336         return 0;
337     s0 = *src;
338     while (*s0)
339     {
340         if (*s0 == '#')
341         {
342             i++;
343             wrbuf_puts(term_dict, ".*");
344             dst_term[j++] = *s0++;
345         }
346         else
347         {
348             const char *s1 = s0;
349             int q_map_match = 0;
350             map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
351             if (space_split && **map == *CHR_SPACE)
352                 break;
353
354             i++;
355             add_non_space(s1, s0, term_dict, dst_term, &j,
356                           map, q_map_match);
357         }
358     }
359     dst_term[j++] = '\0';
360     *src = s0;
361     return i;
362 }
363
364 /* term_103: handle term, where trunc = re-2 (regular expressions) */
365 static int term_103(zebra_map_t zm, const char **src,
366                     WRBUF term_dict, int *errors, int space_split,
367                     char *dst_term)
368 {
369     int i = 0;
370     int j = 0;
371     const char *s0;
372     const char **map;
373
374     if (!term_pre(zm, src, "^\\()[].*+?|", "(", !space_split))
375         return 0;
376     s0 = *src;
377     if (errors && *s0 == '+' && s0[1] && s0[2] == '+' && s0[3] &&
378         isdigit(((const unsigned char *)s0)[1]))
379     {
380         *errors = s0[1] - '0';
381         s0 += 3;
382         if (*errors > 3)
383             *errors = 3;
384     }
385     while (*s0)
386     {
387         if (strchr("^\\()[].*+?|-", *s0))
388         {
389             dst_term[j++] = *s0;
390             wrbuf_putc(term_dict, *s0);
391             s0++;
392             i++;
393         }
394         else
395         {
396             const char *s1 = s0;
397             int q_map_match = 0;
398             map = zebra_maps_search(zm, &s0, strlen(s0),  &q_map_match);
399             if (space_split && **map == *CHR_SPACE)
400                 break;
401
402             i++;
403             add_non_space(s1, s0, term_dict, dst_term, &j,
404                           map, q_map_match);
405         }
406     }
407     dst_term[j] = '\0';
408     *src = s0;
409     
410     return i;
411 }
412
413 /* term_103: handle term, where trunc = re-1 (regular expressions) */
414 static int term_102(zebra_map_t zm, const char **src,
415                     WRBUF term_dict, int space_split, char *dst_term)
416 {
417     return term_103(zm, src, term_dict, NULL, space_split, dst_term);
418 }
419
420
421 /* term_104: handle term, process # and ! */
422 static int term_104(zebra_map_t zm, const char **src, 
423                     WRBUF term_dict, int space_split, char *dst_term)
424 {
425     const char *s0;
426     const char **map;
427     int i = 0;
428     int j = 0;
429
430     if (!term_pre(zm, src, "?*#", "?*#", !space_split))
431         return 0;
432     s0 = *src;
433     while (*s0)
434     {
435         if (*s0 == '?')
436         {
437             i++;
438             dst_term[j++] = *s0++;
439             if (*s0 >= '0' && *s0 <= '9')
440             {
441                 int limit = 0;
442                 while (*s0 >= '0' && *s0 <= '9')
443                 {
444                     limit = limit * 10 + (*s0 - '0');
445                     dst_term[j++] = *s0++;
446                 }
447                 if (limit > 20)
448                     limit = 20;
449                 while (--limit >= 0)
450                 {
451                     wrbuf_puts(term_dict, ".?");
452                 }
453             }
454             else
455             {
456                 wrbuf_puts(term_dict, ".*");
457             }
458         }
459         else if (*s0 == '*')
460         {
461             i++;
462             wrbuf_puts(term_dict, ".*");
463             dst_term[j++] = *s0++;
464         }
465         else if (*s0 == '#')
466         {
467             i++;
468             wrbuf_puts(term_dict, ".");
469             dst_term[j++] = *s0++;
470         }
471         else
472         {
473             const char *s1 = s0;
474             int q_map_match = 0;
475             map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
476             if (space_split && **map == *CHR_SPACE)
477                 break;
478
479             i++;
480             add_non_space(s1, s0, term_dict, dst_term, &j,
481                           map, q_map_match);
482         }
483     }
484     dst_term[j++] = '\0';
485     *src = s0;
486     return i;
487 }
488
489 /* term_105/106: handle term, where trunc = Process * and ! and right trunc */
490 static int term_105(zebra_map_t zm, const char **src, 
491                     WRBUF term_dict, int space_split,
492                     char *dst_term, int right_truncate)
493 {
494     const char *s0;
495     const char **map;
496     int i = 0;
497     int j = 0;
498
499     if (!term_pre(zm, src, "*!", "*!", !space_split))
500         return 0;
501     s0 = *src;
502     while (*s0)
503     {
504         if (*s0 == '*')
505         {
506             i++;
507             wrbuf_puts(term_dict, ".*");
508             dst_term[j++] = *s0++;
509         }
510         else if (*s0 == '!')
511         {
512             i++;
513             wrbuf_putc(term_dict, '.');
514             dst_term[j++] = *s0++;
515         }
516         else
517         {
518             const char *s1 = s0;
519             int q_map_match = 0;
520             map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
521             if (space_split && **map == *CHR_SPACE)
522                 break;
523
524             i++;
525             add_non_space(s1, s0, term_dict, dst_term, &j,
526                           map, q_map_match);
527         }
528     }
529     if (right_truncate)
530         wrbuf_puts(term_dict, ".*");
531     dst_term[j++] = '\0';
532     *src = s0;
533     return i;
534 }
535
536
537 /* gen_regular_rel - generate regular expression from relation
538  *  val:     border value (inclusive)
539  *  islt:    1 if <=; 0 if >=.
540  */
541 static void gen_regular_rel(WRBUF term_dict, int val, int islt)
542 {
543     char dst_buf[20*5*20]; /* assuming enough for expansion */
544     char *dst = dst_buf;
545     int dst_p;
546     int w, d, i;
547     int pos = 0;
548     char numstr[20];
549
550     yaz_log(YLOG_DEBUG, "gen_regular_rel. val=%d, islt=%d", val, islt);
551     if (val >= 0)
552     {
553         if (islt)
554             strcpy(dst, "(-[0-9]+|(");
555         else
556             strcpy(dst, "((");
557     } 
558     else
559     {
560         if (!islt)
561         {
562             strcpy(dst, "([0-9]+|-(");
563             islt = 1;
564         }
565         else
566         {
567             strcpy(dst, "(-(");
568             islt = 0;
569         }
570         val = -val;
571     }
572     dst_p = strlen(dst);
573     sprintf(numstr, "%d", val);
574     for (w = strlen(numstr); --w >= 0; pos++)
575     {
576         d = numstr[w];
577         if (pos > 0)
578         {
579             if (islt)
580             {
581                 if (d == '0')
582                     continue;
583                 d--;
584             } 
585             else
586             {
587                 if (d == '9')
588                     continue;
589                 d++;
590             }
591         }
592         
593         strcpy(dst + dst_p, numstr);
594         dst_p = strlen(dst) - pos - 1;
595
596         if (islt)
597         {
598             if (d != '0')
599             {
600                 dst[dst_p++] = '[';
601                 dst[dst_p++] = '0';
602                 dst[dst_p++] = '-';
603                 dst[dst_p++] = d;
604                 dst[dst_p++] = ']';
605             }
606             else
607                 dst[dst_p++] = d;
608         }
609         else
610         {
611             if (d != '9')
612             { 
613                 dst[dst_p++] = '[';
614                 dst[dst_p++] = d;
615                 dst[dst_p++] = '-';
616                 dst[dst_p++] = '9';
617                 dst[dst_p++] = ']';
618             }
619             else
620                 dst[dst_p++] = d;
621         }
622         for (i = 0; i<pos; i++)
623         {
624             dst[dst_p++] = '[';
625             dst[dst_p++] = '0';
626             dst[dst_p++] = '-';
627             dst[dst_p++] = '9';
628             dst[dst_p++] = ']';
629         }
630         dst[dst_p++] = '|';
631     }
632     dst[dst_p] = '\0';
633     if (islt)
634     {
635         /* match everything less than 10^(pos-1) */
636         strcat(dst, "0*");
637         for (i = 1; i<pos; i++)
638             strcat(dst, "[0-9]?");
639     }
640     else
641     {
642         /* match everything greater than 10^pos */
643         for (i = 0; i <= pos; i++)
644             strcat(dst, "[0-9]");
645         strcat(dst, "[0-9]*");
646     }
647     strcat(dst, "))");
648     wrbuf_puts(term_dict, dst);
649 }
650
651 void string_rel_add_char(WRBUF term_p, WRBUF wsrc, int *indx)
652 {
653     const char *src = wrbuf_cstr(wsrc);
654     if (src[*indx] == '\\')
655     {
656         wrbuf_putc(term_p, src[*indx]);
657         (*indx)++;
658     }
659     wrbuf_putc(term_p, src[*indx]);
660     (*indx)++;
661 }
662
663 /*
664  *   >  abc     ([b-].*|a[c-].*|ab[d-].*|abc.+)
665  *              ([^-a].*|a[^-b].*ab[^-c].*|abc.+)
666  *   >= abc     ([b-].*|a[c-].*|ab[c-].*)
667  *              ([^-a].*|a[^-b].*|ab[c-].*)
668  *   <  abc     ([-0].*|a[-a].*|ab[-b].*)
669  *              ([^a-].*|a[^b-].*|ab[^c-].*)
670  *   <= abc     ([-0].*|a[-a].*|ab[-b].*|abc)
671  *              ([^a-].*|a[^b-].*|ab[^c-].*|abc)
672  */
673 static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
674                            const char **term_sub, WRBUF term_dict,
675                            const Odr_oid *attributeSet,
676                            zebra_map_t zm, int space_split, char *term_dst,
677                            int *error_code)
678 {
679     AttrType relation;
680     int relation_value;
681     int i;
682     WRBUF term_component = wrbuf_alloc();
683
684     attr_init_APT(&relation, zapt, 2);
685     relation_value = attr_find(&relation, NULL);
686
687     *error_code = 0;
688     yaz_log(YLOG_DEBUG, "string relation value=%d", relation_value);
689     switch (relation_value)
690     {
691     case 1:
692         if (!term_100(zm, term_sub, term_component, space_split, term_dst))
693         {
694             wrbuf_destroy(term_component);
695             return 0;
696         }
697         yaz_log(log_level_rpn, "Relation <");
698         
699         wrbuf_putc(term_dict, '(');
700         for (i = 0; i < wrbuf_len(term_component); )
701         {
702             int j = 0;
703             
704             if (i)
705                 wrbuf_putc(term_dict, '|');
706             while (j < i)
707                 string_rel_add_char(term_dict, term_component, &j);
708
709             wrbuf_putc(term_dict, '[');
710
711             wrbuf_putc(term_dict, '^');
712             
713             wrbuf_putc(term_dict, 1);
714             wrbuf_putc(term_dict, FIRST_IN_FIELD_CHAR);
715             
716             string_rel_add_char(term_dict, term_component, &i);
717             wrbuf_putc(term_dict, '-');
718             
719             wrbuf_putc(term_dict, ']');
720             wrbuf_putc(term_dict, '.');
721             wrbuf_putc(term_dict, '*');
722         }
723         wrbuf_putc(term_dict, ')');
724         break;
725     case 2:
726         if (!term_100(zm, term_sub, term_component, space_split, term_dst))
727         {
728             wrbuf_destroy(term_component);
729             return 0;
730         }
731         yaz_log(log_level_rpn, "Relation <=");
732
733         wrbuf_putc(term_dict, '(');
734         for (i = 0; i < wrbuf_len(term_component); )
735         {
736             int j = 0;
737
738             while (j < i)
739                 string_rel_add_char(term_dict, term_component, &j);
740             wrbuf_putc(term_dict, '[');
741
742             wrbuf_putc(term_dict, '^');
743
744             wrbuf_putc(term_dict, 1);
745             wrbuf_putc(term_dict, FIRST_IN_FIELD_CHAR);
746
747             string_rel_add_char(term_dict, term_component, &i);
748             wrbuf_putc(term_dict, '-');
749
750             wrbuf_putc(term_dict, ']');
751             wrbuf_putc(term_dict, '.');
752             wrbuf_putc(term_dict, '*');
753
754             wrbuf_putc(term_dict, '|');
755         }
756         for (i = 0; i < wrbuf_len(term_component); )
757             string_rel_add_char(term_dict, term_component, &i);
758         wrbuf_putc(term_dict, ')');
759         break;
760     case 5:
761         if (!term_100(zm, term_sub, term_component, space_split, term_dst))
762         {
763             wrbuf_destroy(term_component);
764             return 0;
765         }
766         yaz_log(log_level_rpn, "Relation >");
767
768         wrbuf_putc(term_dict, '(');
769         for (i = 0; i < wrbuf_len(term_component); )
770         {
771             int j = 0;
772
773             while (j < i)
774                 string_rel_add_char(term_dict, term_component, &j);
775             wrbuf_putc(term_dict, '[');
776             
777             wrbuf_putc(term_dict, '^');
778             wrbuf_putc(term_dict, '-');
779             string_rel_add_char(term_dict, term_component, &i);
780
781             wrbuf_putc(term_dict, ']');
782             wrbuf_putc(term_dict, '.');
783             wrbuf_putc(term_dict, '*');
784
785             wrbuf_putc(term_dict, '|');
786         }
787         for (i = 0; i < wrbuf_len(term_component); )
788             string_rel_add_char(term_dict, term_component, &i);
789         wrbuf_putc(term_dict, '.');
790         wrbuf_putc(term_dict, '+');
791         wrbuf_putc(term_dict, ')');
792         break;
793     case 4:
794         if (!term_100(zm, term_sub, term_component, space_split, term_dst))
795         {
796             wrbuf_destroy(term_component);
797             return 0;
798         }
799         yaz_log(log_level_rpn, "Relation >=");
800
801         wrbuf_putc(term_dict, '(');
802         for (i = 0; i < wrbuf_len(term_component); )
803         {
804             int j = 0;
805
806             if (i)
807                 wrbuf_putc(term_dict, '|');
808             while (j < i)
809                 string_rel_add_char(term_dict, term_component, &j);
810             wrbuf_putc(term_dict, '[');
811
812             if (i < wrbuf_len(term_component)-1)
813             {
814                 wrbuf_putc(term_dict, '^');
815                 wrbuf_putc(term_dict, '-');
816                 string_rel_add_char(term_dict, term_component, &i);
817             }
818             else
819             {
820                 string_rel_add_char(term_dict, term_component, &i);
821                 wrbuf_putc(term_dict, '-');
822             }
823             wrbuf_putc(term_dict, ']');
824             wrbuf_putc(term_dict, '.');
825             wrbuf_putc(term_dict, '*');
826         }
827         wrbuf_putc(term_dict, ')');
828         break;
829     case 3:
830     case 102:
831     case -1:
832         if (!**term_sub)
833             return 1;
834         yaz_log(log_level_rpn, "Relation =");
835         if (!term_100(zm, term_sub, term_component, space_split, term_dst))
836         {
837             wrbuf_destroy(term_component);
838             return 0;
839         }
840         wrbuf_puts(term_dict, "(");
841         wrbuf_puts(term_dict, wrbuf_cstr(term_component));
842         wrbuf_puts(term_dict, ")");
843         break;
844     case 103:
845         yaz_log(log_level_rpn, "Relation always matches");
846         /* skip to end of term (we don't care what it is) */
847         while (**term_sub != '\0')
848             (*term_sub)++;
849         break;
850     default:
851         *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
852         wrbuf_destroy(term_component);
853         return 0;
854     }
855     wrbuf_destroy(term_component);
856     return 1;
857 }
858
859 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
860                              const char **term_sub, 
861                              WRBUF term_dict,
862                              const Odr_oid *attributeSet, NMEM stream,
863                              struct grep_info *grep_info,
864                              const char *index_type, int complete_flag,
865                              char *term_dst,
866                              const char *xpath_use,
867                              struct ord_list **ol,
868                              zebra_map_t zm);
869
870 ZEBRA_RES zebra_term_limits_APT(ZebraHandle zh,
871                                 Z_AttributesPlusTerm *zapt,
872                                 zint *hits_limit_value,
873                                 const char **term_ref_id_str,
874                                 NMEM nmem)
875 {
876     AttrType term_ref_id_attr;
877     AttrType hits_limit_attr;
878     int term_ref_id_int;
879  
880     attr_init_APT(&hits_limit_attr, zapt, 11);
881     *hits_limit_value  = attr_find(&hits_limit_attr, NULL);
882
883     attr_init_APT(&term_ref_id_attr, zapt, 10);
884     term_ref_id_int = attr_find_ex(&term_ref_id_attr, NULL, term_ref_id_str);
885     if (term_ref_id_int >= 0)
886     {
887         char *res = nmem_malloc(nmem, 20);
888         sprintf(res, "%d", term_ref_id_int);
889         *term_ref_id_str = res;
890     }
891
892     /* no limit given ? */
893     if (*hits_limit_value == -1)
894     {
895         if (*term_ref_id_str)
896         {
897             /* use global if term_ref is present */
898             *hits_limit_value = zh->approx_limit;
899         }
900         else
901         {
902             /* no counting if term_ref is not present */
903             *hits_limit_value = 0;
904         }
905     }
906     else if (*hits_limit_value == 0)
907     {
908         /* 0 is the same as global limit */
909         *hits_limit_value = zh->approx_limit;
910     }
911     yaz_log(YLOG_DEBUG, "term_limits_APT ref_id=%s limit=" ZINT_FORMAT,
912             *term_ref_id_str ? *term_ref_id_str : "none",
913             *hits_limit_value);
914     return ZEBRA_OK;
915 }
916
917 /** \brief search for term (which may be truncated)
918  */
919 static ZEBRA_RES search_term(ZebraHandle zh,
920                              Z_AttributesPlusTerm *zapt,
921                              const char **term_sub, 
922                              const Odr_oid *attributeSet, NMEM stream,
923                              struct grep_info *grep_info,
924                              const char *index_type, int complete_flag,
925                              char *term_dst,
926                              const char *rank_type, 
927                              const char *xpath_use,
928                              NMEM rset_nmem,
929                              RSET *rset,
930                              struct rset_key_control *kc,
931                              zebra_map_t zm)
932 {
933     ZEBRA_RES res;
934     struct ord_list *ol;
935     zint hits_limit_value;
936     const char *term_ref_id_str = 0;
937     WRBUF term_dict = wrbuf_alloc();
938     *rset = 0;
939     zebra_term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str,
940                           stream);
941     grep_info->isam_p_indx = 0;
942     res = string_term(zh, zapt, term_sub, term_dict,
943                       attributeSet, stream, grep_info,
944                       index_type, complete_flag,
945                       term_dst, xpath_use, &ol, zm);
946     wrbuf_destroy(term_dict);
947     if (res != ZEBRA_OK)
948         return res;
949     if (!*term_sub)  /* no more terms ? */
950         return res;
951     yaz_log(log_level_rpn, "term: %s", term_dst);
952     *rset = rset_trunc(zh, grep_info->isam_p_buf,
953                        grep_info->isam_p_indx, term_dst,
954                        strlen(term_dst), rank_type, 1 /* preserve pos */,
955                        zapt->term->which, rset_nmem,
956                        kc, kc->scope, ol, index_type, hits_limit_value,
957                        term_ref_id_str);
958     if (!*rset)
959         return ZEBRA_FAIL;
960     return ZEBRA_OK;
961 }
962
963 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
964                              const char **term_sub, 
965                              WRBUF term_dict,
966                              const Odr_oid *attributeSet, NMEM stream,
967                              struct grep_info *grep_info,
968                              const char *index_type, int complete_flag,
969                              char *term_dst,
970                              const char *xpath_use,
971                              struct ord_list **ol,
972                              zebra_map_t zm)
973 {
974     int r;
975     AttrType truncation;
976     int truncation_value;
977     const char *termp;
978     struct rpn_char_map_info rcmi;
979
980     int space_split = complete_flag ? 0 : 1;
981     int ord = -1;
982     int regex_range = 0;
983     int max_pos, prefix_len = 0;
984     int relation_error;
985     char ord_buf[32];
986     int ord_len, i;
987
988     *ol = ord_list_create(stream);
989
990     rpn_char_map_prepare(zh->reg, zm, &rcmi);
991     attr_init_APT(&truncation, zapt, 5);
992     truncation_value = attr_find(&truncation, NULL);
993     yaz_log(log_level_rpn, "truncation value %d", truncation_value);
994
995     termp = *term_sub; /* start of term for each database */
996     
997     if (zebra_apt_get_ord(zh, zapt, index_type, xpath_use,
998                           attributeSet, &ord) != ZEBRA_OK)
999     {
1000         *term_sub = 0;
1001         return ZEBRA_FAIL;
1002     }
1003     
1004     wrbuf_rewind(term_dict); /* new dictionary regexp term */
1005     
1006     *ol = ord_list_append(stream, *ol, ord);
1007     ord_len = key_SU_encode(ord, ord_buf);
1008     
1009     wrbuf_putc(term_dict, '(');
1010     
1011     for (i = 0; i<ord_len; i++)
1012     {
1013         wrbuf_putc(term_dict, 1);  /* our internal regexp escape char */
1014         wrbuf_putc(term_dict, ord_buf[i]);
1015     }
1016     wrbuf_putc(term_dict, ')');
1017     
1018     prefix_len = wrbuf_len(term_dict);
1019     
1020     switch (truncation_value)
1021     {
1022     case -1:         /* not specified */
1023     case 100:        /* do not truncate */
1024         if (!string_relation(zh, zapt, &termp, term_dict,
1025                              attributeSet,
1026                              zm, space_split, term_dst,
1027                              &relation_error))
1028         {
1029             if (relation_error)
1030             {
1031                 zebra_setError(zh, relation_error, 0);
1032                 return ZEBRA_FAIL;
1033             }
1034             *term_sub = 0;
1035             return ZEBRA_OK;
1036         }
1037         break;
1038     case 1:          /* right truncation */
1039         wrbuf_putc(term_dict, '(');
1040         if (!term_100(zm, &termp, term_dict, space_split, term_dst))
1041         {
1042             *term_sub = 0;
1043             return ZEBRA_OK;
1044         }
1045         wrbuf_puts(term_dict, ".*)");
1046         break;
1047     case 2:          /* keft truncation */
1048         wrbuf_puts(term_dict, "(.*");
1049         if (!term_100(zm, &termp, term_dict, space_split, term_dst))
1050         {
1051             *term_sub = 0;
1052             return ZEBRA_OK;
1053         }
1054         wrbuf_putc(term_dict, ')');
1055         break;
1056     case 3:          /* left&right truncation */
1057         wrbuf_puts(term_dict, "(.*");
1058         if (!term_100(zm, &termp, term_dict, space_split, term_dst))
1059         {
1060             *term_sub = 0;
1061             return ZEBRA_OK;
1062         }
1063         wrbuf_puts(term_dict, ".*)");
1064         break;
1065     case 101:        /* process # in term */
1066         wrbuf_putc(term_dict, '(');
1067         if (!term_101(zm, &termp, term_dict, space_split, term_dst))
1068         {
1069             *term_sub = 0;
1070             return ZEBRA_OK;
1071         }
1072         wrbuf_puts(term_dict, ")");
1073         break;
1074     case 102:        /* Regexp-1 */
1075         wrbuf_putc(term_dict, '(');
1076         if (!term_102(zm, &termp, term_dict, space_split, term_dst))
1077         {
1078             *term_sub = 0;
1079             return ZEBRA_OK;
1080         }
1081         wrbuf_putc(term_dict, ')');
1082         break;
1083     case 103:       /* Regexp-2 */
1084         regex_range = 1;
1085         wrbuf_putc(term_dict, '(');
1086         if (!term_103(zm, &termp, term_dict, &regex_range,
1087                       space_split, term_dst))
1088         {
1089             *term_sub = 0;
1090             return ZEBRA_OK;
1091         }
1092         wrbuf_putc(term_dict, ')');
1093         break;
1094     case 104:        /* process # and ! in term */
1095         wrbuf_putc(term_dict, '(');
1096         if (!term_104(zm, &termp, term_dict, space_split, term_dst))
1097         {
1098             *term_sub = 0;
1099             return ZEBRA_OK;
1100         }
1101         wrbuf_putc(term_dict, ')');
1102         break;
1103     case 105:        /* process * and ! in term */
1104         wrbuf_putc(term_dict, '(');
1105         if (!term_105(zm, &termp, term_dict, space_split, term_dst, 1))
1106         {
1107             *term_sub = 0;
1108             return ZEBRA_OK;
1109         }
1110         wrbuf_putc(term_dict, ')');
1111         break;
1112     case 106:        /* process * and ! in term */
1113         wrbuf_putc(term_dict, '(');
1114         if (!term_105(zm, &termp, term_dict, space_split, term_dst, 0))
1115         {
1116             *term_sub = 0;
1117             return ZEBRA_OK;
1118         }
1119         wrbuf_putc(term_dict, ')');
1120         break;
1121     default:
1122         zebra_setError_zint(zh,
1123                             YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
1124                             truncation_value);
1125         return ZEBRA_FAIL;
1126     }
1127     if (1)
1128     {
1129         char buf[1000];
1130         const char *input = wrbuf_cstr(term_dict) + prefix_len;
1131         esc_str(buf, sizeof(buf), input, strlen(input));
1132     }
1133     {
1134         WRBUF pr_wr = wrbuf_alloc();
1135
1136         wrbuf_write_escaped(pr_wr, wrbuf_buf(term_dict), wrbuf_len(term_dict));
1137         yaz_log(YLOG_LOG, "dict_lookup_grep: %s", wrbuf_cstr(pr_wr));
1138         wrbuf_destroy(pr_wr);
1139     }
1140     r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), regex_range,
1141                          grep_info, &max_pos, 
1142                          ord_len /* number of "exact" chars */,
1143                          grep_handle);
1144     if (r == 1)
1145         zebra_set_partial_result(zh);
1146     else if (r)
1147         yaz_log(YLOG_WARN, "dict_lookup_grep fail %d", r);
1148     *term_sub = termp;
1149     yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1150     return ZEBRA_OK;
1151 }
1152
1153
1154
1155 static void grep_info_delete(struct grep_info *grep_info)
1156 {
1157 #ifdef TERM_COUNT
1158     xfree(grep_info->term_no);
1159 #endif
1160     xfree(grep_info->isam_p_buf);
1161 }
1162
1163 static ZEBRA_RES grep_info_prepare(ZebraHandle zh,
1164                                    Z_AttributesPlusTerm *zapt,
1165                                    struct grep_info *grep_info,
1166                                    const char *index_type)
1167 {
1168 #ifdef TERM_COUNT
1169     grep_info->term_no = 0;
1170 #endif
1171     grep_info->trunc_max = atoi(res_get_def(zh->res, "truncmax", "10000"));
1172     grep_info->isam_p_size = 0;
1173     grep_info->isam_p_buf = NULL;
1174     grep_info->zh = zh;
1175     grep_info->index_type = index_type;
1176     grep_info->termset = 0;
1177     if (zapt)
1178     {
1179         AttrType truncmax;
1180         int truncmax_value;
1181
1182         attr_init_APT(&truncmax, zapt, 13);
1183         truncmax_value = attr_find(&truncmax, NULL);
1184         if (truncmax_value != -1)
1185             grep_info->trunc_max = truncmax_value;
1186     }
1187     if (zapt)
1188     {
1189         AttrType termset;
1190         int termset_value_numeric;
1191         const char *termset_value_string;
1192
1193         attr_init_APT(&termset, zapt, 8);
1194         termset_value_numeric =
1195             attr_find_ex(&termset, NULL, &termset_value_string);
1196         if (termset_value_numeric != -1)
1197         {
1198 #if TERMSET_DISABLE
1199             zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, "termset");
1200             return ZEBRA_FAIL;
1201 #else
1202             char resname[32];
1203             const char *termset_name = 0;
1204             if (termset_value_numeric != -2)
1205             {
1206                 
1207                 sprintf(resname, "%d", termset_value_numeric);
1208                 termset_name = resname;
1209             }
1210             else
1211                 termset_name = termset_value_string;
1212             yaz_log(log_level_rpn, "creating termset set %s", termset_name);
1213             grep_info->termset = resultSetAdd(zh, termset_name, 1);
1214             if (!grep_info->termset)
1215             {
1216                 zebra_setError(zh, YAZ_BIB1_ILLEGAL_RESULT_SET_NAME, termset_name);
1217                 return ZEBRA_FAIL;
1218             }
1219 #endif
1220         }
1221     }
1222     return ZEBRA_OK;
1223 }
1224
1225 static ZEBRA_RES search_terms_chrmap(ZebraHandle zh,
1226                                      Z_AttributesPlusTerm *zapt,
1227                                      const char *termz,
1228                                      const Odr_oid *attributeSet,
1229                                      NMEM stream,
1230                                      const char *index_type, int complete_flag,
1231                                      const char *rank_type,
1232                                      const char *xpath_use,
1233                                      NMEM rset_nmem,
1234                                      RSET **result_sets, int *num_result_sets,
1235                                      struct rset_key_control *kc,
1236                                      zebra_map_t zm)
1237 {
1238     char term_dst[IT_MAX_WORD+1];
1239     struct grep_info grep_info;
1240     const char *termp = termz;
1241     int alloc_sets = 0;
1242     
1243     *num_result_sets = 0;
1244     *term_dst = 0;
1245     if (grep_info_prepare(zh, zapt, &grep_info, index_type) == ZEBRA_FAIL)
1246         return ZEBRA_FAIL;
1247     while(1)
1248     { 
1249         ZEBRA_RES res;
1250
1251         if (alloc_sets == *num_result_sets)
1252         {
1253             int add = 10;
1254             RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) * 
1255                                               sizeof(*rnew));
1256             if (alloc_sets)
1257                 memcpy(rnew, *result_sets, alloc_sets * sizeof(*rnew));
1258             alloc_sets = alloc_sets + add;
1259             *result_sets = rnew;
1260         }
1261         res = search_term(zh, zapt, &termp, attributeSet,
1262                           stream, &grep_info,
1263                           index_type, complete_flag,
1264                           term_dst, rank_type,
1265                           xpath_use, rset_nmem,
1266                           &(*result_sets)[*num_result_sets],
1267                           kc, zm);
1268         if (res != ZEBRA_OK)
1269         {
1270             int i;
1271             for (i = 0; i < *num_result_sets; i++)
1272                 rset_delete((*result_sets)[i]);
1273             grep_info_delete(&grep_info);
1274             return res;
1275         }
1276         if ((*result_sets)[*num_result_sets] == 0)
1277             break;
1278         (*num_result_sets)++;
1279
1280         if (!*termp)
1281             break;
1282     }
1283     grep_info_delete(&grep_info);
1284     return ZEBRA_OK;
1285 }
1286                                
1287 /**
1288    \brief Create result set(s) for list of terms
1289    \param zh Zebra Handle
1290    \param zapt Attributes Plust Term (RPN leaf)
1291    \param termz term as used in query but converted to UTF-8
1292    \param attributeSet default attribute set
1293    \param stream memory for result
1294    \param index_type register type ("w", "p",..)
1295    \param complete_flag whether it's phrases or not
1296    \param rank_type term flags for ranking
1297    \param xpath_use use attribute for X-Path (-1 for no X-path)
1298    \param rset_nmem memory for result sets
1299    \param result_sets output result set for each term in list (output)
1300    \param num_result_sets number of output result sets
1301    \param kc rset key control to be used for created result sets
1302 */
1303 static ZEBRA_RES search_terms_list(ZebraHandle zh,
1304                                    Z_AttributesPlusTerm *zapt,
1305                                    const char *termz,
1306                                    const Odr_oid *attributeSet,
1307                                    NMEM stream,
1308                                    const char *index_type, int complete_flag,
1309                                    const char *rank_type,
1310                                    const char *xpath_use,
1311                                    NMEM rset_nmem,
1312                                    RSET **result_sets, int *num_result_sets,
1313                                    struct rset_key_control *kc)
1314 {
1315     zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type);
1316     if (zebra_maps_is_icu(zm))
1317         zebra_map_tokenize_start(zm, termz, strlen(termz));
1318     return search_terms_chrmap(zh, zapt, termz, attributeSet,
1319                                stream, index_type, complete_flag,
1320                                rank_type, xpath_use,
1321                                rset_nmem, result_sets, num_result_sets,
1322                                kc, zm);
1323 }
1324
1325
1326 /** \brief limit a search by position - returns result set
1327  */
1328 static ZEBRA_RES search_position(ZebraHandle zh,
1329                                  Z_AttributesPlusTerm *zapt,
1330                                  const Odr_oid *attributeSet,
1331                                  const char *index_type,
1332                                  NMEM rset_nmem,
1333                                  RSET *rset,
1334                                  struct rset_key_control *kc)
1335 {
1336     int position_value;
1337     AttrType position;
1338     int ord = -1;
1339     char ord_buf[32];
1340     char term_dict[100];
1341     int ord_len;
1342     char *val;
1343     ISAM_P isam_p;
1344     zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type);
1345     
1346     attr_init_APT(&position, zapt, 3);
1347     position_value = attr_find(&position, NULL);
1348     switch(position_value)
1349     {
1350     case 3:
1351     case -1:
1352         return ZEBRA_OK;
1353     case 1:
1354     case 2:
1355         break;
1356     default:
1357         zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1358                             position_value);
1359         return ZEBRA_FAIL;
1360     }
1361
1362
1363     if (!zebra_maps_is_first_in_field(zm))
1364     {
1365         zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1366                             position_value);
1367         return ZEBRA_FAIL;
1368     }
1369
1370     if (zebra_apt_get_ord(zh, zapt, index_type, 0,
1371                           attributeSet, &ord) != ZEBRA_OK)
1372     {
1373         return ZEBRA_FAIL;
1374     }
1375     ord_len = key_SU_encode(ord, ord_buf);
1376     memcpy(term_dict, ord_buf, ord_len);
1377     strcpy(term_dict+ord_len, FIRST_IN_FIELD_STR);
1378     val = dict_lookup(zh->reg->dict, term_dict);
1379     if (val)
1380     {
1381         assert(*val == sizeof(ISAM_P));
1382         memcpy(&isam_p, val+1, sizeof(isam_p));
1383
1384         *rset = zebra_create_rset_isam(zh, rset_nmem, kc, kc->scope, 
1385                                        isam_p, 0);
1386     }
1387     return ZEBRA_OK;
1388 }
1389
1390 /** \brief returns result set for phrase search
1391  */
1392 static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh,
1393                                        Z_AttributesPlusTerm *zapt,
1394                                        const char *termz_org,
1395                                        const Odr_oid *attributeSet,
1396                                        NMEM stream,
1397                                        const char *index_type,
1398                                        int complete_flag,
1399                                        const char *rank_type,
1400                                        const char *xpath_use,
1401                                        NMEM rset_nmem,
1402                                        RSET *rset,
1403                                        struct rset_key_control *kc)
1404 {
1405     RSET *result_sets = 0;
1406     int num_result_sets = 0;
1407     ZEBRA_RES res =
1408         search_terms_list(zh, zapt, termz_org, attributeSet,
1409                           stream, index_type, complete_flag,
1410                           rank_type, xpath_use,
1411                           rset_nmem,
1412                           &result_sets, &num_result_sets, kc);
1413     
1414     if (res != ZEBRA_OK)
1415         return res;
1416
1417     if (num_result_sets > 0)
1418     {
1419         RSET first_set = 0;
1420         res = search_position(zh, zapt, attributeSet, 
1421                               index_type,
1422                               rset_nmem, &first_set,
1423                               kc);
1424         if (res != ZEBRA_OK)
1425         {
1426             int i;
1427             for (i = 0; i<num_result_sets; i++)
1428                 rset_delete(result_sets[i]);
1429             return res;
1430         }
1431         if (first_set)
1432         {
1433             RSET *nsets = nmem_malloc(stream,
1434                                       sizeof(RSET) * (num_result_sets+1));
1435             nsets[0] = first_set;
1436             memcpy(nsets+1, result_sets, sizeof(RSET) * num_result_sets);
1437             result_sets = nsets;
1438             num_result_sets++;
1439         }
1440     }
1441     if (num_result_sets == 0)
1442         *rset = rset_create_null(rset_nmem, kc, 0); 
1443     else if (num_result_sets == 1)
1444         *rset = result_sets[0];
1445     else
1446         *rset = rset_create_prox(rset_nmem, kc, kc->scope,
1447                                  num_result_sets, result_sets,
1448                                  1 /* ordered */, 0 /* exclusion */,
1449                                  3 /* relation */, 1 /* distance */);
1450     if (!*rset)
1451         return ZEBRA_FAIL;
1452     return ZEBRA_OK;
1453 }
1454
1455 /** \brief returns result set for or-list search
1456  */
1457 static ZEBRA_RES rpn_search_APT_or_list(ZebraHandle zh,
1458                                         Z_AttributesPlusTerm *zapt,
1459                                         const char *termz_org,
1460                                         const Odr_oid *attributeSet,
1461                                         NMEM stream,
1462                                         const char *index_type, 
1463                                         int complete_flag,
1464                                         const char *rank_type,
1465                                         const char *xpath_use,
1466                                         NMEM rset_nmem,
1467                                         RSET *rset,
1468                                         struct rset_key_control *kc)
1469 {
1470     RSET *result_sets = 0;
1471     int num_result_sets = 0;
1472     int i;
1473     ZEBRA_RES res =
1474         search_terms_list(zh, zapt, termz_org, attributeSet,
1475                           stream, index_type, complete_flag,
1476                           rank_type, xpath_use,
1477                           rset_nmem,
1478                           &result_sets, &num_result_sets, kc);
1479     if (res != ZEBRA_OK)
1480         return res;
1481
1482     for (i = 0; i<num_result_sets; i++)
1483     {
1484         RSET first_set = 0;
1485         res = search_position(zh, zapt, attributeSet, 
1486                               index_type,
1487                               rset_nmem, &first_set,
1488                               kc);
1489         if (res != ZEBRA_OK)
1490         {
1491             for (i = 0; i<num_result_sets; i++)
1492                 rset_delete(result_sets[i]);
1493             return res;
1494         }
1495
1496         if (first_set)
1497         {
1498             RSET tmp_set[2];
1499
1500             tmp_set[0] = first_set;
1501             tmp_set[1] = result_sets[i];
1502             
1503             result_sets[i] = rset_create_prox(
1504                 rset_nmem, kc, kc->scope,
1505                 2, tmp_set,
1506                 1 /* ordered */, 0 /* exclusion */,
1507                 3 /* relation */, 1 /* distance */);
1508         }
1509     }
1510     if (num_result_sets == 0)
1511         *rset = rset_create_null(rset_nmem, kc, 0); 
1512     else if (num_result_sets == 1)
1513         *rset = result_sets[0];
1514     else
1515         *rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* termid */,
1516                                num_result_sets, result_sets);
1517     if (!*rset)
1518         return ZEBRA_FAIL;
1519     return ZEBRA_OK;
1520 }
1521
1522 /** \brief returns result set for and-list search
1523  */
1524 static ZEBRA_RES rpn_search_APT_and_list(ZebraHandle zh,
1525                                          Z_AttributesPlusTerm *zapt,
1526                                          const char *termz_org,
1527                                          const Odr_oid *attributeSet,
1528                                          NMEM stream,
1529                                          const char *index_type, 
1530                                          int complete_flag,
1531                                          const char *rank_type, 
1532                                          const char *xpath_use,
1533                                          NMEM rset_nmem,
1534                                          RSET *rset,
1535                                          struct rset_key_control *kc)
1536 {
1537     RSET *result_sets = 0;
1538     int num_result_sets = 0;
1539     int i;
1540     ZEBRA_RES res =
1541         search_terms_list(zh, zapt, termz_org, attributeSet,
1542                           stream, index_type, complete_flag,
1543                           rank_type, xpath_use,
1544                           rset_nmem,
1545                           &result_sets, &num_result_sets,
1546                           kc);
1547     if (res != ZEBRA_OK)
1548         return res;
1549     for (i = 0; i<num_result_sets; i++)
1550     {
1551         RSET first_set = 0;
1552         res = search_position(zh, zapt, attributeSet, 
1553                               index_type,
1554                               rset_nmem, &first_set,
1555                               kc);
1556         if (res != ZEBRA_OK)
1557         {
1558             for (i = 0; i<num_result_sets; i++)
1559                 rset_delete(result_sets[i]);
1560             return res;
1561         }
1562
1563         if (first_set)
1564         {
1565             RSET tmp_set[2];
1566
1567             tmp_set[0] = first_set;
1568             tmp_set[1] = result_sets[i];
1569             
1570             result_sets[i] = rset_create_prox(
1571                 rset_nmem, kc, kc->scope,
1572                 2, tmp_set,
1573                 1 /* ordered */, 0 /* exclusion */,
1574                 3 /* relation */, 1 /* distance */);
1575         }
1576     }
1577
1578
1579     if (num_result_sets == 0)
1580         *rset = rset_create_null(rset_nmem, kc, 0); 
1581     else if (num_result_sets == 1)
1582         *rset = result_sets[0];
1583     else
1584         *rset = rset_create_and(rset_nmem, kc, kc->scope,
1585                                 num_result_sets, result_sets);
1586     if (!*rset)
1587         return ZEBRA_FAIL;
1588     return ZEBRA_OK;
1589 }
1590
1591 static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1592                             const char **term_sub,
1593                             WRBUF term_dict,
1594                             const Odr_oid *attributeSet,
1595                             struct grep_info *grep_info,
1596                             int *max_pos,
1597                             zebra_map_t zm,
1598                             char *term_dst,
1599                             int *error_code)
1600 {
1601     AttrType relation;
1602     int relation_value;
1603     int term_value;
1604     int r;
1605     WRBUF term_num = wrbuf_alloc();
1606
1607     *error_code = 0;
1608     attr_init_APT(&relation, zapt, 2);
1609     relation_value = attr_find(&relation, NULL);
1610
1611     yaz_log(log_level_rpn, "numeric relation value=%d", relation_value);
1612
1613     switch (relation_value)
1614     {
1615     case 1:
1616         yaz_log(log_level_rpn, "Relation <");
1617         if (!term_100(zm, term_sub, term_num, 1, term_dst))
1618         { 
1619             wrbuf_destroy(term_num);
1620             return 0;
1621         }
1622         term_value = atoi(wrbuf_cstr(term_num));
1623         gen_regular_rel(term_dict, term_value-1, 1);
1624         break;
1625     case 2:
1626         yaz_log(log_level_rpn, "Relation <=");
1627         if (!term_100(zm, term_sub, term_num, 1, term_dst))
1628         {
1629             wrbuf_destroy(term_num);
1630             return 0;
1631         }
1632         term_value = atoi(wrbuf_cstr(term_num));
1633         gen_regular_rel(term_dict, term_value, 1);
1634         break;
1635     case 4:
1636         yaz_log(log_level_rpn, "Relation >=");
1637         if (!term_100(zm, term_sub, term_num, 1, term_dst))
1638         {
1639             wrbuf_destroy(term_num);
1640             return 0;
1641         }
1642         term_value = atoi(wrbuf_cstr(term_num));
1643         gen_regular_rel(term_dict, term_value, 0);
1644         break;
1645     case 5:
1646         yaz_log(log_level_rpn, "Relation >");
1647         if (!term_100(zm, term_sub, term_num, 1, term_dst))
1648         {
1649             wrbuf_destroy(term_num);
1650             return 0;
1651         }
1652         term_value = atoi(wrbuf_cstr(term_num));
1653         gen_regular_rel(term_dict, term_value+1, 0);
1654         break;
1655     case -1:
1656     case 3:
1657         yaz_log(log_level_rpn, "Relation =");
1658         if (!term_100(zm, term_sub, term_num, 1, term_dst))
1659         {
1660             wrbuf_destroy(term_num);
1661             return 0; 
1662         }
1663         term_value = atoi(wrbuf_cstr(term_num));
1664         wrbuf_printf(term_dict, "(0*%d)", term_value);
1665         break;
1666     case 103:
1667         /* term_tmp untouched.. */
1668         while (**term_sub != '\0')
1669             (*term_sub)++;
1670         break;
1671     default:
1672         *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
1673         wrbuf_destroy(term_num); 
1674         return 0;
1675     }
1676     r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), 
1677                          0, grep_info, max_pos, 0, grep_handle);
1678
1679     if (r == 1)
1680         zebra_set_partial_result(zh);
1681     else if (r)
1682         yaz_log(YLOG_WARN, "dict_lookup_grep fail, rel = gt: %d", r);
1683     yaz_log(log_level_rpn, "%d positions", grep_info->isam_p_indx);
1684     wrbuf_destroy(term_num);
1685     return 1;
1686 }
1687
1688 static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1689                               const char **term_sub, 
1690                               WRBUF term_dict,
1691                               const Odr_oid *attributeSet, NMEM stream,
1692                               struct grep_info *grep_info,
1693                               const char *index_type, int complete_flag,
1694                               char *term_dst, 
1695                               const char *xpath_use,
1696                               struct ord_list **ol)
1697 {
1698     const char *termp;
1699     struct rpn_char_map_info rcmi;
1700     int max_pos;
1701     int relation_error = 0;
1702     int ord, ord_len, i;
1703     char ord_buf[32];
1704     zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type);
1705     
1706     *ol = ord_list_create(stream);
1707
1708     rpn_char_map_prepare(zh->reg, zm, &rcmi);
1709
1710     termp = *term_sub;
1711     
1712     if (zebra_apt_get_ord(zh, zapt, index_type, xpath_use,
1713                           attributeSet, &ord) != ZEBRA_OK)
1714     {
1715         return ZEBRA_FAIL;
1716     }
1717     
1718     wrbuf_rewind(term_dict);
1719     
1720     *ol = ord_list_append(stream, *ol, ord);
1721     
1722     ord_len = key_SU_encode(ord, ord_buf);
1723     
1724     wrbuf_putc(term_dict, '(');
1725     for (i = 0; i < ord_len; i++)
1726     {
1727         wrbuf_putc(term_dict, 1);
1728         wrbuf_putc(term_dict, ord_buf[i]);
1729     }
1730     wrbuf_putc(term_dict, ')');
1731     
1732     if (!numeric_relation(zh, zapt, &termp, term_dict,
1733                           attributeSet, grep_info, &max_pos, zm,
1734                           term_dst, &relation_error))
1735     {
1736         if (relation_error)
1737         {
1738             zebra_setError(zh, relation_error, 0);
1739             return ZEBRA_FAIL;
1740         }
1741         *term_sub = 0;
1742         return ZEBRA_OK;
1743     }
1744     *term_sub = termp;
1745     yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1746     return ZEBRA_OK;
1747 }
1748
1749                                  
1750 static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh,
1751                                         Z_AttributesPlusTerm *zapt,
1752                                         const char *termz,
1753                                         const Odr_oid *attributeSet,
1754                                         NMEM stream,
1755                                         const char *index_type, 
1756                                         int complete_flag,
1757                                         const char *rank_type, 
1758                                         const char *xpath_use,
1759                                         NMEM rset_nmem,
1760                                         RSET *rset,
1761                                         struct rset_key_control *kc)
1762 {
1763     char term_dst[IT_MAX_WORD+1];
1764     const char *termp = termz;
1765     RSET *result_sets = 0;
1766     int num_result_sets = 0;
1767     ZEBRA_RES res;
1768     struct grep_info grep_info;
1769     int alloc_sets = 0;
1770     zint hits_limit_value;
1771     const char *term_ref_id_str = 0;
1772
1773     zebra_term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str,
1774                           stream);
1775
1776     yaz_log(log_level_rpn, "APT_numeric t='%s'", termz);
1777     if (grep_info_prepare(zh, zapt, &grep_info, index_type) == ZEBRA_FAIL)
1778         return ZEBRA_FAIL;
1779     while (1)
1780     { 
1781         struct ord_list *ol;
1782         WRBUF term_dict = wrbuf_alloc();
1783         if (alloc_sets == num_result_sets)
1784         {
1785             int add = 10;
1786             RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) * 
1787                                               sizeof(*rnew));
1788             if (alloc_sets)
1789                 memcpy(rnew, result_sets, alloc_sets * sizeof(*rnew));
1790             alloc_sets = alloc_sets + add;
1791             result_sets = rnew;
1792         }
1793         yaz_log(YLOG_DEBUG, "APT_numeric termp=%s", termp);
1794         grep_info.isam_p_indx = 0;
1795         res = numeric_term(zh, zapt, &termp, term_dict,
1796                            attributeSet, stream, &grep_info,
1797                            index_type, complete_flag,
1798                            term_dst, xpath_use, &ol);
1799         wrbuf_destroy(term_dict);
1800         if (res == ZEBRA_FAIL || termp == 0)
1801             break;
1802         yaz_log(YLOG_DEBUG, "term: %s", term_dst);
1803         result_sets[num_result_sets] =
1804             rset_trunc(zh, grep_info.isam_p_buf,
1805                        grep_info.isam_p_indx, term_dst,
1806                        strlen(term_dst), rank_type,
1807                        0 /* preserve position */,
1808                        zapt->term->which, rset_nmem, 
1809                        kc, kc->scope, ol, index_type,
1810                        hits_limit_value,
1811                        term_ref_id_str);
1812         if (!result_sets[num_result_sets])
1813             break;
1814         num_result_sets++;
1815         if (!*termp)
1816             break;
1817     }
1818     grep_info_delete(&grep_info);
1819
1820     if (res != ZEBRA_OK)
1821         return res;
1822     if (num_result_sets == 0)
1823         *rset = rset_create_null(rset_nmem, kc, 0);
1824     else if (num_result_sets == 1)
1825         *rset = result_sets[0];
1826     else
1827         *rset = rset_create_and(rset_nmem, kc, kc->scope,
1828                                 num_result_sets, result_sets);
1829     if (!*rset)
1830         return ZEBRA_FAIL;
1831     return ZEBRA_OK;
1832 }
1833
1834 static ZEBRA_RES rpn_search_APT_local(ZebraHandle zh,
1835                                       Z_AttributesPlusTerm *zapt,
1836                                       const char *termz,
1837                                       const Odr_oid *attributeSet,
1838                                       NMEM stream,
1839                                       const char *rank_type, NMEM rset_nmem,
1840                                       RSET *rset,
1841                                       struct rset_key_control *kc)
1842 {
1843     Record rec;
1844     zint sysno = atozint(termz);
1845     
1846     if (sysno <= 0)
1847         sysno = 0;
1848     rec = rec_get(zh->reg->records, sysno);
1849     if (!rec)
1850         sysno = 0;
1851
1852     rec_free(&rec);
1853
1854     if (sysno <= 0)
1855     {
1856         *rset = rset_create_null(rset_nmem, kc, 0);
1857     }
1858     else
1859     {
1860         RSFD rsfd;
1861         struct it_key key;
1862         *rset = rset_create_temp(rset_nmem, kc, kc->scope,
1863                                  res_get(zh->res, "setTmpDir"), 0);
1864         rsfd = rset_open(*rset, RSETF_WRITE);
1865         
1866         key.mem[0] = sysno;
1867         key.mem[1] = 1;
1868         key.len = 2;
1869         rset_write(rsfd, &key);
1870         rset_close(rsfd);
1871     }
1872     return ZEBRA_OK;
1873 }
1874
1875 static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1876                                const Odr_oid *attributeSet, NMEM stream,
1877                                Z_SortKeySpecList *sort_sequence,
1878                                const char *rank_type,
1879                                NMEM rset_nmem,
1880                                RSET *rset,
1881                                struct rset_key_control *kc)
1882 {
1883     int i;
1884     int sort_relation_value;
1885     AttrType sort_relation_type;
1886     Z_SortKeySpec *sks;
1887     Z_SortKey *sk;
1888     char termz[20];
1889     
1890     attr_init_APT(&sort_relation_type, zapt, 7);
1891     sort_relation_value = attr_find(&sort_relation_type, &attributeSet);
1892
1893     if (!sort_sequence->specs)
1894     {
1895         sort_sequence->num_specs = 10;
1896         sort_sequence->specs = (Z_SortKeySpec **)
1897             nmem_malloc(stream, sort_sequence->num_specs *
1898                         sizeof(*sort_sequence->specs));
1899         for (i = 0; i<sort_sequence->num_specs; i++)
1900             sort_sequence->specs[i] = 0;
1901     }
1902     if (zapt->term->which != Z_Term_general)
1903         i = 0;
1904     else
1905         i = atoi_n((char *) zapt->term->u.general->buf,
1906                    zapt->term->u.general->len);
1907     if (i >= sort_sequence->num_specs)
1908         i = 0;
1909     sprintf(termz, "%d", i);
1910
1911     sks = (Z_SortKeySpec *) nmem_malloc(stream, sizeof(*sks));
1912     sks->sortElement = (Z_SortElement *)
1913         nmem_malloc(stream, sizeof(*sks->sortElement));
1914     sks->sortElement->which = Z_SortElement_generic;
1915     sk = sks->sortElement->u.generic = (Z_SortKey *)
1916         nmem_malloc(stream, sizeof(*sk));
1917     sk->which = Z_SortKey_sortAttributes;
1918     sk->u.sortAttributes = (Z_SortAttributes *)
1919         nmem_malloc(stream, sizeof(*sk->u.sortAttributes));
1920
1921     sk->u.sortAttributes->id = odr_oiddup_nmem(stream, attributeSet);
1922     sk->u.sortAttributes->list = zapt->attributes;
1923
1924     sks->sortRelation = (int *)
1925         nmem_malloc(stream, sizeof(*sks->sortRelation));
1926     if (sort_relation_value == 1)
1927         *sks->sortRelation = Z_SortKeySpec_ascending;
1928     else if (sort_relation_value == 2)
1929         *sks->sortRelation = Z_SortKeySpec_descending;
1930     else 
1931         *sks->sortRelation = Z_SortKeySpec_ascending;
1932
1933     sks->caseSensitivity = (int *)
1934         nmem_malloc(stream, sizeof(*sks->caseSensitivity));
1935     *sks->caseSensitivity = 0;
1936
1937     sks->which = Z_SortKeySpec_null;
1938     sks->u.null = odr_nullval ();
1939     sort_sequence->specs[i] = sks;
1940     *rset = rset_create_null(rset_nmem, kc, 0);
1941     return ZEBRA_OK;
1942 }
1943
1944
1945 static int rpn_check_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1946                            const Odr_oid *attributeSet,
1947                            struct xpath_location_step *xpath, int max,
1948                            NMEM mem)
1949 {
1950     const Odr_oid *curAttributeSet = attributeSet;
1951     AttrType use;
1952     const char *use_string = 0;
1953     
1954     attr_init_APT(&use, zapt, 1);
1955     attr_find_ex(&use, &curAttributeSet, &use_string);
1956
1957     if (!use_string || *use_string != '/')
1958         return -1;
1959
1960     return zebra_parse_xpath_str(use_string, xpath, max, mem);
1961 }
1962  
1963                
1964
1965 static RSET xpath_trunc(ZebraHandle zh, NMEM stream,
1966                         const char *index_type, const char *term, 
1967                         const char *xpath_use,
1968                         NMEM rset_nmem,
1969                         struct rset_key_control *kc)
1970 {
1971     struct grep_info grep_info;
1972     int ord = zebraExplain_lookup_attr_str(zh->reg->zei, 
1973                                            zinfo_index_category_index,
1974                                            index_type, xpath_use);
1975     if (grep_info_prepare(zh, 0 /* zapt */, &grep_info, "0") == ZEBRA_FAIL)
1976         return rset_create_null(rset_nmem, kc, 0);
1977     
1978     if (ord < 0)
1979         return rset_create_null(rset_nmem, kc, 0);
1980     else
1981     {
1982         int i, r, max_pos;
1983         char ord_buf[32];
1984         RSET rset;
1985         WRBUF term_dict = wrbuf_alloc();
1986         int ord_len = key_SU_encode(ord, ord_buf);
1987         int term_type = Z_Term_characterString;
1988         const char *flags = "void";
1989
1990         wrbuf_putc(term_dict, '(');
1991         for (i = 0; i<ord_len; i++)
1992         {
1993             wrbuf_putc(term_dict, 1);
1994             wrbuf_putc(term_dict, ord_buf[i]);
1995         }
1996         wrbuf_putc(term_dict, ')');
1997         wrbuf_puts(term_dict, term);
1998         
1999         grep_info.isam_p_indx = 0;
2000         r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), 0,
2001                              &grep_info, &max_pos, 0, grep_handle);
2002         yaz_log(YLOG_DEBUG, "%s %d positions", term,
2003                 grep_info.isam_p_indx);
2004         rset = rset_trunc(zh, grep_info.isam_p_buf,
2005                           grep_info.isam_p_indx, term, strlen(term),
2006                           flags, 1, term_type, rset_nmem,
2007                           kc, kc->scope, 0, index_type, 0 /* hits_limit */,
2008                           0 /* term_ref_id_str */);
2009         grep_info_delete(&grep_info);
2010         wrbuf_destroy(term_dict);
2011         return rset;
2012     }
2013 }
2014
2015 static
2016 ZEBRA_RES rpn_search_xpath(ZebraHandle zh,
2017                            NMEM stream, const char *rank_type, RSET rset,
2018                            int xpath_len, struct xpath_location_step *xpath,
2019                            NMEM rset_nmem,
2020                            RSET *rset_out,
2021                            struct rset_key_control *kc)
2022 {
2023     int i;
2024     int always_matches = rset ? 0 : 1;
2025
2026     if (xpath_len < 0)
2027     {
2028         *rset_out = rset;
2029         return ZEBRA_OK;
2030     }
2031
2032     yaz_log(YLOG_DEBUG, "xpath len=%d", xpath_len);
2033     for (i = 0; i<xpath_len; i++)
2034     {
2035         yaz_log(log_level_rpn, "XPATH %d %s", i, xpath[i].part);
2036
2037     }
2038
2039     /*
2040     //a    ->    a/.*
2041     //a/b  ->    b/a/.*
2042     /a     ->    a/
2043     /a/b   ->    b/a/
2044
2045     /      ->    none
2046
2047     a[@attr = value]/b[@other = othervalue]
2048
2049     /e/@a val      range(e/,range(@a,freetext(w,1015,val),@a),e/)
2050     /a/b val       range(b/a/,freetext(w,1016,val),b/a/)
2051     /a/b/@c val    range(b/a/,range(@c,freetext(w,1016,val),@c),b/a/)
2052     /a/b[@c = y] val range(b/a/,freetext(w,1016,val),b/a/,@c = y)
2053     /a[@c = y]/b val range(a/,range(b/a/,freetext(w,1016,val),b/a/),a/,@c = y)
2054     /a[@c = x]/b[@c = y] range(a/,range(b/a/,freetext(w,1016,val),b/a/,@c = y),a/,@c = x)
2055       
2056     */
2057
2058     dict_grep_cmap(zh->reg->dict, 0, 0);
2059     
2060     {
2061         int level = xpath_len;
2062         int first_path = 1;
2063         
2064         while (--level >= 0)
2065         {
2066             WRBUF xpath_rev = wrbuf_alloc();
2067             int i;
2068             RSET rset_start_tag = 0, rset_end_tag = 0, rset_attr = 0;
2069
2070             for (i = level; i >= 1; --i)
2071             {
2072                 const char *cp = xpath[i].part;
2073                 if (*cp)
2074                 {
2075                     for (; *cp; cp++)
2076                     {
2077                         if (*cp == '*')
2078                             wrbuf_puts(xpath_rev, "[^/]*");
2079                         else if (*cp == ' ')
2080                             wrbuf_puts(xpath_rev, "\001 ");
2081                         else
2082                             wrbuf_putc(xpath_rev, *cp);
2083
2084                         /* wrbuf_putc does not null-terminate , but
2085                            wrbuf_puts below ensures it does.. so xpath_rev
2086                            is OK iff length is > 0 */
2087                     }
2088                     wrbuf_puts(xpath_rev, "/");
2089                 }
2090                 else if (i == 1)  /* // case */
2091                     wrbuf_puts(xpath_rev, ".*");
2092             }
2093             if (xpath[level].predicate &&
2094                 xpath[level].predicate->which == XPATH_PREDICATE_RELATION &&
2095                 xpath[level].predicate->u.relation.name[0])
2096             {
2097                 WRBUF wbuf = wrbuf_alloc();
2098                 wrbuf_puts(wbuf, xpath[level].predicate->u.relation.name+1);
2099                 if (xpath[level].predicate->u.relation.value)
2100                 {
2101                     const char *cp = xpath[level].predicate->u.relation.value;
2102                     wrbuf_putc(wbuf, '=');
2103                     
2104                     while (*cp)
2105                     {
2106                         if (strchr(REGEX_CHARS, *cp))
2107                             wrbuf_putc(wbuf, '\\');
2108                         wrbuf_putc(wbuf, *cp);
2109                         cp++;
2110                     }
2111                 }
2112                 rset_attr = xpath_trunc(
2113                     zh, stream, "0", wrbuf_cstr(wbuf), 
2114                     ZEBRA_XPATH_ATTR_NAME, 
2115                     rset_nmem, kc);
2116                 wrbuf_destroy(wbuf);
2117             } 
2118             else 
2119             {
2120                 if (!first_path)
2121                 {
2122                     wrbuf_destroy(xpath_rev);
2123                     continue;
2124                 }
2125             }
2126             yaz_log(log_level_rpn, "xpath_rev (%d) = %s", level, 
2127                     wrbuf_cstr(xpath_rev));
2128             if (wrbuf_len(xpath_rev))
2129             {
2130                 rset_start_tag = xpath_trunc(zh, stream, "0", 
2131                                              wrbuf_cstr(xpath_rev),
2132                                              ZEBRA_XPATH_ELM_BEGIN, 
2133                                              rset_nmem, kc);
2134                 if (always_matches)
2135                     rset = rset_start_tag;
2136                 else
2137                 {
2138                     rset_end_tag = xpath_trunc(zh, stream, "0", 
2139                                                wrbuf_cstr(xpath_rev),
2140                                                ZEBRA_XPATH_ELM_END, 
2141                                                rset_nmem, kc);
2142                     
2143                     rset = rset_create_between(rset_nmem, kc, kc->scope,
2144                                                rset_start_tag, rset,
2145                                                rset_end_tag, rset_attr);
2146                 }
2147             }
2148             wrbuf_destroy(xpath_rev);
2149             first_path = 0;
2150         }
2151     }
2152     *rset_out = rset;
2153     return ZEBRA_OK;
2154 }
2155
2156 #define MAX_XPATH_STEPS 10
2157
2158 static ZEBRA_RES rpn_search_database(ZebraHandle zh, 
2159                                      Z_AttributesPlusTerm *zapt,
2160                                      const Odr_oid *attributeSet, NMEM stream,
2161                                      Z_SortKeySpecList *sort_sequence,
2162                                      NMEM rset_nmem,
2163                                      RSET *rset,
2164                                      struct rset_key_control *kc);
2165
2166 static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2167                                 const Odr_oid *attributeSet, NMEM stream,
2168                                 Z_SortKeySpecList *sort_sequence,
2169                                 int num_bases, const char **basenames, 
2170                                 NMEM rset_nmem,
2171                                 RSET *rset,
2172                                 struct rset_key_control *kc)
2173 {
2174     RSET *rsets = nmem_malloc(stream, num_bases * sizeof(*rsets));
2175     ZEBRA_RES res = ZEBRA_OK;
2176     int i;
2177     for (i = 0; i < num_bases; i++)
2178     {
2179
2180         if (zebraExplain_curDatabase(zh->reg->zei, basenames[i]))
2181         {
2182             zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
2183                            basenames[i]);
2184             res = ZEBRA_FAIL;
2185             break;
2186         }
2187         res = rpn_search_database(zh, zapt, attributeSet, stream,
2188                                   sort_sequence,
2189                                   rset_nmem, rsets+i, kc);
2190         if (res != ZEBRA_OK)
2191             break;
2192     }
2193     if (res != ZEBRA_OK)
2194     {   /* must clean up the already created sets */
2195         while (--i >= 0)
2196             rset_delete(rsets[i]);
2197         *rset = 0;
2198     }
2199     else 
2200     {
2201         if (num_bases == 1)
2202             *rset = rsets[0];
2203         else if (num_bases == 0)
2204             *rset = rset_create_null(rset_nmem, kc, 0); 
2205         else
2206             *rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* TERMID */,
2207                                    num_bases, rsets);
2208     }
2209     return res;
2210 }
2211
2212 static ZEBRA_RES rpn_search_database(ZebraHandle zh, 
2213                                      Z_AttributesPlusTerm *zapt,
2214                                      const Odr_oid *attributeSet, NMEM stream,
2215                                      Z_SortKeySpecList *sort_sequence,
2216                                      NMEM rset_nmem,
2217                                      RSET *rset,
2218                                      struct rset_key_control *kc)
2219 {
2220     ZEBRA_RES res = ZEBRA_OK;
2221     const char *index_type;
2222     char *search_type = NULL;
2223     char rank_type[128];
2224     int complete_flag;
2225     int sort_flag;
2226     char termz[IT_MAX_WORD+1];
2227     int xpath_len;
2228     const char *xpath_use = 0;
2229     struct xpath_location_step xpath[MAX_XPATH_STEPS];
2230
2231     if (!log_level_set)
2232     {
2233         log_level_rpn = yaz_log_module_level("rpn");
2234         log_level_set = 1;
2235     }
2236     zebra_maps_attr(zh->reg->zebra_maps, zapt, &index_type, &search_type,
2237                     rank_type, &complete_flag, &sort_flag);
2238     
2239     yaz_log(YLOG_DEBUG, "index_type=%s", index_type);
2240     yaz_log(YLOG_DEBUG, "complete_flag=%d", complete_flag);
2241     yaz_log(YLOG_DEBUG, "search_type=%s", search_type);
2242     yaz_log(YLOG_DEBUG, "rank_type=%s", rank_type);
2243
2244     if (zapt_term_to_utf8(zh, zapt, termz) == ZEBRA_FAIL)
2245         return ZEBRA_FAIL;
2246
2247     if (sort_flag)
2248         return rpn_sort_spec(zh, zapt, attributeSet, stream, sort_sequence,
2249                              rank_type, rset_nmem, rset, kc);
2250     /* consider if an X-Path query is used */
2251     xpath_len = rpn_check_xpath(zh, zapt, attributeSet, 
2252                                 xpath, MAX_XPATH_STEPS, stream);
2253     if (xpath_len >= 0)
2254     {
2255         if (xpath[xpath_len-1].part[0] == '@') 
2256             xpath_use = ZEBRA_XPATH_ATTR_CDATA;  /* last step is attribute  */
2257         else
2258             xpath_use = ZEBRA_XPATH_CDATA;  /* searching for cdata */        
2259
2260         if (1)
2261         {
2262             AttrType relation;
2263             int relation_value;
2264
2265             attr_init_APT(&relation, zapt, 2);
2266             relation_value = attr_find(&relation, NULL);
2267
2268             if (relation_value == 103) /* alwaysmatches */
2269             {
2270                 *rset = 0; /* signal no "term" set */
2271                 return rpn_search_xpath(zh, stream, rank_type, *rset, 
2272                                         xpath_len, xpath, rset_nmem, rset, kc);
2273             }
2274         }
2275     }
2276
2277     /* search using one of the various search type strategies
2278        termz is our UTF-8 search term
2279        attributeSet is top-level default attribute set 
2280        stream is ODR for search
2281        reg_id is the register type
2282        complete_flag is 1 for complete subfield, 0 for incomplete
2283        xpath_use is use-attribute to be used for X-Path search, 0 for none
2284     */
2285     if (!strcmp(search_type, "phrase"))
2286     {
2287         res = rpn_search_APT_phrase(zh, zapt, termz, attributeSet, stream,
2288                                     index_type, complete_flag, rank_type,
2289                                     xpath_use,
2290                                     rset_nmem,
2291                                     rset, kc);
2292     }
2293     else if (!strcmp(search_type, "and-list"))
2294     {
2295         res = rpn_search_APT_and_list(zh, zapt, termz, attributeSet, stream,
2296                                       index_type, complete_flag, rank_type,
2297                                       xpath_use,
2298                                       rset_nmem,
2299                                       rset, kc);
2300     }
2301     else if (!strcmp(search_type, "or-list"))
2302     {
2303         res = rpn_search_APT_or_list(zh, zapt, termz, attributeSet, stream,
2304                                      index_type, complete_flag, rank_type,
2305                                      xpath_use,
2306                                      rset_nmem,
2307                                      rset, kc);
2308     }
2309     else if (!strcmp(search_type, "local"))
2310     {
2311         res = rpn_search_APT_local(zh, zapt, termz, attributeSet, stream,
2312                                    rank_type, rset_nmem, rset, kc);
2313     }
2314     else if (!strcmp(search_type, "numeric"))
2315     {
2316         res = rpn_search_APT_numeric(zh, zapt, termz, attributeSet, stream,
2317                                      index_type, complete_flag, rank_type,
2318                                      xpath_use,
2319                                      rset_nmem,
2320                                      rset, kc);
2321     }
2322     else
2323     {
2324         zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2325         res = ZEBRA_FAIL;
2326     }
2327     if (res != ZEBRA_OK)
2328         return res;
2329     if (!*rset)
2330         return ZEBRA_FAIL;
2331     return rpn_search_xpath(zh, stream, rank_type, *rset, 
2332                             xpath_len, xpath, rset_nmem, rset, kc);
2333 }
2334
2335 static ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2336                                       const Odr_oid *attributeSet, 
2337                                       NMEM stream, NMEM rset_nmem,
2338                                       Z_SortKeySpecList *sort_sequence,
2339                                       int num_bases, const char **basenames,
2340                                       RSET **result_sets, int *num_result_sets,
2341                                       Z_Operator *parent_op,
2342                                       struct rset_key_control *kc);
2343
2344 ZEBRA_RES rpn_get_top_approx_limit(ZebraHandle zh, Z_RPNStructure *zs,
2345                                    zint *approx_limit)
2346 {
2347     ZEBRA_RES res = ZEBRA_OK;
2348     if (zs->which == Z_RPNStructure_complex)
2349     {
2350         if (res == ZEBRA_OK)
2351             res = rpn_get_top_approx_limit(zh, zs->u.complex->s1,
2352                                            approx_limit);
2353         if (res == ZEBRA_OK)
2354             res = rpn_get_top_approx_limit(zh, zs->u.complex->s2,
2355                                            approx_limit);
2356     }
2357     else if (zs->which == Z_RPNStructure_simple)
2358     {
2359         if (zs->u.simple->which == Z_Operand_APT)
2360         {
2361             Z_AttributesPlusTerm *zapt = zs->u.simple->u.attributesPlusTerm;
2362             AttrType global_hits_limit_attr;
2363             int l;
2364             
2365             attr_init_APT(&global_hits_limit_attr, zapt, 12);
2366             
2367             l = attr_find(&global_hits_limit_attr, NULL);
2368             if (l != -1)
2369                 *approx_limit = l;
2370         }
2371     }
2372     return res;
2373 }
2374
2375 ZEBRA_RES rpn_search_top(ZebraHandle zh, Z_RPNStructure *zs,
2376                          const Odr_oid *attributeSet, 
2377                          NMEM stream, NMEM rset_nmem,
2378                          Z_SortKeySpecList *sort_sequence,
2379                          int num_bases, const char **basenames,
2380                          RSET *result_set)
2381 {
2382     RSET *result_sets = 0;
2383     int num_result_sets = 0;
2384     ZEBRA_RES res;
2385     struct rset_key_control *kc = zebra_key_control_create(zh);
2386
2387     res = rpn_search_structure(zh, zs, attributeSet,
2388                                stream, rset_nmem,
2389                                sort_sequence, 
2390                                num_bases, basenames,
2391                                &result_sets, &num_result_sets,
2392                                0 /* no parent op */,
2393                                kc);
2394     if (res != ZEBRA_OK)
2395     {
2396         int i;
2397         for (i = 0; i<num_result_sets; i++)
2398             rset_delete(result_sets[i]);
2399         *result_set = 0;
2400     }
2401     else
2402     {
2403         assert(num_result_sets == 1);
2404         assert(result_sets);
2405         assert(*result_sets);
2406         *result_set = *result_sets;
2407     }
2408     (*kc->dec)(kc);
2409     return res;
2410 }
2411
2412 ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2413                                const Odr_oid *attributeSet, 
2414                                NMEM stream, NMEM rset_nmem,
2415                                Z_SortKeySpecList *sort_sequence,
2416                                int num_bases, const char **basenames,
2417                                RSET **result_sets, int *num_result_sets,
2418                                Z_Operator *parent_op,
2419                                struct rset_key_control *kc)
2420 {
2421     *num_result_sets = 0;
2422     if (zs->which == Z_RPNStructure_complex)
2423     {
2424         ZEBRA_RES res;
2425         Z_Operator *zop = zs->u.complex->roperator;
2426         RSET *result_sets_l = 0;
2427         int num_result_sets_l = 0;
2428         RSET *result_sets_r = 0;
2429         int num_result_sets_r = 0;
2430
2431         res = rpn_search_structure(zh, zs->u.complex->s1,
2432                                    attributeSet, stream, rset_nmem,
2433                                    sort_sequence,
2434                                    num_bases, basenames,
2435                                    &result_sets_l, &num_result_sets_l,
2436                                    zop, kc);
2437         if (res != ZEBRA_OK)
2438         {
2439             int i;
2440             for (i = 0; i<num_result_sets_l; i++)
2441                 rset_delete(result_sets_l[i]);
2442             return res;
2443         }
2444         res = rpn_search_structure(zh, zs->u.complex->s2,
2445                                    attributeSet, stream, rset_nmem,
2446                                    sort_sequence,
2447                                    num_bases, basenames,
2448                                    &result_sets_r, &num_result_sets_r,
2449                                    zop, kc);
2450         if (res != ZEBRA_OK)
2451         {
2452             int i;
2453             for (i = 0; i<num_result_sets_l; i++)
2454                 rset_delete(result_sets_l[i]);
2455             for (i = 0; i<num_result_sets_r; i++)
2456                 rset_delete(result_sets_r[i]);
2457             return res;
2458         }
2459
2460         /* make a new list of result for all children */
2461         *num_result_sets = num_result_sets_l + num_result_sets_r;
2462         *result_sets = nmem_malloc(stream, *num_result_sets * 
2463                                    sizeof(**result_sets));
2464         memcpy(*result_sets, result_sets_l, 
2465                num_result_sets_l * sizeof(**result_sets));
2466         memcpy(*result_sets + num_result_sets_l, result_sets_r, 
2467                num_result_sets_r * sizeof(**result_sets));
2468
2469         if (!parent_op || parent_op->which != zop->which
2470             || (zop->which != Z_Operator_and &&
2471                 zop->which != Z_Operator_or))
2472         {
2473             /* parent node different from this one (or non-present) */
2474             /* we must combine result sets now */
2475             RSET rset;
2476             switch (zop->which)
2477             {
2478             case Z_Operator_and:
2479                 rset = rset_create_and(rset_nmem, kc,
2480                                        kc->scope,
2481                                        *num_result_sets, *result_sets);
2482                 break;
2483             case Z_Operator_or:
2484                 rset = rset_create_or(rset_nmem, kc,
2485                                       kc->scope, 0, /* termid */
2486                                       *num_result_sets, *result_sets);
2487                 break;
2488             case Z_Operator_and_not:
2489                 rset = rset_create_not(rset_nmem, kc,
2490                                        kc->scope,
2491                                        (*result_sets)[0],
2492                                        (*result_sets)[1]);
2493                 break;
2494             case Z_Operator_prox:
2495                 if (zop->u.prox->which != Z_ProximityOperator_known)
2496                 {
2497                     zebra_setError(zh, 
2498                                    YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2499                                    0);
2500                     return ZEBRA_FAIL;
2501                 }
2502                 if (*zop->u.prox->u.known != Z_ProxUnit_word)
2503                 {
2504                     zebra_setError_zint(zh,
2505                                         YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2506                                         *zop->u.prox->u.known);
2507                     return ZEBRA_FAIL;
2508                 }
2509                 else
2510                 {
2511                     rset = rset_create_prox(rset_nmem, kc,
2512                                             kc->scope,
2513                                             *num_result_sets, *result_sets, 
2514                                             *zop->u.prox->ordered,
2515                                             (!zop->u.prox->exclusion ? 
2516                                              0 : *zop->u.prox->exclusion),
2517                                             *zop->u.prox->relationType,
2518                                             *zop->u.prox->distance );
2519                 }
2520                 break;
2521             default:
2522                 zebra_setError(zh, YAZ_BIB1_OPERATOR_UNSUPP, 0);
2523                 return ZEBRA_FAIL;
2524             }
2525             *num_result_sets = 1;
2526             *result_sets = nmem_malloc(stream, *num_result_sets * 
2527                                        sizeof(**result_sets));
2528             (*result_sets)[0] = rset;
2529         }
2530     }
2531     else if (zs->which == Z_RPNStructure_simple)
2532     {
2533         RSET rset;
2534         ZEBRA_RES res;
2535
2536         if (zs->u.simple->which == Z_Operand_APT)
2537         {
2538             yaz_log(YLOG_DEBUG, "rpn_search_APT");
2539             res = rpn_search_APT(zh, zs->u.simple->u.attributesPlusTerm,
2540                                  attributeSet, stream, sort_sequence,
2541                                  num_bases, basenames, rset_nmem, &rset,
2542                                  kc);
2543             if (res != ZEBRA_OK)
2544                 return res;
2545         }
2546         else if (zs->u.simple->which == Z_Operand_resultSetId)
2547         {
2548             yaz_log(YLOG_DEBUG, "rpn_search_ref");
2549             rset = resultSetRef(zh, zs->u.simple->u.resultSetId);
2550             if (!rset)
2551             {
2552                 zebra_setError(zh, 
2553                                YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
2554                                zs->u.simple->u.resultSetId);
2555                 return ZEBRA_FAIL;
2556             }
2557             rset_dup(rset);
2558         }
2559         else
2560         {
2561             zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2562             return ZEBRA_FAIL;
2563         }
2564         *num_result_sets = 1;
2565         *result_sets = nmem_malloc(stream, *num_result_sets * 
2566                                    sizeof(**result_sets));
2567         (*result_sets)[0] = rset;
2568     }
2569     else
2570     {
2571         zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2572         return ZEBRA_FAIL;
2573     }
2574     return ZEBRA_OK;
2575 }
2576
2577
2578
2579 /*
2580  * Local variables:
2581  * c-basic-offset: 4
2582  * indent-tabs-mode: nil
2583  * End:
2584  * vim: shiftwidth=4 tabstop=8 expandtab
2585  */
2586