Memory leak fix - in case of error in use of position attribute.
[idzebra-moved-to-github.git] / index / rpnsearch.c
1 /* $Id: rpnsearch.c,v 1.26 2007-12-03 14:33:22 adam Exp $
2    Copyright (C) 1995-2007
3    Index Data ApS
4
5 This file is part of the Zebra server.
6
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
11
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
20
21 */
22
23 #include <stdio.h>
24 #include <assert.h>
25 #ifdef WIN32
26 #include <io.h>
27 #endif
28 #if HAVE_UNISTD_H
29 #include <unistd.h>
30 #endif
31 #include <ctype.h>
32
33 #include <yaz/diagbib1.h>
34 #include "index.h"
35 #include <zebra_xpath.h>
36 #include <attrfind.h>
37 #include <charmap.h>
38 #include <rset.h>
39
40 static int log_level_set = 0;
41 static int log_level_rpn = 0;
42
43 #define TERMSET_DISABLE 1
44
45 static const char **rpn_char_map_handler(void *vp, const char **from, int len)
46 {
47     struct rpn_char_map_info *p = (struct rpn_char_map_info *) vp;
48     const char **out = zebra_maps_input(p->zm, from, len, 0);
49 #if 0
50     if (out && *out)
51     {
52         const char *outp = *out;
53         yaz_log(YLOG_LOG, "---");
54         while (*outp)
55         {
56             yaz_log(YLOG_LOG, "%02X", *outp);
57             outp++;
58         }
59     }
60 #endif
61     return out;
62 }
63
64 void rpn_char_map_prepare(struct zebra_register *reg, zebra_map_t zm,
65                           struct rpn_char_map_info *map_info)
66 {
67     map_info->zm = zm;
68     if (zebra_maps_is_icu(zm))
69         dict_grep_cmap(reg->dict, 0, 0);
70     else
71         dict_grep_cmap(reg->dict, map_info, rpn_char_map_handler);
72 }
73
74 #define TERM_COUNT        
75        
76 struct grep_info {        
77 #ifdef TERM_COUNT        
78     int *term_no;        
79 #endif        
80     ISAM_P *isam_p_buf;
81     int isam_p_size;        
82     int isam_p_indx;
83     int trunc_max;
84     ZebraHandle zh;
85     const char *index_type;
86     ZebraSet termset;
87 };        
88
89 static int add_isam_p(const char *name, const char *info,
90                       struct grep_info *p)
91 {
92     if (!log_level_set)
93     {
94         log_level_rpn = yaz_log_module_level("rpn");
95         log_level_set = 1;
96     }
97     /* we may have to stop this madness.. NOTE: -1 so that if
98        truncmax == trunxlimit we do *not* generate result sets */
99     if (p->isam_p_indx >= p->trunc_max - 1)
100         return 1;
101
102     if (p->isam_p_indx == p->isam_p_size)
103     {
104         ISAM_P *new_isam_p_buf;
105 #ifdef TERM_COUNT        
106         int *new_term_no;        
107 #endif
108         p->isam_p_size = 2*p->isam_p_size + 100;
109         new_isam_p_buf = (ISAM_P *) xmalloc(sizeof(*new_isam_p_buf) *
110                                             p->isam_p_size);
111         if (p->isam_p_buf)
112         {
113             memcpy(new_isam_p_buf, p->isam_p_buf,
114                     p->isam_p_indx * sizeof(*p->isam_p_buf));
115             xfree(p->isam_p_buf);
116         }
117         p->isam_p_buf = new_isam_p_buf;
118
119 #ifdef TERM_COUNT
120         new_term_no = (int *) xmalloc(sizeof(*new_term_no) * p->isam_p_size);
121         if (p->term_no)
122         {
123             memcpy(new_term_no, p->isam_p_buf,
124                     p->isam_p_indx * sizeof(*p->term_no));
125             xfree(p->term_no);
126         }
127         p->term_no = new_term_no;
128 #endif
129     }
130     assert(*info == sizeof(*p->isam_p_buf));
131     memcpy(p->isam_p_buf + p->isam_p_indx, info+1, sizeof(*p->isam_p_buf));
132
133     if (p->termset)
134     {
135         const char *db;
136         char term_tmp[IT_MAX_WORD];
137         int ord = 0;
138         const char *index_name;
139         int len = key_SU_decode(&ord, (const unsigned char *) name);
140         
141         zebra_term_untrans (p->zh, p->index_type, term_tmp, name+len);
142         yaz_log(log_level_rpn, "grep: %d %c %s", ord, name[len], term_tmp);
143         zebraExplain_lookup_ord(p->zh->reg->zei,
144                                 ord, 0 /* index_type */, &db, &index_name);
145         yaz_log(log_level_rpn, "grep:  db=%s index=%s", db, index_name);
146         
147         resultSetAddTerm(p->zh, p->termset, name[len], db,
148                          index_name, term_tmp);
149     }
150     (p->isam_p_indx)++;
151     return 0;
152 }
153
154 static int grep_handle(char *name, const char *info, void *p)
155 {
156     return add_isam_p(name, info, (struct grep_info *) p);
157 }
158
159 static int term_pre(zebra_map_t zm, const char **src,
160                     const char *ct1, const char *ct2, int first)
161 {
162     const char *s1, *s0 = *src;
163     const char **map;
164
165     /* skip white space */
166     while (*s0)
167     {
168         if (ct1 && strchr(ct1, *s0))
169             break;
170         if (ct2 && strchr(ct2, *s0))
171             break;
172         s1 = s0;
173         map = zebra_maps_input(zm, &s1, strlen(s1), first);
174         if (**map != *CHR_SPACE)
175             break;
176         s0 = s1;
177     }
178     *src = s0;
179     return *s0;
180 }
181
182
183 static void esc_str(char *out_buf, size_t out_size,
184                     const char *in_buf, int in_size)
185 {
186     int k;
187
188     assert(out_buf);
189     assert(in_buf);
190     assert(out_size > 20);
191     *out_buf = '\0';
192     for (k = 0; k<in_size; k++)
193     {
194         int c = in_buf[k] & 0xff;
195         int pc;
196         if (c < 32 || c > 126)
197             pc = '?';
198         else
199             pc = c;
200         sprintf(out_buf +strlen(out_buf), "%02X:%c  ", c, pc);
201         if (strlen(out_buf) > out_size-20)
202         {
203             strcat(out_buf, "..");
204             break;
205         }
206     }
207 }
208
209 #define REGEX_CHARS " []()|.*+?!"
210
211 static void add_non_space(const char *start, const char *end,
212                           WRBUF term_dict,
213                           char *dst_term, int *dst_ptr,
214                           const char **map, int q_map_match)
215 {
216     size_t sz = end - start;
217     memcpy(dst_term + *dst_ptr, start, sz);
218     (*dst_ptr) += sz;
219     if (!q_map_match)
220     {
221         while (start < end)
222         {
223             if (strchr(REGEX_CHARS, *start))
224                 wrbuf_putc(term_dict, '\\');
225             wrbuf_putc(term_dict, *start);
226             start++;
227         }
228     }
229     else
230     {
231         char tmpbuf[80];
232         esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
233         
234         wrbuf_puts(term_dict, map[0]);
235     }
236 }
237
238
239 static int term_100_icu(zebra_map_t zm,
240                         const char **src, WRBUF term_dict, int space_split,
241                         char *dst_term)
242 {
243     int no = 0;
244     const char *res_buf = 0;
245     size_t res_len = 0;
246     int r = zebra_map_tokenize(zm, *src, strlen(*src),
247                                &res_buf, &res_len);
248
249     yaz_log(YLOG_LOG, "term_100_icu r=%d", r);
250     if (r)
251         strcat(dst_term, *src);
252     *src += strlen(*src);
253     while (r)
254     {
255         int i;
256         no++;
257         for (i = 0; i < res_len; i++)
258         {
259             if (strchr(REGEX_CHARS, res_buf[i]))
260                 wrbuf_putc(term_dict, '\\');
261             if (res_buf[i] < 32)
262                 wrbuf_putc(term_dict, 1);
263             wrbuf_putc(term_dict, res_buf[i]);
264         }
265         r = zebra_map_tokenize(zm, 0, 0, &res_buf, &res_len);
266     }
267     return no;
268 }
269
270 /* term_100: handle term, where trunc = none(no operators at all) */
271 static int term_100(zebra_map_t zm,
272                     const char **src, WRBUF term_dict, int space_split,
273                     char *dst_term)
274 {
275     const char *s0;
276     const char **map;
277     int i = 0;
278     int j = 0;
279
280     const char *space_start = 0;
281     const char *space_end = 0;
282
283     if (zebra_maps_is_icu(zm))
284         return term_100_icu(zm, src, term_dict, space_split, dst_term);
285
286     if (!term_pre(zm, src, NULL, NULL, !space_split))
287         return 0;
288     s0 = *src;
289     while (*s0)
290     {
291         const char *s1 = s0;
292         int q_map_match = 0;
293         map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
294         if (space_split)
295         {
296             if (**map == *CHR_SPACE)
297                 break;
298         }
299         else  /* complete subfield only. */
300         {
301             if (**map == *CHR_SPACE)
302             {   /* save space mapping for later  .. */
303                 space_start = s1;
304                 space_end = s0;
305                 continue;
306             }
307             else if (space_start)
308             {   /* reload last space */
309                 while (space_start < space_end)
310                 {
311                     if (strchr(REGEX_CHARS, *space_start))
312                         wrbuf_putc(term_dict, '\\');
313                     dst_term[j++] = *space_start;
314                     wrbuf_putc(term_dict, *space_start);
315                     space_start++;
316                                
317                 }
318                 /* and reset */
319                 space_start = space_end = 0;
320             }
321         }
322         i++;
323
324         add_non_space(s1, s0, term_dict, dst_term, &j,
325                       map, q_map_match);
326     }
327     dst_term[j] = '\0';
328     *src = s0;
329     return i;
330 }
331
332 /* term_101: handle term, where trunc = Process # */
333 static int term_101(zebra_map_t zm,
334                     const char **src, WRBUF term_dict, int space_split,
335                     char *dst_term)
336 {
337     const char *s0;
338     const char **map;
339     int i = 0;
340     int j = 0;
341
342     if (!term_pre(zm, src, "#", "#", !space_split))
343         return 0;
344     s0 = *src;
345     while (*s0)
346     {
347         if (*s0 == '#')
348         {
349             i++;
350             wrbuf_puts(term_dict, ".*");
351             dst_term[j++] = *s0++;
352         }
353         else
354         {
355             const char *s1 = s0;
356             int q_map_match = 0;
357             map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
358             if (space_split && **map == *CHR_SPACE)
359                 break;
360
361             i++;
362             add_non_space(s1, s0, term_dict, dst_term, &j,
363                           map, q_map_match);
364         }
365     }
366     dst_term[j++] = '\0';
367     *src = s0;
368     return i;
369 }
370
371 /* term_103: handle term, where trunc = re-2 (regular expressions) */
372 static int term_103(zebra_map_t zm, const char **src,
373                     WRBUF term_dict, int *errors, int space_split,
374                     char *dst_term)
375 {
376     int i = 0;
377     int j = 0;
378     const char *s0;
379     const char **map;
380
381     if (!term_pre(zm, src, "^\\()[].*+?|", "(", !space_split))
382         return 0;
383     s0 = *src;
384     if (errors && *s0 == '+' && s0[1] && s0[2] == '+' && s0[3] &&
385         isdigit(((const unsigned char *)s0)[1]))
386     {
387         *errors = s0[1] - '0';
388         s0 += 3;
389         if (*errors > 3)
390             *errors = 3;
391     }
392     while (*s0)
393     {
394         if (strchr("^\\()[].*+?|-", *s0))
395         {
396             dst_term[j++] = *s0;
397             wrbuf_putc(term_dict, *s0);
398             s0++;
399             i++;
400         }
401         else
402         {
403             const char *s1 = s0;
404             int q_map_match = 0;
405             map = zebra_maps_search(zm, &s0, strlen(s0),  &q_map_match);
406             if (space_split && **map == *CHR_SPACE)
407                 break;
408
409             i++;
410             add_non_space(s1, s0, term_dict, dst_term, &j,
411                           map, q_map_match);
412         }
413     }
414     dst_term[j] = '\0';
415     *src = s0;
416     
417     return i;
418 }
419
420 /* term_103: handle term, where trunc = re-1 (regular expressions) */
421 static int term_102(zebra_map_t zm, const char **src,
422                     WRBUF term_dict, int space_split, char *dst_term)
423 {
424     return term_103(zm, src, term_dict, NULL, space_split, dst_term);
425 }
426
427
428 /* term_104: handle term, process # and ! */
429 static int term_104(zebra_map_t zm, const char **src, 
430                     WRBUF term_dict, int space_split, char *dst_term)
431 {
432     const char *s0;
433     const char **map;
434     int i = 0;
435     int j = 0;
436
437     if (!term_pre(zm, src, "?*#", "?*#", !space_split))
438         return 0;
439     s0 = *src;
440     while (*s0)
441     {
442         if (*s0 == '?')
443         {
444             i++;
445             dst_term[j++] = *s0++;
446             if (*s0 >= '0' && *s0 <= '9')
447             {
448                 int limit = 0;
449                 while (*s0 >= '0' && *s0 <= '9')
450                 {
451                     limit = limit * 10 + (*s0 - '0');
452                     dst_term[j++] = *s0++;
453                 }
454                 if (limit > 20)
455                     limit = 20;
456                 while (--limit >= 0)
457                 {
458                     wrbuf_puts(term_dict, ".?");
459                 }
460             }
461             else
462             {
463                 wrbuf_puts(term_dict, ".*");
464             }
465         }
466         else if (*s0 == '*')
467         {
468             i++;
469             wrbuf_puts(term_dict, ".*");
470             dst_term[j++] = *s0++;
471         }
472         else if (*s0 == '#')
473         {
474             i++;
475             wrbuf_puts(term_dict, ".");
476             dst_term[j++] = *s0++;
477         }
478         else
479         {
480             const char *s1 = s0;
481             int q_map_match = 0;
482             map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
483             if (space_split && **map == *CHR_SPACE)
484                 break;
485
486             i++;
487             add_non_space(s1, s0, term_dict, dst_term, &j,
488                           map, q_map_match);
489         }
490     }
491     dst_term[j++] = '\0';
492     *src = s0;
493     return i;
494 }
495
496 /* term_105/106: handle term, where trunc = Process * and ! and right trunc */
497 static int term_105(zebra_map_t zm, const char **src, 
498                     WRBUF term_dict, int space_split,
499                     char *dst_term, int right_truncate)
500 {
501     const char *s0;
502     const char **map;
503     int i = 0;
504     int j = 0;
505
506     if (!term_pre(zm, src, "*!", "*!", !space_split))
507         return 0;
508     s0 = *src;
509     while (*s0)
510     {
511         if (*s0 == '*')
512         {
513             i++;
514             wrbuf_puts(term_dict, ".*");
515             dst_term[j++] = *s0++;
516         }
517         else if (*s0 == '!')
518         {
519             i++;
520             wrbuf_putc(term_dict, '.');
521             dst_term[j++] = *s0++;
522         }
523         else
524         {
525             const char *s1 = s0;
526             int q_map_match = 0;
527             map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
528             if (space_split && **map == *CHR_SPACE)
529                 break;
530
531             i++;
532             add_non_space(s1, s0, term_dict, dst_term, &j,
533                           map, q_map_match);
534         }
535     }
536     if (right_truncate)
537         wrbuf_puts(term_dict, ".*");
538     dst_term[j++] = '\0';
539     *src = s0;
540     return i;
541 }
542
543
544 /* gen_regular_rel - generate regular expression from relation
545  *  val:     border value (inclusive)
546  *  islt:    1 if <=; 0 if >=.
547  */
548 static void gen_regular_rel(WRBUF term_dict, int val, int islt)
549 {
550     char dst_buf[20*5*20]; /* assuming enough for expansion */
551     char *dst = dst_buf;
552     int dst_p;
553     int w, d, i;
554     int pos = 0;
555     char numstr[20];
556
557     yaz_log(YLOG_DEBUG, "gen_regular_rel. val=%d, islt=%d", val, islt);
558     if (val >= 0)
559     {
560         if (islt)
561             strcpy(dst, "(-[0-9]+|(");
562         else
563             strcpy(dst, "((");
564     } 
565     else
566     {
567         if (!islt)
568         {
569             strcpy(dst, "([0-9]+|-(");
570             islt = 1;
571         }
572         else
573         {
574             strcpy(dst, "(-(");
575             islt = 0;
576         }
577         val = -val;
578     }
579     dst_p = strlen(dst);
580     sprintf(numstr, "%d", val);
581     for (w = strlen(numstr); --w >= 0; pos++)
582     {
583         d = numstr[w];
584         if (pos > 0)
585         {
586             if (islt)
587             {
588                 if (d == '0')
589                     continue;
590                 d--;
591             } 
592             else
593             {
594                 if (d == '9')
595                     continue;
596                 d++;
597             }
598         }
599         
600         strcpy(dst + dst_p, numstr);
601         dst_p = strlen(dst) - pos - 1;
602
603         if (islt)
604         {
605             if (d != '0')
606             {
607                 dst[dst_p++] = '[';
608                 dst[dst_p++] = '0';
609                 dst[dst_p++] = '-';
610                 dst[dst_p++] = d;
611                 dst[dst_p++] = ']';
612             }
613             else
614                 dst[dst_p++] = d;
615         }
616         else
617         {
618             if (d != '9')
619             { 
620                 dst[dst_p++] = '[';
621                 dst[dst_p++] = d;
622                 dst[dst_p++] = '-';
623                 dst[dst_p++] = '9';
624                 dst[dst_p++] = ']';
625             }
626             else
627                 dst[dst_p++] = d;
628         }
629         for (i = 0; i<pos; i++)
630         {
631             dst[dst_p++] = '[';
632             dst[dst_p++] = '0';
633             dst[dst_p++] = '-';
634             dst[dst_p++] = '9';
635             dst[dst_p++] = ']';
636         }
637         dst[dst_p++] = '|';
638     }
639     dst[dst_p] = '\0';
640     if (islt)
641     {
642         /* match everything less than 10^(pos-1) */
643         strcat(dst, "0*");
644         for (i = 1; i<pos; i++)
645             strcat(dst, "[0-9]?");
646     }
647     else
648     {
649         /* match everything greater than 10^pos */
650         for (i = 0; i <= pos; i++)
651             strcat(dst, "[0-9]");
652         strcat(dst, "[0-9]*");
653     }
654     strcat(dst, "))");
655     wrbuf_puts(term_dict, dst);
656 }
657
658 void string_rel_add_char(WRBUF term_p, WRBUF wsrc, int *indx)
659 {
660     const char *src = wrbuf_cstr(wsrc);
661     if (src[*indx] == '\\')
662     {
663         wrbuf_putc(term_p, src[*indx]);
664         (*indx)++;
665     }
666     wrbuf_putc(term_p, src[*indx]);
667     (*indx)++;
668 }
669
670 /*
671  *   >  abc     ([b-].*|a[c-].*|ab[d-].*|abc.+)
672  *              ([^-a].*|a[^-b].*ab[^-c].*|abc.+)
673  *   >= abc     ([b-].*|a[c-].*|ab[c-].*)
674  *              ([^-a].*|a[^-b].*|ab[c-].*)
675  *   <  abc     ([-0].*|a[-a].*|ab[-b].*)
676  *              ([^a-].*|a[^b-].*|ab[^c-].*)
677  *   <= abc     ([-0].*|a[-a].*|ab[-b].*|abc)
678  *              ([^a-].*|a[^b-].*|ab[^c-].*|abc)
679  */
680 static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
681                            const char **term_sub, WRBUF term_dict,
682                            const Odr_oid *attributeSet,
683                            zebra_map_t zm, int space_split, char *term_dst,
684                            int *error_code)
685 {
686     AttrType relation;
687     int relation_value;
688     int i;
689     WRBUF term_component = wrbuf_alloc();
690
691     attr_init_APT(&relation, zapt, 2);
692     relation_value = attr_find(&relation, NULL);
693
694     *error_code = 0;
695     yaz_log(YLOG_DEBUG, "string relation value=%d", relation_value);
696     switch (relation_value)
697     {
698     case 1:
699         if (!term_100(zm, term_sub, term_component, space_split, term_dst))
700         {
701             wrbuf_destroy(term_component);
702             return 0;
703         }
704         yaz_log(log_level_rpn, "Relation <");
705         
706         wrbuf_putc(term_dict, '(');
707         for (i = 0; i < wrbuf_len(term_component); )
708         {
709             int j = 0;
710             
711             if (i)
712                 wrbuf_putc(term_dict, '|');
713             while (j < i)
714                 string_rel_add_char(term_dict, term_component, &j);
715
716             wrbuf_putc(term_dict, '[');
717
718             wrbuf_putc(term_dict, '^');
719             
720             wrbuf_putc(term_dict, 1);
721             wrbuf_putc(term_dict, FIRST_IN_FIELD_CHAR);
722             
723             string_rel_add_char(term_dict, term_component, &i);
724             wrbuf_putc(term_dict, '-');
725             
726             wrbuf_putc(term_dict, ']');
727             wrbuf_putc(term_dict, '.');
728             wrbuf_putc(term_dict, '*');
729         }
730         wrbuf_putc(term_dict, ')');
731         break;
732     case 2:
733         if (!term_100(zm, term_sub, term_component, space_split, term_dst))
734         {
735             wrbuf_destroy(term_component);
736             return 0;
737         }
738         yaz_log(log_level_rpn, "Relation <=");
739
740         wrbuf_putc(term_dict, '(');
741         for (i = 0; i < wrbuf_len(term_component); )
742         {
743             int j = 0;
744
745             while (j < i)
746                 string_rel_add_char(term_dict, term_component, &j);
747             wrbuf_putc(term_dict, '[');
748
749             wrbuf_putc(term_dict, '^');
750
751             wrbuf_putc(term_dict, 1);
752             wrbuf_putc(term_dict, FIRST_IN_FIELD_CHAR);
753
754             string_rel_add_char(term_dict, term_component, &i);
755             wrbuf_putc(term_dict, '-');
756
757             wrbuf_putc(term_dict, ']');
758             wrbuf_putc(term_dict, '.');
759             wrbuf_putc(term_dict, '*');
760
761             wrbuf_putc(term_dict, '|');
762         }
763         for (i = 0; i < wrbuf_len(term_component); )
764             string_rel_add_char(term_dict, term_component, &i);
765         wrbuf_putc(term_dict, ')');
766         break;
767     case 5:
768         if (!term_100(zm, term_sub, term_component, space_split, term_dst))
769         {
770             wrbuf_destroy(term_component);
771             return 0;
772         }
773         yaz_log(log_level_rpn, "Relation >");
774
775         wrbuf_putc(term_dict, '(');
776         for (i = 0; i < wrbuf_len(term_component); )
777         {
778             int j = 0;
779
780             while (j < i)
781                 string_rel_add_char(term_dict, term_component, &j);
782             wrbuf_putc(term_dict, '[');
783             
784             wrbuf_putc(term_dict, '^');
785             wrbuf_putc(term_dict, '-');
786             string_rel_add_char(term_dict, term_component, &i);
787
788             wrbuf_putc(term_dict, ']');
789             wrbuf_putc(term_dict, '.');
790             wrbuf_putc(term_dict, '*');
791
792             wrbuf_putc(term_dict, '|');
793         }
794         for (i = 0; i < wrbuf_len(term_component); )
795             string_rel_add_char(term_dict, term_component, &i);
796         wrbuf_putc(term_dict, '.');
797         wrbuf_putc(term_dict, '+');
798         wrbuf_putc(term_dict, ')');
799         break;
800     case 4:
801         if (!term_100(zm, term_sub, term_component, space_split, term_dst))
802         {
803             wrbuf_destroy(term_component);
804             return 0;
805         }
806         yaz_log(log_level_rpn, "Relation >=");
807
808         wrbuf_putc(term_dict, '(');
809         for (i = 0; i < wrbuf_len(term_component); )
810         {
811             int j = 0;
812
813             if (i)
814                 wrbuf_putc(term_dict, '|');
815             while (j < i)
816                 string_rel_add_char(term_dict, term_component, &j);
817             wrbuf_putc(term_dict, '[');
818
819             if (i < wrbuf_len(term_component)-1)
820             {
821                 wrbuf_putc(term_dict, '^');
822                 wrbuf_putc(term_dict, '-');
823                 string_rel_add_char(term_dict, term_component, &i);
824             }
825             else
826             {
827                 string_rel_add_char(term_dict, term_component, &i);
828                 wrbuf_putc(term_dict, '-');
829             }
830             wrbuf_putc(term_dict, ']');
831             wrbuf_putc(term_dict, '.');
832             wrbuf_putc(term_dict, '*');
833         }
834         wrbuf_putc(term_dict, ')');
835         break;
836     case 3:
837     case 102:
838     case -1:
839         if (!**term_sub)
840             return 1;
841         yaz_log(log_level_rpn, "Relation =");
842         if (!term_100(zm, term_sub, term_component, space_split, term_dst))
843         {
844             wrbuf_destroy(term_component);
845             return 0;
846         }
847         wrbuf_puts(term_dict, "(");
848         wrbuf_puts(term_dict, wrbuf_cstr(term_component));
849         wrbuf_puts(term_dict, ")");
850         break;
851     case 103:
852         yaz_log(log_level_rpn, "Relation always matches");
853         /* skip to end of term (we don't care what it is) */
854         while (**term_sub != '\0')
855             (*term_sub)++;
856         break;
857     default:
858         *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
859         wrbuf_destroy(term_component);
860         return 0;
861     }
862     wrbuf_destroy(term_component);
863     return 1;
864 }
865
866 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
867                              const char **term_sub, 
868                              WRBUF term_dict,
869                              const Odr_oid *attributeSet, NMEM stream,
870                              struct grep_info *grep_info,
871                              const char *index_type, int complete_flag,
872                              char *term_dst,
873                              const char *xpath_use,
874                              struct ord_list **ol);
875
876 ZEBRA_RES zebra_term_limits_APT(ZebraHandle zh,
877                                 Z_AttributesPlusTerm *zapt,
878                                 zint *hits_limit_value,
879                                 const char **term_ref_id_str,
880                                 NMEM nmem)
881 {
882     AttrType term_ref_id_attr;
883     AttrType hits_limit_attr;
884     int term_ref_id_int;
885  
886     attr_init_APT(&hits_limit_attr, zapt, 11);
887     *hits_limit_value  = attr_find(&hits_limit_attr, NULL);
888
889     attr_init_APT(&term_ref_id_attr, zapt, 10);
890     term_ref_id_int = attr_find_ex(&term_ref_id_attr, NULL, term_ref_id_str);
891     if (term_ref_id_int >= 0)
892     {
893         char *res = nmem_malloc(nmem, 20);
894         sprintf(res, "%d", term_ref_id_int);
895         *term_ref_id_str = res;
896     }
897
898     /* no limit given ? */
899     if (*hits_limit_value == -1)
900     {
901         if (*term_ref_id_str)
902         {
903             /* use global if term_ref is present */
904             *hits_limit_value = zh->approx_limit;
905         }
906         else
907         {
908             /* no counting if term_ref is not present */
909             *hits_limit_value = 0;
910         }
911     }
912     else if (*hits_limit_value == 0)
913     {
914         /* 0 is the same as global limit */
915         *hits_limit_value = zh->approx_limit;
916     }
917     yaz_log(YLOG_DEBUG, "term_limits_APT ref_id=%s limit=" ZINT_FORMAT,
918             *term_ref_id_str ? *term_ref_id_str : "none",
919             *hits_limit_value);
920     return ZEBRA_OK;
921 }
922
923 static ZEBRA_RES term_trunc(ZebraHandle zh,
924                             Z_AttributesPlusTerm *zapt,
925                             const char **term_sub, 
926                             const Odr_oid *attributeSet, NMEM stream,
927                             struct grep_info *grep_info,
928                             const char *index_type, int complete_flag,
929                             char *term_dst,
930                             const char *rank_type, 
931                             const char *xpath_use,
932                             NMEM rset_nmem,
933                             RSET *rset,
934                             struct rset_key_control *kc)
935 {
936     ZEBRA_RES res;
937     struct ord_list *ol;
938     zint hits_limit_value;
939     const char *term_ref_id_str = 0;
940     WRBUF term_dict = wrbuf_alloc();
941
942     *rset = 0;
943     zebra_term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str,
944                           stream);
945     grep_info->isam_p_indx = 0;
946     res = string_term(zh, zapt, term_sub, term_dict,
947                       attributeSet, stream, grep_info,
948                       index_type, complete_flag,
949                       term_dst, xpath_use, &ol);
950     wrbuf_destroy(term_dict);
951     if (res != ZEBRA_OK)
952         return res;
953     if (!*term_sub)  /* no more terms ? */
954         return res;
955     yaz_log(log_level_rpn, "term: %s", term_dst);
956     *rset = rset_trunc(zh, grep_info->isam_p_buf,
957                        grep_info->isam_p_indx, term_dst,
958                        strlen(term_dst), rank_type, 1 /* preserve pos */,
959                        zapt->term->which, rset_nmem,
960                        kc, kc->scope, ol, index_type, hits_limit_value,
961                        term_ref_id_str);
962     if (!*rset)
963         return ZEBRA_FAIL;
964     return ZEBRA_OK;
965 }
966
967 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
968                              const char **term_sub, 
969                              WRBUF term_dict,
970                              const Odr_oid *attributeSet, NMEM stream,
971                              struct grep_info *grep_info,
972                              const char *index_type, int complete_flag,
973                              char *term_dst,
974                              const char *xpath_use,
975                              struct ord_list **ol)
976 {
977     int r;
978     AttrType truncation;
979     int truncation_value;
980     const char *termp;
981     struct rpn_char_map_info rcmi;
982
983     int space_split = complete_flag ? 0 : 1;
984     int ord = -1;
985     int regex_range = 0;
986     int max_pos, prefix_len = 0;
987     int relation_error;
988     char ord_buf[32];
989     int ord_len, i;
990     zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type);
991
992     *ol = ord_list_create(stream);
993
994     rpn_char_map_prepare(zh->reg, zm, &rcmi);
995     attr_init_APT(&truncation, zapt, 5);
996     truncation_value = attr_find(&truncation, NULL);
997     yaz_log(log_level_rpn, "truncation value %d", truncation_value);
998
999     termp = *term_sub; /* start of term for each database */
1000     
1001     if (zebra_apt_get_ord(zh, zapt, index_type, xpath_use,
1002                           attributeSet, &ord) != ZEBRA_OK)
1003     {
1004         *term_sub = 0;
1005         return ZEBRA_FAIL;
1006     }
1007     
1008     wrbuf_rewind(term_dict); /* new dictionary regexp term */
1009     
1010     *ol = ord_list_append(stream, *ol, ord);
1011     ord_len = key_SU_encode(ord, ord_buf);
1012     
1013     wrbuf_putc(term_dict, '(');
1014     
1015     for (i = 0; i<ord_len; i++)
1016     {
1017         wrbuf_putc(term_dict, 1);  /* our internal regexp escape char */
1018         wrbuf_putc(term_dict, ord_buf[i]);
1019     }
1020     wrbuf_putc(term_dict, ')');
1021     
1022     prefix_len = wrbuf_len(term_dict);
1023     
1024     switch (truncation_value)
1025     {
1026     case -1:         /* not specified */
1027     case 100:        /* do not truncate */
1028         if (!string_relation(zh, zapt, &termp, term_dict,
1029                              attributeSet,
1030                              zm, space_split, term_dst,
1031                              &relation_error))
1032         {
1033             if (relation_error)
1034             {
1035                 zebra_setError(zh, relation_error, 0);
1036                 return ZEBRA_FAIL;
1037             }
1038             *term_sub = 0;
1039             return ZEBRA_OK;
1040         }
1041         break;
1042     case 1:          /* right truncation */
1043         wrbuf_putc(term_dict, '(');
1044         if (!term_100(zm, &termp, term_dict, space_split, term_dst))
1045         {
1046             *term_sub = 0;
1047             return ZEBRA_OK;
1048         }
1049         wrbuf_puts(term_dict, ".*)");
1050         break;
1051     case 2:          /* keft truncation */
1052         wrbuf_puts(term_dict, "(.*");
1053         if (!term_100(zm, &termp, term_dict, space_split, term_dst))
1054         {
1055             *term_sub = 0;
1056             return ZEBRA_OK;
1057         }
1058         wrbuf_putc(term_dict, ')');
1059         break;
1060     case 3:          /* left&right truncation */
1061         wrbuf_puts(term_dict, "(.*");
1062         if (!term_100(zm, &termp, term_dict, space_split, term_dst))
1063         {
1064             *term_sub = 0;
1065             return ZEBRA_OK;
1066         }
1067         wrbuf_puts(term_dict, ".*)");
1068         break;
1069     case 101:        /* process # in term */
1070         wrbuf_putc(term_dict, '(');
1071         if (!term_101(zm, &termp, term_dict, space_split, term_dst))
1072         {
1073             *term_sub = 0;
1074             return ZEBRA_OK;
1075         }
1076         wrbuf_puts(term_dict, ")");
1077         break;
1078     case 102:        /* Regexp-1 */
1079         wrbuf_putc(term_dict, '(');
1080         if (!term_102(zm, &termp, term_dict, space_split, term_dst))
1081         {
1082             *term_sub = 0;
1083             return ZEBRA_OK;
1084         }
1085         wrbuf_putc(term_dict, ')');
1086         break;
1087     case 103:       /* Regexp-2 */
1088         regex_range = 1;
1089         wrbuf_putc(term_dict, '(');
1090         if (!term_103(zm, &termp, term_dict, &regex_range,
1091                       space_split, term_dst))
1092         {
1093             *term_sub = 0;
1094             return ZEBRA_OK;
1095         }
1096         wrbuf_putc(term_dict, ')');
1097         break;
1098     case 104:        /* process # and ! in term */
1099         wrbuf_putc(term_dict, '(');
1100         if (!term_104(zm, &termp, term_dict, space_split, term_dst))
1101         {
1102             *term_sub = 0;
1103             return ZEBRA_OK;
1104         }
1105         wrbuf_putc(term_dict, ')');
1106         break;
1107     case 105:        /* process * and ! in term */
1108         wrbuf_putc(term_dict, '(');
1109         if (!term_105(zm, &termp, term_dict, space_split, term_dst, 1))
1110         {
1111             *term_sub = 0;
1112             return ZEBRA_OK;
1113         }
1114         wrbuf_putc(term_dict, ')');
1115         break;
1116     case 106:        /* process * and ! in term */
1117         wrbuf_putc(term_dict, '(');
1118         if (!term_105(zm, &termp, term_dict, space_split, term_dst, 0))
1119         {
1120             *term_sub = 0;
1121             return ZEBRA_OK;
1122         }
1123         wrbuf_putc(term_dict, ')');
1124         break;
1125     default:
1126         zebra_setError_zint(zh,
1127                             YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
1128                             truncation_value);
1129         return ZEBRA_FAIL;
1130     }
1131     if (1)
1132     {
1133         char buf[1000];
1134         const char *input = wrbuf_cstr(term_dict) + prefix_len;
1135         esc_str(buf, sizeof(buf), input, strlen(input));
1136     }
1137     {
1138         WRBUF pr_wr = wrbuf_alloc();
1139
1140         wrbuf_write_escaped(pr_wr, wrbuf_buf(term_dict), wrbuf_len(term_dict));
1141         yaz_log(YLOG_LOG, "dict_lookup_grep: %s", wrbuf_cstr(pr_wr));
1142         wrbuf_destroy(pr_wr);
1143     }
1144     r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), regex_range,
1145                          grep_info, &max_pos, 
1146                          ord_len /* number of "exact" chars */,
1147                          grep_handle);
1148     if (r == 1)
1149         zebra_set_partial_result(zh);
1150     else if (r)
1151         yaz_log(YLOG_WARN, "dict_lookup_grep fail %d", r);
1152     *term_sub = termp;
1153     yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1154     return ZEBRA_OK;
1155 }
1156
1157
1158
1159 static void grep_info_delete(struct grep_info *grep_info)
1160 {
1161 #ifdef TERM_COUNT
1162     xfree(grep_info->term_no);
1163 #endif
1164     xfree(grep_info->isam_p_buf);
1165 }
1166
1167 static ZEBRA_RES grep_info_prepare(ZebraHandle zh,
1168                                    Z_AttributesPlusTerm *zapt,
1169                                    struct grep_info *grep_info,
1170                                    const char *index_type)
1171 {
1172 #ifdef TERM_COUNT
1173     grep_info->term_no = 0;
1174 #endif
1175     grep_info->trunc_max = atoi(res_get_def(zh->res, "truncmax", "10000"));
1176     grep_info->isam_p_size = 0;
1177     grep_info->isam_p_buf = NULL;
1178     grep_info->zh = zh;
1179     grep_info->index_type = index_type;
1180     grep_info->termset = 0;
1181     if (zapt)
1182     {
1183         AttrType truncmax;
1184         int truncmax_value;
1185
1186         attr_init_APT(&truncmax, zapt, 13);
1187         truncmax_value = attr_find(&truncmax, NULL);
1188         if (truncmax_value != -1)
1189             grep_info->trunc_max = truncmax_value;
1190     }
1191     if (zapt)
1192     {
1193         AttrType termset;
1194         int termset_value_numeric;
1195         const char *termset_value_string;
1196
1197         attr_init_APT(&termset, zapt, 8);
1198         termset_value_numeric =
1199             attr_find_ex(&termset, NULL, &termset_value_string);
1200         if (termset_value_numeric != -1)
1201         {
1202 #if TERMSET_DISABLE
1203             zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, "termset");
1204             return ZEBRA_FAIL;
1205 #else
1206             char resname[32];
1207             const char *termset_name = 0;
1208             if (termset_value_numeric != -2)
1209             {
1210                 
1211                 sprintf(resname, "%d", termset_value_numeric);
1212                 termset_name = resname;
1213             }
1214             else
1215             termset_name = termset_value_string;
1216             yaz_log(log_level_rpn, "creating termset set %s", termset_name);
1217             grep_info->termset = resultSetAdd(zh, termset_name, 1);
1218             if (!grep_info->termset)
1219             {
1220                 zebra_setError(zh, YAZ_BIB1_ILLEGAL_RESULT_SET_NAME, termset_name);
1221                 return ZEBRA_FAIL;
1222             }
1223 #endif
1224         }
1225     }
1226     return ZEBRA_OK;
1227 }
1228                                
1229 /**
1230   \brief Create result set(s) for list of terms
1231   \param zh Zebra Handle
1232   \param zapt Attributes Plust Term (RPN leaf)
1233   \param termz term as used in query but converted to UTF-8
1234   \param attributeSet default attribute set
1235   \param stream memory for result
1236   \param index_type register type ("w", "p",..)
1237   \param complete_flag whether it's phrases or not
1238   \param rank_type term flags for ranking
1239   \param xpath_use use attribute for X-Path (-1 for no X-path)
1240   \param rset_nmem memory for result sets
1241   \param result_sets output result set for each term in list (output)
1242   \param num_result_sets number of output result sets
1243   \param kc rset key control to be used for created result sets
1244 */
1245 static ZEBRA_RES term_list_trunc(ZebraHandle zh,
1246                                  Z_AttributesPlusTerm *zapt,
1247                                  const char *termz,
1248                                  const Odr_oid *attributeSet,
1249                                  NMEM stream,
1250                                  const char *index_type, int complete_flag,
1251                                  const char *rank_type,
1252                                  const char *xpath_use,
1253                                  NMEM rset_nmem,
1254                                  RSET **result_sets, int *num_result_sets,
1255                                  struct rset_key_control *kc)
1256 {
1257     char term_dst[IT_MAX_WORD+1];
1258     struct grep_info grep_info;
1259     const char *termp = termz;
1260     int alloc_sets = 0;
1261
1262     *num_result_sets = 0;
1263     *term_dst = 0;
1264     if (grep_info_prepare(zh, zapt, &grep_info, index_type) == ZEBRA_FAIL)
1265         return ZEBRA_FAIL;
1266     while(1)
1267     { 
1268         ZEBRA_RES res;
1269
1270         if (alloc_sets == *num_result_sets)
1271         {
1272             int add = 10;
1273             RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) * 
1274                                               sizeof(*rnew));
1275             if (alloc_sets)
1276                 memcpy(rnew, *result_sets, alloc_sets * sizeof(*rnew));
1277             alloc_sets = alloc_sets + add;
1278             *result_sets = rnew;
1279         }
1280         res = term_trunc(zh, zapt, &termp, attributeSet,
1281                          stream, &grep_info,
1282                          index_type, complete_flag,
1283                          term_dst, rank_type,
1284                          xpath_use, rset_nmem,
1285                          &(*result_sets)[*num_result_sets],
1286                          kc);
1287         if (res != ZEBRA_OK)
1288         {
1289             int i;
1290             for (i = 0; i < *num_result_sets; i++)
1291                 rset_delete((*result_sets)[i]);
1292             grep_info_delete(&grep_info);
1293             return res;
1294         }
1295         if ((*result_sets)[*num_result_sets] == 0)
1296             break;
1297         (*num_result_sets)++;
1298
1299         if (!*termp)
1300             break;
1301     }
1302     grep_info_delete(&grep_info);
1303     return ZEBRA_OK;
1304 }
1305
1306 static ZEBRA_RES rpn_search_APT_position(ZebraHandle zh,
1307                                          Z_AttributesPlusTerm *zapt,
1308                                          const Odr_oid *attributeSet,
1309                                          const char *index_type,
1310                                          NMEM rset_nmem,
1311                                          RSET *rset,
1312                                          struct rset_key_control *kc)
1313 {
1314     int position_value;
1315     AttrType position;
1316     int ord = -1;
1317     char ord_buf[32];
1318     char term_dict[100];
1319     int ord_len;
1320     char *val;
1321     ISAM_P isam_p;
1322     zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type);
1323     
1324     attr_init_APT(&position, zapt, 3);
1325     position_value = attr_find(&position, NULL);
1326     switch(position_value)
1327     {
1328     case 3:
1329     case -1:
1330         return ZEBRA_OK;
1331     case 1:
1332     case 2:
1333         break;
1334     default:
1335         zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1336                             position_value);
1337         return ZEBRA_FAIL;
1338     }
1339
1340
1341     if (!zebra_maps_is_first_in_field(zm))
1342     {
1343         zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1344                             position_value);
1345         return ZEBRA_FAIL;
1346     }
1347
1348     if (zebra_apt_get_ord(zh, zapt, index_type, 0,
1349                           attributeSet, &ord) != ZEBRA_OK)
1350     {
1351         return ZEBRA_FAIL;
1352     }
1353     ord_len = key_SU_encode(ord, ord_buf);
1354     memcpy(term_dict, ord_buf, ord_len);
1355     strcpy(term_dict+ord_len, FIRST_IN_FIELD_STR);
1356     val = dict_lookup(zh->reg->dict, term_dict);
1357     if (val)
1358     {
1359         assert(*val == sizeof(ISAM_P));
1360         memcpy(&isam_p, val+1, sizeof(isam_p));
1361
1362         *rset = zebra_create_rset_isam(zh, rset_nmem, kc, kc->scope, 
1363                                        isam_p, 0);
1364     }
1365     return ZEBRA_OK;
1366 }
1367                                          
1368 static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh,
1369                                        Z_AttributesPlusTerm *zapt,
1370                                        const char *termz_org,
1371                                        const Odr_oid *attributeSet,
1372                                        NMEM stream,
1373                                        const char *index_type, int complete_flag,
1374                                        const char *rank_type,
1375                                        const char *xpath_use,
1376                                        NMEM rset_nmem,
1377                                        RSET *rset,
1378                                        struct rset_key_control *kc)
1379 {
1380     RSET *result_sets = 0;
1381     int num_result_sets = 0;
1382     ZEBRA_RES res =
1383         term_list_trunc(zh, zapt, termz_org, attributeSet,
1384                         stream, index_type, complete_flag,
1385                         rank_type, xpath_use,
1386                         rset_nmem,
1387                         &result_sets, &num_result_sets, kc);
1388
1389     if (res != ZEBRA_OK)
1390         return res;
1391
1392     if (num_result_sets > 0)
1393     {
1394         RSET first_set = 0;
1395         res = rpn_search_APT_position(zh, zapt, attributeSet, 
1396                                       index_type,
1397                                       rset_nmem, &first_set,
1398                                       kc);
1399         if (res != ZEBRA_OK)
1400         {
1401             int i;
1402             for (i = 0; i<num_result_sets; i++)
1403                 rset_delete(result_sets[i]);
1404             return res;
1405         }
1406         if (first_set)
1407         {
1408             RSET *nsets = nmem_malloc(stream,
1409                                       sizeof(RSET) * (num_result_sets+1));
1410             nsets[0] = first_set;
1411             memcpy(nsets+1, result_sets, sizeof(RSET) * num_result_sets);
1412             result_sets = nsets;
1413             num_result_sets++;
1414         }
1415     }
1416     if (num_result_sets == 0)
1417         *rset = rset_create_null(rset_nmem, kc, 0); 
1418     else if (num_result_sets == 1)
1419         *rset = result_sets[0];
1420     else
1421         *rset = rset_create_prox(rset_nmem, kc, kc->scope,
1422                                  num_result_sets, result_sets,
1423                                  1 /* ordered */, 0 /* exclusion */,
1424                                  3 /* relation */, 1 /* distance */);
1425     if (!*rset)
1426         return ZEBRA_FAIL;
1427     return ZEBRA_OK;
1428 }
1429
1430 static ZEBRA_RES rpn_search_APT_or_list(ZebraHandle zh,
1431                                         Z_AttributesPlusTerm *zapt,
1432                                         const char *termz_org,
1433                                         const Odr_oid *attributeSet,
1434                                         NMEM stream,
1435                                         const char *index_type, 
1436                                         int complete_flag,
1437                                         const char *rank_type,
1438                                         const char *xpath_use,
1439                                         NMEM rset_nmem,
1440                                         RSET *rset,
1441                                         struct rset_key_control *kc)
1442 {
1443     RSET *result_sets = 0;
1444     int num_result_sets = 0;
1445     int i;
1446     ZEBRA_RES res =
1447         term_list_trunc(zh, zapt, termz_org, attributeSet,
1448                         stream, index_type, complete_flag,
1449                         rank_type, xpath_use,
1450                         rset_nmem,
1451                         &result_sets, &num_result_sets, kc);
1452     if (res != ZEBRA_OK)
1453         return res;
1454
1455     for (i = 0; i<num_result_sets; i++)
1456     {
1457         RSET first_set = 0;
1458         res = rpn_search_APT_position(zh, zapt, attributeSet, 
1459                                       index_type,
1460                                       rset_nmem, &first_set,
1461                                       kc);
1462         if (res != ZEBRA_OK)
1463         {
1464             for (i = 0; i<num_result_sets; i++)
1465                 rset_delete(result_sets[i]);
1466             return res;
1467         }
1468
1469         if (first_set)
1470         {
1471             RSET tmp_set[2];
1472
1473             tmp_set[0] = first_set;
1474             tmp_set[1] = result_sets[i];
1475             
1476             result_sets[i] = rset_create_prox(
1477                 rset_nmem, kc, kc->scope,
1478                 2, tmp_set,
1479                 1 /* ordered */, 0 /* exclusion */,
1480                 3 /* relation */, 1 /* distance */);
1481         }
1482     }
1483     if (num_result_sets == 0)
1484         *rset = rset_create_null(rset_nmem, kc, 0); 
1485     else if (num_result_sets == 1)
1486         *rset = result_sets[0];
1487     else
1488         *rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* termid */,
1489                                num_result_sets, result_sets);
1490     if (!*rset)
1491         return ZEBRA_FAIL;
1492     return ZEBRA_OK;
1493 }
1494
1495 static ZEBRA_RES rpn_search_APT_and_list(ZebraHandle zh,
1496                                          Z_AttributesPlusTerm *zapt,
1497                                          const char *termz_org,
1498                                          const Odr_oid *attributeSet,
1499                                          NMEM stream,
1500                                          const char *index_type, 
1501                                          int complete_flag,
1502                                          const char *rank_type, 
1503                                          const char *xpath_use,
1504                                          NMEM rset_nmem,
1505                                          RSET *rset,
1506                                          struct rset_key_control *kc)
1507 {
1508     RSET *result_sets = 0;
1509     int num_result_sets = 0;
1510     int i;
1511     ZEBRA_RES res =
1512         term_list_trunc(zh, zapt, termz_org, attributeSet,
1513                         stream, index_type, complete_flag,
1514                         rank_type, xpath_use,
1515                         rset_nmem,
1516                         &result_sets, &num_result_sets,
1517                         kc);
1518     if (res != ZEBRA_OK)
1519         return res;
1520     for (i = 0; i<num_result_sets; i++)
1521     {
1522         RSET first_set = 0;
1523         res = rpn_search_APT_position(zh, zapt, attributeSet, 
1524                                       index_type,
1525                                       rset_nmem, &first_set,
1526                                       kc);
1527         if (res != ZEBRA_OK)
1528         {
1529             for (i = 0; i<num_result_sets; i++)
1530                 rset_delete(result_sets[i]);
1531             return res;
1532         }
1533
1534         if (first_set)
1535         {
1536             RSET tmp_set[2];
1537
1538             tmp_set[0] = first_set;
1539             tmp_set[1] = result_sets[i];
1540             
1541             result_sets[i] = rset_create_prox(
1542                 rset_nmem, kc, kc->scope,
1543                 2, tmp_set,
1544                 1 /* ordered */, 0 /* exclusion */,
1545                 3 /* relation */, 1 /* distance */);
1546         }
1547     }
1548
1549
1550     if (num_result_sets == 0)
1551         *rset = rset_create_null(rset_nmem, kc, 0); 
1552     else if (num_result_sets == 1)
1553         *rset = result_sets[0];
1554     else
1555         *rset = rset_create_and(rset_nmem, kc, kc->scope,
1556                                num_result_sets, result_sets);
1557     if (!*rset)
1558         return ZEBRA_FAIL;
1559     return ZEBRA_OK;
1560 }
1561
1562 static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1563                             const char **term_sub,
1564                             WRBUF term_dict,
1565                             const Odr_oid *attributeSet,
1566                             struct grep_info *grep_info,
1567                             int *max_pos,
1568                             zebra_map_t zm,
1569                             char *term_dst,
1570                             int *error_code)
1571 {
1572     AttrType relation;
1573     int relation_value;
1574     int term_value;
1575     int r;
1576     WRBUF term_num = wrbuf_alloc();
1577
1578     *error_code = 0;
1579     attr_init_APT(&relation, zapt, 2);
1580     relation_value = attr_find(&relation, NULL);
1581
1582     yaz_log(log_level_rpn, "numeric relation value=%d", relation_value);
1583
1584     switch (relation_value)
1585     {
1586     case 1:
1587         yaz_log(log_level_rpn, "Relation <");
1588         if (!term_100(zm, term_sub, term_num, 1, term_dst))
1589         { 
1590             wrbuf_destroy(term_num);
1591             return 0;
1592         }
1593         term_value = atoi(wrbuf_cstr(term_num));
1594         gen_regular_rel(term_dict, term_value-1, 1);
1595         break;
1596     case 2:
1597         yaz_log(log_level_rpn, "Relation <=");
1598         if (!term_100(zm, term_sub, term_num, 1, term_dst))
1599         {
1600             wrbuf_destroy(term_num);
1601             return 0;
1602         }
1603         term_value = atoi(wrbuf_cstr(term_num));
1604         gen_regular_rel(term_dict, term_value, 1);
1605         break;
1606     case 4:
1607         yaz_log(log_level_rpn, "Relation >=");
1608         if (!term_100(zm, term_sub, term_num, 1, term_dst))
1609         {
1610             wrbuf_destroy(term_num);
1611             return 0;
1612         }
1613         term_value = atoi(wrbuf_cstr(term_num));
1614         gen_regular_rel(term_dict, term_value, 0);
1615         break;
1616     case 5:
1617         yaz_log(log_level_rpn, "Relation >");
1618         if (!term_100(zm, term_sub, term_num, 1, term_dst))
1619         {
1620             wrbuf_destroy(term_num);
1621             return 0;
1622         }
1623         term_value = atoi(wrbuf_cstr(term_num));
1624         gen_regular_rel(term_dict, term_value+1, 0);
1625         break;
1626     case -1:
1627     case 3:
1628         yaz_log(log_level_rpn, "Relation =");
1629         if (!term_100(zm, term_sub, term_num, 1, term_dst))
1630         {
1631             wrbuf_destroy(term_num);
1632             return 0; 
1633         }
1634         term_value = atoi(wrbuf_cstr(term_num));
1635         wrbuf_printf(term_dict, "(0*%d)", term_value);
1636         break;
1637     case 103:
1638         /* term_tmp untouched.. */
1639         while (**term_sub != '\0')
1640             (*term_sub)++;
1641         break;
1642     default:
1643         *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
1644         wrbuf_destroy(term_num); 
1645         return 0;
1646     }
1647     r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), 
1648                          0, grep_info, max_pos, 0, grep_handle);
1649
1650     if (r == 1)
1651         zebra_set_partial_result(zh);
1652     else if (r)
1653         yaz_log(YLOG_WARN, "dict_lookup_grep fail, rel = gt: %d", r);
1654     yaz_log(log_level_rpn, "%d positions", grep_info->isam_p_indx);
1655     wrbuf_destroy(term_num);
1656     return 1;
1657 }
1658
1659 static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1660                               const char **term_sub, 
1661                               WRBUF term_dict,
1662                               const Odr_oid *attributeSet, NMEM stream,
1663                               struct grep_info *grep_info,
1664                               const char *index_type, int complete_flag,
1665                               char *term_dst, 
1666                               const char *xpath_use,
1667                               struct ord_list **ol)
1668 {
1669     const char *termp;
1670     struct rpn_char_map_info rcmi;
1671     int max_pos;
1672     int relation_error = 0;
1673     int ord, ord_len, i;
1674     char ord_buf[32];
1675     zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type);
1676     
1677     *ol = ord_list_create(stream);
1678
1679     rpn_char_map_prepare(zh->reg, zm, &rcmi);
1680
1681     termp = *term_sub;
1682     
1683     if (zebra_apt_get_ord(zh, zapt, index_type, xpath_use,
1684                           attributeSet, &ord) != ZEBRA_OK)
1685     {
1686         return ZEBRA_FAIL;
1687     }
1688     
1689     wrbuf_rewind(term_dict);
1690     
1691     *ol = ord_list_append(stream, *ol, ord);
1692     
1693     ord_len = key_SU_encode(ord, ord_buf);
1694     
1695     wrbuf_putc(term_dict, '(');
1696     for (i = 0; i < ord_len; i++)
1697     {
1698         wrbuf_putc(term_dict, 1);
1699         wrbuf_putc(term_dict, ord_buf[i]);
1700     }
1701     wrbuf_putc(term_dict, ')');
1702     
1703     if (!numeric_relation(zh, zapt, &termp, term_dict,
1704                           attributeSet, grep_info, &max_pos, zm,
1705                           term_dst, &relation_error))
1706     {
1707         if (relation_error)
1708         {
1709             zebra_setError(zh, relation_error, 0);
1710             return ZEBRA_FAIL;
1711         }
1712         *term_sub = 0;
1713         return ZEBRA_OK;
1714     }
1715     *term_sub = termp;
1716     yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1717     return ZEBRA_OK;
1718 }
1719
1720                                  
1721 static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh,
1722                                         Z_AttributesPlusTerm *zapt,
1723                                         const char *termz,
1724                                         const Odr_oid *attributeSet,
1725                                         NMEM stream,
1726                                         const char *index_type, 
1727                                         int complete_flag,
1728                                         const char *rank_type, 
1729                                         const char *xpath_use,
1730                                         NMEM rset_nmem,
1731                                         RSET *rset,
1732                                         struct rset_key_control *kc)
1733 {
1734     char term_dst[IT_MAX_WORD+1];
1735     const char *termp = termz;
1736     RSET *result_sets = 0;
1737     int num_result_sets = 0;
1738     ZEBRA_RES res;
1739     struct grep_info grep_info;
1740     int alloc_sets = 0;
1741     zint hits_limit_value;
1742     const char *term_ref_id_str = 0;
1743
1744     zebra_term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str,
1745                           stream);
1746
1747     yaz_log(log_level_rpn, "APT_numeric t='%s'", termz);
1748     if (grep_info_prepare(zh, zapt, &grep_info, index_type) == ZEBRA_FAIL)
1749         return ZEBRA_FAIL;
1750     while (1)
1751     { 
1752         struct ord_list *ol;
1753         WRBUF term_dict = wrbuf_alloc();
1754         if (alloc_sets == num_result_sets)
1755         {
1756             int add = 10;
1757             RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) * 
1758                                               sizeof(*rnew));
1759             if (alloc_sets)
1760                 memcpy(rnew, result_sets, alloc_sets * sizeof(*rnew));
1761             alloc_sets = alloc_sets + add;
1762             result_sets = rnew;
1763         }
1764         yaz_log(YLOG_DEBUG, "APT_numeric termp=%s", termp);
1765         grep_info.isam_p_indx = 0;
1766         res = numeric_term(zh, zapt, &termp, term_dict,
1767                            attributeSet, stream, &grep_info,
1768                            index_type, complete_flag,
1769                            term_dst, xpath_use, &ol);
1770         wrbuf_destroy(term_dict);
1771         if (res == ZEBRA_FAIL || termp == 0)
1772             break;
1773         yaz_log(YLOG_DEBUG, "term: %s", term_dst);
1774         result_sets[num_result_sets] =
1775             rset_trunc(zh, grep_info.isam_p_buf,
1776                        grep_info.isam_p_indx, term_dst,
1777                        strlen(term_dst), rank_type,
1778                        0 /* preserve position */,
1779                        zapt->term->which, rset_nmem, 
1780                        kc, kc->scope, ol, index_type,
1781                        hits_limit_value,
1782                        term_ref_id_str);
1783         if (!result_sets[num_result_sets])
1784             break;
1785         num_result_sets++;
1786         if (!*termp)
1787             break;
1788     }
1789     grep_info_delete(&grep_info);
1790
1791     if (res != ZEBRA_OK)
1792         return res;
1793     if (num_result_sets == 0)
1794         *rset = rset_create_null(rset_nmem, kc, 0);
1795     else if (num_result_sets == 1)
1796         *rset = result_sets[0];
1797     else
1798         *rset = rset_create_and(rset_nmem, kc, kc->scope,
1799                                 num_result_sets, result_sets);
1800     if (!*rset)
1801         return ZEBRA_FAIL;
1802     return ZEBRA_OK;
1803 }
1804
1805 static ZEBRA_RES rpn_search_APT_local(ZebraHandle zh,
1806                                       Z_AttributesPlusTerm *zapt,
1807                                       const char *termz,
1808                                       const Odr_oid *attributeSet,
1809                                       NMEM stream,
1810                                       const char *rank_type, NMEM rset_nmem,
1811                                       RSET *rset,
1812                                       struct rset_key_control *kc)
1813 {
1814     Record rec;
1815     zint sysno = atozint(termz);
1816     
1817     if (sysno <= 0)
1818         sysno = 0;
1819     rec = rec_get(zh->reg->records, sysno);
1820     if (!rec)
1821         sysno = 0;
1822
1823     rec_free(&rec);
1824
1825     if (sysno <= 0)
1826     {
1827         *rset = rset_create_null(rset_nmem, kc, 0);
1828     }
1829     else
1830     {
1831         RSFD rsfd;
1832         struct it_key key;
1833         *rset = rset_create_temp(rset_nmem, kc, kc->scope,
1834                                  res_get(zh->res, "setTmpDir"), 0);
1835         rsfd = rset_open(*rset, RSETF_WRITE);
1836         
1837         key.mem[0] = sysno;
1838         key.mem[1] = 1;
1839         key.len = 2;
1840         rset_write(rsfd, &key);
1841         rset_close(rsfd);
1842     }
1843     return ZEBRA_OK;
1844 }
1845
1846 static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1847                                const Odr_oid *attributeSet, NMEM stream,
1848                                Z_SortKeySpecList *sort_sequence,
1849                                const char *rank_type,
1850                                NMEM rset_nmem,
1851                                RSET *rset,
1852                                struct rset_key_control *kc)
1853 {
1854     int i;
1855     int sort_relation_value;
1856     AttrType sort_relation_type;
1857     Z_SortKeySpec *sks;
1858     Z_SortKey *sk;
1859     char termz[20];
1860     
1861     attr_init_APT(&sort_relation_type, zapt, 7);
1862     sort_relation_value = attr_find(&sort_relation_type, &attributeSet);
1863
1864     if (!sort_sequence->specs)
1865     {
1866         sort_sequence->num_specs = 10;
1867         sort_sequence->specs = (Z_SortKeySpec **)
1868             nmem_malloc(stream, sort_sequence->num_specs *
1869                          sizeof(*sort_sequence->specs));
1870         for (i = 0; i<sort_sequence->num_specs; i++)
1871             sort_sequence->specs[i] = 0;
1872     }
1873     if (zapt->term->which != Z_Term_general)
1874         i = 0;
1875     else
1876         i = atoi_n((char *) zapt->term->u.general->buf,
1877                     zapt->term->u.general->len);
1878     if (i >= sort_sequence->num_specs)
1879         i = 0;
1880     sprintf(termz, "%d", i);
1881
1882     sks = (Z_SortKeySpec *) nmem_malloc(stream, sizeof(*sks));
1883     sks->sortElement = (Z_SortElement *)
1884         nmem_malloc(stream, sizeof(*sks->sortElement));
1885     sks->sortElement->which = Z_SortElement_generic;
1886     sk = sks->sortElement->u.generic = (Z_SortKey *)
1887         nmem_malloc(stream, sizeof(*sk));
1888     sk->which = Z_SortKey_sortAttributes;
1889     sk->u.sortAttributes = (Z_SortAttributes *)
1890         nmem_malloc(stream, sizeof(*sk->u.sortAttributes));
1891
1892     sk->u.sortAttributes->id = odr_oiddup_nmem(stream, attributeSet);
1893     sk->u.sortAttributes->list = zapt->attributes;
1894
1895     sks->sortRelation = (int *)
1896         nmem_malloc(stream, sizeof(*sks->sortRelation));
1897     if (sort_relation_value == 1)
1898         *sks->sortRelation = Z_SortKeySpec_ascending;
1899     else if (sort_relation_value == 2)
1900         *sks->sortRelation = Z_SortKeySpec_descending;
1901     else 
1902         *sks->sortRelation = Z_SortKeySpec_ascending;
1903
1904     sks->caseSensitivity = (int *)
1905         nmem_malloc(stream, sizeof(*sks->caseSensitivity));
1906     *sks->caseSensitivity = 0;
1907
1908     sks->which = Z_SortKeySpec_null;
1909     sks->u.null = odr_nullval ();
1910     sort_sequence->specs[i] = sks;
1911     *rset = rset_create_null(rset_nmem, kc, 0);
1912     return ZEBRA_OK;
1913 }
1914
1915
1916 static int rpn_check_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1917                            const Odr_oid *attributeSet,
1918                            struct xpath_location_step *xpath, int max,
1919                            NMEM mem)
1920 {
1921     const Odr_oid *curAttributeSet = attributeSet;
1922     AttrType use;
1923     const char *use_string = 0;
1924     
1925     attr_init_APT(&use, zapt, 1);
1926     attr_find_ex(&use, &curAttributeSet, &use_string);
1927
1928     if (!use_string || *use_string != '/')
1929         return -1;
1930
1931     return zebra_parse_xpath_str(use_string, xpath, max, mem);
1932 }
1933  
1934                
1935
1936 static RSET xpath_trunc(ZebraHandle zh, NMEM stream,
1937                         const char *index_type, const char *term, 
1938                         const char *xpath_use,
1939                         NMEM rset_nmem,
1940                         struct rset_key_control *kc)
1941 {
1942     struct grep_info grep_info;
1943     int ord = zebraExplain_lookup_attr_str(zh->reg->zei, 
1944                                            zinfo_index_category_index,
1945                                            index_type, xpath_use);
1946     if (grep_info_prepare(zh, 0 /* zapt */, &grep_info, "0") == ZEBRA_FAIL)
1947         return rset_create_null(rset_nmem, kc, 0);
1948     
1949     if (ord < 0)
1950         return rset_create_null(rset_nmem, kc, 0);
1951     else
1952     {
1953         int i, r, max_pos;
1954         char ord_buf[32];
1955         RSET rset;
1956         WRBUF term_dict = wrbuf_alloc();
1957         int ord_len = key_SU_encode(ord, ord_buf);
1958         int term_type = Z_Term_characterString;
1959         const char *flags = "void";
1960
1961         wrbuf_putc(term_dict, '(');
1962         for (i = 0; i<ord_len; i++)
1963         {
1964             wrbuf_putc(term_dict, 1);
1965             wrbuf_putc(term_dict, ord_buf[i]);
1966         }
1967         wrbuf_putc(term_dict, ')');
1968         wrbuf_puts(term_dict, term);
1969         
1970         grep_info.isam_p_indx = 0;
1971         r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), 0,
1972                              &grep_info, &max_pos, 0, grep_handle);
1973         yaz_log(YLOG_DEBUG, "%s %d positions", term,
1974                 grep_info.isam_p_indx);
1975         rset = rset_trunc(zh, grep_info.isam_p_buf,
1976                           grep_info.isam_p_indx, term, strlen(term),
1977                           flags, 1, term_type, rset_nmem,
1978                           kc, kc->scope, 0, index_type, 0 /* hits_limit */,
1979                           0 /* term_ref_id_str */);
1980         grep_info_delete(&grep_info);
1981         wrbuf_destroy(term_dict);
1982         return rset;
1983     }
1984 }
1985
1986 static
1987 ZEBRA_RES rpn_search_xpath(ZebraHandle zh,
1988                            NMEM stream, const char *rank_type, RSET rset,
1989                            int xpath_len, struct xpath_location_step *xpath,
1990                            NMEM rset_nmem,
1991                            RSET *rset_out,
1992                            struct rset_key_control *kc)
1993 {
1994     int i;
1995     int always_matches = rset ? 0 : 1;
1996
1997     if (xpath_len < 0)
1998     {
1999         *rset_out = rset;
2000         return ZEBRA_OK;
2001     }
2002
2003     yaz_log(YLOG_DEBUG, "xpath len=%d", xpath_len);
2004     for (i = 0; i<xpath_len; i++)
2005     {
2006         yaz_log(log_level_rpn, "XPATH %d %s", i, xpath[i].part);
2007
2008     }
2009
2010     /*
2011       //a    ->    a/.*
2012       //a/b  ->    b/a/.*
2013       /a     ->    a/
2014       /a/b   ->    b/a/
2015
2016       /      ->    none
2017
2018    a[@attr = value]/b[@other = othervalue]
2019
2020  /e/@a val      range(e/,range(@a,freetext(w,1015,val),@a),e/)
2021  /a/b val       range(b/a/,freetext(w,1016,val),b/a/)
2022  /a/b/@c val    range(b/a/,range(@c,freetext(w,1016,val),@c),b/a/)
2023  /a/b[@c = y] val range(b/a/,freetext(w,1016,val),b/a/,@c = y)
2024  /a[@c = y]/b val range(a/,range(b/a/,freetext(w,1016,val),b/a/),a/,@c = y)
2025  /a[@c = x]/b[@c = y] range(a/,range(b/a/,freetext(w,1016,val),b/a/,@c = y),a/,@c = x)
2026       
2027     */
2028
2029     dict_grep_cmap(zh->reg->dict, 0, 0);
2030     
2031     {
2032         int level = xpath_len;
2033         int first_path = 1;
2034         
2035         while (--level >= 0)
2036         {
2037             WRBUF xpath_rev = wrbuf_alloc();
2038             int i;
2039             RSET rset_start_tag = 0, rset_end_tag = 0, rset_attr = 0;
2040
2041             for (i = level; i >= 1; --i)
2042             {
2043                 const char *cp = xpath[i].part;
2044                 if (*cp)
2045                 {
2046                     for (; *cp; cp++)
2047                     {
2048                         if (*cp == '*')
2049                             wrbuf_puts(xpath_rev, "[^/]*");
2050                         else if (*cp == ' ')
2051                             wrbuf_puts(xpath_rev, "\001 ");
2052                         else
2053                             wrbuf_putc(xpath_rev, *cp);
2054
2055                         /* wrbuf_putc does not null-terminate , but
2056                            wrbuf_puts below ensures it does.. so xpath_rev
2057                            is OK iff length is > 0 */
2058                     }
2059                     wrbuf_puts(xpath_rev, "/");
2060                 }
2061                 else if (i == 1)  /* // case */
2062                     wrbuf_puts(xpath_rev, ".*");
2063             }
2064             if (xpath[level].predicate &&
2065                 xpath[level].predicate->which == XPATH_PREDICATE_RELATION &&
2066                 xpath[level].predicate->u.relation.name[0])
2067             {
2068                 WRBUF wbuf = wrbuf_alloc();
2069                 wrbuf_puts(wbuf, xpath[level].predicate->u.relation.name+1);
2070                 if (xpath[level].predicate->u.relation.value)
2071                 {
2072                     const char *cp = xpath[level].predicate->u.relation.value;
2073                     wrbuf_putc(wbuf, '=');
2074                     
2075                     while (*cp)
2076                     {
2077                         if (strchr(REGEX_CHARS, *cp))
2078                             wrbuf_putc(wbuf, '\\');
2079                         wrbuf_putc(wbuf, *cp);
2080                         cp++;
2081                     }
2082                 }
2083                 rset_attr = xpath_trunc(
2084                     zh, stream, "0", wrbuf_cstr(wbuf), 
2085                     ZEBRA_XPATH_ATTR_NAME, 
2086                     rset_nmem, kc);
2087                 wrbuf_destroy(wbuf);
2088             } 
2089             else 
2090             {
2091                 if (!first_path)
2092                 {
2093                     wrbuf_destroy(xpath_rev);
2094                     continue;
2095                 }
2096             }
2097             yaz_log(log_level_rpn, "xpath_rev (%d) = %s", level, 
2098                     wrbuf_cstr(xpath_rev));
2099             if (wrbuf_len(xpath_rev))
2100             {
2101                 rset_start_tag = xpath_trunc(zh, stream, "0", 
2102                                              wrbuf_cstr(xpath_rev),
2103                                              ZEBRA_XPATH_ELM_BEGIN, 
2104                                              rset_nmem, kc);
2105                 if (always_matches)
2106                     rset = rset_start_tag;
2107                 else
2108                 {
2109                     rset_end_tag = xpath_trunc(zh, stream, "0", 
2110                                                wrbuf_cstr(xpath_rev),
2111                                                ZEBRA_XPATH_ELM_END, 
2112                                                rset_nmem, kc);
2113                     
2114                     rset = rset_create_between(rset_nmem, kc, kc->scope,
2115                                                rset_start_tag, rset,
2116                                                rset_end_tag, rset_attr);
2117                 }
2118             }
2119             wrbuf_destroy(xpath_rev);
2120             first_path = 0;
2121         }
2122     }
2123     *rset_out = rset;
2124     return ZEBRA_OK;
2125 }
2126
2127 #define MAX_XPATH_STEPS 10
2128
2129 static ZEBRA_RES rpn_search_database(ZebraHandle zh, 
2130                                      Z_AttributesPlusTerm *zapt,
2131                                      const Odr_oid *attributeSet, NMEM stream,
2132                                      Z_SortKeySpecList *sort_sequence,
2133                                      NMEM rset_nmem,
2134                                      RSET *rset,
2135                                      struct rset_key_control *kc);
2136
2137 static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2138                                 const Odr_oid *attributeSet, NMEM stream,
2139                                 Z_SortKeySpecList *sort_sequence,
2140                                 int num_bases, const char **basenames, 
2141                                 NMEM rset_nmem,
2142                                 RSET *rset,
2143                                 struct rset_key_control *kc)
2144 {
2145     RSET *rsets = nmem_malloc(stream, num_bases * sizeof(*rsets));
2146     ZEBRA_RES res = ZEBRA_OK;
2147     int i;
2148     for (i = 0; i < num_bases; i++)
2149     {
2150
2151         if (zebraExplain_curDatabase(zh->reg->zei, basenames[i]))
2152         {
2153             zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
2154                            basenames[i]);
2155             res = ZEBRA_FAIL;
2156             break;
2157         }
2158         res = rpn_search_database(zh, zapt, attributeSet, stream,
2159                                   sort_sequence,
2160                                   rset_nmem, rsets+i, kc);
2161         if (res != ZEBRA_OK)
2162             break;
2163     }
2164     if (res != ZEBRA_OK)
2165     {   /* must clean up the already created sets */
2166         while (--i >= 0)
2167             rset_delete(rsets[i]);
2168         *rset = 0;
2169     }
2170     else 
2171     {
2172         if (num_bases == 1)
2173             *rset = rsets[0];
2174         else if (num_bases == 0)
2175             *rset = rset_create_null(rset_nmem, kc, 0); 
2176         else
2177             *rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* TERMID */,
2178                                    num_bases, rsets);
2179     }
2180     return res;
2181 }
2182
2183 static ZEBRA_RES rpn_search_database(ZebraHandle zh, 
2184                                      Z_AttributesPlusTerm *zapt,
2185                                      const Odr_oid *attributeSet, NMEM stream,
2186                                      Z_SortKeySpecList *sort_sequence,
2187                                      NMEM rset_nmem,
2188                                      RSET *rset,
2189                                      struct rset_key_control *kc)
2190 {
2191     ZEBRA_RES res = ZEBRA_OK;
2192     const char *index_type;
2193     char *search_type = NULL;
2194     char rank_type[128];
2195     int complete_flag;
2196     int sort_flag;
2197     char termz[IT_MAX_WORD+1];
2198     int xpath_len;
2199     const char *xpath_use = 0;
2200     struct xpath_location_step xpath[MAX_XPATH_STEPS];
2201
2202     if (!log_level_set)
2203     {
2204         log_level_rpn = yaz_log_module_level("rpn");
2205         log_level_set = 1;
2206     }
2207     zebra_maps_attr(zh->reg->zebra_maps, zapt, &index_type, &search_type,
2208                     rank_type, &complete_flag, &sort_flag);
2209     
2210     yaz_log(YLOG_DEBUG, "index_type=%s", index_type);
2211     yaz_log(YLOG_DEBUG, "complete_flag=%d", complete_flag);
2212     yaz_log(YLOG_DEBUG, "search_type=%s", search_type);
2213     yaz_log(YLOG_DEBUG, "rank_type=%s", rank_type);
2214
2215     if (zapt_term_to_utf8(zh, zapt, termz) == ZEBRA_FAIL)
2216         return ZEBRA_FAIL;
2217
2218     if (sort_flag)
2219         return rpn_sort_spec(zh, zapt, attributeSet, stream, sort_sequence,
2220                              rank_type, rset_nmem, rset, kc);
2221     /* consider if an X-Path query is used */
2222     xpath_len = rpn_check_xpath(zh, zapt, attributeSet, 
2223                                 xpath, MAX_XPATH_STEPS, stream);
2224     if (xpath_len >= 0)
2225     {
2226         if (xpath[xpath_len-1].part[0] == '@') 
2227             xpath_use = ZEBRA_XPATH_ATTR_CDATA;  /* last step is attribute  */
2228         else
2229             xpath_use = ZEBRA_XPATH_CDATA;  /* searching for cdata */        
2230
2231         if (1)
2232         {
2233             AttrType relation;
2234             int relation_value;
2235
2236             attr_init_APT(&relation, zapt, 2);
2237             relation_value = attr_find(&relation, NULL);
2238
2239             if (relation_value == 103) /* alwaysmatches */
2240             {
2241                 *rset = 0; /* signal no "term" set */
2242                 return rpn_search_xpath(zh, stream, rank_type, *rset, 
2243                                         xpath_len, xpath, rset_nmem, rset, kc);
2244             }
2245         }
2246     }
2247
2248     /* search using one of the various search type strategies
2249        termz is our UTF-8 search term
2250        attributeSet is top-level default attribute set 
2251        stream is ODR for search
2252        reg_id is the register type
2253        complete_flag is 1 for complete subfield, 0 for incomplete
2254        xpath_use is use-attribute to be used for X-Path search, 0 for none
2255     */
2256     if (!strcmp(search_type, "phrase"))
2257     {
2258         res = rpn_search_APT_phrase(zh, zapt, termz, attributeSet, stream,
2259                                     index_type, complete_flag, rank_type,
2260                                     xpath_use,
2261                                     rset_nmem,
2262                                     rset, kc);
2263     }
2264     else if (!strcmp(search_type, "and-list"))
2265     {
2266         res = rpn_search_APT_and_list(zh, zapt, termz, attributeSet, stream,
2267                                       index_type, complete_flag, rank_type,
2268                                       xpath_use,
2269                                       rset_nmem,
2270                                       rset, kc);
2271     }
2272     else if (!strcmp(search_type, "or-list"))
2273     {
2274         res = rpn_search_APT_or_list(zh, zapt, termz, attributeSet, stream,
2275                                      index_type, complete_flag, rank_type,
2276                                      xpath_use,
2277                                      rset_nmem,
2278                                      rset, kc);
2279     }
2280     else if (!strcmp(search_type, "local"))
2281     {
2282         res = rpn_search_APT_local(zh, zapt, termz, attributeSet, stream,
2283                                    rank_type, rset_nmem, rset, kc);
2284     }
2285     else if (!strcmp(search_type, "numeric"))
2286     {
2287         res = rpn_search_APT_numeric(zh, zapt, termz, attributeSet, stream,
2288                                      index_type, complete_flag, rank_type,
2289                                      xpath_use,
2290                                      rset_nmem,
2291                                      rset, kc);
2292     }
2293     else
2294     {
2295         zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2296         res = ZEBRA_FAIL;
2297     }
2298     if (res != ZEBRA_OK)
2299         return res;
2300     if (!*rset)
2301         return ZEBRA_FAIL;
2302     return rpn_search_xpath(zh, stream, rank_type, *rset, 
2303                             xpath_len, xpath, rset_nmem, rset, kc);
2304 }
2305
2306 static ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2307                                       const Odr_oid *attributeSet, 
2308                                       NMEM stream, NMEM rset_nmem,
2309                                       Z_SortKeySpecList *sort_sequence,
2310                                       int num_bases, const char **basenames,
2311                                       RSET **result_sets, int *num_result_sets,
2312                                       Z_Operator *parent_op,
2313                                       struct rset_key_control *kc);
2314
2315 ZEBRA_RES rpn_get_top_approx_limit(ZebraHandle zh, Z_RPNStructure *zs,
2316                                    zint *approx_limit)
2317 {
2318     ZEBRA_RES res = ZEBRA_OK;
2319     if (zs->which == Z_RPNStructure_complex)
2320     {
2321         if (res == ZEBRA_OK)
2322             res = rpn_get_top_approx_limit(zh, zs->u.complex->s1,
2323                                            approx_limit);
2324         if (res == ZEBRA_OK)
2325             res = rpn_get_top_approx_limit(zh, zs->u.complex->s2,
2326                                            approx_limit);
2327     }
2328     else if (zs->which == Z_RPNStructure_simple)
2329     {
2330         if (zs->u.simple->which == Z_Operand_APT)
2331         {
2332             Z_AttributesPlusTerm *zapt = zs->u.simple->u.attributesPlusTerm;
2333             AttrType global_hits_limit_attr;
2334             int l;
2335             
2336             attr_init_APT(&global_hits_limit_attr, zapt, 12);
2337             
2338             l = attr_find(&global_hits_limit_attr, NULL);
2339             if (l != -1)
2340                 *approx_limit = l;
2341         }
2342     }
2343     return res;
2344 }
2345
2346 ZEBRA_RES rpn_search_top(ZebraHandle zh, Z_RPNStructure *zs,
2347                          const Odr_oid *attributeSet, 
2348                          NMEM stream, NMEM rset_nmem,
2349                          Z_SortKeySpecList *sort_sequence,
2350                          int num_bases, const char **basenames,
2351                          RSET *result_set)
2352 {
2353     RSET *result_sets = 0;
2354     int num_result_sets = 0;
2355     ZEBRA_RES res;
2356     struct rset_key_control *kc = zebra_key_control_create(zh);
2357
2358     res = rpn_search_structure(zh, zs, attributeSet,
2359                                stream, rset_nmem,
2360                                sort_sequence, 
2361                                num_bases, basenames,
2362                                &result_sets, &num_result_sets,
2363                                0 /* no parent op */,
2364                                kc);
2365     if (res != ZEBRA_OK)
2366     {
2367         int i;
2368         for (i = 0; i<num_result_sets; i++)
2369             rset_delete(result_sets[i]);
2370         *result_set = 0;
2371     }
2372     else
2373     {
2374         assert(num_result_sets == 1);
2375         assert(result_sets);
2376         assert(*result_sets);
2377         *result_set = *result_sets;
2378     }
2379     (*kc->dec)(kc);
2380     return res;
2381 }
2382
2383 ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2384                                const Odr_oid *attributeSet, 
2385                                NMEM stream, NMEM rset_nmem,
2386                                Z_SortKeySpecList *sort_sequence,
2387                                int num_bases, const char **basenames,
2388                                RSET **result_sets, int *num_result_sets,
2389                                Z_Operator *parent_op,
2390                                struct rset_key_control *kc)
2391 {
2392     *num_result_sets = 0;
2393     if (zs->which == Z_RPNStructure_complex)
2394     {
2395         ZEBRA_RES res;
2396         Z_Operator *zop = zs->u.complex->roperator;
2397         RSET *result_sets_l = 0;
2398         int num_result_sets_l = 0;
2399         RSET *result_sets_r = 0;
2400         int num_result_sets_r = 0;
2401
2402         res = rpn_search_structure(zh, zs->u.complex->s1,
2403                                    attributeSet, stream, rset_nmem,
2404                                    sort_sequence,
2405                                    num_bases, basenames,
2406                                    &result_sets_l, &num_result_sets_l,
2407                                    zop, kc);
2408         if (res != ZEBRA_OK)
2409         {
2410             int i;
2411             for (i = 0; i<num_result_sets_l; i++)
2412                 rset_delete(result_sets_l[i]);
2413             return res;
2414         }
2415         res = rpn_search_structure(zh, zs->u.complex->s2,
2416                                    attributeSet, stream, rset_nmem,
2417                                    sort_sequence,
2418                                    num_bases, basenames,
2419                                    &result_sets_r, &num_result_sets_r,
2420                                    zop, kc);
2421         if (res != ZEBRA_OK)
2422         {
2423             int i;
2424             for (i = 0; i<num_result_sets_l; i++)
2425                 rset_delete(result_sets_l[i]);
2426             for (i = 0; i<num_result_sets_r; i++)
2427                 rset_delete(result_sets_r[i]);
2428             return res;
2429         }
2430
2431         /* make a new list of result for all children */
2432         *num_result_sets = num_result_sets_l + num_result_sets_r;
2433         *result_sets = nmem_malloc(stream, *num_result_sets * 
2434                                    sizeof(**result_sets));
2435         memcpy(*result_sets, result_sets_l, 
2436                num_result_sets_l * sizeof(**result_sets));
2437         memcpy(*result_sets + num_result_sets_l, result_sets_r, 
2438                num_result_sets_r * sizeof(**result_sets));
2439
2440         if (!parent_op || parent_op->which != zop->which
2441             || (zop->which != Z_Operator_and &&
2442                 zop->which != Z_Operator_or))
2443         {
2444             /* parent node different from this one (or non-present) */
2445             /* we must combine result sets now */
2446             RSET rset;
2447             switch (zop->which)
2448             {
2449             case Z_Operator_and:
2450                 rset = rset_create_and(rset_nmem, kc,
2451                                        kc->scope,
2452                                        *num_result_sets, *result_sets);
2453                 break;
2454             case Z_Operator_or:
2455                 rset = rset_create_or(rset_nmem, kc,
2456                                       kc->scope, 0, /* termid */
2457                                       *num_result_sets, *result_sets);
2458                 break;
2459             case Z_Operator_and_not:
2460                 rset = rset_create_not(rset_nmem, kc,
2461                                        kc->scope,
2462                                        (*result_sets)[0],
2463                                        (*result_sets)[1]);
2464                 break;
2465             case Z_Operator_prox:
2466                 if (zop->u.prox->which != Z_ProximityOperator_known)
2467                 {
2468                     zebra_setError(zh, 
2469                                    YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2470                                    0);
2471                     return ZEBRA_FAIL;
2472                 }
2473                 if (*zop->u.prox->u.known != Z_ProxUnit_word)
2474                 {
2475                     zebra_setError_zint(zh,
2476                                         YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2477                                         *zop->u.prox->u.known);
2478                     return ZEBRA_FAIL;
2479                 }
2480                 else
2481                 {
2482                     rset = rset_create_prox(rset_nmem, kc,
2483                                             kc->scope,
2484                                             *num_result_sets, *result_sets, 
2485                                             *zop->u.prox->ordered,
2486                                             (!zop->u.prox->exclusion ? 
2487                                              0 : *zop->u.prox->exclusion),
2488                                             *zop->u.prox->relationType,
2489                                             *zop->u.prox->distance );
2490                 }
2491                 break;
2492             default:
2493                 zebra_setError(zh, YAZ_BIB1_OPERATOR_UNSUPP, 0);
2494                 return ZEBRA_FAIL;
2495             }
2496             *num_result_sets = 1;
2497             *result_sets = nmem_malloc(stream, *num_result_sets * 
2498                                        sizeof(**result_sets));
2499             (*result_sets)[0] = rset;
2500         }
2501     }
2502     else if (zs->which == Z_RPNStructure_simple)
2503     {
2504         RSET rset;
2505         ZEBRA_RES res;
2506
2507         if (zs->u.simple->which == Z_Operand_APT)
2508         {
2509             yaz_log(YLOG_DEBUG, "rpn_search_APT");
2510             res = rpn_search_APT(zh, zs->u.simple->u.attributesPlusTerm,
2511                                  attributeSet, stream, sort_sequence,
2512                                  num_bases, basenames, rset_nmem, &rset,
2513                                  kc);
2514             if (res != ZEBRA_OK)
2515                 return res;
2516         }
2517         else if (zs->u.simple->which == Z_Operand_resultSetId)
2518         {
2519             yaz_log(YLOG_DEBUG, "rpn_search_ref");
2520             rset = resultSetRef(zh, zs->u.simple->u.resultSetId);
2521             if (!rset)
2522             {
2523                 zebra_setError(zh, 
2524                                YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
2525                                zs->u.simple->u.resultSetId);
2526                 return ZEBRA_FAIL;
2527             }
2528             rset_dup(rset);
2529         }
2530         else
2531         {
2532             zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2533             return ZEBRA_FAIL;
2534         }
2535         *num_result_sets = 1;
2536         *result_sets = nmem_malloc(stream, *num_result_sets * 
2537                                    sizeof(**result_sets));
2538         (*result_sets)[0] = rset;
2539     }
2540     else
2541     {
2542         zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2543         return ZEBRA_FAIL;
2544     }
2545     return ZEBRA_OK;
2546 }
2547
2548
2549
2550 /*
2551  * Local variables:
2552  * c-basic-offset: 4
2553  * indent-tabs-mode: nil
2554  * End:
2555  * vim: shiftwidth=4 tabstop=8 expandtab
2556  */
2557