Handle right-trucation for ICU normalized terms.
[idzebra-moved-to-github.git] / index / rpnsearch.c
1 /* $Id: rpnsearch.c,v 1.31 2008-01-26 15:48:29 adam Exp $
2    Copyright (C) 1995-2007
3    Index Data ApS
4
5 This file is part of the Zebra server.
6
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
11
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
20
21 */
22
23 #include <stdio.h>
24 #include <assert.h>
25 #ifdef WIN32
26 #include <io.h>
27 #endif
28 #if HAVE_UNISTD_H
29 #include <unistd.h>
30 #endif
31 #include <ctype.h>
32
33 #include <yaz/diagbib1.h>
34 #include "index.h"
35 #include <zebra_xpath.h>
36 #include <attrfind.h>
37 #include <charmap.h>
38 #include <rset.h>
39
40 static int log_level_set = 0;
41 static int log_level_rpn = 0;
42
43 #define TERMSET_DISABLE 1
44
45 static const char **rpn_char_map_handler(void *vp, const char **from, int len)
46 {
47     struct rpn_char_map_info *p = (struct rpn_char_map_info *) vp;
48     const char **out = zebra_maps_input(p->zm, from, len, 0);
49 #if 0
50     if (out && *out)
51     {
52         const char *outp = *out;
53         yaz_log(YLOG_LOG, "---");
54         while (*outp)
55         {
56             yaz_log(YLOG_LOG, "%02X", *outp);
57             outp++;
58         }
59     }
60 #endif
61     return out;
62 }
63
64 void rpn_char_map_prepare(struct zebra_register *reg, zebra_map_t zm,
65                           struct rpn_char_map_info *map_info)
66 {
67     map_info->zm = zm;
68     if (zebra_maps_is_icu(zm))
69         dict_grep_cmap(reg->dict, 0, 0);
70     else
71         dict_grep_cmap(reg->dict, map_info, rpn_char_map_handler);
72 }
73
74 #define TERM_COUNT        
75        
76 struct grep_info {        
77 #ifdef TERM_COUNT        
78     int *term_no;        
79 #endif        
80     ISAM_P *isam_p_buf;
81     int isam_p_size;        
82     int isam_p_indx;
83     int trunc_max;
84     ZebraHandle zh;
85     const char *index_type;
86     ZebraSet termset;
87 };        
88
89 static int add_isam_p(const char *name, const char *info,
90                       struct grep_info *p)
91 {
92     if (!log_level_set)
93     {
94         log_level_rpn = yaz_log_module_level("rpn");
95         log_level_set = 1;
96     }
97     /* we may have to stop this madness.. NOTE: -1 so that if
98        truncmax == trunxlimit we do *not* generate result sets */
99     if (p->isam_p_indx >= p->trunc_max - 1)
100         return 1;
101
102     if (p->isam_p_indx == p->isam_p_size)
103     {
104         ISAM_P *new_isam_p_buf;
105 #ifdef TERM_COUNT        
106         int *new_term_no;        
107 #endif
108         p->isam_p_size = 2*p->isam_p_size + 100;
109         new_isam_p_buf = (ISAM_P *) xmalloc(sizeof(*new_isam_p_buf) *
110                                             p->isam_p_size);
111         if (p->isam_p_buf)
112         {
113             memcpy(new_isam_p_buf, p->isam_p_buf,
114                    p->isam_p_indx * sizeof(*p->isam_p_buf));
115             xfree(p->isam_p_buf);
116         }
117         p->isam_p_buf = new_isam_p_buf;
118
119 #ifdef TERM_COUNT
120         new_term_no = (int *) xmalloc(sizeof(*new_term_no) * p->isam_p_size);
121         if (p->term_no)
122         {
123             memcpy(new_term_no, p->isam_p_buf,
124                    p->isam_p_indx * sizeof(*p->term_no));
125             xfree(p->term_no);
126         }
127         p->term_no = new_term_no;
128 #endif
129     }
130     assert(*info == sizeof(*p->isam_p_buf));
131     memcpy(p->isam_p_buf + p->isam_p_indx, info+1, sizeof(*p->isam_p_buf));
132
133     if (p->termset)
134     {
135         const char *db;
136         char term_tmp[IT_MAX_WORD];
137         int ord = 0;
138         const char *index_name;
139         int len = key_SU_decode(&ord, (const unsigned char *) name);
140         
141         zebra_term_untrans (p->zh, p->index_type, term_tmp, name+len);
142         yaz_log(log_level_rpn, "grep: %d %c %s", ord, name[len], term_tmp);
143         zebraExplain_lookup_ord(p->zh->reg->zei,
144                                 ord, 0 /* index_type */, &db, &index_name);
145         yaz_log(log_level_rpn, "grep:  db=%s index=%s", db, index_name);
146         
147         resultSetAddTerm(p->zh, p->termset, name[len], db,
148                          index_name, term_tmp);
149     }
150     (p->isam_p_indx)++;
151     return 0;
152 }
153
154 static int grep_handle(char *name, const char *info, void *p)
155 {
156     return add_isam_p(name, info, (struct grep_info *) p);
157 }
158
159 static int term_pre(zebra_map_t zm, const char **src,
160                     const char *ct1, const char *ct2, int first)
161 {
162     const char *s1, *s0 = *src;
163     const char **map;
164
165     /* skip white space */
166     while (*s0)
167     {
168         if (ct1 && strchr(ct1, *s0))
169             break;
170         if (ct2 && strchr(ct2, *s0))
171             break;
172         s1 = s0;
173         map = zebra_maps_input(zm, &s1, strlen(s1), first);
174         if (**map != *CHR_SPACE)
175             break;
176         s0 = s1;
177     }
178     *src = s0;
179     return *s0;
180 }
181
182
183 static void esc_str(char *out_buf, size_t out_size,
184                     const char *in_buf, int in_size)
185 {
186     int k;
187
188     assert(out_buf);
189     assert(in_buf);
190     assert(out_size > 20);
191     *out_buf = '\0';
192     for (k = 0; k<in_size; k++)
193     {
194         int c = in_buf[k] & 0xff;
195         int pc;
196         if (c < 32 || c > 126)
197             pc = '?';
198         else
199             pc = c;
200         sprintf(out_buf +strlen(out_buf), "%02X:%c  ", c, pc);
201         if (strlen(out_buf) > out_size-20)
202         {
203             strcat(out_buf, "..");
204             break;
205         }
206     }
207 }
208
209 #define REGEX_CHARS " []()|.*+?!\"$"
210
211 static void add_non_space(const char *start, const char *end,
212                           WRBUF term_dict,
213                           WRBUF display_term,
214                           const char **map, int q_map_match)
215 {
216     size_t sz = end - start;
217
218     wrbuf_write(display_term, start, sz);
219     if (!q_map_match)
220     {
221         while (start < end)
222         {
223             if (strchr(REGEX_CHARS, *start))
224                 wrbuf_putc(term_dict, '\\');
225             wrbuf_putc(term_dict, *start);
226             start++;
227         }
228     }
229     else
230     {
231         char tmpbuf[80];
232         esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
233         
234         wrbuf_puts(term_dict, map[0]);
235     }
236 }
237
238
239 static int term_100_icu(zebra_map_t zm,
240                         const char **src, WRBUF term_dict, int space_split,
241                         WRBUF display_term,
242                         int right_trunc)
243 {
244     int i;
245     const char *res_buf = 0;
246     size_t res_len = 0;
247     const char *display_buf;
248     size_t display_len;
249     if (!zebra_map_tokenize_next(zm, &res_buf, &res_len,
250                                  &display_buf, &display_len))
251     {
252         *src += strlen(*src);
253         return 0;
254     }
255     wrbuf_write(display_term, display_buf, display_len);
256     if (right_trunc)
257     {
258         /* ICU sort keys seem to be of the form
259            basechars \x01 accents \x01 length
260            For now we'll just right truncate from basechars . This 
261            may give false hits due to accents not being used.
262         */
263         i = res_len;
264         while (--i >= 0 && res_buf[i] != '\x01')
265             ;
266         if (i > 0)
267         {
268             while (--i >= 0 && res_buf[i] != '\x01')
269                 ;
270         }
271         if (i == 0)
272         {  /* did not find base chars at all. Throw error */
273             return -1;
274         }
275         res_len = i; /* reduce res_len */
276     }
277     for (i = 0; i < res_len; i++)
278     {
279         if (strchr(REGEX_CHARS "\\", res_buf[i]))
280             wrbuf_putc(term_dict, '\\');
281         if (res_buf[i] < 32)
282             wrbuf_putc(term_dict, 1);
283             
284         wrbuf_putc(term_dict, res_buf[i]);
285     }
286     if (right_trunc)
287         wrbuf_puts(term_dict, ".*");
288     return 1;
289 }
290
291 /* term_100: handle term, where trunc = none(no operators at all) */
292 static int term_100(zebra_map_t zm,
293                     const char **src, WRBUF term_dict, int space_split,
294                     WRBUF display_term)
295 {
296     const char *s0;
297     const char **map;
298     int i = 0;
299
300     const char *space_start = 0;
301     const char *space_end = 0;
302
303     if (!term_pre(zm, src, NULL, NULL, !space_split))
304         return 0;
305     s0 = *src;
306     while (*s0)
307     {
308         const char *s1 = s0;
309         int q_map_match = 0;
310         map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
311         if (space_split)
312         {
313             if (**map == *CHR_SPACE)
314                 break;
315         }
316         else  /* complete subfield only. */
317         {
318             if (**map == *CHR_SPACE)
319             {   /* save space mapping for later  .. */
320                 space_start = s1;
321                 space_end = s0;
322                 continue;
323             }
324             else if (space_start)
325             {   /* reload last space */
326                 while (space_start < space_end)
327                 {
328                     if (strchr(REGEX_CHARS, *space_start))
329                         wrbuf_putc(term_dict, '\\');
330                     wrbuf_putc(display_term, *space_start);
331                     wrbuf_putc(term_dict, *space_start);
332                     space_start++;
333                                
334                 }
335                 /* and reset */
336                 space_start = space_end = 0;
337             }
338         }
339         i++;
340
341         add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
342     }
343     *src = s0;
344     return i;
345 }
346
347 /* term_101: handle term, where trunc = Process # */
348 static int term_101(zebra_map_t zm,
349                     const char **src, WRBUF term_dict, int space_split,
350                     WRBUF display_term)
351 {
352     const char *s0;
353     const char **map;
354     int i = 0;
355
356     if (!term_pre(zm, src, "#", "#", !space_split))
357         return 0;
358     s0 = *src;
359     while (*s0)
360     {
361         if (*s0 == '#')
362         {
363             i++;
364             wrbuf_puts(term_dict, ".*");
365             wrbuf_putc(display_term, *s0);
366             s0++;
367         }
368         else
369         {
370             const char *s1 = s0;
371             int q_map_match = 0;
372             map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
373             if (space_split && **map == *CHR_SPACE)
374                 break;
375
376             i++;
377             add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
378         }
379     }
380     *src = s0;
381     return i;
382 }
383
384 /* term_103: handle term, where trunc = re-2 (regular expressions) */
385 static int term_103(zebra_map_t zm, const char **src,
386                     WRBUF term_dict, int *errors, int space_split,
387                     WRBUF display_term)
388 {
389     int i = 0;
390     const char *s0;
391     const char **map;
392
393     if (!term_pre(zm, src, "^\\()[].*+?|", "(", !space_split))
394         return 0;
395     s0 = *src;
396     if (errors && *s0 == '+' && s0[1] && s0[2] == '+' && s0[3] &&
397         isdigit(((const unsigned char *)s0)[1]))
398     {
399         *errors = s0[1] - '0';
400         s0 += 3;
401         if (*errors > 3)
402             *errors = 3;
403     }
404     while (*s0)
405     {
406         if (strchr("^\\()[].*+?|-", *s0))
407         {
408             wrbuf_putc(display_term, *s0);
409             wrbuf_putc(term_dict, *s0);
410             s0++;
411             i++;
412         }
413         else
414         {
415             const char *s1 = s0;
416             int q_map_match = 0;
417             map = zebra_maps_search(zm, &s0, strlen(s0),  &q_map_match);
418             if (space_split && **map == *CHR_SPACE)
419                 break;
420
421             i++;
422             add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
423         }
424     }
425     *src = s0;
426     
427     return i;
428 }
429
430 /* term_103: handle term, where trunc = re-1 (regular expressions) */
431 static int term_102(zebra_map_t zm, const char **src,
432                     WRBUF term_dict, int space_split, WRBUF display_term)
433 {
434     return term_103(zm, src, term_dict, NULL, space_split, display_term);
435 }
436
437
438 /* term_104: handle term, process # and ! */
439 static int term_104(zebra_map_t zm, const char **src, 
440                     WRBUF term_dict, int space_split, WRBUF display_term)
441 {
442     const char *s0;
443     const char **map;
444     int i = 0;
445
446     if (!term_pre(zm, src, "?*#", "?*#", !space_split))
447         return 0;
448     s0 = *src;
449     while (*s0)
450     {
451         if (*s0 == '?')
452         {
453             i++;
454             wrbuf_putc(display_term, *s0);
455             s0++;
456             if (*s0 >= '0' && *s0 <= '9')
457             {
458                 int limit = 0;
459                 while (*s0 >= '0' && *s0 <= '9')
460                 {
461                     limit = limit * 10 + (*s0 - '0');
462                     wrbuf_putc(display_term, *s0);
463                     s0++;
464                 }
465                 if (limit > 20)
466                     limit = 20;
467                 while (--limit >= 0)
468                 {
469                     wrbuf_puts(term_dict, ".?");
470                 }
471             }
472             else
473             {
474                 wrbuf_puts(term_dict, ".*");
475             }
476         }
477         else if (*s0 == '*')
478         {
479             i++;
480             wrbuf_puts(term_dict, ".*");
481             wrbuf_putc(display_term, *s0);
482             s0++;
483         }
484         else if (*s0 == '#')
485         {
486             i++;
487             wrbuf_puts(term_dict, ".");
488             wrbuf_putc(display_term, *s0);
489             s0++;
490         }
491         else
492         {
493             const char *s1 = s0;
494             int q_map_match = 0;
495             map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
496             if (space_split && **map == *CHR_SPACE)
497                 break;
498
499             i++;
500             add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
501         }
502     }
503     *src = s0;
504     return i;
505 }
506
507 /* term_105/106: handle term, where trunc = Process * and ! and right trunc */
508 static int term_105(zebra_map_t zm, const char **src, 
509                     WRBUF term_dict, int space_split,
510                     WRBUF display_term, int right_truncate)
511 {
512     const char *s0;
513     const char **map;
514     int i = 0;
515
516     if (!term_pre(zm, src, "*!", "*!", !space_split))
517         return 0;
518     s0 = *src;
519     while (*s0)
520     {
521         if (*s0 == '*')
522         {
523             i++;
524             wrbuf_puts(term_dict, ".*");
525             wrbuf_putc(display_term, *s0);
526             s0++;
527         }
528         else if (*s0 == '!')
529         {
530             i++;
531             wrbuf_putc(term_dict, '.');
532             wrbuf_putc(display_term, *s0);
533             s0++;
534         }
535         else
536         {
537             const char *s1 = s0;
538             int q_map_match = 0;
539             map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
540             if (space_split && **map == *CHR_SPACE)
541                 break;
542
543             i++;
544             add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
545         }
546     }
547     if (right_truncate)
548         wrbuf_puts(term_dict, ".*");
549     *src = s0;
550     return i;
551 }
552
553
554 /* gen_regular_rel - generate regular expression from relation
555  *  val:     border value (inclusive)
556  *  islt:    1 if <=; 0 if >=.
557  */
558 static void gen_regular_rel(WRBUF term_dict, int val, int islt)
559 {
560     char dst_buf[20*5*20]; /* assuming enough for expansion */
561     char *dst = dst_buf;
562     int dst_p;
563     int w, d, i;
564     int pos = 0;
565     char numstr[20];
566
567     yaz_log(YLOG_DEBUG, "gen_regular_rel. val=%d, islt=%d", val, islt);
568     if (val >= 0)
569     {
570         if (islt)
571             strcpy(dst, "(-[0-9]+|(");
572         else
573             strcpy(dst, "((");
574     } 
575     else
576     {
577         if (!islt)
578         {
579             strcpy(dst, "([0-9]+|-(");
580             islt = 1;
581         }
582         else
583         {
584             strcpy(dst, "(-(");
585             islt = 0;
586         }
587         val = -val;
588     }
589     dst_p = strlen(dst);
590     sprintf(numstr, "%d", val);
591     for (w = strlen(numstr); --w >= 0; pos++)
592     {
593         d = numstr[w];
594         if (pos > 0)
595         {
596             if (islt)
597             {
598                 if (d == '0')
599                     continue;
600                 d--;
601             } 
602             else
603             {
604                 if (d == '9')
605                     continue;
606                 d++;
607             }
608         }
609         
610         strcpy(dst + dst_p, numstr);
611         dst_p = strlen(dst) - pos - 1;
612
613         if (islt)
614         {
615             if (d != '0')
616             {
617                 dst[dst_p++] = '[';
618                 dst[dst_p++] = '0';
619                 dst[dst_p++] = '-';
620                 dst[dst_p++] = d;
621                 dst[dst_p++] = ']';
622             }
623             else
624                 dst[dst_p++] = d;
625         }
626         else
627         {
628             if (d != '9')
629             { 
630                 dst[dst_p++] = '[';
631                 dst[dst_p++] = d;
632                 dst[dst_p++] = '-';
633                 dst[dst_p++] = '9';
634                 dst[dst_p++] = ']';
635             }
636             else
637                 dst[dst_p++] = d;
638         }
639         for (i = 0; i<pos; i++)
640         {
641             dst[dst_p++] = '[';
642             dst[dst_p++] = '0';
643             dst[dst_p++] = '-';
644             dst[dst_p++] = '9';
645             dst[dst_p++] = ']';
646         }
647         dst[dst_p++] = '|';
648     }
649     dst[dst_p] = '\0';
650     if (islt)
651     {
652         /* match everything less than 10^(pos-1) */
653         strcat(dst, "0*");
654         for (i = 1; i<pos; i++)
655             strcat(dst, "[0-9]?");
656     }
657     else
658     {
659         /* match everything greater than 10^pos */
660         for (i = 0; i <= pos; i++)
661             strcat(dst, "[0-9]");
662         strcat(dst, "[0-9]*");
663     }
664     strcat(dst, "))");
665     wrbuf_puts(term_dict, dst);
666 }
667
668 void string_rel_add_char(WRBUF term_p, WRBUF wsrc, int *indx)
669 {
670     const char *src = wrbuf_cstr(wsrc);
671     if (src[*indx] == '\\')
672     {
673         wrbuf_putc(term_p, src[*indx]);
674         (*indx)++;
675     }
676     wrbuf_putc(term_p, src[*indx]);
677     (*indx)++;
678 }
679
680 /*
681  *   >  abc     ([b-].*|a[c-].*|ab[d-].*|abc.+)
682  *              ([^-a].*|a[^-b].*ab[^-c].*|abc.+)
683  *   >= abc     ([b-].*|a[c-].*|ab[c-].*)
684  *              ([^-a].*|a[^-b].*|ab[c-].*)
685  *   <  abc     ([-0].*|a[-a].*|ab[-b].*)
686  *              ([^a-].*|a[^b-].*|ab[^c-].*)
687  *   <= abc     ([-0].*|a[-a].*|ab[-b].*|abc)
688  *              ([^a-].*|a[^b-].*|ab[^c-].*|abc)
689  */
690 static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
691                            const char **term_sub, WRBUF term_dict,
692                            const Odr_oid *attributeSet,
693                            zebra_map_t zm, int space_split, 
694                            WRBUF display_term,
695                            int *error_code)
696 {
697     AttrType relation;
698     int relation_value;
699     int i;
700     WRBUF term_component = wrbuf_alloc();
701
702     attr_init_APT(&relation, zapt, 2);
703     relation_value = attr_find(&relation, NULL);
704
705     *error_code = 0;
706     yaz_log(YLOG_DEBUG, "string relation value=%d", relation_value);
707     switch (relation_value)
708     {
709     case 1:
710         if (!term_100(zm, term_sub, term_component, space_split, display_term))
711         {
712             wrbuf_destroy(term_component);
713             return 0;
714         }
715         yaz_log(log_level_rpn, "Relation <");
716         
717         wrbuf_putc(term_dict, '(');
718         for (i = 0; i < wrbuf_len(term_component); )
719         {
720             int j = 0;
721             
722             if (i)
723                 wrbuf_putc(term_dict, '|');
724             while (j < i)
725                 string_rel_add_char(term_dict, term_component, &j);
726
727             wrbuf_putc(term_dict, '[');
728
729             wrbuf_putc(term_dict, '^');
730             
731             wrbuf_putc(term_dict, 1);
732             wrbuf_putc(term_dict, FIRST_IN_FIELD_CHAR);
733             
734             string_rel_add_char(term_dict, term_component, &i);
735             wrbuf_putc(term_dict, '-');
736             
737             wrbuf_putc(term_dict, ']');
738             wrbuf_putc(term_dict, '.');
739             wrbuf_putc(term_dict, '*');
740         }
741         wrbuf_putc(term_dict, ')');
742         break;
743     case 2:
744         if (!term_100(zm, term_sub, term_component, space_split, display_term))
745         {
746             wrbuf_destroy(term_component);
747             return 0;
748         }
749         yaz_log(log_level_rpn, "Relation <=");
750
751         wrbuf_putc(term_dict, '(');
752         for (i = 0; i < wrbuf_len(term_component); )
753         {
754             int j = 0;
755
756             while (j < i)
757                 string_rel_add_char(term_dict, term_component, &j);
758             wrbuf_putc(term_dict, '[');
759
760             wrbuf_putc(term_dict, '^');
761
762             wrbuf_putc(term_dict, 1);
763             wrbuf_putc(term_dict, FIRST_IN_FIELD_CHAR);
764
765             string_rel_add_char(term_dict, term_component, &i);
766             wrbuf_putc(term_dict, '-');
767
768             wrbuf_putc(term_dict, ']');
769             wrbuf_putc(term_dict, '.');
770             wrbuf_putc(term_dict, '*');
771
772             wrbuf_putc(term_dict, '|');
773         }
774         for (i = 0; i < wrbuf_len(term_component); )
775             string_rel_add_char(term_dict, term_component, &i);
776         wrbuf_putc(term_dict, ')');
777         break;
778     case 5:
779         if (!term_100(zm, term_sub, term_component, space_split, display_term))
780         {
781             wrbuf_destroy(term_component);
782             return 0;
783         }
784         yaz_log(log_level_rpn, "Relation >");
785
786         wrbuf_putc(term_dict, '(');
787         for (i = 0; i < wrbuf_len(term_component); )
788         {
789             int j = 0;
790
791             while (j < i)
792                 string_rel_add_char(term_dict, term_component, &j);
793             wrbuf_putc(term_dict, '[');
794             
795             wrbuf_putc(term_dict, '^');
796             wrbuf_putc(term_dict, '-');
797             string_rel_add_char(term_dict, term_component, &i);
798
799             wrbuf_putc(term_dict, ']');
800             wrbuf_putc(term_dict, '.');
801             wrbuf_putc(term_dict, '*');
802
803             wrbuf_putc(term_dict, '|');
804         }
805         for (i = 0; i < wrbuf_len(term_component); )
806             string_rel_add_char(term_dict, term_component, &i);
807         wrbuf_putc(term_dict, '.');
808         wrbuf_putc(term_dict, '+');
809         wrbuf_putc(term_dict, ')');
810         break;
811     case 4:
812         if (!term_100(zm, term_sub, term_component, space_split, display_term))
813         {
814             wrbuf_destroy(term_component);
815             return 0;
816         }
817         yaz_log(log_level_rpn, "Relation >=");
818
819         wrbuf_putc(term_dict, '(');
820         for (i = 0; i < wrbuf_len(term_component); )
821         {
822             int j = 0;
823
824             if (i)
825                 wrbuf_putc(term_dict, '|');
826             while (j < i)
827                 string_rel_add_char(term_dict, term_component, &j);
828             wrbuf_putc(term_dict, '[');
829
830             if (i < wrbuf_len(term_component)-1)
831             {
832                 wrbuf_putc(term_dict, '^');
833                 wrbuf_putc(term_dict, '-');
834                 string_rel_add_char(term_dict, term_component, &i);
835             }
836             else
837             {
838                 string_rel_add_char(term_dict, term_component, &i);
839                 wrbuf_putc(term_dict, '-');
840             }
841             wrbuf_putc(term_dict, ']');
842             wrbuf_putc(term_dict, '.');
843             wrbuf_putc(term_dict, '*');
844         }
845         wrbuf_putc(term_dict, ')');
846         break;
847     case 3:
848     case 102:
849     case -1:
850         if (!**term_sub)
851             return 1;
852         yaz_log(log_level_rpn, "Relation =");
853         if (!term_100(zm, term_sub, term_component, space_split, display_term))
854         {
855             wrbuf_destroy(term_component);
856             return 0;
857         }
858         wrbuf_puts(term_dict, "(");
859         wrbuf_puts(term_dict, wrbuf_cstr(term_component));
860         wrbuf_puts(term_dict, ")");
861         break;
862     case 103:
863         yaz_log(log_level_rpn, "Relation always matches");
864         /* skip to end of term (we don't care what it is) */
865         while (**term_sub != '\0')
866             (*term_sub)++;
867         break;
868     default:
869         *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
870         wrbuf_destroy(term_component);
871         return 0;
872     }
873     wrbuf_destroy(term_component);
874     return 1;
875 }
876
877 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
878                              const char **term_sub, 
879                              WRBUF term_dict,
880                              const Odr_oid *attributeSet, NMEM stream,
881                              struct grep_info *grep_info,
882                              const char *index_type, int complete_flag,
883                              WRBUF display_term,
884                              const char *xpath_use,
885                              struct ord_list **ol,
886                              zebra_map_t zm);
887
888 ZEBRA_RES zebra_term_limits_APT(ZebraHandle zh,
889                                 Z_AttributesPlusTerm *zapt,
890                                 zint *hits_limit_value,
891                                 const char **term_ref_id_str,
892                                 NMEM nmem)
893 {
894     AttrType term_ref_id_attr;
895     AttrType hits_limit_attr;
896     int term_ref_id_int;
897  
898     attr_init_APT(&hits_limit_attr, zapt, 11);
899     *hits_limit_value  = attr_find(&hits_limit_attr, NULL);
900
901     attr_init_APT(&term_ref_id_attr, zapt, 10);
902     term_ref_id_int = attr_find_ex(&term_ref_id_attr, NULL, term_ref_id_str);
903     if (term_ref_id_int >= 0)
904     {
905         char *res = nmem_malloc(nmem, 20);
906         sprintf(res, "%d", term_ref_id_int);
907         *term_ref_id_str = res;
908     }
909
910     /* no limit given ? */
911     if (*hits_limit_value == -1)
912     {
913         if (*term_ref_id_str)
914         {
915             /* use global if term_ref is present */
916             *hits_limit_value = zh->approx_limit;
917         }
918         else
919         {
920             /* no counting if term_ref is not present */
921             *hits_limit_value = 0;
922         }
923     }
924     else if (*hits_limit_value == 0)
925     {
926         /* 0 is the same as global limit */
927         *hits_limit_value = zh->approx_limit;
928     }
929     yaz_log(YLOG_DEBUG, "term_limits_APT ref_id=%s limit=" ZINT_FORMAT,
930             *term_ref_id_str ? *term_ref_id_str : "none",
931             *hits_limit_value);
932     return ZEBRA_OK;
933 }
934
935 /** \brief search for term (which may be truncated)
936  */
937 static ZEBRA_RES search_term(ZebraHandle zh,
938                              Z_AttributesPlusTerm *zapt,
939                              const char **term_sub, 
940                              const Odr_oid *attributeSet, NMEM stream,
941                              struct grep_info *grep_info,
942                              const char *index_type, int complete_flag,
943                              const char *rank_type, 
944                              const char *xpath_use,
945                              NMEM rset_nmem,
946                              RSET *rset,
947                              struct rset_key_control *kc,
948                              zebra_map_t zm)
949 {
950     ZEBRA_RES res;
951     struct ord_list *ol;
952     zint hits_limit_value;
953     const char *term_ref_id_str = 0;
954     WRBUF term_dict = wrbuf_alloc();
955     WRBUF display_term = wrbuf_alloc();
956     *rset = 0;
957     zebra_term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str,
958                           stream);
959     grep_info->isam_p_indx = 0;
960     res = string_term(zh, zapt, term_sub, term_dict,
961                       attributeSet, stream, grep_info,
962                       index_type, complete_flag,
963                       display_term, xpath_use, &ol, zm);
964     wrbuf_destroy(term_dict);
965     if (res == ZEBRA_OK && *term_sub)
966     {
967         yaz_log(log_level_rpn, "term: %s", wrbuf_cstr(display_term));
968         *rset = rset_trunc(zh, grep_info->isam_p_buf,
969                            grep_info->isam_p_indx, wrbuf_buf(display_term),
970                            wrbuf_len(display_term), rank_type, 
971                            1 /* preserve pos */,
972                            zapt->term->which, rset_nmem,
973                            kc, kc->scope, ol, index_type, hits_limit_value,
974                            term_ref_id_str);
975         if (!*rset)
976             res = ZEBRA_FAIL;
977     }
978     wrbuf_destroy(display_term);
979     return res;
980 }
981
982 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
983                              const char **term_sub, 
984                              WRBUF term_dict,
985                              const Odr_oid *attributeSet, NMEM stream,
986                              struct grep_info *grep_info,
987                              const char *index_type, int complete_flag,
988                              WRBUF display_term,
989                              const char *xpath_use,
990                              struct ord_list **ol,
991                              zebra_map_t zm)
992 {
993     int r;
994     AttrType truncation;
995     int truncation_value;
996     const char *termp;
997     struct rpn_char_map_info rcmi;
998
999     int space_split = complete_flag ? 0 : 1;
1000     int ord = -1;
1001     int regex_range = 0;
1002     int max_pos, prefix_len = 0;
1003     int relation_error;
1004     char ord_buf[32];
1005     int ord_len, i;
1006
1007     *ol = ord_list_create(stream);
1008
1009     rpn_char_map_prepare(zh->reg, zm, &rcmi);
1010     attr_init_APT(&truncation, zapt, 5);
1011     truncation_value = attr_find(&truncation, NULL);
1012     yaz_log(log_level_rpn, "truncation value %d", truncation_value);
1013
1014     termp = *term_sub; /* start of term for each database */
1015     
1016     if (zebra_apt_get_ord(zh, zapt, index_type, xpath_use,
1017                           attributeSet, &ord) != ZEBRA_OK)
1018     {
1019         *term_sub = 0;
1020         return ZEBRA_FAIL;
1021     }
1022     
1023     wrbuf_rewind(term_dict); /* new dictionary regexp term */
1024     
1025     *ol = ord_list_append(stream, *ol, ord);
1026     ord_len = key_SU_encode(ord, ord_buf);
1027     
1028     wrbuf_putc(term_dict, '(');
1029     
1030     for (i = 0; i<ord_len; i++)
1031     {
1032         wrbuf_putc(term_dict, 1);  /* our internal regexp escape char */
1033         wrbuf_putc(term_dict, ord_buf[i]);
1034     }
1035     wrbuf_putc(term_dict, ')');
1036     
1037     prefix_len = wrbuf_len(term_dict);
1038
1039     if (zebra_maps_is_icu(zm))
1040     {
1041         /* ICU case */
1042         switch (truncation_value)
1043         {
1044         case -1:         /* not specified */
1045         case 100:        /* do not truncate */
1046             if (!term_100_icu(zm, &termp, term_dict, space_split, display_term, 0))
1047             {
1048                 *term_sub = 0;
1049                 return ZEBRA_OK;
1050             }
1051             break;
1052         case 1:          /* right truncation */
1053             if (!term_100_icu(zm, &termp, term_dict, space_split, display_term, 1))
1054             {
1055                 *term_sub = 0;
1056                 return ZEBRA_OK;
1057             }
1058             break;
1059         default:
1060             zebra_setError_zint(zh,
1061                                 YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
1062                                 truncation_value);
1063             return ZEBRA_FAIL;
1064         }
1065     }
1066     else
1067     {
1068         /* non-ICU case. using string.chr and friends */
1069         switch (truncation_value)
1070         {
1071         case -1:         /* not specified */
1072         case 100:        /* do not truncate */
1073             if (!string_relation(zh, zapt, &termp, term_dict,
1074                                  attributeSet,
1075                                  zm, space_split, display_term,
1076                                  &relation_error))
1077             {
1078                 if (relation_error)
1079                 {
1080                     zebra_setError(zh, relation_error, 0);
1081                     return ZEBRA_FAIL;
1082                 }
1083                 *term_sub = 0;
1084                 return ZEBRA_OK;
1085             }
1086             break;
1087         case 1:          /* right truncation */
1088             wrbuf_putc(term_dict, '(');
1089             if (!term_100(zm, &termp, term_dict, space_split, display_term))
1090             {
1091                 *term_sub = 0;
1092                 return ZEBRA_OK;
1093             }
1094             wrbuf_puts(term_dict, ".*)");
1095             break;
1096         case 2:          /* left truncation */
1097             wrbuf_puts(term_dict, "(.*");
1098             if (!term_100(zm, &termp, term_dict, space_split, display_term))
1099             {
1100                 *term_sub = 0;
1101                 return ZEBRA_OK;
1102             }
1103             wrbuf_putc(term_dict, ')');
1104             break;
1105         case 3:          /* left&right truncation */
1106             wrbuf_puts(term_dict, "(.*");
1107             if (!term_100(zm, &termp, term_dict, space_split, display_term))
1108             {
1109                 *term_sub = 0;
1110                 return ZEBRA_OK;
1111             }
1112             wrbuf_puts(term_dict, ".*)");
1113             break;
1114         case 101:        /* process # in term */
1115             wrbuf_putc(term_dict, '(');
1116             if (!term_101(zm, &termp, term_dict, space_split, display_term))
1117             {
1118                 *term_sub = 0;
1119                 return ZEBRA_OK;
1120             }
1121             wrbuf_puts(term_dict, ")");
1122             break;
1123         case 102:        /* Regexp-1 */
1124             wrbuf_putc(term_dict, '(');
1125             if (!term_102(zm, &termp, term_dict, space_split, display_term))
1126             {
1127                 *term_sub = 0;
1128                 return ZEBRA_OK;
1129             }
1130             wrbuf_putc(term_dict, ')');
1131             break;
1132         case 103:       /* Regexp-2 */
1133             regex_range = 1;
1134             wrbuf_putc(term_dict, '(');
1135             if (!term_103(zm, &termp, term_dict, &regex_range,
1136                           space_split, display_term))
1137             {
1138                 *term_sub = 0;
1139                 return ZEBRA_OK;
1140             }
1141             wrbuf_putc(term_dict, ')');
1142             break;
1143         case 104:        /* process # and ! in term */
1144             wrbuf_putc(term_dict, '(');
1145             if (!term_104(zm, &termp, term_dict, space_split, display_term))
1146             {
1147                 *term_sub = 0;
1148                 return ZEBRA_OK;
1149             }
1150             wrbuf_putc(term_dict, ')');
1151             break;
1152         case 105:        /* process * and ! in term */
1153             wrbuf_putc(term_dict, '(');
1154             if (!term_105(zm, &termp, term_dict, space_split, display_term, 1))
1155             {
1156                 *term_sub = 0;
1157                 return ZEBRA_OK;
1158             }
1159             wrbuf_putc(term_dict, ')');
1160             break;
1161         case 106:        /* process * and ! in term */
1162             wrbuf_putc(term_dict, '(');
1163             if (!term_105(zm, &termp, term_dict, space_split, display_term, 0))
1164             {
1165                 *term_sub = 0;
1166                 return ZEBRA_OK;
1167             }
1168             wrbuf_putc(term_dict, ')');
1169             break;
1170         default:
1171             zebra_setError_zint(zh,
1172                                 YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
1173                                 truncation_value);
1174             return ZEBRA_FAIL;
1175         }
1176     }
1177     if (1)
1178     {
1179         char buf[1000];
1180         const char *input = wrbuf_cstr(term_dict) + prefix_len;
1181         esc_str(buf, sizeof(buf), input, strlen(input));
1182     }
1183     {
1184         WRBUF pr_wr = wrbuf_alloc();
1185
1186         wrbuf_write_escaped(pr_wr, wrbuf_buf(term_dict), wrbuf_len(term_dict));
1187         yaz_log(YLOG_LOG, "dict_lookup_grep: %s", wrbuf_cstr(pr_wr));
1188         wrbuf_destroy(pr_wr);
1189     }
1190     r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), regex_range,
1191                          grep_info, &max_pos, 
1192                          ord_len /* number of "exact" chars */,
1193                          grep_handle);
1194     if (r == 1)
1195         zebra_set_partial_result(zh);
1196     else if (r)
1197         yaz_log(YLOG_WARN, "dict_lookup_grep fail %d", r);
1198     *term_sub = termp;
1199     yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1200     return ZEBRA_OK;
1201 }
1202
1203
1204
1205 static void grep_info_delete(struct grep_info *grep_info)
1206 {
1207 #ifdef TERM_COUNT
1208     xfree(grep_info->term_no);
1209 #endif
1210     xfree(grep_info->isam_p_buf);
1211 }
1212
1213 static ZEBRA_RES grep_info_prepare(ZebraHandle zh,
1214                                    Z_AttributesPlusTerm *zapt,
1215                                    struct grep_info *grep_info,
1216                                    const char *index_type)
1217 {
1218 #ifdef TERM_COUNT
1219     grep_info->term_no = 0;
1220 #endif
1221     grep_info->trunc_max = atoi(res_get_def(zh->res, "truncmax", "10000"));
1222     grep_info->isam_p_size = 0;
1223     grep_info->isam_p_buf = NULL;
1224     grep_info->zh = zh;
1225     grep_info->index_type = index_type;
1226     grep_info->termset = 0;
1227     if (zapt)
1228     {
1229         AttrType truncmax;
1230         int truncmax_value;
1231
1232         attr_init_APT(&truncmax, zapt, 13);
1233         truncmax_value = attr_find(&truncmax, NULL);
1234         if (truncmax_value != -1)
1235             grep_info->trunc_max = truncmax_value;
1236     }
1237     if (zapt)
1238     {
1239         AttrType termset;
1240         int termset_value_numeric;
1241         const char *termset_value_string;
1242
1243         attr_init_APT(&termset, zapt, 8);
1244         termset_value_numeric =
1245             attr_find_ex(&termset, NULL, &termset_value_string);
1246         if (termset_value_numeric != -1)
1247         {
1248 #if TERMSET_DISABLE
1249             zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, "termset");
1250             return ZEBRA_FAIL;
1251 #else
1252             char resname[32];
1253             const char *termset_name = 0;
1254             if (termset_value_numeric != -2)
1255             {
1256                 
1257                 sprintf(resname, "%d", termset_value_numeric);
1258                 termset_name = resname;
1259             }
1260             else
1261                 termset_name = termset_value_string;
1262             yaz_log(log_level_rpn, "creating termset set %s", termset_name);
1263             grep_info->termset = resultSetAdd(zh, termset_name, 1);
1264             if (!grep_info->termset)
1265             {
1266                 zebra_setError(zh, YAZ_BIB1_ILLEGAL_RESULT_SET_NAME, termset_name);
1267                 return ZEBRA_FAIL;
1268             }
1269 #endif
1270         }
1271     }
1272     return ZEBRA_OK;
1273 }
1274
1275 static ZEBRA_RES search_terms_chrmap(ZebraHandle zh,
1276                                      Z_AttributesPlusTerm *zapt,
1277                                      const char *termz,
1278                                      const Odr_oid *attributeSet,
1279                                      NMEM stream,
1280                                      const char *index_type, int complete_flag,
1281                                      const char *rank_type,
1282                                      const char *xpath_use,
1283                                      NMEM rset_nmem,
1284                                      RSET **result_sets, int *num_result_sets,
1285                                      struct rset_key_control *kc,
1286                                      zebra_map_t zm)
1287 {
1288     struct grep_info grep_info;
1289     const char *termp = termz;
1290     int alloc_sets = 0;
1291     
1292     *num_result_sets = 0;
1293     if (grep_info_prepare(zh, zapt, &grep_info, index_type) == ZEBRA_FAIL)
1294         return ZEBRA_FAIL;
1295     while (1)
1296     { 
1297         ZEBRA_RES res;
1298
1299         if (alloc_sets == *num_result_sets)
1300         {
1301             int add = 10;
1302             RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) * 
1303                                               sizeof(*rnew));
1304             if (alloc_sets)
1305                 memcpy(rnew, *result_sets, alloc_sets * sizeof(*rnew));
1306             alloc_sets = alloc_sets + add;
1307             *result_sets = rnew;
1308         }
1309         res = search_term(zh, zapt, &termp, attributeSet,
1310                           stream, &grep_info,
1311                           index_type, complete_flag,
1312                           rank_type,
1313                           xpath_use, rset_nmem,
1314                           &(*result_sets)[*num_result_sets],
1315                           kc, zm);
1316         if (res != ZEBRA_OK)
1317         {
1318             int i;
1319             for (i = 0; i < *num_result_sets; i++)
1320                 rset_delete((*result_sets)[i]);
1321             grep_info_delete(&grep_info);
1322             return res;
1323         }
1324         if ((*result_sets)[*num_result_sets] == 0)
1325             break;
1326         (*num_result_sets)++;
1327
1328         if (!*termp)
1329             break;
1330     }
1331     grep_info_delete(&grep_info);
1332     return ZEBRA_OK;
1333 }
1334                                
1335 /**
1336    \brief Create result set(s) for list of terms
1337    \param zh Zebra Handle
1338    \param zapt Attributes Plust Term (RPN leaf)
1339    \param termz term as used in query but converted to UTF-8
1340    \param attributeSet default attribute set
1341    \param stream memory for result
1342    \param index_type register type ("w", "p",..)
1343    \param complete_flag whether it's phrases or not
1344    \param rank_type term flags for ranking
1345    \param xpath_use use attribute for X-Path (-1 for no X-path)
1346    \param rset_nmem memory for result sets
1347    \param result_sets output result set for each term in list (output)
1348    \param num_result_sets number of output result sets
1349    \param kc rset key control to be used for created result sets
1350 */
1351 static ZEBRA_RES search_terms_list(ZebraHandle zh,
1352                                    Z_AttributesPlusTerm *zapt,
1353                                    const char *termz,
1354                                    const Odr_oid *attributeSet,
1355                                    NMEM stream,
1356                                    const char *index_type, int complete_flag,
1357                                    const char *rank_type,
1358                                    const char *xpath_use,
1359                                    NMEM rset_nmem,
1360                                    RSET **result_sets, int *num_result_sets,
1361                                    struct rset_key_control *kc)
1362 {
1363     zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type);
1364     if (zebra_maps_is_icu(zm))
1365         zebra_map_tokenize_start(zm, termz, strlen(termz));
1366     return search_terms_chrmap(zh, zapt, termz, attributeSet,
1367                                stream, index_type, complete_flag,
1368                                rank_type, xpath_use,
1369                                rset_nmem, result_sets, num_result_sets,
1370                                kc, zm);
1371 }
1372
1373
1374 /** \brief limit a search by position - returns result set
1375  */
1376 static ZEBRA_RES search_position(ZebraHandle zh,
1377                                  Z_AttributesPlusTerm *zapt,
1378                                  const Odr_oid *attributeSet,
1379                                  const char *index_type,
1380                                  NMEM rset_nmem,
1381                                  RSET *rset,
1382                                  struct rset_key_control *kc)
1383 {
1384     int position_value;
1385     AttrType position;
1386     int ord = -1;
1387     char ord_buf[32];
1388     char term_dict[100];
1389     int ord_len;
1390     char *val;
1391     ISAM_P isam_p;
1392     zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type);
1393     
1394     attr_init_APT(&position, zapt, 3);
1395     position_value = attr_find(&position, NULL);
1396     switch(position_value)
1397     {
1398     case 3:
1399     case -1:
1400         return ZEBRA_OK;
1401     case 1:
1402     case 2:
1403         break;
1404     default:
1405         zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1406                             position_value);
1407         return ZEBRA_FAIL;
1408     }
1409
1410
1411     if (!zebra_maps_is_first_in_field(zm))
1412     {
1413         zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1414                             position_value);
1415         return ZEBRA_FAIL;
1416     }
1417
1418     if (zebra_apt_get_ord(zh, zapt, index_type, 0,
1419                           attributeSet, &ord) != ZEBRA_OK)
1420     {
1421         return ZEBRA_FAIL;
1422     }
1423     ord_len = key_SU_encode(ord, ord_buf);
1424     memcpy(term_dict, ord_buf, ord_len);
1425     strcpy(term_dict+ord_len, FIRST_IN_FIELD_STR);
1426     val = dict_lookup(zh->reg->dict, term_dict);
1427     if (val)
1428     {
1429         assert(*val == sizeof(ISAM_P));
1430         memcpy(&isam_p, val+1, sizeof(isam_p));
1431
1432         *rset = zebra_create_rset_isam(zh, rset_nmem, kc, kc->scope, 
1433                                        isam_p, 0);
1434     }
1435     return ZEBRA_OK;
1436 }
1437
1438 /** \brief returns result set for phrase search
1439  */
1440 static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh,
1441                                        Z_AttributesPlusTerm *zapt,
1442                                        const char *termz_org,
1443                                        const Odr_oid *attributeSet,
1444                                        NMEM stream,
1445                                        const char *index_type,
1446                                        int complete_flag,
1447                                        const char *rank_type,
1448                                        const char *xpath_use,
1449                                        NMEM rset_nmem,
1450                                        RSET *rset,
1451                                        struct rset_key_control *kc)
1452 {
1453     RSET *result_sets = 0;
1454     int num_result_sets = 0;
1455     ZEBRA_RES res =
1456         search_terms_list(zh, zapt, termz_org, attributeSet,
1457                           stream, index_type, complete_flag,
1458                           rank_type, xpath_use,
1459                           rset_nmem,
1460                           &result_sets, &num_result_sets, kc);
1461     
1462     if (res != ZEBRA_OK)
1463         return res;
1464
1465     if (num_result_sets > 0)
1466     {
1467         RSET first_set = 0;
1468         res = search_position(zh, zapt, attributeSet, 
1469                               index_type,
1470                               rset_nmem, &first_set,
1471                               kc);
1472         if (res != ZEBRA_OK)
1473         {
1474             int i;
1475             for (i = 0; i<num_result_sets; i++)
1476                 rset_delete(result_sets[i]);
1477             return res;
1478         }
1479         if (first_set)
1480         {
1481             RSET *nsets = nmem_malloc(stream,
1482                                       sizeof(RSET) * (num_result_sets+1));
1483             nsets[0] = first_set;
1484             memcpy(nsets+1, result_sets, sizeof(RSET) * num_result_sets);
1485             result_sets = nsets;
1486             num_result_sets++;
1487         }
1488     }
1489     if (num_result_sets == 0)
1490         *rset = rset_create_null(rset_nmem, kc, 0); 
1491     else if (num_result_sets == 1)
1492         *rset = result_sets[0];
1493     else
1494         *rset = rset_create_prox(rset_nmem, kc, kc->scope,
1495                                  num_result_sets, result_sets,
1496                                  1 /* ordered */, 0 /* exclusion */,
1497                                  3 /* relation */, 1 /* distance */);
1498     if (!*rset)
1499         return ZEBRA_FAIL;
1500     return ZEBRA_OK;
1501 }
1502
1503 /** \brief returns result set for or-list search
1504  */
1505 static ZEBRA_RES rpn_search_APT_or_list(ZebraHandle zh,
1506                                         Z_AttributesPlusTerm *zapt,
1507                                         const char *termz_org,
1508                                         const Odr_oid *attributeSet,
1509                                         NMEM stream,
1510                                         const char *index_type, 
1511                                         int complete_flag,
1512                                         const char *rank_type,
1513                                         const char *xpath_use,
1514                                         NMEM rset_nmem,
1515                                         RSET *rset,
1516                                         struct rset_key_control *kc)
1517 {
1518     RSET *result_sets = 0;
1519     int num_result_sets = 0;
1520     int i;
1521     ZEBRA_RES res =
1522         search_terms_list(zh, zapt, termz_org, attributeSet,
1523                           stream, index_type, complete_flag,
1524                           rank_type, xpath_use,
1525                           rset_nmem,
1526                           &result_sets, &num_result_sets, kc);
1527     if (res != ZEBRA_OK)
1528         return res;
1529
1530     for (i = 0; i<num_result_sets; i++)
1531     {
1532         RSET first_set = 0;
1533         res = search_position(zh, zapt, attributeSet, 
1534                               index_type,
1535                               rset_nmem, &first_set,
1536                               kc);
1537         if (res != ZEBRA_OK)
1538         {
1539             for (i = 0; i<num_result_sets; i++)
1540                 rset_delete(result_sets[i]);
1541             return res;
1542         }
1543
1544         if (first_set)
1545         {
1546             RSET tmp_set[2];
1547
1548             tmp_set[0] = first_set;
1549             tmp_set[1] = result_sets[i];
1550             
1551             result_sets[i] = rset_create_prox(
1552                 rset_nmem, kc, kc->scope,
1553                 2, tmp_set,
1554                 1 /* ordered */, 0 /* exclusion */,
1555                 3 /* relation */, 1 /* distance */);
1556         }
1557     }
1558     if (num_result_sets == 0)
1559         *rset = rset_create_null(rset_nmem, kc, 0); 
1560     else if (num_result_sets == 1)
1561         *rset = result_sets[0];
1562     else
1563         *rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* termid */,
1564                                num_result_sets, result_sets);
1565     if (!*rset)
1566         return ZEBRA_FAIL;
1567     return ZEBRA_OK;
1568 }
1569
1570 /** \brief returns result set for and-list search
1571  */
1572 static ZEBRA_RES rpn_search_APT_and_list(ZebraHandle zh,
1573                                          Z_AttributesPlusTerm *zapt,
1574                                          const char *termz_org,
1575                                          const Odr_oid *attributeSet,
1576                                          NMEM stream,
1577                                          const char *index_type, 
1578                                          int complete_flag,
1579                                          const char *rank_type, 
1580                                          const char *xpath_use,
1581                                          NMEM rset_nmem,
1582                                          RSET *rset,
1583                                          struct rset_key_control *kc)
1584 {
1585     RSET *result_sets = 0;
1586     int num_result_sets = 0;
1587     int i;
1588     ZEBRA_RES res =
1589         search_terms_list(zh, zapt, termz_org, attributeSet,
1590                           stream, index_type, complete_flag,
1591                           rank_type, xpath_use,
1592                           rset_nmem,
1593                           &result_sets, &num_result_sets,
1594                           kc);
1595     if (res != ZEBRA_OK)
1596         return res;
1597     for (i = 0; i<num_result_sets; i++)
1598     {
1599         RSET first_set = 0;
1600         res = search_position(zh, zapt, attributeSet, 
1601                               index_type,
1602                               rset_nmem, &first_set,
1603                               kc);
1604         if (res != ZEBRA_OK)
1605         {
1606             for (i = 0; i<num_result_sets; i++)
1607                 rset_delete(result_sets[i]);
1608             return res;
1609         }
1610
1611         if (first_set)
1612         {
1613             RSET tmp_set[2];
1614
1615             tmp_set[0] = first_set;
1616             tmp_set[1] = result_sets[i];
1617             
1618             result_sets[i] = rset_create_prox(
1619                 rset_nmem, kc, kc->scope,
1620                 2, tmp_set,
1621                 1 /* ordered */, 0 /* exclusion */,
1622                 3 /* relation */, 1 /* distance */);
1623         }
1624     }
1625
1626
1627     if (num_result_sets == 0)
1628         *rset = rset_create_null(rset_nmem, kc, 0); 
1629     else if (num_result_sets == 1)
1630         *rset = result_sets[0];
1631     else
1632         *rset = rset_create_and(rset_nmem, kc, kc->scope,
1633                                 num_result_sets, result_sets);
1634     if (!*rset)
1635         return ZEBRA_FAIL;
1636     return ZEBRA_OK;
1637 }
1638
1639 static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1640                             const char **term_sub,
1641                             WRBUF term_dict,
1642                             const Odr_oid *attributeSet,
1643                             struct grep_info *grep_info,
1644                             int *max_pos,
1645                             zebra_map_t zm,
1646                             WRBUF display_term,
1647                             int *error_code)
1648 {
1649     AttrType relation;
1650     int relation_value;
1651     int term_value;
1652     int r;
1653     WRBUF term_num = wrbuf_alloc();
1654
1655     *error_code = 0;
1656     attr_init_APT(&relation, zapt, 2);
1657     relation_value = attr_find(&relation, NULL);
1658
1659     yaz_log(log_level_rpn, "numeric relation value=%d", relation_value);
1660
1661     switch (relation_value)
1662     {
1663     case 1:
1664         yaz_log(log_level_rpn, "Relation <");
1665         if (!term_100(zm, term_sub, term_num, 1, display_term))
1666         { 
1667             wrbuf_destroy(term_num);
1668             return 0;
1669         }
1670         term_value = atoi(wrbuf_cstr(term_num));
1671         gen_regular_rel(term_dict, term_value-1, 1);
1672         break;
1673     case 2:
1674         yaz_log(log_level_rpn, "Relation <=");
1675         if (!term_100(zm, term_sub, term_num, 1, display_term))
1676         {
1677             wrbuf_destroy(term_num);
1678             return 0;
1679         }
1680         term_value = atoi(wrbuf_cstr(term_num));
1681         gen_regular_rel(term_dict, term_value, 1);
1682         break;
1683     case 4:
1684         yaz_log(log_level_rpn, "Relation >=");
1685         if (!term_100(zm, term_sub, term_num, 1, display_term))
1686         {
1687             wrbuf_destroy(term_num);
1688             return 0;
1689         }
1690         term_value = atoi(wrbuf_cstr(term_num));
1691         gen_regular_rel(term_dict, term_value, 0);
1692         break;
1693     case 5:
1694         yaz_log(log_level_rpn, "Relation >");
1695         if (!term_100(zm, term_sub, term_num, 1, display_term))
1696         {
1697             wrbuf_destroy(term_num);
1698             return 0;
1699         }
1700         term_value = atoi(wrbuf_cstr(term_num));
1701         gen_regular_rel(term_dict, term_value+1, 0);
1702         break;
1703     case -1:
1704     case 3:
1705         yaz_log(log_level_rpn, "Relation =");
1706         if (!term_100(zm, term_sub, term_num, 1, display_term))
1707         {
1708             wrbuf_destroy(term_num);
1709             return 0; 
1710         }
1711         term_value = atoi(wrbuf_cstr(term_num));
1712         wrbuf_printf(term_dict, "(0*%d)", term_value);
1713         break;
1714     case 103:
1715         /* term_tmp untouched.. */
1716         while (**term_sub != '\0')
1717             (*term_sub)++;
1718         break;
1719     default:
1720         *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
1721         wrbuf_destroy(term_num); 
1722         return 0;
1723     }
1724     r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), 
1725                          0, grep_info, max_pos, 0, grep_handle);
1726
1727     if (r == 1)
1728         zebra_set_partial_result(zh);
1729     else if (r)
1730         yaz_log(YLOG_WARN, "dict_lookup_grep fail, rel = gt: %d", r);
1731     yaz_log(log_level_rpn, "%d positions", grep_info->isam_p_indx);
1732     wrbuf_destroy(term_num);
1733     return 1;
1734 }
1735
1736 static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1737                               const char **term_sub, 
1738                               WRBUF term_dict,
1739                               const Odr_oid *attributeSet, NMEM stream,
1740                               struct grep_info *grep_info,
1741                               const char *index_type, int complete_flag,
1742                               WRBUF display_term,
1743                               const char *xpath_use,
1744                               struct ord_list **ol)
1745 {
1746     const char *termp;
1747     struct rpn_char_map_info rcmi;
1748     int max_pos;
1749     int relation_error = 0;
1750     int ord, ord_len, i;
1751     char ord_buf[32];
1752     zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type);
1753     
1754     *ol = ord_list_create(stream);
1755
1756     rpn_char_map_prepare(zh->reg, zm, &rcmi);
1757
1758     termp = *term_sub;
1759     
1760     if (zebra_apt_get_ord(zh, zapt, index_type, xpath_use,
1761                           attributeSet, &ord) != ZEBRA_OK)
1762     {
1763         return ZEBRA_FAIL;
1764     }
1765     
1766     wrbuf_rewind(term_dict);
1767     
1768     *ol = ord_list_append(stream, *ol, ord);
1769     
1770     ord_len = key_SU_encode(ord, ord_buf);
1771     
1772     wrbuf_putc(term_dict, '(');
1773     for (i = 0; i < ord_len; i++)
1774     {
1775         wrbuf_putc(term_dict, 1);
1776         wrbuf_putc(term_dict, ord_buf[i]);
1777     }
1778     wrbuf_putc(term_dict, ')');
1779     
1780     if (!numeric_relation(zh, zapt, &termp, term_dict,
1781                           attributeSet, grep_info, &max_pos, zm,
1782                           display_term, &relation_error))
1783     {
1784         if (relation_error)
1785         {
1786             zebra_setError(zh, relation_error, 0);
1787             return ZEBRA_FAIL;
1788         }
1789         *term_sub = 0;
1790         return ZEBRA_OK;
1791     }
1792     *term_sub = termp;
1793     yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1794     return ZEBRA_OK;
1795 }
1796
1797                                  
1798 static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh,
1799                                         Z_AttributesPlusTerm *zapt,
1800                                         const char *termz,
1801                                         const Odr_oid *attributeSet,
1802                                         NMEM stream,
1803                                         const char *index_type, 
1804                                         int complete_flag,
1805                                         const char *rank_type, 
1806                                         const char *xpath_use,
1807                                         NMEM rset_nmem,
1808                                         RSET *rset,
1809                                         struct rset_key_control *kc)
1810 {
1811     const char *termp = termz;
1812     RSET *result_sets = 0;
1813     int num_result_sets = 0;
1814     ZEBRA_RES res;
1815     struct grep_info grep_info;
1816     int alloc_sets = 0;
1817     zint hits_limit_value;
1818     const char *term_ref_id_str = 0;
1819
1820     zebra_term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str,
1821                           stream);
1822
1823     yaz_log(log_level_rpn, "APT_numeric t='%s'", termz);
1824     if (grep_info_prepare(zh, zapt, &grep_info, index_type) == ZEBRA_FAIL)
1825         return ZEBRA_FAIL;
1826     while (1)
1827     { 
1828         struct ord_list *ol;
1829         WRBUF term_dict = wrbuf_alloc();
1830         WRBUF display_term = wrbuf_alloc();
1831         if (alloc_sets == num_result_sets)
1832         {
1833             int add = 10;
1834             RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) * 
1835                                               sizeof(*rnew));
1836             if (alloc_sets)
1837                 memcpy(rnew, result_sets, alloc_sets * sizeof(*rnew));
1838             alloc_sets = alloc_sets + add;
1839             result_sets = rnew;
1840         }
1841         yaz_log(YLOG_DEBUG, "APT_numeric termp=%s", termp);
1842         grep_info.isam_p_indx = 0;
1843         res = numeric_term(zh, zapt, &termp, term_dict,
1844                            attributeSet, stream, &grep_info,
1845                            index_type, complete_flag,
1846                            display_term, xpath_use, &ol);
1847         wrbuf_destroy(term_dict);
1848         if (res == ZEBRA_FAIL || termp == 0)
1849         {
1850             wrbuf_destroy(display_term);
1851             break;
1852         }
1853         yaz_log(YLOG_DEBUG, "term: %s", wrbuf_cstr(display_term));
1854         result_sets[num_result_sets] =
1855             rset_trunc(zh, grep_info.isam_p_buf,
1856                        grep_info.isam_p_indx, wrbuf_buf(display_term),
1857                        wrbuf_len(display_term), rank_type,
1858                        0 /* preserve position */,
1859                        zapt->term->which, rset_nmem, 
1860                        kc, kc->scope, ol, index_type,
1861                        hits_limit_value,
1862                        term_ref_id_str);
1863         wrbuf_destroy(display_term);
1864         if (!result_sets[num_result_sets])
1865             break;
1866         num_result_sets++;
1867         if (!*termp)
1868             break;
1869     }
1870     grep_info_delete(&grep_info);
1871
1872     if (res != ZEBRA_OK)
1873         return res;
1874     if (num_result_sets == 0)
1875         *rset = rset_create_null(rset_nmem, kc, 0);
1876     else if (num_result_sets == 1)
1877         *rset = result_sets[0];
1878     else
1879         *rset = rset_create_and(rset_nmem, kc, kc->scope,
1880                                 num_result_sets, result_sets);
1881     if (!*rset)
1882         return ZEBRA_FAIL;
1883     return ZEBRA_OK;
1884 }
1885
1886 static ZEBRA_RES rpn_search_APT_local(ZebraHandle zh,
1887                                       Z_AttributesPlusTerm *zapt,
1888                                       const char *termz,
1889                                       const Odr_oid *attributeSet,
1890                                       NMEM stream,
1891                                       const char *rank_type, NMEM rset_nmem,
1892                                       RSET *rset,
1893                                       struct rset_key_control *kc)
1894 {
1895     Record rec;
1896     zint sysno = atozint(termz);
1897     
1898     if (sysno <= 0)
1899         sysno = 0;
1900     rec = rec_get(zh->reg->records, sysno);
1901     if (!rec)
1902         sysno = 0;
1903
1904     rec_free(&rec);
1905
1906     if (sysno <= 0)
1907     {
1908         *rset = rset_create_null(rset_nmem, kc, 0);
1909     }
1910     else
1911     {
1912         RSFD rsfd;
1913         struct it_key key;
1914         *rset = rset_create_temp(rset_nmem, kc, kc->scope,
1915                                  res_get(zh->res, "setTmpDir"), 0);
1916         rsfd = rset_open(*rset, RSETF_WRITE);
1917         
1918         key.mem[0] = sysno;
1919         key.mem[1] = 1;
1920         key.len = 2;
1921         rset_write(rsfd, &key);
1922         rset_close(rsfd);
1923     }
1924     return ZEBRA_OK;
1925 }
1926
1927 static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1928                                const Odr_oid *attributeSet, NMEM stream,
1929                                Z_SortKeySpecList *sort_sequence,
1930                                const char *rank_type,
1931                                NMEM rset_nmem,
1932                                RSET *rset,
1933                                struct rset_key_control *kc)
1934 {
1935     int i;
1936     int sort_relation_value;
1937     AttrType sort_relation_type;
1938     Z_SortKeySpec *sks;
1939     Z_SortKey *sk;
1940     char termz[20];
1941     
1942     attr_init_APT(&sort_relation_type, zapt, 7);
1943     sort_relation_value = attr_find(&sort_relation_type, &attributeSet);
1944
1945     if (!sort_sequence->specs)
1946     {
1947         sort_sequence->num_specs = 10;
1948         sort_sequence->specs = (Z_SortKeySpec **)
1949             nmem_malloc(stream, sort_sequence->num_specs *
1950                         sizeof(*sort_sequence->specs));
1951         for (i = 0; i<sort_sequence->num_specs; i++)
1952             sort_sequence->specs[i] = 0;
1953     }
1954     if (zapt->term->which != Z_Term_general)
1955         i = 0;
1956     else
1957         i = atoi_n((char *) zapt->term->u.general->buf,
1958                    zapt->term->u.general->len);
1959     if (i >= sort_sequence->num_specs)
1960         i = 0;
1961     sprintf(termz, "%d", i);
1962
1963     sks = (Z_SortKeySpec *) nmem_malloc(stream, sizeof(*sks));
1964     sks->sortElement = (Z_SortElement *)
1965         nmem_malloc(stream, sizeof(*sks->sortElement));
1966     sks->sortElement->which = Z_SortElement_generic;
1967     sk = sks->sortElement->u.generic = (Z_SortKey *)
1968         nmem_malloc(stream, sizeof(*sk));
1969     sk->which = Z_SortKey_sortAttributes;
1970     sk->u.sortAttributes = (Z_SortAttributes *)
1971         nmem_malloc(stream, sizeof(*sk->u.sortAttributes));
1972
1973     sk->u.sortAttributes->id = odr_oiddup_nmem(stream, attributeSet);
1974     sk->u.sortAttributes->list = zapt->attributes;
1975
1976     sks->sortRelation = (int *)
1977         nmem_malloc(stream, sizeof(*sks->sortRelation));
1978     if (sort_relation_value == 1)
1979         *sks->sortRelation = Z_SortKeySpec_ascending;
1980     else if (sort_relation_value == 2)
1981         *sks->sortRelation = Z_SortKeySpec_descending;
1982     else 
1983         *sks->sortRelation = Z_SortKeySpec_ascending;
1984
1985     sks->caseSensitivity = (int *)
1986         nmem_malloc(stream, sizeof(*sks->caseSensitivity));
1987     *sks->caseSensitivity = 0;
1988
1989     sks->which = Z_SortKeySpec_null;
1990     sks->u.null = odr_nullval ();
1991     sort_sequence->specs[i] = sks;
1992     *rset = rset_create_null(rset_nmem, kc, 0);
1993     return ZEBRA_OK;
1994 }
1995
1996
1997 static int rpn_check_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1998                            const Odr_oid *attributeSet,
1999                            struct xpath_location_step *xpath, int max,
2000                            NMEM mem)
2001 {
2002     const Odr_oid *curAttributeSet = attributeSet;
2003     AttrType use;
2004     const char *use_string = 0;
2005     
2006     attr_init_APT(&use, zapt, 1);
2007     attr_find_ex(&use, &curAttributeSet, &use_string);
2008
2009     if (!use_string || *use_string != '/')
2010         return -1;
2011
2012     return zebra_parse_xpath_str(use_string, xpath, max, mem);
2013 }
2014  
2015                
2016
2017 static RSET xpath_trunc(ZebraHandle zh, NMEM stream,
2018                         const char *index_type, const char *term, 
2019                         const char *xpath_use,
2020                         NMEM rset_nmem,
2021                         struct rset_key_control *kc)
2022 {
2023     struct grep_info grep_info;
2024     int ord = zebraExplain_lookup_attr_str(zh->reg->zei, 
2025                                            zinfo_index_category_index,
2026                                            index_type, xpath_use);
2027     if (grep_info_prepare(zh, 0 /* zapt */, &grep_info, "0") == ZEBRA_FAIL)
2028         return rset_create_null(rset_nmem, kc, 0);
2029     
2030     if (ord < 0)
2031         return rset_create_null(rset_nmem, kc, 0);
2032     else
2033     {
2034         int i, r, max_pos;
2035         char ord_buf[32];
2036         RSET rset;
2037         WRBUF term_dict = wrbuf_alloc();
2038         int ord_len = key_SU_encode(ord, ord_buf);
2039         int term_type = Z_Term_characterString;
2040         const char *flags = "void";
2041
2042         wrbuf_putc(term_dict, '(');
2043         for (i = 0; i<ord_len; i++)
2044         {
2045             wrbuf_putc(term_dict, 1);
2046             wrbuf_putc(term_dict, ord_buf[i]);
2047         }
2048         wrbuf_putc(term_dict, ')');
2049         wrbuf_puts(term_dict, term);
2050         
2051         grep_info.isam_p_indx = 0;
2052         r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), 0,
2053                              &grep_info, &max_pos, 0, grep_handle);
2054         yaz_log(YLOG_DEBUG, "%s %d positions", term,
2055                 grep_info.isam_p_indx);
2056         rset = rset_trunc(zh, grep_info.isam_p_buf,
2057                           grep_info.isam_p_indx, term, strlen(term),
2058                           flags, 1, term_type, rset_nmem,
2059                           kc, kc->scope, 0, index_type, 0 /* hits_limit */,
2060                           0 /* term_ref_id_str */);
2061         grep_info_delete(&grep_info);
2062         wrbuf_destroy(term_dict);
2063         return rset;
2064     }
2065 }
2066
2067 static
2068 ZEBRA_RES rpn_search_xpath(ZebraHandle zh,
2069                            NMEM stream, const char *rank_type, RSET rset,
2070                            int xpath_len, struct xpath_location_step *xpath,
2071                            NMEM rset_nmem,
2072                            RSET *rset_out,
2073                            struct rset_key_control *kc)
2074 {
2075     int i;
2076     int always_matches = rset ? 0 : 1;
2077
2078     if (xpath_len < 0)
2079     {
2080         *rset_out = rset;
2081         return ZEBRA_OK;
2082     }
2083
2084     yaz_log(YLOG_DEBUG, "xpath len=%d", xpath_len);
2085     for (i = 0; i<xpath_len; i++)
2086     {
2087         yaz_log(log_level_rpn, "XPATH %d %s", i, xpath[i].part);
2088
2089     }
2090
2091     /*
2092     //a    ->    a/.*
2093     //a/b  ->    b/a/.*
2094     /a     ->    a/
2095     /a/b   ->    b/a/
2096
2097     /      ->    none
2098
2099     a[@attr = value]/b[@other = othervalue]
2100
2101     /e/@a val      range(e/,range(@a,freetext(w,1015,val),@a),e/)
2102     /a/b val       range(b/a/,freetext(w,1016,val),b/a/)
2103     /a/b/@c val    range(b/a/,range(@c,freetext(w,1016,val),@c),b/a/)
2104     /a/b[@c = y] val range(b/a/,freetext(w,1016,val),b/a/,@c = y)
2105     /a[@c = y]/b val range(a/,range(b/a/,freetext(w,1016,val),b/a/),a/,@c = y)
2106     /a[@c = x]/b[@c = y] range(a/,range(b/a/,freetext(w,1016,val),b/a/,@c = y),a/,@c = x)
2107       
2108     */
2109
2110     dict_grep_cmap(zh->reg->dict, 0, 0);
2111     
2112     {
2113         int level = xpath_len;
2114         int first_path = 1;
2115         
2116         while (--level >= 0)
2117         {
2118             WRBUF xpath_rev = wrbuf_alloc();
2119             int i;
2120             RSET rset_start_tag = 0, rset_end_tag = 0, rset_attr = 0;
2121
2122             for (i = level; i >= 1; --i)
2123             {
2124                 const char *cp = xpath[i].part;
2125                 if (*cp)
2126                 {
2127                     for (; *cp; cp++)
2128                     {
2129                         if (*cp == '*')
2130                             wrbuf_puts(xpath_rev, "[^/]*");
2131                         else if (*cp == ' ')
2132                             wrbuf_puts(xpath_rev, "\001 ");
2133                         else
2134                             wrbuf_putc(xpath_rev, *cp);
2135
2136                         /* wrbuf_putc does not null-terminate , but
2137                            wrbuf_puts below ensures it does.. so xpath_rev
2138                            is OK iff length is > 0 */
2139                     }
2140                     wrbuf_puts(xpath_rev, "/");
2141                 }
2142                 else if (i == 1)  /* // case */
2143                     wrbuf_puts(xpath_rev, ".*");
2144             }
2145             if (xpath[level].predicate &&
2146                 xpath[level].predicate->which == XPATH_PREDICATE_RELATION &&
2147                 xpath[level].predicate->u.relation.name[0])
2148             {
2149                 WRBUF wbuf = wrbuf_alloc();
2150                 wrbuf_puts(wbuf, xpath[level].predicate->u.relation.name+1);
2151                 if (xpath[level].predicate->u.relation.value)
2152                 {
2153                     const char *cp = xpath[level].predicate->u.relation.value;
2154                     wrbuf_putc(wbuf, '=');
2155                     
2156                     while (*cp)
2157                     {
2158                         if (strchr(REGEX_CHARS, *cp))
2159                             wrbuf_putc(wbuf, '\\');
2160                         wrbuf_putc(wbuf, *cp);
2161                         cp++;
2162                     }
2163                 }
2164                 rset_attr = xpath_trunc(
2165                     zh, stream, "0", wrbuf_cstr(wbuf), 
2166                     ZEBRA_XPATH_ATTR_NAME, 
2167                     rset_nmem, kc);
2168                 wrbuf_destroy(wbuf);
2169             } 
2170             else 
2171             {
2172                 if (!first_path)
2173                 {
2174                     wrbuf_destroy(xpath_rev);
2175                     continue;
2176                 }
2177             }
2178             yaz_log(log_level_rpn, "xpath_rev (%d) = %s", level, 
2179                     wrbuf_cstr(xpath_rev));
2180             if (wrbuf_len(xpath_rev))
2181             {
2182                 rset_start_tag = xpath_trunc(zh, stream, "0", 
2183                                              wrbuf_cstr(xpath_rev),
2184                                              ZEBRA_XPATH_ELM_BEGIN, 
2185                                              rset_nmem, kc);
2186                 if (always_matches)
2187                     rset = rset_start_tag;
2188                 else
2189                 {
2190                     rset_end_tag = xpath_trunc(zh, stream, "0", 
2191                                                wrbuf_cstr(xpath_rev),
2192                                                ZEBRA_XPATH_ELM_END, 
2193                                                rset_nmem, kc);
2194                     
2195                     rset = rset_create_between(rset_nmem, kc, kc->scope,
2196                                                rset_start_tag, rset,
2197                                                rset_end_tag, rset_attr);
2198                 }
2199             }
2200             wrbuf_destroy(xpath_rev);
2201             first_path = 0;
2202         }
2203     }
2204     *rset_out = rset;
2205     return ZEBRA_OK;
2206 }
2207
2208 #define MAX_XPATH_STEPS 10
2209
2210 static ZEBRA_RES rpn_search_database(ZebraHandle zh, 
2211                                      Z_AttributesPlusTerm *zapt,
2212                                      const Odr_oid *attributeSet, NMEM stream,
2213                                      Z_SortKeySpecList *sort_sequence,
2214                                      NMEM rset_nmem,
2215                                      RSET *rset,
2216                                      struct rset_key_control *kc);
2217
2218 static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2219                                 const Odr_oid *attributeSet, NMEM stream,
2220                                 Z_SortKeySpecList *sort_sequence,
2221                                 int num_bases, const char **basenames, 
2222                                 NMEM rset_nmem,
2223                                 RSET *rset,
2224                                 struct rset_key_control *kc)
2225 {
2226     RSET *rsets = nmem_malloc(stream, num_bases * sizeof(*rsets));
2227     ZEBRA_RES res = ZEBRA_OK;
2228     int i;
2229     for (i = 0; i < num_bases; i++)
2230     {
2231
2232         if (zebraExplain_curDatabase(zh->reg->zei, basenames[i]))
2233         {
2234             zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
2235                            basenames[i]);
2236             res = ZEBRA_FAIL;
2237             break;
2238         }
2239         res = rpn_search_database(zh, zapt, attributeSet, stream,
2240                                   sort_sequence,
2241                                   rset_nmem, rsets+i, kc);
2242         if (res != ZEBRA_OK)
2243             break;
2244     }
2245     if (res != ZEBRA_OK)
2246     {   /* must clean up the already created sets */
2247         while (--i >= 0)
2248             rset_delete(rsets[i]);
2249         *rset = 0;
2250     }
2251     else 
2252     {
2253         if (num_bases == 1)
2254             *rset = rsets[0];
2255         else if (num_bases == 0)
2256             *rset = rset_create_null(rset_nmem, kc, 0); 
2257         else
2258             *rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* TERMID */,
2259                                    num_bases, rsets);
2260     }
2261     return res;
2262 }
2263
2264 static ZEBRA_RES rpn_search_database(ZebraHandle zh, 
2265                                      Z_AttributesPlusTerm *zapt,
2266                                      const Odr_oid *attributeSet, NMEM stream,
2267                                      Z_SortKeySpecList *sort_sequence,
2268                                      NMEM rset_nmem,
2269                                      RSET *rset,
2270                                      struct rset_key_control *kc)
2271 {
2272     ZEBRA_RES res = ZEBRA_OK;
2273     const char *index_type;
2274     char *search_type = NULL;
2275     char rank_type[128];
2276     int complete_flag;
2277     int sort_flag;
2278     char termz[IT_MAX_WORD+1];
2279     int xpath_len;
2280     const char *xpath_use = 0;
2281     struct xpath_location_step xpath[MAX_XPATH_STEPS];
2282
2283     if (!log_level_set)
2284     {
2285         log_level_rpn = yaz_log_module_level("rpn");
2286         log_level_set = 1;
2287     }
2288     zebra_maps_attr(zh->reg->zebra_maps, zapt, &index_type, &search_type,
2289                     rank_type, &complete_flag, &sort_flag);
2290     
2291     yaz_log(YLOG_DEBUG, "index_type=%s", index_type);
2292     yaz_log(YLOG_DEBUG, "complete_flag=%d", complete_flag);
2293     yaz_log(YLOG_DEBUG, "search_type=%s", search_type);
2294     yaz_log(YLOG_DEBUG, "rank_type=%s", rank_type);
2295
2296     if (zapt_term_to_utf8(zh, zapt, termz) == ZEBRA_FAIL)
2297         return ZEBRA_FAIL;
2298
2299     if (sort_flag)
2300         return rpn_sort_spec(zh, zapt, attributeSet, stream, sort_sequence,
2301                              rank_type, rset_nmem, rset, kc);
2302     /* consider if an X-Path query is used */
2303     xpath_len = rpn_check_xpath(zh, zapt, attributeSet, 
2304                                 xpath, MAX_XPATH_STEPS, stream);
2305     if (xpath_len >= 0)
2306     {
2307         if (xpath[xpath_len-1].part[0] == '@') 
2308             xpath_use = ZEBRA_XPATH_ATTR_CDATA;  /* last step is attribute  */
2309         else
2310             xpath_use = ZEBRA_XPATH_CDATA;  /* searching for cdata */        
2311
2312         if (1)
2313         {
2314             AttrType relation;
2315             int relation_value;
2316
2317             attr_init_APT(&relation, zapt, 2);
2318             relation_value = attr_find(&relation, NULL);
2319
2320             if (relation_value == 103) /* alwaysmatches */
2321             {
2322                 *rset = 0; /* signal no "term" set */
2323                 return rpn_search_xpath(zh, stream, rank_type, *rset, 
2324                                         xpath_len, xpath, rset_nmem, rset, kc);
2325             }
2326         }
2327     }
2328
2329     /* search using one of the various search type strategies
2330        termz is our UTF-8 search term
2331        attributeSet is top-level default attribute set 
2332        stream is ODR for search
2333        reg_id is the register type
2334        complete_flag is 1 for complete subfield, 0 for incomplete
2335        xpath_use is use-attribute to be used for X-Path search, 0 for none
2336     */
2337     if (!strcmp(search_type, "phrase"))
2338     {
2339         res = rpn_search_APT_phrase(zh, zapt, termz, attributeSet, stream,
2340                                     index_type, complete_flag, rank_type,
2341                                     xpath_use,
2342                                     rset_nmem,
2343                                     rset, kc);
2344     }
2345     else if (!strcmp(search_type, "and-list"))
2346     {
2347         res = rpn_search_APT_and_list(zh, zapt, termz, attributeSet, stream,
2348                                       index_type, complete_flag, rank_type,
2349                                       xpath_use,
2350                                       rset_nmem,
2351                                       rset, kc);
2352     }
2353     else if (!strcmp(search_type, "or-list"))
2354     {
2355         res = rpn_search_APT_or_list(zh, zapt, termz, attributeSet, stream,
2356                                      index_type, complete_flag, rank_type,
2357                                      xpath_use,
2358                                      rset_nmem,
2359                                      rset, kc);
2360     }
2361     else if (!strcmp(search_type, "local"))
2362     {
2363         res = rpn_search_APT_local(zh, zapt, termz, attributeSet, stream,
2364                                    rank_type, rset_nmem, rset, kc);
2365     }
2366     else if (!strcmp(search_type, "numeric"))
2367     {
2368         res = rpn_search_APT_numeric(zh, zapt, termz, attributeSet, stream,
2369                                      index_type, complete_flag, rank_type,
2370                                      xpath_use,
2371                                      rset_nmem,
2372                                      rset, kc);
2373     }
2374     else
2375     {
2376         zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2377         res = ZEBRA_FAIL;
2378     }
2379     if (res != ZEBRA_OK)
2380         return res;
2381     if (!*rset)
2382         return ZEBRA_FAIL;
2383     return rpn_search_xpath(zh, stream, rank_type, *rset, 
2384                             xpath_len, xpath, rset_nmem, rset, kc);
2385 }
2386
2387 static ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2388                                       const Odr_oid *attributeSet, 
2389                                       NMEM stream, NMEM rset_nmem,
2390                                       Z_SortKeySpecList *sort_sequence,
2391                                       int num_bases, const char **basenames,
2392                                       RSET **result_sets, int *num_result_sets,
2393                                       Z_Operator *parent_op,
2394                                       struct rset_key_control *kc);
2395
2396 ZEBRA_RES rpn_get_top_approx_limit(ZebraHandle zh, Z_RPNStructure *zs,
2397                                    zint *approx_limit)
2398 {
2399     ZEBRA_RES res = ZEBRA_OK;
2400     if (zs->which == Z_RPNStructure_complex)
2401     {
2402         if (res == ZEBRA_OK)
2403             res = rpn_get_top_approx_limit(zh, zs->u.complex->s1,
2404                                            approx_limit);
2405         if (res == ZEBRA_OK)
2406             res = rpn_get_top_approx_limit(zh, zs->u.complex->s2,
2407                                            approx_limit);
2408     }
2409     else if (zs->which == Z_RPNStructure_simple)
2410     {
2411         if (zs->u.simple->which == Z_Operand_APT)
2412         {
2413             Z_AttributesPlusTerm *zapt = zs->u.simple->u.attributesPlusTerm;
2414             AttrType global_hits_limit_attr;
2415             int l;
2416             
2417             attr_init_APT(&global_hits_limit_attr, zapt, 12);
2418             
2419             l = attr_find(&global_hits_limit_attr, NULL);
2420             if (l != -1)
2421                 *approx_limit = l;
2422         }
2423     }
2424     return res;
2425 }
2426
2427 ZEBRA_RES rpn_search_top(ZebraHandle zh, Z_RPNStructure *zs,
2428                          const Odr_oid *attributeSet, 
2429                          NMEM stream, NMEM rset_nmem,
2430                          Z_SortKeySpecList *sort_sequence,
2431                          int num_bases, const char **basenames,
2432                          RSET *result_set)
2433 {
2434     RSET *result_sets = 0;
2435     int num_result_sets = 0;
2436     ZEBRA_RES res;
2437     struct rset_key_control *kc = zebra_key_control_create(zh);
2438
2439     res = rpn_search_structure(zh, zs, attributeSet,
2440                                stream, rset_nmem,
2441                                sort_sequence, 
2442                                num_bases, basenames,
2443                                &result_sets, &num_result_sets,
2444                                0 /* no parent op */,
2445                                kc);
2446     if (res != ZEBRA_OK)
2447     {
2448         int i;
2449         for (i = 0; i<num_result_sets; i++)
2450             rset_delete(result_sets[i]);
2451         *result_set = 0;
2452     }
2453     else
2454     {
2455         assert(num_result_sets == 1);
2456         assert(result_sets);
2457         assert(*result_sets);
2458         *result_set = *result_sets;
2459     }
2460     (*kc->dec)(kc);
2461     return res;
2462 }
2463
2464 ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2465                                const Odr_oid *attributeSet, 
2466                                NMEM stream, NMEM rset_nmem,
2467                                Z_SortKeySpecList *sort_sequence,
2468                                int num_bases, const char **basenames,
2469                                RSET **result_sets, int *num_result_sets,
2470                                Z_Operator *parent_op,
2471                                struct rset_key_control *kc)
2472 {
2473     *num_result_sets = 0;
2474     if (zs->which == Z_RPNStructure_complex)
2475     {
2476         ZEBRA_RES res;
2477         Z_Operator *zop = zs->u.complex->roperator;
2478         RSET *result_sets_l = 0;
2479         int num_result_sets_l = 0;
2480         RSET *result_sets_r = 0;
2481         int num_result_sets_r = 0;
2482
2483         res = rpn_search_structure(zh, zs->u.complex->s1,
2484                                    attributeSet, stream, rset_nmem,
2485                                    sort_sequence,
2486                                    num_bases, basenames,
2487                                    &result_sets_l, &num_result_sets_l,
2488                                    zop, kc);
2489         if (res != ZEBRA_OK)
2490         {
2491             int i;
2492             for (i = 0; i<num_result_sets_l; i++)
2493                 rset_delete(result_sets_l[i]);
2494             return res;
2495         }
2496         res = rpn_search_structure(zh, zs->u.complex->s2,
2497                                    attributeSet, stream, rset_nmem,
2498                                    sort_sequence,
2499                                    num_bases, basenames,
2500                                    &result_sets_r, &num_result_sets_r,
2501                                    zop, kc);
2502         if (res != ZEBRA_OK)
2503         {
2504             int i;
2505             for (i = 0; i<num_result_sets_l; i++)
2506                 rset_delete(result_sets_l[i]);
2507             for (i = 0; i<num_result_sets_r; i++)
2508                 rset_delete(result_sets_r[i]);
2509             return res;
2510         }
2511
2512         /* make a new list of result for all children */
2513         *num_result_sets = num_result_sets_l + num_result_sets_r;
2514         *result_sets = nmem_malloc(stream, *num_result_sets * 
2515                                    sizeof(**result_sets));
2516         memcpy(*result_sets, result_sets_l, 
2517                num_result_sets_l * sizeof(**result_sets));
2518         memcpy(*result_sets + num_result_sets_l, result_sets_r, 
2519                num_result_sets_r * sizeof(**result_sets));
2520
2521         if (!parent_op || parent_op->which != zop->which
2522             || (zop->which != Z_Operator_and &&
2523                 zop->which != Z_Operator_or))
2524         {
2525             /* parent node different from this one (or non-present) */
2526             /* we must combine result sets now */
2527             RSET rset;
2528             switch (zop->which)
2529             {
2530             case Z_Operator_and:
2531                 rset = rset_create_and(rset_nmem, kc,
2532                                        kc->scope,
2533                                        *num_result_sets, *result_sets);
2534                 break;
2535             case Z_Operator_or:
2536                 rset = rset_create_or(rset_nmem, kc,
2537                                       kc->scope, 0, /* termid */
2538                                       *num_result_sets, *result_sets);
2539                 break;
2540             case Z_Operator_and_not:
2541                 rset = rset_create_not(rset_nmem, kc,
2542                                        kc->scope,
2543                                        (*result_sets)[0],
2544                                        (*result_sets)[1]);
2545                 break;
2546             case Z_Operator_prox:
2547                 if (zop->u.prox->which != Z_ProximityOperator_known)
2548                 {
2549                     zebra_setError(zh, 
2550                                    YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2551                                    0);
2552                     return ZEBRA_FAIL;
2553                 }
2554                 if (*zop->u.prox->u.known != Z_ProxUnit_word)
2555                 {
2556                     zebra_setError_zint(zh,
2557                                         YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2558                                         *zop->u.prox->u.known);
2559                     return ZEBRA_FAIL;
2560                 }
2561                 else
2562                 {
2563                     rset = rset_create_prox(rset_nmem, kc,
2564                                             kc->scope,
2565                                             *num_result_sets, *result_sets, 
2566                                             *zop->u.prox->ordered,
2567                                             (!zop->u.prox->exclusion ? 
2568                                              0 : *zop->u.prox->exclusion),
2569                                             *zop->u.prox->relationType,
2570                                             *zop->u.prox->distance );
2571                 }
2572                 break;
2573             default:
2574                 zebra_setError(zh, YAZ_BIB1_OPERATOR_UNSUPP, 0);
2575                 return ZEBRA_FAIL;
2576             }
2577             *num_result_sets = 1;
2578             *result_sets = nmem_malloc(stream, *num_result_sets * 
2579                                        sizeof(**result_sets));
2580             (*result_sets)[0] = rset;
2581         }
2582     }
2583     else if (zs->which == Z_RPNStructure_simple)
2584     {
2585         RSET rset;
2586         ZEBRA_RES res;
2587
2588         if (zs->u.simple->which == Z_Operand_APT)
2589         {
2590             yaz_log(YLOG_DEBUG, "rpn_search_APT");
2591             res = rpn_search_APT(zh, zs->u.simple->u.attributesPlusTerm,
2592                                  attributeSet, stream, sort_sequence,
2593                                  num_bases, basenames, rset_nmem, &rset,
2594                                  kc);
2595             if (res != ZEBRA_OK)
2596                 return res;
2597         }
2598         else if (zs->u.simple->which == Z_Operand_resultSetId)
2599         {
2600             yaz_log(YLOG_DEBUG, "rpn_search_ref");
2601             rset = resultSetRef(zh, zs->u.simple->u.resultSetId);
2602             if (!rset)
2603             {
2604                 zebra_setError(zh, 
2605                                YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
2606                                zs->u.simple->u.resultSetId);
2607                 return ZEBRA_FAIL;
2608             }
2609             rset_dup(rset);
2610         }
2611         else
2612         {
2613             zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2614             return ZEBRA_FAIL;
2615         }
2616         *num_result_sets = 1;
2617         *result_sets = nmem_malloc(stream, *num_result_sets * 
2618                                    sizeof(**result_sets));
2619         (*result_sets)[0] = rset;
2620     }
2621     else
2622     {
2623         zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2624         return ZEBRA_FAIL;
2625     }
2626     return ZEBRA_OK;
2627 }
2628
2629
2630
2631 /*
2632  * Local variables:
2633  * c-basic-offset: 4
2634  * indent-tabs-mode: nil
2635  * End:
2636  * vim: shiftwidth=4 tabstop=8 expandtab
2637  */
2638