Omit CVS Id. Update copyright year.
[idzebra-moved-to-github.git] / index / rpnsearch.c
1 /* This file is part of the Zebra server.
2    Copyright (C) 1995-2008 Index Data
3
4 Zebra is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 2, or (at your option) any later
7 version.
8
9 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
10 WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12 for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17
18 */
19
20 #include <stdio.h>
21 #include <assert.h>
22 #ifdef WIN32
23 #include <io.h>
24 #endif
25 #if HAVE_UNISTD_H
26 #include <unistd.h>
27 #endif
28 #include <ctype.h>
29
30 #include <yaz/diagbib1.h>
31 #include "index.h"
32 #include <zebra_xpath.h>
33 #include <attrfind.h>
34 #include <charmap.h>
35 #include <rset.h>
36
37 static int log_level_set = 0;
38 static int log_level_rpn = 0;
39
40 #define TERMSET_DISABLE 1
41
42 static const char **rpn_char_map_handler(void *vp, const char **from, int len)
43 {
44     struct rpn_char_map_info *p = (struct rpn_char_map_info *) vp;
45     const char **out = zebra_maps_input(p->zm, from, len, 0);
46 #if 0
47     if (out && *out)
48     {
49         const char *outp = *out;
50         yaz_log(YLOG_LOG, "---");
51         while (*outp)
52         {
53             yaz_log(YLOG_LOG, "%02X", *outp);
54             outp++;
55         }
56     }
57 #endif
58     return out;
59 }
60
61 void rpn_char_map_prepare(struct zebra_register *reg, zebra_map_t zm,
62                           struct rpn_char_map_info *map_info)
63 {
64     map_info->zm = zm;
65     if (zebra_maps_is_icu(zm))
66         dict_grep_cmap(reg->dict, 0, 0);
67     else
68         dict_grep_cmap(reg->dict, map_info, rpn_char_map_handler);
69 }
70
71 #define TERM_COUNT        
72        
73 struct grep_info {        
74 #ifdef TERM_COUNT        
75     int *term_no;        
76 #endif        
77     ISAM_P *isam_p_buf;
78     int isam_p_size;        
79     int isam_p_indx;
80     int trunc_max;
81     ZebraHandle zh;
82     const char *index_type;
83     ZebraSet termset;
84 };        
85
86 static int add_isam_p(const char *name, const char *info,
87                       struct grep_info *p)
88 {
89     if (!log_level_set)
90     {
91         log_level_rpn = yaz_log_module_level("rpn");
92         log_level_set = 1;
93     }
94     /* we may have to stop this madness.. NOTE: -1 so that if
95        truncmax == trunxlimit we do *not* generate result sets */
96     if (p->isam_p_indx >= p->trunc_max - 1)
97         return 1;
98
99     if (p->isam_p_indx == p->isam_p_size)
100     {
101         ISAM_P *new_isam_p_buf;
102 #ifdef TERM_COUNT        
103         int *new_term_no;        
104 #endif
105         p->isam_p_size = 2*p->isam_p_size + 100;
106         new_isam_p_buf = (ISAM_P *) xmalloc(sizeof(*new_isam_p_buf) *
107                                             p->isam_p_size);
108         if (p->isam_p_buf)
109         {
110             memcpy(new_isam_p_buf, p->isam_p_buf,
111                    p->isam_p_indx * sizeof(*p->isam_p_buf));
112             xfree(p->isam_p_buf);
113         }
114         p->isam_p_buf = new_isam_p_buf;
115
116 #ifdef TERM_COUNT
117         new_term_no = (int *) xmalloc(sizeof(*new_term_no) * p->isam_p_size);
118         if (p->term_no)
119         {
120             memcpy(new_term_no, p->isam_p_buf,
121                    p->isam_p_indx * sizeof(*p->term_no));
122             xfree(p->term_no);
123         }
124         p->term_no = new_term_no;
125 #endif
126     }
127     assert(*info == sizeof(*p->isam_p_buf));
128     memcpy(p->isam_p_buf + p->isam_p_indx, info+1, sizeof(*p->isam_p_buf));
129
130     if (p->termset)
131     {
132         const char *db;
133         char term_tmp[IT_MAX_WORD];
134         int ord = 0;
135         const char *index_name;
136         int len = key_SU_decode(&ord, (const unsigned char *) name);
137         
138         zebra_term_untrans (p->zh, p->index_type, term_tmp, name+len);
139         yaz_log(log_level_rpn, "grep: %d %c %s", ord, name[len], term_tmp);
140         zebraExplain_lookup_ord(p->zh->reg->zei,
141                                 ord, 0 /* index_type */, &db, &index_name);
142         yaz_log(log_level_rpn, "grep:  db=%s index=%s", db, index_name);
143         
144         resultSetAddTerm(p->zh, p->termset, name[len], db,
145                          index_name, term_tmp);
146     }
147     (p->isam_p_indx)++;
148     return 0;
149 }
150
151 static int grep_handle(char *name, const char *info, void *p)
152 {
153     return add_isam_p(name, info, (struct grep_info *) p);
154 }
155
156 static int term_pre(zebra_map_t zm, const char **src,
157                     const char *ct1, const char *ct2, int first)
158 {
159     const char *s1, *s0 = *src;
160     const char **map;
161
162     /* skip white space */
163     while (*s0)
164     {
165         if (ct1 && strchr(ct1, *s0))
166             break;
167         if (ct2 && strchr(ct2, *s0))
168             break;
169         s1 = s0;
170         map = zebra_maps_input(zm, &s1, strlen(s1), first);
171         if (**map != *CHR_SPACE)
172             break;
173         s0 = s1;
174     }
175     *src = s0;
176     return *s0;
177 }
178
179
180 static void esc_str(char *out_buf, size_t out_size,
181                     const char *in_buf, int in_size)
182 {
183     int k;
184
185     assert(out_buf);
186     assert(in_buf);
187     assert(out_size > 20);
188     *out_buf = '\0';
189     for (k = 0; k<in_size; k++)
190     {
191         int c = in_buf[k] & 0xff;
192         int pc;
193         if (c < 32 || c > 126)
194             pc = '?';
195         else
196             pc = c;
197         sprintf(out_buf +strlen(out_buf), "%02X:%c  ", c, pc);
198         if (strlen(out_buf) > out_size-20)
199         {
200             strcat(out_buf, "..");
201             break;
202         }
203     }
204 }
205
206 #define REGEX_CHARS " []()|.*+?!\"$"
207
208 static void add_non_space(const char *start, const char *end,
209                           WRBUF term_dict,
210                           WRBUF display_term,
211                           const char **map, int q_map_match)
212 {
213     size_t sz = end - start;
214
215     wrbuf_write(display_term, start, sz);
216     if (!q_map_match)
217     {
218         while (start < end)
219         {
220             if (strchr(REGEX_CHARS, *start))
221                 wrbuf_putc(term_dict, '\\');
222             wrbuf_putc(term_dict, *start);
223             start++;
224         }
225     }
226     else
227     {
228         char tmpbuf[80];
229         esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
230         
231         wrbuf_puts(term_dict, map[0]);
232     }
233 }
234
235
236 static int term_100_icu(zebra_map_t zm,
237                         const char **src, WRBUF term_dict, int space_split,
238                         WRBUF display_term,
239                         int right_trunc)
240 {
241     int i;
242     const char *res_buf = 0;
243     size_t res_len = 0;
244     const char *display_buf;
245     size_t display_len;
246     if (!zebra_map_tokenize_next(zm, &res_buf, &res_len,
247                                  &display_buf, &display_len))
248     {
249         *src += strlen(*src);
250         return 0;
251     }
252     wrbuf_write(display_term, display_buf, display_len);
253     if (right_trunc)
254     {
255         /* ICU sort keys seem to be of the form
256            basechars \x01 accents \x01 length
257            For now we'll just right truncate from basechars . This 
258            may give false hits due to accents not being used.
259         */
260         i = res_len;
261         while (--i >= 0 && res_buf[i] != '\x01')
262             ;
263         if (i > 0)
264         {
265             while (--i >= 0 && res_buf[i] != '\x01')
266                 ;
267         }
268         if (i == 0)
269         {  /* did not find base chars at all. Throw error */
270             return -1;
271         }
272         res_len = i; /* reduce res_len */
273     }
274     for (i = 0; i < res_len; i++)
275     {
276         if (strchr(REGEX_CHARS "\\", res_buf[i]))
277             wrbuf_putc(term_dict, '\\');
278         if (res_buf[i] < 32)
279             wrbuf_putc(term_dict, 1);
280             
281         wrbuf_putc(term_dict, res_buf[i]);
282     }
283     if (right_trunc)
284         wrbuf_puts(term_dict, ".*");
285     return 1;
286 }
287
288 /* term_100: handle term, where trunc = none(no operators at all) */
289 static int term_100(zebra_map_t zm,
290                     const char **src, WRBUF term_dict, int space_split,
291                     WRBUF display_term)
292 {
293     const char *s0;
294     const char **map;
295     int i = 0;
296
297     const char *space_start = 0;
298     const char *space_end = 0;
299
300     if (!term_pre(zm, src, NULL, NULL, !space_split))
301         return 0;
302     s0 = *src;
303     while (*s0)
304     {
305         const char *s1 = s0;
306         int q_map_match = 0;
307         map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
308         if (space_split)
309         {
310             if (**map == *CHR_SPACE)
311                 break;
312         }
313         else  /* complete subfield only. */
314         {
315             if (**map == *CHR_SPACE)
316             {   /* save space mapping for later  .. */
317                 space_start = s1;
318                 space_end = s0;
319                 continue;
320             }
321             else if (space_start)
322             {   /* reload last space */
323                 while (space_start < space_end)
324                 {
325                     if (strchr(REGEX_CHARS, *space_start))
326                         wrbuf_putc(term_dict, '\\');
327                     wrbuf_putc(display_term, *space_start);
328                     wrbuf_putc(term_dict, *space_start);
329                     space_start++;
330                                
331                 }
332                 /* and reset */
333                 space_start = space_end = 0;
334             }
335         }
336         i++;
337
338         add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
339     }
340     *src = s0;
341     return i;
342 }
343
344 /* term_101: handle term, where trunc = Process # */
345 static int term_101(zebra_map_t zm,
346                     const char **src, WRBUF term_dict, int space_split,
347                     WRBUF display_term)
348 {
349     const char *s0;
350     const char **map;
351     int i = 0;
352
353     if (!term_pre(zm, src, "#", "#", !space_split))
354         return 0;
355     s0 = *src;
356     while (*s0)
357     {
358         if (*s0 == '#')
359         {
360             i++;
361             wrbuf_puts(term_dict, ".*");
362             wrbuf_putc(display_term, *s0);
363             s0++;
364         }
365         else
366         {
367             const char *s1 = s0;
368             int q_map_match = 0;
369             map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
370             if (space_split && **map == *CHR_SPACE)
371                 break;
372
373             i++;
374             add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
375         }
376     }
377     *src = s0;
378     return i;
379 }
380
381 /* term_103: handle term, where trunc = re-2 (regular expressions) */
382 static int term_103(zebra_map_t zm, const char **src,
383                     WRBUF term_dict, int *errors, int space_split,
384                     WRBUF display_term)
385 {
386     int i = 0;
387     const char *s0;
388     const char **map;
389
390     if (!term_pre(zm, src, "^\\()[].*+?|", "(", !space_split))
391         return 0;
392     s0 = *src;
393     if (errors && *s0 == '+' && s0[1] && s0[2] == '+' && s0[3] &&
394         isdigit(((const unsigned char *)s0)[1]))
395     {
396         *errors = s0[1] - '0';
397         s0 += 3;
398         if (*errors > 3)
399             *errors = 3;
400     }
401     while (*s0)
402     {
403         if (strchr("^\\()[].*+?|-", *s0))
404         {
405             wrbuf_putc(display_term, *s0);
406             wrbuf_putc(term_dict, *s0);
407             s0++;
408             i++;
409         }
410         else
411         {
412             const char *s1 = s0;
413             int q_map_match = 0;
414             map = zebra_maps_search(zm, &s0, strlen(s0),  &q_map_match);
415             if (space_split && **map == *CHR_SPACE)
416                 break;
417
418             i++;
419             add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
420         }
421     }
422     *src = s0;
423     
424     return i;
425 }
426
427 /* term_103: handle term, where trunc = re-1 (regular expressions) */
428 static int term_102(zebra_map_t zm, const char **src,
429                     WRBUF term_dict, int space_split, WRBUF display_term)
430 {
431     return term_103(zm, src, term_dict, NULL, space_split, display_term);
432 }
433
434
435 /* term_104: handle term, process # and ! */
436 static int term_104(zebra_map_t zm, const char **src, 
437                     WRBUF term_dict, int space_split, WRBUF display_term)
438 {
439     const char *s0;
440     const char **map;
441     int i = 0;
442
443     if (!term_pre(zm, src, "?*#", "?*#", !space_split))
444         return 0;
445     s0 = *src;
446     while (*s0)
447     {
448         if (*s0 == '?')
449         {
450             i++;
451             wrbuf_putc(display_term, *s0);
452             s0++;
453             if (*s0 >= '0' && *s0 <= '9')
454             {
455                 int limit = 0;
456                 while (*s0 >= '0' && *s0 <= '9')
457                 {
458                     limit = limit * 10 + (*s0 - '0');
459                     wrbuf_putc(display_term, *s0);
460                     s0++;
461                 }
462                 if (limit > 20)
463                     limit = 20;
464                 while (--limit >= 0)
465                 {
466                     wrbuf_puts(term_dict, ".?");
467                 }
468             }
469             else
470             {
471                 wrbuf_puts(term_dict, ".*");
472             }
473         }
474         else if (*s0 == '*')
475         {
476             i++;
477             wrbuf_puts(term_dict, ".*");
478             wrbuf_putc(display_term, *s0);
479             s0++;
480         }
481         else if (*s0 == '#')
482         {
483             i++;
484             wrbuf_puts(term_dict, ".");
485             wrbuf_putc(display_term, *s0);
486             s0++;
487         }
488         else
489         {
490             const char *s1 = s0;
491             int q_map_match = 0;
492             map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
493             if (space_split && **map == *CHR_SPACE)
494                 break;
495
496             i++;
497             add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
498         }
499     }
500     *src = s0;
501     return i;
502 }
503
504 /* term_105/106: handle term, where trunc = Process * and ! and right trunc */
505 static int term_105(zebra_map_t zm, const char **src, 
506                     WRBUF term_dict, int space_split,
507                     WRBUF display_term, int right_truncate)
508 {
509     const char *s0;
510     const char **map;
511     int i = 0;
512
513     if (!term_pre(zm, src, "*!", "*!", !space_split))
514         return 0;
515     s0 = *src;
516     while (*s0)
517     {
518         if (*s0 == '*')
519         {
520             i++;
521             wrbuf_puts(term_dict, ".*");
522             wrbuf_putc(display_term, *s0);
523             s0++;
524         }
525         else if (*s0 == '!')
526         {
527             i++;
528             wrbuf_putc(term_dict, '.');
529             wrbuf_putc(display_term, *s0);
530             s0++;
531         }
532         else
533         {
534             const char *s1 = s0;
535             int q_map_match = 0;
536             map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
537             if (space_split && **map == *CHR_SPACE)
538                 break;
539
540             i++;
541             add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
542         }
543     }
544     if (right_truncate)
545         wrbuf_puts(term_dict, ".*");
546     *src = s0;
547     return i;
548 }
549
550
551 /* gen_regular_rel - generate regular expression from relation
552  *  val:     border value (inclusive)
553  *  islt:    1 if <=; 0 if >=.
554  */
555 static void gen_regular_rel(WRBUF term_dict, int val, int islt)
556 {
557     char dst_buf[20*5*20]; /* assuming enough for expansion */
558     char *dst = dst_buf;
559     int dst_p;
560     int w, d, i;
561     int pos = 0;
562     char numstr[20];
563
564     yaz_log(YLOG_DEBUG, "gen_regular_rel. val=%d, islt=%d", val, islt);
565     if (val >= 0)
566     {
567         if (islt)
568             strcpy(dst, "(-[0-9]+|(");
569         else
570             strcpy(dst, "((");
571     } 
572     else
573     {
574         if (!islt)
575         {
576             strcpy(dst, "([0-9]+|-(");
577             islt = 1;
578         }
579         else
580         {
581             strcpy(dst, "(-(");
582             islt = 0;
583         }
584         val = -val;
585     }
586     dst_p = strlen(dst);
587     sprintf(numstr, "%d", val);
588     for (w = strlen(numstr); --w >= 0; pos++)
589     {
590         d = numstr[w];
591         if (pos > 0)
592         {
593             if (islt)
594             {
595                 if (d == '0')
596                     continue;
597                 d--;
598             } 
599             else
600             {
601                 if (d == '9')
602                     continue;
603                 d++;
604             }
605         }
606         
607         strcpy(dst + dst_p, numstr);
608         dst_p = strlen(dst) - pos - 1;
609
610         if (islt)
611         {
612             if (d != '0')
613             {
614                 dst[dst_p++] = '[';
615                 dst[dst_p++] = '0';
616                 dst[dst_p++] = '-';
617                 dst[dst_p++] = d;
618                 dst[dst_p++] = ']';
619             }
620             else
621                 dst[dst_p++] = d;
622         }
623         else
624         {
625             if (d != '9')
626             { 
627                 dst[dst_p++] = '[';
628                 dst[dst_p++] = d;
629                 dst[dst_p++] = '-';
630                 dst[dst_p++] = '9';
631                 dst[dst_p++] = ']';
632             }
633             else
634                 dst[dst_p++] = d;
635         }
636         for (i = 0; i<pos; i++)
637         {
638             dst[dst_p++] = '[';
639             dst[dst_p++] = '0';
640             dst[dst_p++] = '-';
641             dst[dst_p++] = '9';
642             dst[dst_p++] = ']';
643         }
644         dst[dst_p++] = '|';
645     }
646     dst[dst_p] = '\0';
647     if (islt)
648     {
649         /* match everything less than 10^(pos-1) */
650         strcat(dst, "0*");
651         for (i = 1; i<pos; i++)
652             strcat(dst, "[0-9]?");
653     }
654     else
655     {
656         /* match everything greater than 10^pos */
657         for (i = 0; i <= pos; i++)
658             strcat(dst, "[0-9]");
659         strcat(dst, "[0-9]*");
660     }
661     strcat(dst, "))");
662     wrbuf_puts(term_dict, dst);
663 }
664
665 void string_rel_add_char(WRBUF term_p, WRBUF wsrc, int *indx)
666 {
667     const char *src = wrbuf_cstr(wsrc);
668     if (src[*indx] == '\\')
669     {
670         wrbuf_putc(term_p, src[*indx]);
671         (*indx)++;
672     }
673     wrbuf_putc(term_p, src[*indx]);
674     (*indx)++;
675 }
676
677 /*
678  *   >  abc     ([b-].*|a[c-].*|ab[d-].*|abc.+)
679  *              ([^-a].*|a[^-b].*ab[^-c].*|abc.+)
680  *   >= abc     ([b-].*|a[c-].*|ab[c-].*)
681  *              ([^-a].*|a[^-b].*|ab[c-].*)
682  *   <  abc     ([-0].*|a[-a].*|ab[-b].*)
683  *              ([^a-].*|a[^b-].*|ab[^c-].*)
684  *   <= abc     ([-0].*|a[-a].*|ab[-b].*|abc)
685  *              ([^a-].*|a[^b-].*|ab[^c-].*|abc)
686  */
687 static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
688                            const char **term_sub, WRBUF term_dict,
689                            const Odr_oid *attributeSet,
690                            zebra_map_t zm, int space_split, 
691                            WRBUF display_term,
692                            int *error_code)
693 {
694     AttrType relation;
695     int relation_value;
696     int i;
697     WRBUF term_component = wrbuf_alloc();
698
699     attr_init_APT(&relation, zapt, 2);
700     relation_value = attr_find(&relation, NULL);
701
702     *error_code = 0;
703     yaz_log(YLOG_DEBUG, "string relation value=%d", relation_value);
704     switch (relation_value)
705     {
706     case 1:
707         if (!term_100(zm, term_sub, term_component, space_split, display_term))
708         {
709             wrbuf_destroy(term_component);
710             return 0;
711         }
712         yaz_log(log_level_rpn, "Relation <");
713         
714         wrbuf_putc(term_dict, '(');
715         for (i = 0; i < wrbuf_len(term_component); )
716         {
717             int j = 0;
718             
719             if (i)
720                 wrbuf_putc(term_dict, '|');
721             while (j < i)
722                 string_rel_add_char(term_dict, term_component, &j);
723
724             wrbuf_putc(term_dict, '[');
725
726             wrbuf_putc(term_dict, '^');
727             
728             wrbuf_putc(term_dict, 1);
729             wrbuf_putc(term_dict, FIRST_IN_FIELD_CHAR);
730             
731             string_rel_add_char(term_dict, term_component, &i);
732             wrbuf_putc(term_dict, '-');
733             
734             wrbuf_putc(term_dict, ']');
735             wrbuf_putc(term_dict, '.');
736             wrbuf_putc(term_dict, '*');
737         }
738         wrbuf_putc(term_dict, ')');
739         break;
740     case 2:
741         if (!term_100(zm, term_sub, term_component, space_split, display_term))
742         {
743             wrbuf_destroy(term_component);
744             return 0;
745         }
746         yaz_log(log_level_rpn, "Relation <=");
747
748         wrbuf_putc(term_dict, '(');
749         for (i = 0; i < wrbuf_len(term_component); )
750         {
751             int j = 0;
752
753             while (j < i)
754                 string_rel_add_char(term_dict, term_component, &j);
755             wrbuf_putc(term_dict, '[');
756
757             wrbuf_putc(term_dict, '^');
758
759             wrbuf_putc(term_dict, 1);
760             wrbuf_putc(term_dict, FIRST_IN_FIELD_CHAR);
761
762             string_rel_add_char(term_dict, term_component, &i);
763             wrbuf_putc(term_dict, '-');
764
765             wrbuf_putc(term_dict, ']');
766             wrbuf_putc(term_dict, '.');
767             wrbuf_putc(term_dict, '*');
768
769             wrbuf_putc(term_dict, '|');
770         }
771         for (i = 0; i < wrbuf_len(term_component); )
772             string_rel_add_char(term_dict, term_component, &i);
773         wrbuf_putc(term_dict, ')');
774         break;
775     case 5:
776         if (!term_100(zm, term_sub, term_component, space_split, display_term))
777         {
778             wrbuf_destroy(term_component);
779             return 0;
780         }
781         yaz_log(log_level_rpn, "Relation >");
782
783         wrbuf_putc(term_dict, '(');
784         for (i = 0; i < wrbuf_len(term_component); )
785         {
786             int j = 0;
787
788             while (j < i)
789                 string_rel_add_char(term_dict, term_component, &j);
790             wrbuf_putc(term_dict, '[');
791             
792             wrbuf_putc(term_dict, '^');
793             wrbuf_putc(term_dict, '-');
794             string_rel_add_char(term_dict, term_component, &i);
795
796             wrbuf_putc(term_dict, ']');
797             wrbuf_putc(term_dict, '.');
798             wrbuf_putc(term_dict, '*');
799
800             wrbuf_putc(term_dict, '|');
801         }
802         for (i = 0; i < wrbuf_len(term_component); )
803             string_rel_add_char(term_dict, term_component, &i);
804         wrbuf_putc(term_dict, '.');
805         wrbuf_putc(term_dict, '+');
806         wrbuf_putc(term_dict, ')');
807         break;
808     case 4:
809         if (!term_100(zm, term_sub, term_component, space_split, display_term))
810         {
811             wrbuf_destroy(term_component);
812             return 0;
813         }
814         yaz_log(log_level_rpn, "Relation >=");
815
816         wrbuf_putc(term_dict, '(');
817         for (i = 0; i < wrbuf_len(term_component); )
818         {
819             int j = 0;
820
821             if (i)
822                 wrbuf_putc(term_dict, '|');
823             while (j < i)
824                 string_rel_add_char(term_dict, term_component, &j);
825             wrbuf_putc(term_dict, '[');
826
827             if (i < wrbuf_len(term_component)-1)
828             {
829                 wrbuf_putc(term_dict, '^');
830                 wrbuf_putc(term_dict, '-');
831                 string_rel_add_char(term_dict, term_component, &i);
832             }
833             else
834             {
835                 string_rel_add_char(term_dict, term_component, &i);
836                 wrbuf_putc(term_dict, '-');
837             }
838             wrbuf_putc(term_dict, ']');
839             wrbuf_putc(term_dict, '.');
840             wrbuf_putc(term_dict, '*');
841         }
842         wrbuf_putc(term_dict, ')');
843         break;
844     case 3:
845     case 102:
846     case -1:
847         if (!**term_sub)
848             return 1;
849         yaz_log(log_level_rpn, "Relation =");
850         if (!term_100(zm, term_sub, term_component, space_split, display_term))
851         {
852             wrbuf_destroy(term_component);
853             return 0;
854         }
855         wrbuf_puts(term_dict, "(");
856         wrbuf_puts(term_dict, wrbuf_cstr(term_component));
857         wrbuf_puts(term_dict, ")");
858         break;
859     case 103:
860         yaz_log(log_level_rpn, "Relation always matches");
861         /* skip to end of term (we don't care what it is) */
862         while (**term_sub != '\0')
863             (*term_sub)++;
864         break;
865     default:
866         *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
867         wrbuf_destroy(term_component);
868         return 0;
869     }
870     wrbuf_destroy(term_component);
871     return 1;
872 }
873
874 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
875                              const char **term_sub, 
876                              WRBUF term_dict,
877                              const Odr_oid *attributeSet, NMEM stream,
878                              struct grep_info *grep_info,
879                              const char *index_type, int complete_flag,
880                              WRBUF display_term,
881                              const char *xpath_use,
882                              struct ord_list **ol,
883                              zebra_map_t zm);
884
885 ZEBRA_RES zebra_term_limits_APT(ZebraHandle zh,
886                                 Z_AttributesPlusTerm *zapt,
887                                 zint *hits_limit_value,
888                                 const char **term_ref_id_str,
889                                 NMEM nmem)
890 {
891     AttrType term_ref_id_attr;
892     AttrType hits_limit_attr;
893     int term_ref_id_int;
894  
895     attr_init_APT(&hits_limit_attr, zapt, 11);
896     *hits_limit_value  = attr_find(&hits_limit_attr, NULL);
897
898     attr_init_APT(&term_ref_id_attr, zapt, 10);
899     term_ref_id_int = attr_find_ex(&term_ref_id_attr, NULL, term_ref_id_str);
900     if (term_ref_id_int >= 0)
901     {
902         char *res = nmem_malloc(nmem, 20);
903         sprintf(res, "%d", term_ref_id_int);
904         *term_ref_id_str = res;
905     }
906
907     /* no limit given ? */
908     if (*hits_limit_value == -1)
909     {
910         if (*term_ref_id_str)
911         {
912             /* use global if term_ref is present */
913             *hits_limit_value = zh->approx_limit;
914         }
915         else
916         {
917             /* no counting if term_ref is not present */
918             *hits_limit_value = 0;
919         }
920     }
921     else if (*hits_limit_value == 0)
922     {
923         /* 0 is the same as global limit */
924         *hits_limit_value = zh->approx_limit;
925     }
926     yaz_log(YLOG_DEBUG, "term_limits_APT ref_id=%s limit=" ZINT_FORMAT,
927             *term_ref_id_str ? *term_ref_id_str : "none",
928             *hits_limit_value);
929     return ZEBRA_OK;
930 }
931
932 /** \brief search for term (which may be truncated)
933  */
934 static ZEBRA_RES search_term(ZebraHandle zh,
935                              Z_AttributesPlusTerm *zapt,
936                              const char **term_sub, 
937                              const Odr_oid *attributeSet, NMEM stream,
938                              struct grep_info *grep_info,
939                              const char *index_type, int complete_flag,
940                              const char *rank_type, 
941                              const char *xpath_use,
942                              NMEM rset_nmem,
943                              RSET *rset,
944                              struct rset_key_control *kc,
945                              zebra_map_t zm)
946 {
947     ZEBRA_RES res;
948     struct ord_list *ol;
949     zint hits_limit_value;
950     const char *term_ref_id_str = 0;
951     WRBUF term_dict = wrbuf_alloc();
952     WRBUF display_term = wrbuf_alloc();
953     *rset = 0;
954     zebra_term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str,
955                           stream);
956     grep_info->isam_p_indx = 0;
957     res = string_term(zh, zapt, term_sub, term_dict,
958                       attributeSet, stream, grep_info,
959                       index_type, complete_flag,
960                       display_term, xpath_use, &ol, zm);
961     wrbuf_destroy(term_dict);
962     if (res == ZEBRA_OK && *term_sub)
963     {
964         yaz_log(log_level_rpn, "term: %s", wrbuf_cstr(display_term));
965         *rset = rset_trunc(zh, grep_info->isam_p_buf,
966                            grep_info->isam_p_indx, wrbuf_buf(display_term),
967                            wrbuf_len(display_term), rank_type, 
968                            1 /* preserve pos */,
969                            zapt->term->which, rset_nmem,
970                            kc, kc->scope, ol, index_type, hits_limit_value,
971                            term_ref_id_str);
972         if (!*rset)
973             res = ZEBRA_FAIL;
974     }
975     wrbuf_destroy(display_term);
976     return res;
977 }
978
979 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
980                              const char **term_sub, 
981                              WRBUF term_dict,
982                              const Odr_oid *attributeSet, NMEM stream,
983                              struct grep_info *grep_info,
984                              const char *index_type, int complete_flag,
985                              WRBUF display_term,
986                              const char *xpath_use,
987                              struct ord_list **ol,
988                              zebra_map_t zm)
989 {
990     int r;
991     AttrType truncation;
992     int truncation_value;
993     const char *termp;
994     struct rpn_char_map_info rcmi;
995
996     int space_split = complete_flag ? 0 : 1;
997     int ord = -1;
998     int regex_range = 0;
999     int max_pos, prefix_len = 0;
1000     int relation_error;
1001     char ord_buf[32];
1002     int ord_len, i;
1003
1004     *ol = ord_list_create(stream);
1005
1006     rpn_char_map_prepare(zh->reg, zm, &rcmi);
1007     attr_init_APT(&truncation, zapt, 5);
1008     truncation_value = attr_find(&truncation, NULL);
1009     yaz_log(log_level_rpn, "truncation value %d", truncation_value);
1010
1011     termp = *term_sub; /* start of term for each database */
1012     
1013     if (zebra_apt_get_ord(zh, zapt, index_type, xpath_use,
1014                           attributeSet, &ord) != ZEBRA_OK)
1015     {
1016         *term_sub = 0;
1017         return ZEBRA_FAIL;
1018     }
1019     
1020     wrbuf_rewind(term_dict); /* new dictionary regexp term */
1021     
1022     *ol = ord_list_append(stream, *ol, ord);
1023     ord_len = key_SU_encode(ord, ord_buf);
1024     
1025     wrbuf_putc(term_dict, '(');
1026     
1027     for (i = 0; i<ord_len; i++)
1028     {
1029         wrbuf_putc(term_dict, 1);  /* our internal regexp escape char */
1030         wrbuf_putc(term_dict, ord_buf[i]);
1031     }
1032     wrbuf_putc(term_dict, ')');
1033     
1034     prefix_len = wrbuf_len(term_dict);
1035
1036     if (zebra_maps_is_icu(zm))
1037     {
1038         /* ICU case */
1039         switch (truncation_value)
1040         {
1041         case -1:         /* not specified */
1042         case 100:        /* do not truncate */
1043             if (!term_100_icu(zm, &termp, term_dict, space_split, display_term, 0))
1044             {
1045                 *term_sub = 0;
1046                 return ZEBRA_OK;
1047             }
1048             break;
1049         case 1:          /* right truncation */
1050             if (!term_100_icu(zm, &termp, term_dict, space_split, display_term, 1))
1051             {
1052                 *term_sub = 0;
1053                 return ZEBRA_OK;
1054             }
1055             break;
1056         default:
1057             zebra_setError_zint(zh,
1058                                 YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
1059                                 truncation_value);
1060             return ZEBRA_FAIL;
1061         }
1062     }
1063     else
1064     {
1065         /* non-ICU case. using string.chr and friends */
1066         switch (truncation_value)
1067         {
1068         case -1:         /* not specified */
1069         case 100:        /* do not truncate */
1070             if (!string_relation(zh, zapt, &termp, term_dict,
1071                                  attributeSet,
1072                                  zm, space_split, display_term,
1073                                  &relation_error))
1074             {
1075                 if (relation_error)
1076                 {
1077                     zebra_setError(zh, relation_error, 0);
1078                     return ZEBRA_FAIL;
1079                 }
1080                 *term_sub = 0;
1081                 return ZEBRA_OK;
1082             }
1083             break;
1084         case 1:          /* right truncation */
1085             wrbuf_putc(term_dict, '(');
1086             if (!term_100(zm, &termp, term_dict, space_split, display_term))
1087             {
1088                 *term_sub = 0;
1089                 return ZEBRA_OK;
1090             }
1091             wrbuf_puts(term_dict, ".*)");
1092             break;
1093         case 2:          /* left truncation */
1094             wrbuf_puts(term_dict, "(.*");
1095             if (!term_100(zm, &termp, term_dict, space_split, display_term))
1096             {
1097                 *term_sub = 0;
1098                 return ZEBRA_OK;
1099             }
1100             wrbuf_putc(term_dict, ')');
1101             break;
1102         case 3:          /* left&right truncation */
1103             wrbuf_puts(term_dict, "(.*");
1104             if (!term_100(zm, &termp, term_dict, space_split, display_term))
1105             {
1106                 *term_sub = 0;
1107                 return ZEBRA_OK;
1108             }
1109             wrbuf_puts(term_dict, ".*)");
1110             break;
1111         case 101:        /* process # in term */
1112             wrbuf_putc(term_dict, '(');
1113             if (!term_101(zm, &termp, term_dict, space_split, display_term))
1114             {
1115                 *term_sub = 0;
1116                 return ZEBRA_OK;
1117             }
1118             wrbuf_puts(term_dict, ")");
1119             break;
1120         case 102:        /* Regexp-1 */
1121             wrbuf_putc(term_dict, '(');
1122             if (!term_102(zm, &termp, term_dict, space_split, display_term))
1123             {
1124                 *term_sub = 0;
1125                 return ZEBRA_OK;
1126             }
1127             wrbuf_putc(term_dict, ')');
1128             break;
1129         case 103:       /* Regexp-2 */
1130             regex_range = 1;
1131             wrbuf_putc(term_dict, '(');
1132             if (!term_103(zm, &termp, term_dict, &regex_range,
1133                           space_split, display_term))
1134             {
1135                 *term_sub = 0;
1136                 return ZEBRA_OK;
1137             }
1138             wrbuf_putc(term_dict, ')');
1139             break;
1140         case 104:        /* process # and ! in term */
1141             wrbuf_putc(term_dict, '(');
1142             if (!term_104(zm, &termp, term_dict, space_split, display_term))
1143             {
1144                 *term_sub = 0;
1145                 return ZEBRA_OK;
1146             }
1147             wrbuf_putc(term_dict, ')');
1148             break;
1149         case 105:        /* process * and ! in term */
1150             wrbuf_putc(term_dict, '(');
1151             if (!term_105(zm, &termp, term_dict, space_split, display_term, 1))
1152             {
1153                 *term_sub = 0;
1154                 return ZEBRA_OK;
1155             }
1156             wrbuf_putc(term_dict, ')');
1157             break;
1158         case 106:        /* process * and ! in term */
1159             wrbuf_putc(term_dict, '(');
1160             if (!term_105(zm, &termp, term_dict, space_split, display_term, 0))
1161             {
1162                 *term_sub = 0;
1163                 return ZEBRA_OK;
1164             }
1165             wrbuf_putc(term_dict, ')');
1166             break;
1167         default:
1168             zebra_setError_zint(zh,
1169                                 YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
1170                                 truncation_value);
1171             return ZEBRA_FAIL;
1172         }
1173     }
1174     if (1)
1175     {
1176         char buf[1000];
1177         const char *input = wrbuf_cstr(term_dict) + prefix_len;
1178         esc_str(buf, sizeof(buf), input, strlen(input));
1179     }
1180     {
1181         WRBUF pr_wr = wrbuf_alloc();
1182
1183         wrbuf_write_escaped(pr_wr, wrbuf_buf(term_dict), wrbuf_len(term_dict));
1184         yaz_log(YLOG_LOG, "dict_lookup_grep: %s", wrbuf_cstr(pr_wr));
1185         wrbuf_destroy(pr_wr);
1186     }
1187     r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), regex_range,
1188                          grep_info, &max_pos, 
1189                          ord_len /* number of "exact" chars */,
1190                          grep_handle);
1191     if (r == 1)
1192         zebra_set_partial_result(zh);
1193     else if (r)
1194         yaz_log(YLOG_WARN, "dict_lookup_grep fail %d", r);
1195     *term_sub = termp;
1196     yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1197     return ZEBRA_OK;
1198 }
1199
1200
1201
1202 static void grep_info_delete(struct grep_info *grep_info)
1203 {
1204 #ifdef TERM_COUNT
1205     xfree(grep_info->term_no);
1206 #endif
1207     xfree(grep_info->isam_p_buf);
1208 }
1209
1210 static ZEBRA_RES grep_info_prepare(ZebraHandle zh,
1211                                    Z_AttributesPlusTerm *zapt,
1212                                    struct grep_info *grep_info,
1213                                    const char *index_type)
1214 {
1215 #ifdef TERM_COUNT
1216     grep_info->term_no = 0;
1217 #endif
1218     grep_info->trunc_max = atoi(res_get_def(zh->res, "truncmax", "10000"));
1219     grep_info->isam_p_size = 0;
1220     grep_info->isam_p_buf = NULL;
1221     grep_info->zh = zh;
1222     grep_info->index_type = index_type;
1223     grep_info->termset = 0;
1224     if (zapt)
1225     {
1226         AttrType truncmax;
1227         int truncmax_value;
1228
1229         attr_init_APT(&truncmax, zapt, 13);
1230         truncmax_value = attr_find(&truncmax, NULL);
1231         if (truncmax_value != -1)
1232             grep_info->trunc_max = truncmax_value;
1233     }
1234     if (zapt)
1235     {
1236         AttrType termset;
1237         int termset_value_numeric;
1238         const char *termset_value_string;
1239
1240         attr_init_APT(&termset, zapt, 8);
1241         termset_value_numeric =
1242             attr_find_ex(&termset, NULL, &termset_value_string);
1243         if (termset_value_numeric != -1)
1244         {
1245 #if TERMSET_DISABLE
1246             zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, "termset");
1247             return ZEBRA_FAIL;
1248 #else
1249             char resname[32];
1250             const char *termset_name = 0;
1251             if (termset_value_numeric != -2)
1252             {
1253                 
1254                 sprintf(resname, "%d", termset_value_numeric);
1255                 termset_name = resname;
1256             }
1257             else
1258                 termset_name = termset_value_string;
1259             yaz_log(log_level_rpn, "creating termset set %s", termset_name);
1260             grep_info->termset = resultSetAdd(zh, termset_name, 1);
1261             if (!grep_info->termset)
1262             {
1263                 zebra_setError(zh, YAZ_BIB1_ILLEGAL_RESULT_SET_NAME, termset_name);
1264                 return ZEBRA_FAIL;
1265             }
1266 #endif
1267         }
1268     }
1269     return ZEBRA_OK;
1270 }
1271
1272 static ZEBRA_RES search_terms_chrmap(ZebraHandle zh,
1273                                      Z_AttributesPlusTerm *zapt,
1274                                      const char *termz,
1275                                      const Odr_oid *attributeSet,
1276                                      NMEM stream,
1277                                      const char *index_type, int complete_flag,
1278                                      const char *rank_type,
1279                                      const char *xpath_use,
1280                                      NMEM rset_nmem,
1281                                      RSET **result_sets, int *num_result_sets,
1282                                      struct rset_key_control *kc,
1283                                      zebra_map_t zm)
1284 {
1285     struct grep_info grep_info;
1286     const char *termp = termz;
1287     int alloc_sets = 0;
1288     
1289     *num_result_sets = 0;
1290     if (grep_info_prepare(zh, zapt, &grep_info, index_type) == ZEBRA_FAIL)
1291         return ZEBRA_FAIL;
1292     while (1)
1293     { 
1294         ZEBRA_RES res;
1295
1296         if (alloc_sets == *num_result_sets)
1297         {
1298             int add = 10;
1299             RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) * 
1300                                               sizeof(*rnew));
1301             if (alloc_sets)
1302                 memcpy(rnew, *result_sets, alloc_sets * sizeof(*rnew));
1303             alloc_sets = alloc_sets + add;
1304             *result_sets = rnew;
1305         }
1306         res = search_term(zh, zapt, &termp, attributeSet,
1307                           stream, &grep_info,
1308                           index_type, complete_flag,
1309                           rank_type,
1310                           xpath_use, rset_nmem,
1311                           &(*result_sets)[*num_result_sets],
1312                           kc, zm);
1313         if (res != ZEBRA_OK)
1314         {
1315             int i;
1316             for (i = 0; i < *num_result_sets; i++)
1317                 rset_delete((*result_sets)[i]);
1318             grep_info_delete(&grep_info);
1319             return res;
1320         }
1321         if ((*result_sets)[*num_result_sets] == 0)
1322             break;
1323         (*num_result_sets)++;
1324
1325         if (!*termp)
1326             break;
1327     }
1328     grep_info_delete(&grep_info);
1329     return ZEBRA_OK;
1330 }
1331                                
1332 /**
1333    \brief Create result set(s) for list of terms
1334    \param zh Zebra Handle
1335    \param zapt Attributes Plust Term (RPN leaf)
1336    \param termz term as used in query but converted to UTF-8
1337    \param attributeSet default attribute set
1338    \param stream memory for result
1339    \param index_type register type ("w", "p",..)
1340    \param complete_flag whether it's phrases or not
1341    \param rank_type term flags for ranking
1342    \param xpath_use use attribute for X-Path (-1 for no X-path)
1343    \param rset_nmem memory for result sets
1344    \param result_sets output result set for each term in list (output)
1345    \param num_result_sets number of output result sets
1346    \param kc rset key control to be used for created result sets
1347 */
1348 static ZEBRA_RES search_terms_list(ZebraHandle zh,
1349                                    Z_AttributesPlusTerm *zapt,
1350                                    const char *termz,
1351                                    const Odr_oid *attributeSet,
1352                                    NMEM stream,
1353                                    const char *index_type, int complete_flag,
1354                                    const char *rank_type,
1355                                    const char *xpath_use,
1356                                    NMEM rset_nmem,
1357                                    RSET **result_sets, int *num_result_sets,
1358                                    struct rset_key_control *kc)
1359 {
1360     zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type);
1361     if (zebra_maps_is_icu(zm))
1362         zebra_map_tokenize_start(zm, termz, strlen(termz));
1363     return search_terms_chrmap(zh, zapt, termz, attributeSet,
1364                                stream, index_type, complete_flag,
1365                                rank_type, xpath_use,
1366                                rset_nmem, result_sets, num_result_sets,
1367                                kc, zm);
1368 }
1369
1370
1371 /** \brief limit a search by position - returns result set
1372  */
1373 static ZEBRA_RES search_position(ZebraHandle zh,
1374                                  Z_AttributesPlusTerm *zapt,
1375                                  const Odr_oid *attributeSet,
1376                                  const char *index_type,
1377                                  NMEM rset_nmem,
1378                                  RSET *rset,
1379                                  struct rset_key_control *kc)
1380 {
1381     int position_value;
1382     AttrType position;
1383     int ord = -1;
1384     char ord_buf[32];
1385     char term_dict[100];
1386     int ord_len;
1387     char *val;
1388     ISAM_P isam_p;
1389     zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type);
1390     
1391     attr_init_APT(&position, zapt, 3);
1392     position_value = attr_find(&position, NULL);
1393     switch(position_value)
1394     {
1395     case 3:
1396     case -1:
1397         return ZEBRA_OK;
1398     case 1:
1399     case 2:
1400         break;
1401     default:
1402         zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1403                             position_value);
1404         return ZEBRA_FAIL;
1405     }
1406
1407
1408     if (!zebra_maps_is_first_in_field(zm))
1409     {
1410         zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1411                             position_value);
1412         return ZEBRA_FAIL;
1413     }
1414
1415     if (zebra_apt_get_ord(zh, zapt, index_type, 0,
1416                           attributeSet, &ord) != ZEBRA_OK)
1417     {
1418         return ZEBRA_FAIL;
1419     }
1420     ord_len = key_SU_encode(ord, ord_buf);
1421     memcpy(term_dict, ord_buf, ord_len);
1422     strcpy(term_dict+ord_len, FIRST_IN_FIELD_STR);
1423     val = dict_lookup(zh->reg->dict, term_dict);
1424     if (val)
1425     {
1426         assert(*val == sizeof(ISAM_P));
1427         memcpy(&isam_p, val+1, sizeof(isam_p));
1428
1429         *rset = zebra_create_rset_isam(zh, rset_nmem, kc, kc->scope, 
1430                                        isam_p, 0);
1431     }
1432     return ZEBRA_OK;
1433 }
1434
1435 /** \brief returns result set for phrase search
1436  */
1437 static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh,
1438                                        Z_AttributesPlusTerm *zapt,
1439                                        const char *termz_org,
1440                                        const Odr_oid *attributeSet,
1441                                        NMEM stream,
1442                                        const char *index_type,
1443                                        int complete_flag,
1444                                        const char *rank_type,
1445                                        const char *xpath_use,
1446                                        NMEM rset_nmem,
1447                                        RSET *rset,
1448                                        struct rset_key_control *kc)
1449 {
1450     RSET *result_sets = 0;
1451     int num_result_sets = 0;
1452     ZEBRA_RES res =
1453         search_terms_list(zh, zapt, termz_org, attributeSet,
1454                           stream, index_type, complete_flag,
1455                           rank_type, xpath_use,
1456                           rset_nmem,
1457                           &result_sets, &num_result_sets, kc);
1458     
1459     if (res != ZEBRA_OK)
1460         return res;
1461
1462     if (num_result_sets > 0)
1463     {
1464         RSET first_set = 0;
1465         res = search_position(zh, zapt, attributeSet, 
1466                               index_type,
1467                               rset_nmem, &first_set,
1468                               kc);
1469         if (res != ZEBRA_OK)
1470         {
1471             int i;
1472             for (i = 0; i<num_result_sets; i++)
1473                 rset_delete(result_sets[i]);
1474             return res;
1475         }
1476         if (first_set)
1477         {
1478             RSET *nsets = nmem_malloc(stream,
1479                                       sizeof(RSET) * (num_result_sets+1));
1480             nsets[0] = first_set;
1481             memcpy(nsets+1, result_sets, sizeof(RSET) * num_result_sets);
1482             result_sets = nsets;
1483             num_result_sets++;
1484         }
1485     }
1486     if (num_result_sets == 0)
1487         *rset = rset_create_null(rset_nmem, kc, 0); 
1488     else if (num_result_sets == 1)
1489         *rset = result_sets[0];
1490     else
1491         *rset = rset_create_prox(rset_nmem, kc, kc->scope,
1492                                  num_result_sets, result_sets,
1493                                  1 /* ordered */, 0 /* exclusion */,
1494                                  3 /* relation */, 1 /* distance */);
1495     if (!*rset)
1496         return ZEBRA_FAIL;
1497     return ZEBRA_OK;
1498 }
1499
1500 /** \brief returns result set for or-list search
1501  */
1502 static ZEBRA_RES rpn_search_APT_or_list(ZebraHandle zh,
1503                                         Z_AttributesPlusTerm *zapt,
1504                                         const char *termz_org,
1505                                         const Odr_oid *attributeSet,
1506                                         NMEM stream,
1507                                         const char *index_type, 
1508                                         int complete_flag,
1509                                         const char *rank_type,
1510                                         const char *xpath_use,
1511                                         NMEM rset_nmem,
1512                                         RSET *rset,
1513                                         struct rset_key_control *kc)
1514 {
1515     RSET *result_sets = 0;
1516     int num_result_sets = 0;
1517     int i;
1518     ZEBRA_RES res =
1519         search_terms_list(zh, zapt, termz_org, attributeSet,
1520                           stream, index_type, complete_flag,
1521                           rank_type, xpath_use,
1522                           rset_nmem,
1523                           &result_sets, &num_result_sets, kc);
1524     if (res != ZEBRA_OK)
1525         return res;
1526
1527     for (i = 0; i<num_result_sets; i++)
1528     {
1529         RSET first_set = 0;
1530         res = search_position(zh, zapt, attributeSet, 
1531                               index_type,
1532                               rset_nmem, &first_set,
1533                               kc);
1534         if (res != ZEBRA_OK)
1535         {
1536             for (i = 0; i<num_result_sets; i++)
1537                 rset_delete(result_sets[i]);
1538             return res;
1539         }
1540
1541         if (first_set)
1542         {
1543             RSET tmp_set[2];
1544
1545             tmp_set[0] = first_set;
1546             tmp_set[1] = result_sets[i];
1547             
1548             result_sets[i] = rset_create_prox(
1549                 rset_nmem, kc, kc->scope,
1550                 2, tmp_set,
1551                 1 /* ordered */, 0 /* exclusion */,
1552                 3 /* relation */, 1 /* distance */);
1553         }
1554     }
1555     if (num_result_sets == 0)
1556         *rset = rset_create_null(rset_nmem, kc, 0); 
1557     else if (num_result_sets == 1)
1558         *rset = result_sets[0];
1559     else
1560         *rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* termid */,
1561                                num_result_sets, result_sets);
1562     if (!*rset)
1563         return ZEBRA_FAIL;
1564     return ZEBRA_OK;
1565 }
1566
1567 /** \brief returns result set for and-list search
1568  */
1569 static ZEBRA_RES rpn_search_APT_and_list(ZebraHandle zh,
1570                                          Z_AttributesPlusTerm *zapt,
1571                                          const char *termz_org,
1572                                          const Odr_oid *attributeSet,
1573                                          NMEM stream,
1574                                          const char *index_type, 
1575                                          int complete_flag,
1576                                          const char *rank_type, 
1577                                          const char *xpath_use,
1578                                          NMEM rset_nmem,
1579                                          RSET *rset,
1580                                          struct rset_key_control *kc)
1581 {
1582     RSET *result_sets = 0;
1583     int num_result_sets = 0;
1584     int i;
1585     ZEBRA_RES res =
1586         search_terms_list(zh, zapt, termz_org, attributeSet,
1587                           stream, index_type, complete_flag,
1588                           rank_type, xpath_use,
1589                           rset_nmem,
1590                           &result_sets, &num_result_sets,
1591                           kc);
1592     if (res != ZEBRA_OK)
1593         return res;
1594     for (i = 0; i<num_result_sets; i++)
1595     {
1596         RSET first_set = 0;
1597         res = search_position(zh, zapt, attributeSet, 
1598                               index_type,
1599                               rset_nmem, &first_set,
1600                               kc);
1601         if (res != ZEBRA_OK)
1602         {
1603             for (i = 0; i<num_result_sets; i++)
1604                 rset_delete(result_sets[i]);
1605             return res;
1606         }
1607
1608         if (first_set)
1609         {
1610             RSET tmp_set[2];
1611
1612             tmp_set[0] = first_set;
1613             tmp_set[1] = result_sets[i];
1614             
1615             result_sets[i] = rset_create_prox(
1616                 rset_nmem, kc, kc->scope,
1617                 2, tmp_set,
1618                 1 /* ordered */, 0 /* exclusion */,
1619                 3 /* relation */, 1 /* distance */);
1620         }
1621     }
1622
1623
1624     if (num_result_sets == 0)
1625         *rset = rset_create_null(rset_nmem, kc, 0); 
1626     else if (num_result_sets == 1)
1627         *rset = result_sets[0];
1628     else
1629         *rset = rset_create_and(rset_nmem, kc, kc->scope,
1630                                 num_result_sets, result_sets);
1631     if (!*rset)
1632         return ZEBRA_FAIL;
1633     return ZEBRA_OK;
1634 }
1635
1636 static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1637                             const char **term_sub,
1638                             WRBUF term_dict,
1639                             const Odr_oid *attributeSet,
1640                             struct grep_info *grep_info,
1641                             int *max_pos,
1642                             zebra_map_t zm,
1643                             WRBUF display_term,
1644                             int *error_code)
1645 {
1646     AttrType relation;
1647     int relation_value;
1648     int term_value;
1649     int r;
1650     WRBUF term_num = wrbuf_alloc();
1651
1652     *error_code = 0;
1653     attr_init_APT(&relation, zapt, 2);
1654     relation_value = attr_find(&relation, NULL);
1655
1656     yaz_log(log_level_rpn, "numeric relation value=%d", relation_value);
1657
1658     switch (relation_value)
1659     {
1660     case 1:
1661         yaz_log(log_level_rpn, "Relation <");
1662         if (!term_100(zm, term_sub, term_num, 1, display_term))
1663         { 
1664             wrbuf_destroy(term_num);
1665             return 0;
1666         }
1667         term_value = atoi(wrbuf_cstr(term_num));
1668         gen_regular_rel(term_dict, term_value-1, 1);
1669         break;
1670     case 2:
1671         yaz_log(log_level_rpn, "Relation <=");
1672         if (!term_100(zm, term_sub, term_num, 1, display_term))
1673         {
1674             wrbuf_destroy(term_num);
1675             return 0;
1676         }
1677         term_value = atoi(wrbuf_cstr(term_num));
1678         gen_regular_rel(term_dict, term_value, 1);
1679         break;
1680     case 4:
1681         yaz_log(log_level_rpn, "Relation >=");
1682         if (!term_100(zm, term_sub, term_num, 1, display_term))
1683         {
1684             wrbuf_destroy(term_num);
1685             return 0;
1686         }
1687         term_value = atoi(wrbuf_cstr(term_num));
1688         gen_regular_rel(term_dict, term_value, 0);
1689         break;
1690     case 5:
1691         yaz_log(log_level_rpn, "Relation >");
1692         if (!term_100(zm, term_sub, term_num, 1, display_term))
1693         {
1694             wrbuf_destroy(term_num);
1695             return 0;
1696         }
1697         term_value = atoi(wrbuf_cstr(term_num));
1698         gen_regular_rel(term_dict, term_value+1, 0);
1699         break;
1700     case -1:
1701     case 3:
1702         yaz_log(log_level_rpn, "Relation =");
1703         if (!term_100(zm, term_sub, term_num, 1, display_term))
1704         {
1705             wrbuf_destroy(term_num);
1706             return 0; 
1707         }
1708         term_value = atoi(wrbuf_cstr(term_num));
1709         wrbuf_printf(term_dict, "(0*%d)", term_value);
1710         break;
1711     case 103:
1712         /* term_tmp untouched.. */
1713         while (**term_sub != '\0')
1714             (*term_sub)++;
1715         break;
1716     default:
1717         *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
1718         wrbuf_destroy(term_num); 
1719         return 0;
1720     }
1721     r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), 
1722                          0, grep_info, max_pos, 0, grep_handle);
1723
1724     if (r == 1)
1725         zebra_set_partial_result(zh);
1726     else if (r)
1727         yaz_log(YLOG_WARN, "dict_lookup_grep fail, rel = gt: %d", r);
1728     yaz_log(log_level_rpn, "%d positions", grep_info->isam_p_indx);
1729     wrbuf_destroy(term_num);
1730     return 1;
1731 }
1732
1733 static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1734                               const char **term_sub, 
1735                               WRBUF term_dict,
1736                               const Odr_oid *attributeSet, NMEM stream,
1737                               struct grep_info *grep_info,
1738                               const char *index_type, int complete_flag,
1739                               WRBUF display_term,
1740                               const char *xpath_use,
1741                               struct ord_list **ol)
1742 {
1743     const char *termp;
1744     struct rpn_char_map_info rcmi;
1745     int max_pos;
1746     int relation_error = 0;
1747     int ord, ord_len, i;
1748     char ord_buf[32];
1749     zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type);
1750     
1751     *ol = ord_list_create(stream);
1752
1753     rpn_char_map_prepare(zh->reg, zm, &rcmi);
1754
1755     termp = *term_sub;
1756     
1757     if (zebra_apt_get_ord(zh, zapt, index_type, xpath_use,
1758                           attributeSet, &ord) != ZEBRA_OK)
1759     {
1760         return ZEBRA_FAIL;
1761     }
1762     
1763     wrbuf_rewind(term_dict);
1764     
1765     *ol = ord_list_append(stream, *ol, ord);
1766     
1767     ord_len = key_SU_encode(ord, ord_buf);
1768     
1769     wrbuf_putc(term_dict, '(');
1770     for (i = 0; i < ord_len; i++)
1771     {
1772         wrbuf_putc(term_dict, 1);
1773         wrbuf_putc(term_dict, ord_buf[i]);
1774     }
1775     wrbuf_putc(term_dict, ')');
1776     
1777     if (!numeric_relation(zh, zapt, &termp, term_dict,
1778                           attributeSet, grep_info, &max_pos, zm,
1779                           display_term, &relation_error))
1780     {
1781         if (relation_error)
1782         {
1783             zebra_setError(zh, relation_error, 0);
1784             return ZEBRA_FAIL;
1785         }
1786         *term_sub = 0;
1787         return ZEBRA_OK;
1788     }
1789     *term_sub = termp;
1790     yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1791     return ZEBRA_OK;
1792 }
1793
1794                                  
1795 static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh,
1796                                         Z_AttributesPlusTerm *zapt,
1797                                         const char *termz,
1798                                         const Odr_oid *attributeSet,
1799                                         NMEM stream,
1800                                         const char *index_type, 
1801                                         int complete_flag,
1802                                         const char *rank_type, 
1803                                         const char *xpath_use,
1804                                         NMEM rset_nmem,
1805                                         RSET *rset,
1806                                         struct rset_key_control *kc)
1807 {
1808     const char *termp = termz;
1809     RSET *result_sets = 0;
1810     int num_result_sets = 0;
1811     ZEBRA_RES res;
1812     struct grep_info grep_info;
1813     int alloc_sets = 0;
1814     zint hits_limit_value;
1815     const char *term_ref_id_str = 0;
1816
1817     zebra_term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str,
1818                           stream);
1819
1820     yaz_log(log_level_rpn, "APT_numeric t='%s'", termz);
1821     if (grep_info_prepare(zh, zapt, &grep_info, index_type) == ZEBRA_FAIL)
1822         return ZEBRA_FAIL;
1823     while (1)
1824     { 
1825         struct ord_list *ol;
1826         WRBUF term_dict = wrbuf_alloc();
1827         WRBUF display_term = wrbuf_alloc();
1828         if (alloc_sets == num_result_sets)
1829         {
1830             int add = 10;
1831             RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) * 
1832                                               sizeof(*rnew));
1833             if (alloc_sets)
1834                 memcpy(rnew, result_sets, alloc_sets * sizeof(*rnew));
1835             alloc_sets = alloc_sets + add;
1836             result_sets = rnew;
1837         }
1838         yaz_log(YLOG_DEBUG, "APT_numeric termp=%s", termp);
1839         grep_info.isam_p_indx = 0;
1840         res = numeric_term(zh, zapt, &termp, term_dict,
1841                            attributeSet, stream, &grep_info,
1842                            index_type, complete_flag,
1843                            display_term, xpath_use, &ol);
1844         wrbuf_destroy(term_dict);
1845         if (res == ZEBRA_FAIL || termp == 0)
1846         {
1847             wrbuf_destroy(display_term);
1848             break;
1849         }
1850         yaz_log(YLOG_DEBUG, "term: %s", wrbuf_cstr(display_term));
1851         result_sets[num_result_sets] =
1852             rset_trunc(zh, grep_info.isam_p_buf,
1853                        grep_info.isam_p_indx, wrbuf_buf(display_term),
1854                        wrbuf_len(display_term), rank_type,
1855                        0 /* preserve position */,
1856                        zapt->term->which, rset_nmem, 
1857                        kc, kc->scope, ol, index_type,
1858                        hits_limit_value,
1859                        term_ref_id_str);
1860         wrbuf_destroy(display_term);
1861         if (!result_sets[num_result_sets])
1862             break;
1863         num_result_sets++;
1864         if (!*termp)
1865             break;
1866     }
1867     grep_info_delete(&grep_info);
1868
1869     if (res != ZEBRA_OK)
1870         return res;
1871     if (num_result_sets == 0)
1872         *rset = rset_create_null(rset_nmem, kc, 0);
1873     else if (num_result_sets == 1)
1874         *rset = result_sets[0];
1875     else
1876         *rset = rset_create_and(rset_nmem, kc, kc->scope,
1877                                 num_result_sets, result_sets);
1878     if (!*rset)
1879         return ZEBRA_FAIL;
1880     return ZEBRA_OK;
1881 }
1882
1883 static ZEBRA_RES rpn_search_APT_local(ZebraHandle zh,
1884                                       Z_AttributesPlusTerm *zapt,
1885                                       const char *termz,
1886                                       const Odr_oid *attributeSet,
1887                                       NMEM stream,
1888                                       const char *rank_type, NMEM rset_nmem,
1889                                       RSET *rset,
1890                                       struct rset_key_control *kc)
1891 {
1892     Record rec;
1893     zint sysno = atozint(termz);
1894     
1895     if (sysno <= 0)
1896         sysno = 0;
1897     rec = rec_get(zh->reg->records, sysno);
1898     if (!rec)
1899         sysno = 0;
1900
1901     rec_free(&rec);
1902
1903     if (sysno <= 0)
1904     {
1905         *rset = rset_create_null(rset_nmem, kc, 0);
1906     }
1907     else
1908     {
1909         RSFD rsfd;
1910         struct it_key key;
1911         *rset = rset_create_temp(rset_nmem, kc, kc->scope,
1912                                  res_get(zh->res, "setTmpDir"), 0);
1913         rsfd = rset_open(*rset, RSETF_WRITE);
1914         
1915         key.mem[0] = sysno;
1916         key.mem[1] = 1;
1917         key.len = 2;
1918         rset_write(rsfd, &key);
1919         rset_close(rsfd);
1920     }
1921     return ZEBRA_OK;
1922 }
1923
1924 static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1925                                const Odr_oid *attributeSet, NMEM stream,
1926                                Z_SortKeySpecList *sort_sequence,
1927                                const char *rank_type,
1928                                NMEM rset_nmem,
1929                                RSET *rset,
1930                                struct rset_key_control *kc)
1931 {
1932     int i;
1933     int sort_relation_value;
1934     AttrType sort_relation_type;
1935     Z_SortKeySpec *sks;
1936     Z_SortKey *sk;
1937     char termz[20];
1938     
1939     attr_init_APT(&sort_relation_type, zapt, 7);
1940     sort_relation_value = attr_find(&sort_relation_type, &attributeSet);
1941
1942     if (!sort_sequence->specs)
1943     {
1944         sort_sequence->num_specs = 10;
1945         sort_sequence->specs = (Z_SortKeySpec **)
1946             nmem_malloc(stream, sort_sequence->num_specs *
1947                         sizeof(*sort_sequence->specs));
1948         for (i = 0; i<sort_sequence->num_specs; i++)
1949             sort_sequence->specs[i] = 0;
1950     }
1951     if (zapt->term->which != Z_Term_general)
1952         i = 0;
1953     else
1954         i = atoi_n((char *) zapt->term->u.general->buf,
1955                    zapt->term->u.general->len);
1956     if (i >= sort_sequence->num_specs)
1957         i = 0;
1958     sprintf(termz, "%d", i);
1959
1960     sks = (Z_SortKeySpec *) nmem_malloc(stream, sizeof(*sks));
1961     sks->sortElement = (Z_SortElement *)
1962         nmem_malloc(stream, sizeof(*sks->sortElement));
1963     sks->sortElement->which = Z_SortElement_generic;
1964     sk = sks->sortElement->u.generic = (Z_SortKey *)
1965         nmem_malloc(stream, sizeof(*sk));
1966     sk->which = Z_SortKey_sortAttributes;
1967     sk->u.sortAttributes = (Z_SortAttributes *)
1968         nmem_malloc(stream, sizeof(*sk->u.sortAttributes));
1969
1970     sk->u.sortAttributes->id = odr_oiddup_nmem(stream, attributeSet);
1971     sk->u.sortAttributes->list = zapt->attributes;
1972
1973     sks->sortRelation = (int *)
1974         nmem_malloc(stream, sizeof(*sks->sortRelation));
1975     if (sort_relation_value == 1)
1976         *sks->sortRelation = Z_SortKeySpec_ascending;
1977     else if (sort_relation_value == 2)
1978         *sks->sortRelation = Z_SortKeySpec_descending;
1979     else 
1980         *sks->sortRelation = Z_SortKeySpec_ascending;
1981
1982     sks->caseSensitivity = (int *)
1983         nmem_malloc(stream, sizeof(*sks->caseSensitivity));
1984     *sks->caseSensitivity = 0;
1985
1986     sks->which = Z_SortKeySpec_null;
1987     sks->u.null = odr_nullval ();
1988     sort_sequence->specs[i] = sks;
1989     *rset = rset_create_null(rset_nmem, kc, 0);
1990     return ZEBRA_OK;
1991 }
1992
1993
1994 static int rpn_check_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1995                            const Odr_oid *attributeSet,
1996                            struct xpath_location_step *xpath, int max,
1997                            NMEM mem)
1998 {
1999     const Odr_oid *curAttributeSet = attributeSet;
2000     AttrType use;
2001     const char *use_string = 0;
2002     
2003     attr_init_APT(&use, zapt, 1);
2004     attr_find_ex(&use, &curAttributeSet, &use_string);
2005
2006     if (!use_string || *use_string != '/')
2007         return -1;
2008
2009     return zebra_parse_xpath_str(use_string, xpath, max, mem);
2010 }
2011  
2012                
2013
2014 static RSET xpath_trunc(ZebraHandle zh, NMEM stream,
2015                         const char *index_type, const char *term, 
2016                         const char *xpath_use,
2017                         NMEM rset_nmem,
2018                         struct rset_key_control *kc)
2019 {
2020     struct grep_info grep_info;
2021     int ord = zebraExplain_lookup_attr_str(zh->reg->zei, 
2022                                            zinfo_index_category_index,
2023                                            index_type, xpath_use);
2024     if (grep_info_prepare(zh, 0 /* zapt */, &grep_info, "0") == ZEBRA_FAIL)
2025         return rset_create_null(rset_nmem, kc, 0);
2026     
2027     if (ord < 0)
2028         return rset_create_null(rset_nmem, kc, 0);
2029     else
2030     {
2031         int i, r, max_pos;
2032         char ord_buf[32];
2033         RSET rset;
2034         WRBUF term_dict = wrbuf_alloc();
2035         int ord_len = key_SU_encode(ord, ord_buf);
2036         int term_type = Z_Term_characterString;
2037         const char *flags = "void";
2038
2039         wrbuf_putc(term_dict, '(');
2040         for (i = 0; i<ord_len; i++)
2041         {
2042             wrbuf_putc(term_dict, 1);
2043             wrbuf_putc(term_dict, ord_buf[i]);
2044         }
2045         wrbuf_putc(term_dict, ')');
2046         wrbuf_puts(term_dict, term);
2047         
2048         grep_info.isam_p_indx = 0;
2049         r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), 0,
2050                              &grep_info, &max_pos, 0, grep_handle);
2051         yaz_log(YLOG_DEBUG, "%s %d positions", term,
2052                 grep_info.isam_p_indx);
2053         rset = rset_trunc(zh, grep_info.isam_p_buf,
2054                           grep_info.isam_p_indx, term, strlen(term),
2055                           flags, 1, term_type, rset_nmem,
2056                           kc, kc->scope, 0, index_type, 0 /* hits_limit */,
2057                           0 /* term_ref_id_str */);
2058         grep_info_delete(&grep_info);
2059         wrbuf_destroy(term_dict);
2060         return rset;
2061     }
2062 }
2063
2064 static
2065 ZEBRA_RES rpn_search_xpath(ZebraHandle zh,
2066                            NMEM stream, const char *rank_type, RSET rset,
2067                            int xpath_len, struct xpath_location_step *xpath,
2068                            NMEM rset_nmem,
2069                            RSET *rset_out,
2070                            struct rset_key_control *kc)
2071 {
2072     int i;
2073     int always_matches = rset ? 0 : 1;
2074
2075     if (xpath_len < 0)
2076     {
2077         *rset_out = rset;
2078         return ZEBRA_OK;
2079     }
2080
2081     yaz_log(YLOG_DEBUG, "xpath len=%d", xpath_len);
2082     for (i = 0; i<xpath_len; i++)
2083     {
2084         yaz_log(log_level_rpn, "XPATH %d %s", i, xpath[i].part);
2085
2086     }
2087
2088     /*
2089     //a    ->    a/.*
2090     //a/b  ->    b/a/.*
2091     /a     ->    a/
2092     /a/b   ->    b/a/
2093
2094     /      ->    none
2095
2096     a[@attr = value]/b[@other = othervalue]
2097
2098     /e/@a val      range(e/,range(@a,freetext(w,1015,val),@a),e/)
2099     /a/b val       range(b/a/,freetext(w,1016,val),b/a/)
2100     /a/b/@c val    range(b/a/,range(@c,freetext(w,1016,val),@c),b/a/)
2101     /a/b[@c = y] val range(b/a/,freetext(w,1016,val),b/a/,@c = y)
2102     /a[@c = y]/b val range(a/,range(b/a/,freetext(w,1016,val),b/a/),a/,@c = y)
2103     /a[@c = x]/b[@c = y] range(a/,range(b/a/,freetext(w,1016,val),b/a/,@c = y),a/,@c = x)
2104       
2105     */
2106
2107     dict_grep_cmap(zh->reg->dict, 0, 0);
2108     
2109     {
2110         int level = xpath_len;
2111         int first_path = 1;
2112         
2113         while (--level >= 0)
2114         {
2115             WRBUF xpath_rev = wrbuf_alloc();
2116             int i;
2117             RSET rset_start_tag = 0, rset_end_tag = 0, rset_attr = 0;
2118
2119             for (i = level; i >= 1; --i)
2120             {
2121                 const char *cp = xpath[i].part;
2122                 if (*cp)
2123                 {
2124                     for (; *cp; cp++)
2125                     {
2126                         if (*cp == '*')
2127                             wrbuf_puts(xpath_rev, "[^/]*");
2128                         else if (*cp == ' ')
2129                             wrbuf_puts(xpath_rev, "\001 ");
2130                         else
2131                             wrbuf_putc(xpath_rev, *cp);
2132
2133                         /* wrbuf_putc does not null-terminate , but
2134                            wrbuf_puts below ensures it does.. so xpath_rev
2135                            is OK iff length is > 0 */
2136                     }
2137                     wrbuf_puts(xpath_rev, "/");
2138                 }
2139                 else if (i == 1)  /* // case */
2140                     wrbuf_puts(xpath_rev, ".*");
2141             }
2142             if (xpath[level].predicate &&
2143                 xpath[level].predicate->which == XPATH_PREDICATE_RELATION &&
2144                 xpath[level].predicate->u.relation.name[0])
2145             {
2146                 WRBUF wbuf = wrbuf_alloc();
2147                 wrbuf_puts(wbuf, xpath[level].predicate->u.relation.name+1);
2148                 if (xpath[level].predicate->u.relation.value)
2149                 {
2150                     const char *cp = xpath[level].predicate->u.relation.value;
2151                     wrbuf_putc(wbuf, '=');
2152                     
2153                     while (*cp)
2154                     {
2155                         if (strchr(REGEX_CHARS, *cp))
2156                             wrbuf_putc(wbuf, '\\');
2157                         wrbuf_putc(wbuf, *cp);
2158                         cp++;
2159                     }
2160                 }
2161                 rset_attr = xpath_trunc(
2162                     zh, stream, "0", wrbuf_cstr(wbuf), 
2163                     ZEBRA_XPATH_ATTR_NAME, 
2164                     rset_nmem, kc);
2165                 wrbuf_destroy(wbuf);
2166             } 
2167             else 
2168             {
2169                 if (!first_path)
2170                 {
2171                     wrbuf_destroy(xpath_rev);
2172                     continue;
2173                 }
2174             }
2175             yaz_log(log_level_rpn, "xpath_rev (%d) = %s", level, 
2176                     wrbuf_cstr(xpath_rev));
2177             if (wrbuf_len(xpath_rev))
2178             {
2179                 rset_start_tag = xpath_trunc(zh, stream, "0", 
2180                                              wrbuf_cstr(xpath_rev),
2181                                              ZEBRA_XPATH_ELM_BEGIN, 
2182                                              rset_nmem, kc);
2183                 if (always_matches)
2184                     rset = rset_start_tag;
2185                 else
2186                 {
2187                     rset_end_tag = xpath_trunc(zh, stream, "0", 
2188                                                wrbuf_cstr(xpath_rev),
2189                                                ZEBRA_XPATH_ELM_END, 
2190                                                rset_nmem, kc);
2191                     
2192                     rset = rset_create_between(rset_nmem, kc, kc->scope,
2193                                                rset_start_tag, rset,
2194                                                rset_end_tag, rset_attr);
2195                 }
2196             }
2197             wrbuf_destroy(xpath_rev);
2198             first_path = 0;
2199         }
2200     }
2201     *rset_out = rset;
2202     return ZEBRA_OK;
2203 }
2204
2205 #define MAX_XPATH_STEPS 10
2206
2207 static ZEBRA_RES rpn_search_database(ZebraHandle zh, 
2208                                      Z_AttributesPlusTerm *zapt,
2209                                      const Odr_oid *attributeSet, NMEM stream,
2210                                      Z_SortKeySpecList *sort_sequence,
2211                                      NMEM rset_nmem,
2212                                      RSET *rset,
2213                                      struct rset_key_control *kc);
2214
2215 static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2216                                 const Odr_oid *attributeSet, NMEM stream,
2217                                 Z_SortKeySpecList *sort_sequence,
2218                                 int num_bases, const char **basenames, 
2219                                 NMEM rset_nmem,
2220                                 RSET *rset,
2221                                 struct rset_key_control *kc)
2222 {
2223     RSET *rsets = nmem_malloc(stream, num_bases * sizeof(*rsets));
2224     ZEBRA_RES res = ZEBRA_OK;
2225     int i;
2226     for (i = 0; i < num_bases; i++)
2227     {
2228
2229         if (zebraExplain_curDatabase(zh->reg->zei, basenames[i]))
2230         {
2231             zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
2232                            basenames[i]);
2233             res = ZEBRA_FAIL;
2234             break;
2235         }
2236         res = rpn_search_database(zh, zapt, attributeSet, stream,
2237                                   sort_sequence,
2238                                   rset_nmem, rsets+i, kc);
2239         if (res != ZEBRA_OK)
2240             break;
2241     }
2242     if (res != ZEBRA_OK)
2243     {   /* must clean up the already created sets */
2244         while (--i >= 0)
2245             rset_delete(rsets[i]);
2246         *rset = 0;
2247     }
2248     else 
2249     {
2250         if (num_bases == 1)
2251             *rset = rsets[0];
2252         else if (num_bases == 0)
2253             *rset = rset_create_null(rset_nmem, kc, 0); 
2254         else
2255             *rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* TERMID */,
2256                                    num_bases, rsets);
2257     }
2258     return res;
2259 }
2260
2261 static ZEBRA_RES rpn_search_database(ZebraHandle zh, 
2262                                      Z_AttributesPlusTerm *zapt,
2263                                      const Odr_oid *attributeSet, NMEM stream,
2264                                      Z_SortKeySpecList *sort_sequence,
2265                                      NMEM rset_nmem,
2266                                      RSET *rset,
2267                                      struct rset_key_control *kc)
2268 {
2269     ZEBRA_RES res = ZEBRA_OK;
2270     const char *index_type;
2271     char *search_type = NULL;
2272     char rank_type[128];
2273     int complete_flag;
2274     int sort_flag;
2275     char termz[IT_MAX_WORD+1];
2276     int xpath_len;
2277     const char *xpath_use = 0;
2278     struct xpath_location_step xpath[MAX_XPATH_STEPS];
2279
2280     if (!log_level_set)
2281     {
2282         log_level_rpn = yaz_log_module_level("rpn");
2283         log_level_set = 1;
2284     }
2285     zebra_maps_attr(zh->reg->zebra_maps, zapt, &index_type, &search_type,
2286                     rank_type, &complete_flag, &sort_flag);
2287     
2288     yaz_log(YLOG_DEBUG, "index_type=%s", index_type);
2289     yaz_log(YLOG_DEBUG, "complete_flag=%d", complete_flag);
2290     yaz_log(YLOG_DEBUG, "search_type=%s", search_type);
2291     yaz_log(YLOG_DEBUG, "rank_type=%s", rank_type);
2292
2293     if (zapt_term_to_utf8(zh, zapt, termz) == ZEBRA_FAIL)
2294         return ZEBRA_FAIL;
2295
2296     if (sort_flag)
2297         return rpn_sort_spec(zh, zapt, attributeSet, stream, sort_sequence,
2298                              rank_type, rset_nmem, rset, kc);
2299     /* consider if an X-Path query is used */
2300     xpath_len = rpn_check_xpath(zh, zapt, attributeSet, 
2301                                 xpath, MAX_XPATH_STEPS, stream);
2302     if (xpath_len >= 0)
2303     {
2304         if (xpath[xpath_len-1].part[0] == '@') 
2305             xpath_use = ZEBRA_XPATH_ATTR_CDATA;  /* last step is attribute  */
2306         else
2307             xpath_use = ZEBRA_XPATH_CDATA;  /* searching for cdata */        
2308
2309         if (1)
2310         {
2311             AttrType relation;
2312             int relation_value;
2313
2314             attr_init_APT(&relation, zapt, 2);
2315             relation_value = attr_find(&relation, NULL);
2316
2317             if (relation_value == 103) /* alwaysmatches */
2318             {
2319                 *rset = 0; /* signal no "term" set */
2320                 return rpn_search_xpath(zh, stream, rank_type, *rset, 
2321                                         xpath_len, xpath, rset_nmem, rset, kc);
2322             }
2323         }
2324     }
2325
2326     /* search using one of the various search type strategies
2327        termz is our UTF-8 search term
2328        attributeSet is top-level default attribute set 
2329        stream is ODR for search
2330        reg_id is the register type
2331        complete_flag is 1 for complete subfield, 0 for incomplete
2332        xpath_use is use-attribute to be used for X-Path search, 0 for none
2333     */
2334     if (!strcmp(search_type, "phrase"))
2335     {
2336         res = rpn_search_APT_phrase(zh, zapt, termz, attributeSet, stream,
2337                                     index_type, complete_flag, rank_type,
2338                                     xpath_use,
2339                                     rset_nmem,
2340                                     rset, kc);
2341     }
2342     else if (!strcmp(search_type, "and-list"))
2343     {
2344         res = rpn_search_APT_and_list(zh, zapt, termz, attributeSet, stream,
2345                                       index_type, complete_flag, rank_type,
2346                                       xpath_use,
2347                                       rset_nmem,
2348                                       rset, kc);
2349     }
2350     else if (!strcmp(search_type, "or-list"))
2351     {
2352         res = rpn_search_APT_or_list(zh, zapt, termz, attributeSet, stream,
2353                                      index_type, complete_flag, rank_type,
2354                                      xpath_use,
2355                                      rset_nmem,
2356                                      rset, kc);
2357     }
2358     else if (!strcmp(search_type, "local"))
2359     {
2360         res = rpn_search_APT_local(zh, zapt, termz, attributeSet, stream,
2361                                    rank_type, rset_nmem, rset, kc);
2362     }
2363     else if (!strcmp(search_type, "numeric"))
2364     {
2365         res = rpn_search_APT_numeric(zh, zapt, termz, attributeSet, stream,
2366                                      index_type, complete_flag, rank_type,
2367                                      xpath_use,
2368                                      rset_nmem,
2369                                      rset, kc);
2370     }
2371     else
2372     {
2373         zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2374         res = ZEBRA_FAIL;
2375     }
2376     if (res != ZEBRA_OK)
2377         return res;
2378     if (!*rset)
2379         return ZEBRA_FAIL;
2380     return rpn_search_xpath(zh, stream, rank_type, *rset, 
2381                             xpath_len, xpath, rset_nmem, rset, kc);
2382 }
2383
2384 static ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2385                                       const Odr_oid *attributeSet, 
2386                                       NMEM stream, NMEM rset_nmem,
2387                                       Z_SortKeySpecList *sort_sequence,
2388                                       int num_bases, const char **basenames,
2389                                       RSET **result_sets, int *num_result_sets,
2390                                       Z_Operator *parent_op,
2391                                       struct rset_key_control *kc);
2392
2393 ZEBRA_RES rpn_get_top_approx_limit(ZebraHandle zh, Z_RPNStructure *zs,
2394                                    zint *approx_limit)
2395 {
2396     ZEBRA_RES res = ZEBRA_OK;
2397     if (zs->which == Z_RPNStructure_complex)
2398     {
2399         if (res == ZEBRA_OK)
2400             res = rpn_get_top_approx_limit(zh, zs->u.complex->s1,
2401                                            approx_limit);
2402         if (res == ZEBRA_OK)
2403             res = rpn_get_top_approx_limit(zh, zs->u.complex->s2,
2404                                            approx_limit);
2405     }
2406     else if (zs->which == Z_RPNStructure_simple)
2407     {
2408         if (zs->u.simple->which == Z_Operand_APT)
2409         {
2410             Z_AttributesPlusTerm *zapt = zs->u.simple->u.attributesPlusTerm;
2411             AttrType global_hits_limit_attr;
2412             int l;
2413             
2414             attr_init_APT(&global_hits_limit_attr, zapt, 12);
2415             
2416             l = attr_find(&global_hits_limit_attr, NULL);
2417             if (l != -1)
2418                 *approx_limit = l;
2419         }
2420     }
2421     return res;
2422 }
2423
2424 ZEBRA_RES rpn_search_top(ZebraHandle zh, Z_RPNStructure *zs,
2425                          const Odr_oid *attributeSet, 
2426                          NMEM stream, NMEM rset_nmem,
2427                          Z_SortKeySpecList *sort_sequence,
2428                          int num_bases, const char **basenames,
2429                          RSET *result_set)
2430 {
2431     RSET *result_sets = 0;
2432     int num_result_sets = 0;
2433     ZEBRA_RES res;
2434     struct rset_key_control *kc = zebra_key_control_create(zh);
2435
2436     res = rpn_search_structure(zh, zs, attributeSet,
2437                                stream, rset_nmem,
2438                                sort_sequence, 
2439                                num_bases, basenames,
2440                                &result_sets, &num_result_sets,
2441                                0 /* no parent op */,
2442                                kc);
2443     if (res != ZEBRA_OK)
2444     {
2445         int i;
2446         for (i = 0; i<num_result_sets; i++)
2447             rset_delete(result_sets[i]);
2448         *result_set = 0;
2449     }
2450     else
2451     {
2452         assert(num_result_sets == 1);
2453         assert(result_sets);
2454         assert(*result_sets);
2455         *result_set = *result_sets;
2456     }
2457     (*kc->dec)(kc);
2458     return res;
2459 }
2460
2461 ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2462                                const Odr_oid *attributeSet, 
2463                                NMEM stream, NMEM rset_nmem,
2464                                Z_SortKeySpecList *sort_sequence,
2465                                int num_bases, const char **basenames,
2466                                RSET **result_sets, int *num_result_sets,
2467                                Z_Operator *parent_op,
2468                                struct rset_key_control *kc)
2469 {
2470     *num_result_sets = 0;
2471     if (zs->which == Z_RPNStructure_complex)
2472     {
2473         ZEBRA_RES res;
2474         Z_Operator *zop = zs->u.complex->roperator;
2475         RSET *result_sets_l = 0;
2476         int num_result_sets_l = 0;
2477         RSET *result_sets_r = 0;
2478         int num_result_sets_r = 0;
2479
2480         res = rpn_search_structure(zh, zs->u.complex->s1,
2481                                    attributeSet, stream, rset_nmem,
2482                                    sort_sequence,
2483                                    num_bases, basenames,
2484                                    &result_sets_l, &num_result_sets_l,
2485                                    zop, kc);
2486         if (res != ZEBRA_OK)
2487         {
2488             int i;
2489             for (i = 0; i<num_result_sets_l; i++)
2490                 rset_delete(result_sets_l[i]);
2491             return res;
2492         }
2493         res = rpn_search_structure(zh, zs->u.complex->s2,
2494                                    attributeSet, stream, rset_nmem,
2495                                    sort_sequence,
2496                                    num_bases, basenames,
2497                                    &result_sets_r, &num_result_sets_r,
2498                                    zop, kc);
2499         if (res != ZEBRA_OK)
2500         {
2501             int i;
2502             for (i = 0; i<num_result_sets_l; i++)
2503                 rset_delete(result_sets_l[i]);
2504             for (i = 0; i<num_result_sets_r; i++)
2505                 rset_delete(result_sets_r[i]);
2506             return res;
2507         }
2508
2509         /* make a new list of result for all children */
2510         *num_result_sets = num_result_sets_l + num_result_sets_r;
2511         *result_sets = nmem_malloc(stream, *num_result_sets * 
2512                                    sizeof(**result_sets));
2513         memcpy(*result_sets, result_sets_l, 
2514                num_result_sets_l * sizeof(**result_sets));
2515         memcpy(*result_sets + num_result_sets_l, result_sets_r, 
2516                num_result_sets_r * sizeof(**result_sets));
2517
2518         if (!parent_op || parent_op->which != zop->which
2519             || (zop->which != Z_Operator_and &&
2520                 zop->which != Z_Operator_or))
2521         {
2522             /* parent node different from this one (or non-present) */
2523             /* we must combine result sets now */
2524             RSET rset;
2525             switch (zop->which)
2526             {
2527             case Z_Operator_and:
2528                 rset = rset_create_and(rset_nmem, kc,
2529                                        kc->scope,
2530                                        *num_result_sets, *result_sets);
2531                 break;
2532             case Z_Operator_or:
2533                 rset = rset_create_or(rset_nmem, kc,
2534                                       kc->scope, 0, /* termid */
2535                                       *num_result_sets, *result_sets);
2536                 break;
2537             case Z_Operator_and_not:
2538                 rset = rset_create_not(rset_nmem, kc,
2539                                        kc->scope,
2540                                        (*result_sets)[0],
2541                                        (*result_sets)[1]);
2542                 break;
2543             case Z_Operator_prox:
2544                 if (zop->u.prox->which != Z_ProximityOperator_known)
2545                 {
2546                     zebra_setError(zh, 
2547                                    YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2548                                    0);
2549                     return ZEBRA_FAIL;
2550                 }
2551                 if (*zop->u.prox->u.known != Z_ProxUnit_word)
2552                 {
2553                     zebra_setError_zint(zh,
2554                                         YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2555                                         *zop->u.prox->u.known);
2556                     return ZEBRA_FAIL;
2557                 }
2558                 else
2559                 {
2560                     rset = rset_create_prox(rset_nmem, kc,
2561                                             kc->scope,
2562                                             *num_result_sets, *result_sets, 
2563                                             *zop->u.prox->ordered,
2564                                             (!zop->u.prox->exclusion ? 
2565                                              0 : *zop->u.prox->exclusion),
2566                                             *zop->u.prox->relationType,
2567                                             *zop->u.prox->distance );
2568                 }
2569                 break;
2570             default:
2571                 zebra_setError(zh, YAZ_BIB1_OPERATOR_UNSUPP, 0);
2572                 return ZEBRA_FAIL;
2573             }
2574             *num_result_sets = 1;
2575             *result_sets = nmem_malloc(stream, *num_result_sets * 
2576                                        sizeof(**result_sets));
2577             (*result_sets)[0] = rset;
2578         }
2579     }
2580     else if (zs->which == Z_RPNStructure_simple)
2581     {
2582         RSET rset;
2583         ZEBRA_RES res;
2584
2585         if (zs->u.simple->which == Z_Operand_APT)
2586         {
2587             yaz_log(YLOG_DEBUG, "rpn_search_APT");
2588             res = rpn_search_APT(zh, zs->u.simple->u.attributesPlusTerm,
2589                                  attributeSet, stream, sort_sequence,
2590                                  num_bases, basenames, rset_nmem, &rset,
2591                                  kc);
2592             if (res != ZEBRA_OK)
2593                 return res;
2594         }
2595         else if (zs->u.simple->which == Z_Operand_resultSetId)
2596         {
2597             yaz_log(YLOG_DEBUG, "rpn_search_ref");
2598             rset = resultSetRef(zh, zs->u.simple->u.resultSetId);
2599             if (!rset)
2600             {
2601                 zebra_setError(zh, 
2602                                YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
2603                                zs->u.simple->u.resultSetId);
2604                 return ZEBRA_FAIL;
2605             }
2606             rset_dup(rset);
2607         }
2608         else
2609         {
2610             zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2611             return ZEBRA_FAIL;
2612         }
2613         *num_result_sets = 1;
2614         *result_sets = nmem_malloc(stream, *num_result_sets * 
2615                                    sizeof(**result_sets));
2616         (*result_sets)[0] = rset;
2617     }
2618     else
2619     {
2620         zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2621         return ZEBRA_FAIL;
2622     }
2623     return ZEBRA_OK;
2624 }
2625
2626
2627
2628 /*
2629  * Local variables:
2630  * c-basic-offset: 4
2631  * indent-tabs-mode: nil
2632  * End:
2633  * vim: shiftwidth=4 tabstop=8 expandtab
2634  */
2635