b0c0ce3cb1d487ebb3fc6cf1cae051d94f589ba8
[idzebra-moved-to-github.git] / index / rpnsearch.c
1 /* This file is part of the Zebra server.
2    Copyright (C) 1994-2009 Index Data
3
4 Zebra is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 2, or (at your option) any later
7 version.
8
9 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
10 WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12 for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17
18 */
19
20 #include <stdio.h>
21 #include <assert.h>
22 #ifdef WIN32
23 #include <io.h>
24 #endif
25 #if HAVE_UNISTD_H
26 #include <unistd.h>
27 #endif
28 #include <ctype.h>
29
30 #include <yaz/diagbib1.h>
31 #include "index.h"
32 #include <zebra_xpath.h>
33 #include <attrfind.h>
34 #include <charmap.h>
35 #include <rset.h>
36
37 static int log_level_set = 0;
38 static int log_level_rpn = 0;
39
40 #define TERMSET_DISABLE 1
41
42 static const char **rpn_char_map_handler(void *vp, const char **from, int len)
43 {
44     struct rpn_char_map_info *p = (struct rpn_char_map_info *) vp;
45     const char **out = zebra_maps_input(p->zm, from, len, 0);
46 #if 0
47     if (out && *out)
48     {
49         const char *outp = *out;
50         yaz_log(YLOG_LOG, "---");
51         while (*outp)
52         {
53             yaz_log(YLOG_LOG, "%02X", *outp);
54             outp++;
55         }
56     }
57 #endif
58     return out;
59 }
60
61 void rpn_char_map_prepare(struct zebra_register *reg, zebra_map_t zm,
62                           struct rpn_char_map_info *map_info)
63 {
64     map_info->zm = zm;
65     if (zebra_maps_is_icu(zm))
66         dict_grep_cmap(reg->dict, 0, 0);
67     else
68         dict_grep_cmap(reg->dict, map_info, rpn_char_map_handler);
69 }
70
71 #define TERM_COUNT        
72        
73 struct grep_info {        
74 #ifdef TERM_COUNT        
75     int *term_no;        
76 #endif        
77     ISAM_P *isam_p_buf;
78     int isam_p_size;        
79     int isam_p_indx;
80     int trunc_max;
81     ZebraHandle zh;
82     const char *index_type;
83     ZebraSet termset;
84 };        
85
86 static int add_isam_p(const char *name, const char *info,
87                       struct grep_info *p)
88 {
89     if (!log_level_set)
90     {
91         log_level_rpn = yaz_log_module_level("rpn");
92         log_level_set = 1;
93     }
94     /* we may have to stop this madness.. NOTE: -1 so that if
95        truncmax == trunxlimit we do *not* generate result sets */
96     if (p->isam_p_indx >= p->trunc_max - 1)
97         return 1;
98
99     if (p->isam_p_indx == p->isam_p_size)
100     {
101         ISAM_P *new_isam_p_buf;
102 #ifdef TERM_COUNT        
103         int *new_term_no;        
104 #endif
105         p->isam_p_size = 2*p->isam_p_size + 100;
106         new_isam_p_buf = (ISAM_P *) xmalloc(sizeof(*new_isam_p_buf) *
107                                             p->isam_p_size);
108         if (p->isam_p_buf)
109         {
110             memcpy(new_isam_p_buf, p->isam_p_buf,
111                    p->isam_p_indx * sizeof(*p->isam_p_buf));
112             xfree(p->isam_p_buf);
113         }
114         p->isam_p_buf = new_isam_p_buf;
115
116 #ifdef TERM_COUNT
117         new_term_no = (int *) xmalloc(sizeof(*new_term_no) * p->isam_p_size);
118         if (p->term_no)
119         {
120             memcpy(new_term_no, p->isam_p_buf,
121                    p->isam_p_indx * sizeof(*p->term_no));
122             xfree(p->term_no);
123         }
124         p->term_no = new_term_no;
125 #endif
126     }
127     assert(*info == sizeof(*p->isam_p_buf));
128     memcpy(p->isam_p_buf + p->isam_p_indx, info+1, sizeof(*p->isam_p_buf));
129
130     if (p->termset)
131     {
132         const char *db;
133         char term_tmp[IT_MAX_WORD];
134         int ord = 0;
135         const char *index_name;
136         int len = key_SU_decode(&ord, (const unsigned char *) name);
137         
138         zebra_term_untrans (p->zh, p->index_type, term_tmp, name+len);
139         yaz_log(log_level_rpn, "grep: %d %c %s", ord, name[len], term_tmp);
140         zebraExplain_lookup_ord(p->zh->reg->zei,
141                                 ord, 0 /* index_type */, &db, &index_name);
142         yaz_log(log_level_rpn, "grep:  db=%s index=%s", db, index_name);
143         
144         resultSetAddTerm(p->zh, p->termset, name[len], db,
145                          index_name, term_tmp);
146     }
147     (p->isam_p_indx)++;
148     return 0;
149 }
150
151 static int grep_handle(char *name, const char *info, void *p)
152 {
153     return add_isam_p(name, info, (struct grep_info *) p);
154 }
155
156 static int term_pre(zebra_map_t zm, const char **src,
157                     const char *ct1, const char *ct2, int first)
158 {
159     const char *s1, *s0 = *src;
160     const char **map;
161
162     /* skip white space */
163     while (*s0)
164     {
165         if (ct1 && strchr(ct1, *s0))
166             break;
167         if (ct2 && strchr(ct2, *s0))
168             break;
169         s1 = s0;
170         map = zebra_maps_input(zm, &s1, strlen(s1), first);
171         if (**map != *CHR_SPACE)
172             break;
173         s0 = s1;
174     }
175     *src = s0;
176     return *s0;
177 }
178
179
180 static void esc_str(char *out_buf, size_t out_size,
181                     const char *in_buf, int in_size)
182 {
183     int k;
184
185     assert(out_buf);
186     assert(in_buf);
187     assert(out_size > 20);
188     *out_buf = '\0';
189     for (k = 0; k<in_size; k++)
190     {
191         int c = in_buf[k] & 0xff;
192         int pc;
193         if (c < 32 || c > 126)
194             pc = '?';
195         else
196             pc = c;
197         sprintf(out_buf +strlen(out_buf), "%02X:%c  ", c, pc);
198         if (strlen(out_buf) > out_size-20)
199         {
200             strcat(out_buf, "..");
201             break;
202         }
203     }
204 }
205
206 #define REGEX_CHARS " ^[]()|.*+?!\"$"
207
208 static void add_non_space(const char *start, const char *end,
209                           WRBUF term_dict,
210                           WRBUF display_term,
211                           const char **map, int q_map_match)
212 {
213     size_t sz = end - start;
214
215     wrbuf_write(display_term, start, sz);
216     if (!q_map_match)
217     {
218         while (start < end)
219         {
220             if (strchr(REGEX_CHARS, *start))
221                 wrbuf_putc(term_dict, '\\');
222             wrbuf_putc(term_dict, *start);
223             start++;
224         }
225     }
226     else
227     {
228         char tmpbuf[80];
229         esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
230         
231         wrbuf_puts(term_dict, map[0]);
232     }
233 }
234
235
236 static int term_100_icu(zebra_map_t zm,
237                         const char **src, WRBUF term_dict, int space_split,
238                         WRBUF display_term,
239                         int right_trunc)
240 {
241     int i;
242     const char *res_buf = 0;
243     size_t res_len = 0;
244     const char *display_buf;
245     size_t display_len;
246     if (!zebra_map_tokenize_next(zm, &res_buf, &res_len,
247                                  &display_buf, &display_len))
248     {
249         *src += strlen(*src);
250         return 0;
251     }
252     wrbuf_write(display_term, display_buf, display_len);
253     if (right_trunc)
254     {
255         /* ICU sort keys seem to be of the form
256            basechars \x01 accents \x01 length
257            For now we'll just right truncate from basechars . This 
258            may give false hits due to accents not being used.
259         */
260         i = res_len;
261         while (--i >= 0 && res_buf[i] != '\x01')
262             ;
263         if (i > 0)
264         {
265             while (--i >= 0 && res_buf[i] != '\x01')
266                 ;
267         }
268         if (i == 0)
269         {  /* did not find base chars at all. Throw error */
270             return -1;
271         }
272         res_len = i; /* reduce res_len */
273     }
274     for (i = 0; i < res_len; i++)
275     {
276         if (strchr(REGEX_CHARS "\\", res_buf[i]))
277             wrbuf_putc(term_dict, '\\');
278         if (res_buf[i] < 32)
279             wrbuf_putc(term_dict, 1);
280             
281         wrbuf_putc(term_dict, res_buf[i]);
282     }
283     if (right_trunc)
284         wrbuf_puts(term_dict, ".*");
285     return 1;
286 }
287
288 /* term_100: handle term, where trunc = none(no operators at all) */
289 static int term_100(zebra_map_t zm,
290                     const char **src, WRBUF term_dict, int space_split,
291                     WRBUF display_term)
292 {
293     const char *s0;
294     const char **map;
295     int i = 0;
296
297     const char *space_start = 0;
298     const char *space_end = 0;
299
300     if (!term_pre(zm, src, NULL, NULL, !space_split))
301         return 0;
302     s0 = *src;
303     while (*s0)
304     {
305         const char *s1 = s0;
306         int q_map_match = 0;
307         map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
308         if (space_split)
309         {
310             if (**map == *CHR_SPACE)
311                 break;
312         }
313         else  /* complete subfield only. */
314         {
315             if (**map == *CHR_SPACE)
316             {   /* save space mapping for later  .. */
317                 space_start = s1;
318                 space_end = s0;
319                 continue;
320             }
321             else if (space_start)
322             {   /* reload last space */
323                 while (space_start < space_end)
324                 {
325                     if (strchr(REGEX_CHARS, *space_start))
326                         wrbuf_putc(term_dict, '\\');
327                     wrbuf_putc(display_term, *space_start);
328                     wrbuf_putc(term_dict, *space_start);
329                     space_start++;
330                                
331                 }
332                 /* and reset */
333                 space_start = space_end = 0;
334             }
335         }
336         i++;
337
338         add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
339     }
340     *src = s0;
341     return i;
342 }
343
344 /* term_101: handle term, where trunc = Process # */
345 static int term_101(zebra_map_t zm,
346                     const char **src, WRBUF term_dict, int space_split,
347                     WRBUF display_term)
348 {
349     const char *s0;
350     const char **map;
351     int i = 0;
352
353     if (!term_pre(zm, src, "#", "#", !space_split))
354         return 0;
355     s0 = *src;
356     while (*s0)
357     {
358         if (*s0 == '#')
359         {
360             i++;
361             wrbuf_puts(term_dict, ".*");
362             wrbuf_putc(display_term, *s0);
363             s0++;
364         }
365         else
366         {
367             const char *s1 = s0;
368             int q_map_match = 0;
369             map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
370             if (space_split && **map == *CHR_SPACE)
371                 break;
372
373             i++;
374             add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
375         }
376     }
377     *src = s0;
378     return i;
379 }
380
381 /* term_103: handle term, where trunc = re-2 (regular expressions) */
382 static int term_103(zebra_map_t zm, const char **src,
383                     WRBUF term_dict, int *errors, int space_split,
384                     WRBUF display_term)
385 {
386     int i = 0;
387     const char *s0;
388     const char **map;
389
390     if (!term_pre(zm, src, "^\\()[].*+?|", "(", !space_split))
391         return 0;
392     s0 = *src;
393     if (errors && *s0 == '+' && s0[1] && s0[2] == '+' && s0[3] &&
394         isdigit(((const unsigned char *)s0)[1]))
395     {
396         *errors = s0[1] - '0';
397         s0 += 3;
398         if (*errors > 3)
399             *errors = 3;
400     }
401     while (*s0)
402     {
403         if (strchr("^\\()[].*+?|-", *s0))
404         {
405             wrbuf_putc(display_term, *s0);
406             wrbuf_putc(term_dict, *s0);
407             s0++;
408             i++;
409         }
410         else
411         {
412             const char *s1 = s0;
413             int q_map_match = 0;
414             map = zebra_maps_search(zm, &s0, strlen(s0),  &q_map_match);
415             if (space_split && **map == *CHR_SPACE)
416                 break;
417
418             i++;
419             add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
420         }
421     }
422     *src = s0;
423     
424     return i;
425 }
426
427 /* term_103: handle term, where trunc = re-1 (regular expressions) */
428 static int term_102(zebra_map_t zm, const char **src,
429                     WRBUF term_dict, int space_split, WRBUF display_term)
430 {
431     return term_103(zm, src, term_dict, NULL, space_split, display_term);
432 }
433
434
435 /* term_104: handle term, process # and ! */
436 static int term_104(zebra_map_t zm, const char **src, 
437                     WRBUF term_dict, int space_split, WRBUF display_term)
438 {
439     const char *s0;
440     const char **map;
441     int i = 0;
442
443     if (!term_pre(zm, src, "?*#", "?*#", !space_split))
444         return 0;
445     s0 = *src;
446     while (*s0)
447     {
448         if (*s0 == '?')
449         {
450             i++;
451             wrbuf_putc(display_term, *s0);
452             s0++;
453             if (*s0 >= '0' && *s0 <= '9')
454             {
455                 int limit = 0;
456                 while (*s0 >= '0' && *s0 <= '9')
457                 {
458                     limit = limit * 10 + (*s0 - '0');
459                     wrbuf_putc(display_term, *s0);
460                     s0++;
461                 }
462                 if (limit > 20)
463                     limit = 20;
464                 while (--limit >= 0)
465                 {
466                     wrbuf_puts(term_dict, ".?");
467                 }
468             }
469             else
470             {
471                 wrbuf_puts(term_dict, ".*");
472             }
473         }
474         else if (*s0 == '*')
475         {
476             i++;
477             wrbuf_puts(term_dict, ".*");
478             wrbuf_putc(display_term, *s0);
479             s0++;
480         }
481         else if (*s0 == '#')
482         {
483             i++;
484             wrbuf_puts(term_dict, ".");
485             wrbuf_putc(display_term, *s0);
486             s0++;
487         }
488         else
489         {
490             const char *s1 = s0;
491             int q_map_match = 0;
492             map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
493             if (space_split && **map == *CHR_SPACE)
494                 break;
495
496             i++;
497             add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
498         }
499     }
500     *src = s0;
501     return i;
502 }
503
504 /* term_105/106: handle term, where trunc = Process * and ! and right trunc */
505 static int term_105(zebra_map_t zm, const char **src, 
506                     WRBUF term_dict, int space_split,
507                     WRBUF display_term, int right_truncate)
508 {
509     const char *s0;
510     const char **map;
511     int i = 0;
512
513     if (!term_pre(zm, src, "*!", "*!", !space_split))
514         return 0;
515     s0 = *src;
516     while (*s0)
517     {
518         if (*s0 == '*')
519         {
520             i++;
521             wrbuf_puts(term_dict, ".*");
522             wrbuf_putc(display_term, *s0);
523             s0++;
524         }
525         else if (*s0 == '!')
526         {
527             i++;
528             wrbuf_putc(term_dict, '.');
529             wrbuf_putc(display_term, *s0);
530             s0++;
531         }
532         else
533         {
534             const char *s1 = s0;
535             int q_map_match = 0;
536             map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
537             if (space_split && **map == *CHR_SPACE)
538                 break;
539
540             i++;
541             add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
542         }
543     }
544     if (right_truncate)
545         wrbuf_puts(term_dict, ".*");
546     *src = s0;
547     return i;
548 }
549
550
551 /* gen_regular_rel - generate regular expression from relation
552  *  val:     border value (inclusive)
553  *  islt:    1 if <=; 0 if >=.
554  */
555 static void gen_regular_rel(WRBUF term_dict, int val, int islt)
556 {
557     char dst_buf[20*5*20]; /* assuming enough for expansion */
558     char *dst = dst_buf;
559     int dst_p;
560     int w, d, i;
561     int pos = 0;
562     char numstr[20];
563
564     yaz_log(YLOG_DEBUG, "gen_regular_rel. val=%d, islt=%d", val, islt);
565     if (val >= 0)
566     {
567         if (islt)
568             strcpy(dst, "(-[0-9]+|(");
569         else
570             strcpy(dst, "((");
571     } 
572     else
573     {
574         if (!islt)
575         {
576             strcpy(dst, "([0-9]+|-(");
577             islt = 1;
578         }
579         else
580         {
581             strcpy(dst, "(-(");
582             islt = 0;
583         }
584         val = -val;
585     }
586     dst_p = strlen(dst);
587     sprintf(numstr, "%d", val);
588     for (w = strlen(numstr); --w >= 0; pos++)
589     {
590         d = numstr[w];
591         if (pos > 0)
592         {
593             if (islt)
594             {
595                 if (d == '0')
596                     continue;
597                 d--;
598             } 
599             else
600             {
601                 if (d == '9')
602                     continue;
603                 d++;
604             }
605         }
606         
607         strcpy(dst + dst_p, numstr);
608         dst_p = strlen(dst) - pos - 1;
609
610         if (islt)
611         {
612             if (d != '0')
613             {
614                 dst[dst_p++] = '[';
615                 dst[dst_p++] = '0';
616                 dst[dst_p++] = '-';
617                 dst[dst_p++] = d;
618                 dst[dst_p++] = ']';
619             }
620             else
621                 dst[dst_p++] = d;
622         }
623         else
624         {
625             if (d != '9')
626             { 
627                 dst[dst_p++] = '[';
628                 dst[dst_p++] = d;
629                 dst[dst_p++] = '-';
630                 dst[dst_p++] = '9';
631                 dst[dst_p++] = ']';
632             }
633             else
634                 dst[dst_p++] = d;
635         }
636         for (i = 0; i<pos; i++)
637         {
638             dst[dst_p++] = '[';
639             dst[dst_p++] = '0';
640             dst[dst_p++] = '-';
641             dst[dst_p++] = '9';
642             dst[dst_p++] = ']';
643         }
644         dst[dst_p++] = '|';
645     }
646     dst[dst_p] = '\0';
647     if (islt)
648     {
649         /* match everything less than 10^(pos-1) */
650         strcat(dst, "0*");
651         for (i = 1; i<pos; i++)
652             strcat(dst, "[0-9]?");
653     }
654     else
655     {
656         /* match everything greater than 10^pos */
657         for (i = 0; i <= pos; i++)
658             strcat(dst, "[0-9]");
659         strcat(dst, "[0-9]*");
660     }
661     strcat(dst, "))");
662     wrbuf_puts(term_dict, dst);
663 }
664
665 void string_rel_add_char(WRBUF term_p, WRBUF wsrc, int *indx)
666 {
667     const char *src = wrbuf_cstr(wsrc);
668     if (src[*indx] == '\\')
669     {
670         wrbuf_putc(term_p, src[*indx]);
671         (*indx)++;
672     }
673     wrbuf_putc(term_p, src[*indx]);
674     (*indx)++;
675 }
676
677 /*
678  *   >  abc     ([b-].*|a[c-].*|ab[d-].*|abc.+)
679  *              ([^-a].*|a[^-b].*ab[^-c].*|abc.+)
680  *   >= abc     ([b-].*|a[c-].*|ab[c-].*)
681  *              ([^-a].*|a[^-b].*|ab[c-].*)
682  *   <  abc     ([-0].*|a[-a].*|ab[-b].*)
683  *              ([^a-].*|a[^b-].*|ab[^c-].*)
684  *   <= abc     ([-0].*|a[-a].*|ab[-b].*|abc)
685  *              ([^a-].*|a[^b-].*|ab[^c-].*|abc)
686  */
687 static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
688                            const char **term_sub, WRBUF term_dict,
689                            const Odr_oid *attributeSet,
690                            zebra_map_t zm, int space_split, 
691                            WRBUF display_term,
692                            int *error_code)
693 {
694     AttrType relation;
695     int relation_value;
696     int i;
697     WRBUF term_component = wrbuf_alloc();
698
699     attr_init_APT(&relation, zapt, 2);
700     relation_value = attr_find(&relation, NULL);
701
702     *error_code = 0;
703     yaz_log(YLOG_DEBUG, "string relation value=%d", relation_value);
704     switch (relation_value)
705     {
706     case 1:
707         if (!term_100(zm, term_sub, term_component, space_split, display_term))
708         {
709             wrbuf_destroy(term_component);
710             return 0;
711         }
712         yaz_log(log_level_rpn, "Relation <");
713         
714         wrbuf_putc(term_dict, '(');
715         for (i = 0; i < wrbuf_len(term_component); )
716         {
717             int j = 0;
718             
719             if (i)
720                 wrbuf_putc(term_dict, '|');
721             while (j < i)
722                 string_rel_add_char(term_dict, term_component, &j);
723
724             wrbuf_putc(term_dict, '[');
725
726             wrbuf_putc(term_dict, '^');
727             
728             wrbuf_putc(term_dict, 1);
729             wrbuf_putc(term_dict, FIRST_IN_FIELD_CHAR);
730             
731             string_rel_add_char(term_dict, term_component, &i);
732             wrbuf_putc(term_dict, '-');
733             
734             wrbuf_putc(term_dict, ']');
735             wrbuf_putc(term_dict, '.');
736             wrbuf_putc(term_dict, '*');
737         }
738         wrbuf_putc(term_dict, ')');
739         break;
740     case 2:
741         if (!term_100(zm, term_sub, term_component, space_split, display_term))
742         {
743             wrbuf_destroy(term_component);
744             return 0;
745         }
746         yaz_log(log_level_rpn, "Relation <=");
747
748         wrbuf_putc(term_dict, '(');
749         for (i = 0; i < wrbuf_len(term_component); )
750         {
751             int j = 0;
752
753             while (j < i)
754                 string_rel_add_char(term_dict, term_component, &j);
755             wrbuf_putc(term_dict, '[');
756
757             wrbuf_putc(term_dict, '^');
758
759             wrbuf_putc(term_dict, 1);
760             wrbuf_putc(term_dict, FIRST_IN_FIELD_CHAR);
761
762             string_rel_add_char(term_dict, term_component, &i);
763             wrbuf_putc(term_dict, '-');
764
765             wrbuf_putc(term_dict, ']');
766             wrbuf_putc(term_dict, '.');
767             wrbuf_putc(term_dict, '*');
768
769             wrbuf_putc(term_dict, '|');
770         }
771         for (i = 0; i < wrbuf_len(term_component); )
772             string_rel_add_char(term_dict, term_component, &i);
773         wrbuf_putc(term_dict, ')');
774         break;
775     case 5:
776         if (!term_100(zm, term_sub, term_component, space_split, display_term))
777         {
778             wrbuf_destroy(term_component);
779             return 0;
780         }
781         yaz_log(log_level_rpn, "Relation >");
782
783         wrbuf_putc(term_dict, '(');
784         for (i = 0; i < wrbuf_len(term_component); )
785         {
786             int j = 0;
787
788             while (j < i)
789                 string_rel_add_char(term_dict, term_component, &j);
790             wrbuf_putc(term_dict, '[');
791             
792             wrbuf_putc(term_dict, '^');
793             wrbuf_putc(term_dict, '-');
794             string_rel_add_char(term_dict, term_component, &i);
795
796             wrbuf_putc(term_dict, ']');
797             wrbuf_putc(term_dict, '.');
798             wrbuf_putc(term_dict, '*');
799
800             wrbuf_putc(term_dict, '|');
801         }
802         for (i = 0; i < wrbuf_len(term_component); )
803             string_rel_add_char(term_dict, term_component, &i);
804         wrbuf_putc(term_dict, '.');
805         wrbuf_putc(term_dict, '+');
806         wrbuf_putc(term_dict, ')');
807         break;
808     case 4:
809         if (!term_100(zm, term_sub, term_component, space_split, display_term))
810         {
811             wrbuf_destroy(term_component);
812             return 0;
813         }
814         yaz_log(log_level_rpn, "Relation >=");
815
816         wrbuf_putc(term_dict, '(');
817         for (i = 0; i < wrbuf_len(term_component); )
818         {
819             int j = 0;
820
821             if (i)
822                 wrbuf_putc(term_dict, '|');
823             while (j < i)
824                 string_rel_add_char(term_dict, term_component, &j);
825             wrbuf_putc(term_dict, '[');
826
827             if (i < wrbuf_len(term_component)-1)
828             {
829                 wrbuf_putc(term_dict, '^');
830                 wrbuf_putc(term_dict, '-');
831                 string_rel_add_char(term_dict, term_component, &i);
832             }
833             else
834             {
835                 string_rel_add_char(term_dict, term_component, &i);
836                 wrbuf_putc(term_dict, '-');
837             }
838             wrbuf_putc(term_dict, ']');
839             wrbuf_putc(term_dict, '.');
840             wrbuf_putc(term_dict, '*');
841         }
842         wrbuf_putc(term_dict, ')');
843         break;
844     case 3:
845     case 102:
846     case -1:
847         if (!**term_sub)
848             return 1;
849         yaz_log(log_level_rpn, "Relation =");
850         if (!term_100(zm, term_sub, term_component, space_split, display_term))
851         {
852             wrbuf_destroy(term_component);
853             return 0;
854         }
855         wrbuf_puts(term_dict, "(");
856         wrbuf_puts(term_dict, wrbuf_cstr(term_component));
857         wrbuf_puts(term_dict, ")");
858         break;
859     case 103:
860         yaz_log(log_level_rpn, "Relation always matches");
861         /* skip to end of term (we don't care what it is) */
862         while (**term_sub != '\0')
863             (*term_sub)++;
864         break;
865     default:
866         *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
867         wrbuf_destroy(term_component);
868         return 0;
869     }
870     wrbuf_destroy(term_component);
871     return 1;
872 }
873
874 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
875                              const char **term_sub, 
876                              WRBUF term_dict,
877                              const Odr_oid *attributeSet, NMEM stream,
878                              struct grep_info *grep_info,
879                              const char *index_type, int complete_flag,
880                              WRBUF display_term,
881                              const char *xpath_use,
882                              struct ord_list **ol,
883                              zebra_map_t zm);
884
885 ZEBRA_RES zebra_term_limits_APT(ZebraHandle zh,
886                                 Z_AttributesPlusTerm *zapt,
887                                 zint *hits_limit_value,
888                                 const char **term_ref_id_str,
889                                 NMEM nmem)
890 {
891     AttrType term_ref_id_attr;
892     AttrType hits_limit_attr;
893     int term_ref_id_int;
894     zint hits_limit_from_attr;
895  
896     attr_init_APT(&hits_limit_attr, zapt, 11);
897     hits_limit_from_attr  = attr_find(&hits_limit_attr, NULL);
898
899     attr_init_APT(&term_ref_id_attr, zapt, 10);
900     term_ref_id_int = attr_find_ex(&term_ref_id_attr, NULL, term_ref_id_str);
901     if (term_ref_id_int >= 0)
902     {
903         char *res = nmem_malloc(nmem, 20);
904         sprintf(res, "%d", term_ref_id_int);
905         *term_ref_id_str = res;
906     }
907     if (hits_limit_from_attr != -1)
908         *hits_limit_value = hits_limit_from_attr;
909
910     yaz_log(YLOG_DEBUG, "term_limits_APT ref_id=%s limit=" ZINT_FORMAT,
911             *term_ref_id_str ? *term_ref_id_str : "none",
912             *hits_limit_value);
913     return ZEBRA_OK;
914 }
915
916 /** \brief search for term (which may be truncated)
917  */
918 static ZEBRA_RES search_term(ZebraHandle zh,
919                              Z_AttributesPlusTerm *zapt,
920                              const char **term_sub, 
921                              const Odr_oid *attributeSet,
922                              zint hits_limit, NMEM stream,
923                              struct grep_info *grep_info,
924                              const char *index_type, int complete_flag,
925                              const char *rank_type, 
926                              const char *xpath_use,
927                              NMEM rset_nmem,
928                              RSET *rset,
929                              struct rset_key_control *kc,
930                              zebra_map_t zm)
931 {
932     ZEBRA_RES res;
933     struct ord_list *ol;
934     zint hits_limit_value = hits_limit;
935     const char *term_ref_id_str = 0;
936     WRBUF term_dict = wrbuf_alloc();
937     WRBUF display_term = wrbuf_alloc();
938     *rset = 0;
939     zebra_term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str,
940                           stream);
941     grep_info->isam_p_indx = 0;
942     res = string_term(zh, zapt, term_sub, term_dict,
943                       attributeSet, stream, grep_info,
944                       index_type, complete_flag,
945                       display_term, xpath_use, &ol, zm);
946     wrbuf_destroy(term_dict);
947     if (res == ZEBRA_OK && *term_sub)
948     {
949         yaz_log(log_level_rpn, "term: %s", wrbuf_cstr(display_term));
950         *rset = rset_trunc(zh, grep_info->isam_p_buf,
951                            grep_info->isam_p_indx, wrbuf_buf(display_term),
952                            wrbuf_len(display_term), rank_type, 
953                            1 /* preserve pos */,
954                            zapt->term->which, rset_nmem,
955                            kc, kc->scope, ol, index_type, hits_limit_value,
956                            term_ref_id_str);
957         if (!*rset)
958             res = ZEBRA_FAIL;
959     }
960     wrbuf_destroy(display_term);
961     return res;
962 }
963
964 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
965                              const char **term_sub, 
966                              WRBUF term_dict,
967                              const Odr_oid *attributeSet, NMEM stream,
968                              struct grep_info *grep_info,
969                              const char *index_type, int complete_flag,
970                              WRBUF display_term,
971                              const char *xpath_use,
972                              struct ord_list **ol,
973                              zebra_map_t zm)
974 {
975     int r;
976     AttrType truncation;
977     int truncation_value;
978     const char *termp;
979     struct rpn_char_map_info rcmi;
980
981     int space_split = complete_flag ? 0 : 1;
982     int ord = -1;
983     int regex_range = 0;
984     int max_pos, prefix_len = 0;
985     int relation_error;
986     char ord_buf[32];
987     int ord_len, i;
988
989     *ol = ord_list_create(stream);
990
991     rpn_char_map_prepare(zh->reg, zm, &rcmi);
992     attr_init_APT(&truncation, zapt, 5);
993     truncation_value = attr_find(&truncation, NULL);
994     yaz_log(log_level_rpn, "truncation value %d", truncation_value);
995
996     termp = *term_sub; /* start of term for each database */
997     
998     if (zebra_apt_get_ord(zh, zapt, index_type, xpath_use,
999                           attributeSet, &ord) != ZEBRA_OK)
1000     {
1001         *term_sub = 0;
1002         return ZEBRA_FAIL;
1003     }
1004     
1005     wrbuf_rewind(term_dict); /* new dictionary regexp term */
1006     
1007     *ol = ord_list_append(stream, *ol, ord);
1008     ord_len = key_SU_encode(ord, ord_buf);
1009     
1010     wrbuf_putc(term_dict, '(');
1011     
1012     for (i = 0; i<ord_len; i++)
1013     {
1014         wrbuf_putc(term_dict, 1);  /* our internal regexp escape char */
1015         wrbuf_putc(term_dict, ord_buf[i]);
1016     }
1017     wrbuf_putc(term_dict, ')');
1018     
1019     prefix_len = wrbuf_len(term_dict);
1020
1021     if (zebra_maps_is_icu(zm))
1022     {
1023         int relation_value;
1024         AttrType relation;
1025         
1026         attr_init_APT(&relation, zapt, 2);
1027         relation_value = attr_find(&relation, NULL);
1028         if (relation_value == 103) /* always matches */
1029             termp += strlen(termp); /* move to end of term */
1030         else if (relation_value == 3 || relation_value == 102 || relation_value == -1)
1031         {
1032             /* ICU case */
1033             switch (truncation_value)
1034             {
1035             case -1:         /* not specified */
1036             case 100:        /* do not truncate */
1037                 if (!term_100_icu(zm, &termp, term_dict, space_split, display_term, 0))
1038                 {
1039                     *term_sub = 0;
1040                     return ZEBRA_OK;
1041                 }
1042                 break;
1043             case 1:          /* right truncation */
1044                 if (!term_100_icu(zm, &termp, term_dict, space_split, display_term, 1))
1045                 {
1046                     *term_sub = 0;
1047                     return ZEBRA_OK;
1048                 }
1049                 break;
1050             default:
1051                 zebra_setError_zint(zh,
1052                                     YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
1053                                     truncation_value);
1054                 return ZEBRA_FAIL;
1055             }
1056         }
1057         else
1058         {
1059             zebra_setError_zint(zh,
1060                                 YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE,
1061                                 relation_value);
1062             return ZEBRA_FAIL;
1063         }
1064     }
1065     else
1066     {
1067         /* non-ICU case. using string.chr and friends */
1068         switch (truncation_value)
1069         {
1070         case -1:         /* not specified */
1071         case 100:        /* do not truncate */
1072             if (!string_relation(zh, zapt, &termp, term_dict,
1073                                  attributeSet,
1074                                  zm, space_split, display_term,
1075                                  &relation_error))
1076             {
1077                 if (relation_error)
1078                 {
1079                     zebra_setError(zh, relation_error, 0);
1080                     return ZEBRA_FAIL;
1081                 }
1082                 *term_sub = 0;
1083                 return ZEBRA_OK;
1084             }
1085             break;
1086         case 1:          /* right truncation */
1087             wrbuf_putc(term_dict, '(');
1088             if (!term_100(zm, &termp, term_dict, space_split, display_term))
1089             {
1090                 *term_sub = 0;
1091                 return ZEBRA_OK;
1092             }
1093             wrbuf_puts(term_dict, ".*)");
1094             break;
1095         case 2:          /* left truncation */
1096             wrbuf_puts(term_dict, "(.*");
1097             if (!term_100(zm, &termp, term_dict, space_split, display_term))
1098             {
1099                 *term_sub = 0;
1100                 return ZEBRA_OK;
1101             }
1102             wrbuf_putc(term_dict, ')');
1103             break;
1104         case 3:          /* left&right truncation */
1105             wrbuf_puts(term_dict, "(.*");
1106             if (!term_100(zm, &termp, term_dict, space_split, display_term))
1107             {
1108                 *term_sub = 0;
1109                 return ZEBRA_OK;
1110             }
1111             wrbuf_puts(term_dict, ".*)");
1112             break;
1113         case 101:        /* process # in term */
1114             wrbuf_putc(term_dict, '(');
1115             if (!term_101(zm, &termp, term_dict, space_split, display_term))
1116             {
1117                 *term_sub = 0;
1118                 return ZEBRA_OK;
1119             }
1120             wrbuf_puts(term_dict, ")");
1121             break;
1122         case 102:        /* Regexp-1 */
1123             wrbuf_putc(term_dict, '(');
1124             if (!term_102(zm, &termp, term_dict, space_split, display_term))
1125             {
1126                 *term_sub = 0;
1127                 return ZEBRA_OK;
1128             }
1129             wrbuf_putc(term_dict, ')');
1130             break;
1131         case 103:       /* Regexp-2 */
1132             regex_range = 1;
1133             wrbuf_putc(term_dict, '(');
1134             if (!term_103(zm, &termp, term_dict, &regex_range,
1135                           space_split, display_term))
1136             {
1137                 *term_sub = 0;
1138                 return ZEBRA_OK;
1139             }
1140             wrbuf_putc(term_dict, ')');
1141             break;
1142         case 104:        /* process # and ! in term */
1143             wrbuf_putc(term_dict, '(');
1144             if (!term_104(zm, &termp, term_dict, space_split, display_term))
1145             {
1146                 *term_sub = 0;
1147                 return ZEBRA_OK;
1148             }
1149             wrbuf_putc(term_dict, ')');
1150             break;
1151         case 105:        /* process * and ! in term */
1152             wrbuf_putc(term_dict, '(');
1153             if (!term_105(zm, &termp, term_dict, space_split, display_term, 1))
1154             {
1155                 *term_sub = 0;
1156                 return ZEBRA_OK;
1157             }
1158             wrbuf_putc(term_dict, ')');
1159             break;
1160         case 106:        /* process * and ! in term */
1161             wrbuf_putc(term_dict, '(');
1162             if (!term_105(zm, &termp, term_dict, space_split, display_term, 0))
1163             {
1164                 *term_sub = 0;
1165                 return ZEBRA_OK;
1166             }
1167             wrbuf_putc(term_dict, ')');
1168             break;
1169         default:
1170             zebra_setError_zint(zh,
1171                                 YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
1172                                 truncation_value);
1173             return ZEBRA_FAIL;
1174         }
1175     }
1176     if (1)
1177     {
1178         char buf[1000];
1179         const char *input = wrbuf_cstr(term_dict) + prefix_len;
1180         esc_str(buf, sizeof(buf), input, strlen(input));
1181     }
1182     {
1183         WRBUF pr_wr = wrbuf_alloc();
1184
1185         wrbuf_write_escaped(pr_wr, wrbuf_buf(term_dict), wrbuf_len(term_dict));
1186         yaz_log(YLOG_LOG, "dict_lookup_grep: %s", wrbuf_cstr(pr_wr));
1187         wrbuf_destroy(pr_wr);
1188     }
1189     r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), regex_range,
1190                          grep_info, &max_pos, 
1191                          ord_len /* number of "exact" chars */,
1192                          grep_handle);
1193     if (r == 1)
1194         zebra_set_partial_result(zh);
1195     else if (r)
1196         yaz_log(YLOG_WARN, "dict_lookup_grep fail %d", r);
1197     *term_sub = termp;
1198     yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1199     return ZEBRA_OK;
1200 }
1201
1202
1203
1204 static void grep_info_delete(struct grep_info *grep_info)
1205 {
1206 #ifdef TERM_COUNT
1207     xfree(grep_info->term_no);
1208 #endif
1209     xfree(grep_info->isam_p_buf);
1210 }
1211
1212 static ZEBRA_RES grep_info_prepare(ZebraHandle zh,
1213                                    Z_AttributesPlusTerm *zapt,
1214                                    struct grep_info *grep_info,
1215                                    const char *index_type)
1216 {
1217 #ifdef TERM_COUNT
1218     grep_info->term_no = 0;
1219 #endif
1220     grep_info->trunc_max = atoi(res_get_def(zh->res, "truncmax", "10000"));
1221     grep_info->isam_p_size = 0;
1222     grep_info->isam_p_buf = NULL;
1223     grep_info->zh = zh;
1224     grep_info->index_type = index_type;
1225     grep_info->termset = 0;
1226     if (zapt)
1227     {
1228         AttrType truncmax;
1229         int truncmax_value;
1230
1231         attr_init_APT(&truncmax, zapt, 13);
1232         truncmax_value = attr_find(&truncmax, NULL);
1233         if (truncmax_value != -1)
1234             grep_info->trunc_max = truncmax_value;
1235     }
1236     if (zapt)
1237     {
1238         AttrType termset;
1239         int termset_value_numeric;
1240         const char *termset_value_string;
1241
1242         attr_init_APT(&termset, zapt, 8);
1243         termset_value_numeric =
1244             attr_find_ex(&termset, NULL, &termset_value_string);
1245         if (termset_value_numeric != -1)
1246         {
1247 #if TERMSET_DISABLE
1248             zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, "termset");
1249             return ZEBRA_FAIL;
1250 #else
1251             char resname[32];
1252             const char *termset_name = 0;
1253             if (termset_value_numeric != -2)
1254             {
1255                 
1256                 sprintf(resname, "%d", termset_value_numeric);
1257                 termset_name = resname;
1258             }
1259             else
1260                 termset_name = termset_value_string;
1261             yaz_log(log_level_rpn, "creating termset set %s", termset_name);
1262             grep_info->termset = resultSetAdd(zh, termset_name, 1);
1263             if (!grep_info->termset)
1264             {
1265                 zebra_setError(zh, YAZ_BIB1_ILLEGAL_RESULT_SET_NAME, termset_name);
1266                 return ZEBRA_FAIL;
1267             }
1268 #endif
1269         }
1270     }
1271     return ZEBRA_OK;
1272 }
1273
1274 static ZEBRA_RES search_terms_chrmap(ZebraHandle zh,
1275                                      Z_AttributesPlusTerm *zapt,
1276                                      const char *termz,
1277                                      const Odr_oid *attributeSet,
1278                                      zint hits_limit,
1279                                      NMEM stream,
1280                                      const char *index_type, int complete_flag,
1281                                      const char *rank_type,
1282                                      const char *xpath_use,
1283                                      NMEM rset_nmem,
1284                                      RSET **result_sets, int *num_result_sets,
1285                                      struct rset_key_control *kc,
1286                                      zebra_map_t zm)
1287 {
1288     struct grep_info grep_info;
1289     const char *termp = termz;
1290     int alloc_sets = 0;
1291     
1292     *num_result_sets = 0;
1293     if (grep_info_prepare(zh, zapt, &grep_info, index_type) == ZEBRA_FAIL)
1294         return ZEBRA_FAIL;
1295     while (1)
1296     { 
1297         ZEBRA_RES res;
1298
1299         if (alloc_sets == *num_result_sets)
1300         {
1301             int add = 10;
1302             RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) * 
1303                                               sizeof(*rnew));
1304             if (alloc_sets)
1305                 memcpy(rnew, *result_sets, alloc_sets * sizeof(*rnew));
1306             alloc_sets = alloc_sets + add;
1307             *result_sets = rnew;
1308         }
1309         res = search_term(zh, zapt, &termp, attributeSet, hits_limit,
1310                           stream, &grep_info,
1311                           index_type, complete_flag,
1312                           rank_type,
1313                           xpath_use, rset_nmem,
1314                           &(*result_sets)[*num_result_sets],
1315                           kc, zm);
1316         if (res != ZEBRA_OK)
1317         {
1318             int i;
1319             for (i = 0; i < *num_result_sets; i++)
1320                 rset_delete((*result_sets)[i]);
1321             grep_info_delete(&grep_info);
1322             return res;
1323         }
1324         if ((*result_sets)[*num_result_sets] == 0)
1325             break;
1326         (*num_result_sets)++;
1327
1328         if (!*termp)
1329             break;
1330     }
1331     grep_info_delete(&grep_info);
1332     return ZEBRA_OK;
1333 }
1334                                
1335 /**
1336    \brief Create result set(s) for list of terms
1337    \param zh Zebra Handle
1338    \param zapt Attributes Plust Term (RPN leaf)
1339    \param termz term as used in query but converted to UTF-8
1340    \param attributeSet default attribute set
1341    \param stream memory for result
1342    \param index_type register type ("w", "p",..)
1343    \param complete_flag whether it's phrases or not
1344    \param rank_type term flags for ranking
1345    \param xpath_use use attribute for X-Path (-1 for no X-path)
1346    \param rset_nmem memory for result sets
1347    \param result_sets output result set for each term in list (output)
1348    \param num_result_sets number of output result sets
1349    \param kc rset key control to be used for created result sets
1350 */
1351 static ZEBRA_RES search_terms_list(ZebraHandle zh,
1352                                    Z_AttributesPlusTerm *zapt,
1353                                    const char *termz,
1354                                    const Odr_oid *attributeSet,
1355                                    zint hits_limit,
1356                                    NMEM stream,
1357                                    const char *index_type, int complete_flag,
1358                                    const char *rank_type,
1359                                    const char *xpath_use,
1360                                    NMEM rset_nmem,
1361                                    RSET **result_sets, int *num_result_sets,
1362                                    struct rset_key_control *kc)
1363 {
1364     zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type);
1365     if (zebra_maps_is_icu(zm))
1366         zebra_map_tokenize_start(zm, termz, strlen(termz));
1367     return search_terms_chrmap(zh, zapt, termz, attributeSet, hits_limit,
1368                                stream, index_type, complete_flag,
1369                                rank_type, xpath_use,
1370                                rset_nmem, result_sets, num_result_sets,
1371                                kc, zm);
1372 }
1373
1374
1375 /** \brief limit a search by position - returns result set
1376  */
1377 static ZEBRA_RES search_position(ZebraHandle zh,
1378                                  Z_AttributesPlusTerm *zapt,
1379                                  const Odr_oid *attributeSet,
1380                                  const char *index_type,
1381                                  NMEM rset_nmem,
1382                                  RSET *rset,
1383                                  struct rset_key_control *kc)
1384 {
1385     int position_value;
1386     AttrType position;
1387     int ord = -1;
1388     char ord_buf[32];
1389     char term_dict[100];
1390     int ord_len;
1391     char *val;
1392     ISAM_P isam_p;
1393     zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type);
1394     
1395     attr_init_APT(&position, zapt, 3);
1396     position_value = attr_find(&position, NULL);
1397     switch(position_value)
1398     {
1399     case 3:
1400     case -1:
1401         return ZEBRA_OK;
1402     case 1:
1403     case 2:
1404         break;
1405     default:
1406         zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1407                             position_value);
1408         return ZEBRA_FAIL;
1409     }
1410
1411
1412     if (!zebra_maps_is_first_in_field(zm))
1413     {
1414         zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1415                             position_value);
1416         return ZEBRA_FAIL;
1417     }
1418
1419     if (zebra_apt_get_ord(zh, zapt, index_type, 0,
1420                           attributeSet, &ord) != ZEBRA_OK)
1421     {
1422         return ZEBRA_FAIL;
1423     }
1424     ord_len = key_SU_encode(ord, ord_buf);
1425     memcpy(term_dict, ord_buf, ord_len);
1426     strcpy(term_dict+ord_len, FIRST_IN_FIELD_STR);
1427     val = dict_lookup(zh->reg->dict, term_dict);
1428     if (val)
1429     {
1430         assert(*val == sizeof(ISAM_P));
1431         memcpy(&isam_p, val+1, sizeof(isam_p));
1432
1433         *rset = zebra_create_rset_isam(zh, rset_nmem, kc, kc->scope, 
1434                                        isam_p, 0);
1435     }
1436     return ZEBRA_OK;
1437 }
1438
1439 /** \brief returns result set for phrase search
1440  */
1441 static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh,
1442                                        Z_AttributesPlusTerm *zapt,
1443                                        const char *termz_org,
1444                                        const Odr_oid *attributeSet,
1445                                        zint hits_limit,
1446                                        NMEM stream,
1447                                        const char *index_type,
1448                                        int complete_flag,
1449                                        const char *rank_type,
1450                                        const char *xpath_use,
1451                                        NMEM rset_nmem,
1452                                        RSET *rset,
1453                                        struct rset_key_control *kc)
1454 {
1455     RSET *result_sets = 0;
1456     int num_result_sets = 0;
1457     ZEBRA_RES res =
1458         search_terms_list(zh, zapt, termz_org, attributeSet, hits_limit,
1459                           stream, index_type, complete_flag,
1460                           rank_type, xpath_use,
1461                           rset_nmem,
1462                           &result_sets, &num_result_sets, kc);
1463     
1464     if (res != ZEBRA_OK)
1465         return res;
1466
1467     if (num_result_sets > 0)
1468     {
1469         RSET first_set = 0;
1470         res = search_position(zh, zapt, attributeSet, 
1471                               index_type,
1472                               rset_nmem, &first_set,
1473                               kc);
1474         if (res != ZEBRA_OK)
1475         {
1476             int i;
1477             for (i = 0; i<num_result_sets; i++)
1478                 rset_delete(result_sets[i]);
1479             return res;
1480         }
1481         if (first_set)
1482         {
1483             RSET *nsets = nmem_malloc(stream,
1484                                       sizeof(RSET) * (num_result_sets+1));
1485             nsets[0] = first_set;
1486             memcpy(nsets+1, result_sets, sizeof(RSET) * num_result_sets);
1487             result_sets = nsets;
1488             num_result_sets++;
1489         }
1490     }
1491     if (num_result_sets == 0)
1492         *rset = rset_create_null(rset_nmem, kc, 0); 
1493     else if (num_result_sets == 1)
1494         *rset = result_sets[0];
1495     else
1496         *rset = rset_create_prox(rset_nmem, kc, kc->scope,
1497                                  num_result_sets, result_sets,
1498                                  1 /* ordered */, 0 /* exclusion */,
1499                                  3 /* relation */, 1 /* distance */);
1500     if (!*rset)
1501         return ZEBRA_FAIL;
1502     return ZEBRA_OK;
1503 }
1504
1505 /** \brief returns result set for or-list search
1506  */
1507 static ZEBRA_RES rpn_search_APT_or_list(ZebraHandle zh,
1508                                         Z_AttributesPlusTerm *zapt,
1509                                         const char *termz_org,
1510                                         const Odr_oid *attributeSet,
1511                                         zint hits_limit,
1512                                         NMEM stream,
1513                                         const char *index_type, 
1514                                         int complete_flag,
1515                                         const char *rank_type,
1516                                         const char *xpath_use,
1517                                         NMEM rset_nmem,
1518                                         RSET *rset,
1519                                         struct rset_key_control *kc)
1520 {
1521     RSET *result_sets = 0;
1522     int num_result_sets = 0;
1523     int i;
1524     ZEBRA_RES res =
1525         search_terms_list(zh, zapt, termz_org, attributeSet, hits_limit,
1526                           stream, index_type, complete_flag,
1527                           rank_type, xpath_use,
1528                           rset_nmem,
1529                           &result_sets, &num_result_sets, kc);
1530     if (res != ZEBRA_OK)
1531         return res;
1532
1533     for (i = 0; i<num_result_sets; i++)
1534     {
1535         RSET first_set = 0;
1536         res = search_position(zh, zapt, attributeSet, 
1537                               index_type,
1538                               rset_nmem, &first_set,
1539                               kc);
1540         if (res != ZEBRA_OK)
1541         {
1542             for (i = 0; i<num_result_sets; i++)
1543                 rset_delete(result_sets[i]);
1544             return res;
1545         }
1546
1547         if (first_set)
1548         {
1549             RSET tmp_set[2];
1550
1551             tmp_set[0] = first_set;
1552             tmp_set[1] = result_sets[i];
1553             
1554             result_sets[i] = rset_create_prox(
1555                 rset_nmem, kc, kc->scope,
1556                 2, tmp_set,
1557                 1 /* ordered */, 0 /* exclusion */,
1558                 3 /* relation */, 1 /* distance */);
1559         }
1560     }
1561     if (num_result_sets == 0)
1562         *rset = rset_create_null(rset_nmem, kc, 0); 
1563     else if (num_result_sets == 1)
1564         *rset = result_sets[0];
1565     else
1566         *rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* termid */,
1567                                num_result_sets, result_sets);
1568     if (!*rset)
1569         return ZEBRA_FAIL;
1570     return ZEBRA_OK;
1571 }
1572
1573 /** \brief returns result set for and-list search
1574  */
1575 static ZEBRA_RES rpn_search_APT_and_list(ZebraHandle zh,
1576                                          Z_AttributesPlusTerm *zapt,
1577                                          const char *termz_org,
1578                                          const Odr_oid *attributeSet,
1579                                          zint hits_limit,
1580                                          NMEM stream,
1581                                          const char *index_type, 
1582                                          int complete_flag,
1583                                          const char *rank_type, 
1584                                          const char *xpath_use,
1585                                          NMEM rset_nmem,
1586                                          RSET *rset,
1587                                          struct rset_key_control *kc)
1588 {
1589     RSET *result_sets = 0;
1590     int num_result_sets = 0;
1591     int i;
1592     ZEBRA_RES res =
1593         search_terms_list(zh, zapt, termz_org, attributeSet, hits_limit,
1594                           stream, index_type, complete_flag,
1595                           rank_type, xpath_use,
1596                           rset_nmem,
1597                           &result_sets, &num_result_sets,
1598                           kc);
1599     if (res != ZEBRA_OK)
1600         return res;
1601     for (i = 0; i<num_result_sets; i++)
1602     {
1603         RSET first_set = 0;
1604         res = search_position(zh, zapt, attributeSet, 
1605                               index_type,
1606                               rset_nmem, &first_set,
1607                               kc);
1608         if (res != ZEBRA_OK)
1609         {
1610             for (i = 0; i<num_result_sets; i++)
1611                 rset_delete(result_sets[i]);
1612             return res;
1613         }
1614
1615         if (first_set)
1616         {
1617             RSET tmp_set[2];
1618
1619             tmp_set[0] = first_set;
1620             tmp_set[1] = result_sets[i];
1621             
1622             result_sets[i] = rset_create_prox(
1623                 rset_nmem, kc, kc->scope,
1624                 2, tmp_set,
1625                 1 /* ordered */, 0 /* exclusion */,
1626                 3 /* relation */, 1 /* distance */);
1627         }
1628     }
1629
1630
1631     if (num_result_sets == 0)
1632         *rset = rset_create_null(rset_nmem, kc, 0); 
1633     else if (num_result_sets == 1)
1634         *rset = result_sets[0];
1635     else
1636         *rset = rset_create_and(rset_nmem, kc, kc->scope,
1637                                 num_result_sets, result_sets);
1638     if (!*rset)
1639         return ZEBRA_FAIL;
1640     return ZEBRA_OK;
1641 }
1642
1643 static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1644                             const char **term_sub,
1645                             WRBUF term_dict,
1646                             const Odr_oid *attributeSet,
1647                             struct grep_info *grep_info,
1648                             int *max_pos,
1649                             zebra_map_t zm,
1650                             WRBUF display_term,
1651                             int *error_code)
1652 {
1653     AttrType relation;
1654     int relation_value;
1655     int term_value;
1656     int r;
1657     WRBUF term_num = wrbuf_alloc();
1658
1659     *error_code = 0;
1660     attr_init_APT(&relation, zapt, 2);
1661     relation_value = attr_find(&relation, NULL);
1662
1663     yaz_log(log_level_rpn, "numeric relation value=%d", relation_value);
1664
1665     switch (relation_value)
1666     {
1667     case 1:
1668         yaz_log(log_level_rpn, "Relation <");
1669         if (!term_100(zm, term_sub, term_num, 1, display_term))
1670         { 
1671             wrbuf_destroy(term_num);
1672             return 0;
1673         }
1674         term_value = atoi(wrbuf_cstr(term_num));
1675         gen_regular_rel(term_dict, term_value-1, 1);
1676         break;
1677     case 2:
1678         yaz_log(log_level_rpn, "Relation <=");
1679         if (!term_100(zm, term_sub, term_num, 1, display_term))
1680         {
1681             wrbuf_destroy(term_num);
1682             return 0;
1683         }
1684         term_value = atoi(wrbuf_cstr(term_num));
1685         gen_regular_rel(term_dict, term_value, 1);
1686         break;
1687     case 4:
1688         yaz_log(log_level_rpn, "Relation >=");
1689         if (!term_100(zm, term_sub, term_num, 1, display_term))
1690         {
1691             wrbuf_destroy(term_num);
1692             return 0;
1693         }
1694         term_value = atoi(wrbuf_cstr(term_num));
1695         gen_regular_rel(term_dict, term_value, 0);
1696         break;
1697     case 5:
1698         yaz_log(log_level_rpn, "Relation >");
1699         if (!term_100(zm, term_sub, term_num, 1, display_term))
1700         {
1701             wrbuf_destroy(term_num);
1702             return 0;
1703         }
1704         term_value = atoi(wrbuf_cstr(term_num));
1705         gen_regular_rel(term_dict, term_value+1, 0);
1706         break;
1707     case -1:
1708     case 3:
1709         yaz_log(log_level_rpn, "Relation =");
1710         if (!term_100(zm, term_sub, term_num, 1, display_term))
1711         {
1712             wrbuf_destroy(term_num);
1713             return 0; 
1714         }
1715         term_value = atoi(wrbuf_cstr(term_num));
1716         wrbuf_printf(term_dict, "(0*%d)", term_value);
1717         break;
1718     case 103:
1719         /* term_tmp untouched.. */
1720         while (**term_sub != '\0')
1721             (*term_sub)++;
1722         break;
1723     default:
1724         *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
1725         wrbuf_destroy(term_num); 
1726         return 0;
1727     }
1728     r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), 
1729                          0, grep_info, max_pos, 0, grep_handle);
1730
1731     if (r == 1)
1732         zebra_set_partial_result(zh);
1733     else if (r)
1734         yaz_log(YLOG_WARN, "dict_lookup_grep fail, rel = gt: %d", r);
1735     yaz_log(log_level_rpn, "%d positions", grep_info->isam_p_indx);
1736     wrbuf_destroy(term_num);
1737     return 1;
1738 }
1739
1740 static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1741                               const char **term_sub, 
1742                               WRBUF term_dict,
1743                               const Odr_oid *attributeSet, NMEM stream,
1744                               struct grep_info *grep_info,
1745                               const char *index_type, int complete_flag,
1746                               WRBUF display_term,
1747                               const char *xpath_use,
1748                               struct ord_list **ol)
1749 {
1750     const char *termp;
1751     struct rpn_char_map_info rcmi;
1752     int max_pos;
1753     int relation_error = 0;
1754     int ord, ord_len, i;
1755     char ord_buf[32];
1756     zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type);
1757     
1758     *ol = ord_list_create(stream);
1759
1760     rpn_char_map_prepare(zh->reg, zm, &rcmi);
1761
1762     termp = *term_sub;
1763     
1764     if (zebra_apt_get_ord(zh, zapt, index_type, xpath_use,
1765                           attributeSet, &ord) != ZEBRA_OK)
1766     {
1767         return ZEBRA_FAIL;
1768     }
1769     
1770     wrbuf_rewind(term_dict);
1771     
1772     *ol = ord_list_append(stream, *ol, ord);
1773     
1774     ord_len = key_SU_encode(ord, ord_buf);
1775     
1776     wrbuf_putc(term_dict, '(');
1777     for (i = 0; i < ord_len; i++)
1778     {
1779         wrbuf_putc(term_dict, 1);
1780         wrbuf_putc(term_dict, ord_buf[i]);
1781     }
1782     wrbuf_putc(term_dict, ')');
1783     
1784     if (!numeric_relation(zh, zapt, &termp, term_dict,
1785                           attributeSet, grep_info, &max_pos, zm,
1786                           display_term, &relation_error))
1787     {
1788         if (relation_error)
1789         {
1790             zebra_setError(zh, relation_error, 0);
1791             return ZEBRA_FAIL;
1792         }
1793         *term_sub = 0;
1794         return ZEBRA_OK;
1795     }
1796     *term_sub = termp;
1797     yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1798     return ZEBRA_OK;
1799 }
1800
1801                                  
1802 static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh,
1803                                         Z_AttributesPlusTerm *zapt,
1804                                         const char *termz,
1805                                         const Odr_oid *attributeSet,
1806                                         NMEM stream,
1807                                         const char *index_type, 
1808                                         int complete_flag,
1809                                         const char *rank_type, 
1810                                         const char *xpath_use,
1811                                         NMEM rset_nmem,
1812                                         RSET *rset,
1813                                         struct rset_key_control *kc)
1814 {
1815     const char *termp = termz;
1816     RSET *result_sets = 0;
1817     int num_result_sets = 0;
1818     ZEBRA_RES res;
1819     struct grep_info grep_info;
1820     int alloc_sets = 0;
1821     zint hits_limit_value;
1822     const char *term_ref_id_str = 0;
1823
1824     zebra_term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str,
1825                           stream);
1826
1827     yaz_log(log_level_rpn, "APT_numeric t='%s'", termz);
1828     if (grep_info_prepare(zh, zapt, &grep_info, index_type) == ZEBRA_FAIL)
1829         return ZEBRA_FAIL;
1830     while (1)
1831     { 
1832         struct ord_list *ol;
1833         WRBUF term_dict = wrbuf_alloc();
1834         WRBUF display_term = wrbuf_alloc();
1835         if (alloc_sets == num_result_sets)
1836         {
1837             int add = 10;
1838             RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) * 
1839                                               sizeof(*rnew));
1840             if (alloc_sets)
1841                 memcpy(rnew, result_sets, alloc_sets * sizeof(*rnew));
1842             alloc_sets = alloc_sets + add;
1843             result_sets = rnew;
1844         }
1845         yaz_log(YLOG_DEBUG, "APT_numeric termp=%s", termp);
1846         grep_info.isam_p_indx = 0;
1847         res = numeric_term(zh, zapt, &termp, term_dict,
1848                            attributeSet, stream, &grep_info,
1849                            index_type, complete_flag,
1850                            display_term, xpath_use, &ol);
1851         wrbuf_destroy(term_dict);
1852         if (res == ZEBRA_FAIL || termp == 0)
1853         {
1854             wrbuf_destroy(display_term);
1855             break;
1856         }
1857         yaz_log(YLOG_DEBUG, "term: %s", wrbuf_cstr(display_term));
1858         result_sets[num_result_sets] =
1859             rset_trunc(zh, grep_info.isam_p_buf,
1860                        grep_info.isam_p_indx, wrbuf_buf(display_term),
1861                        wrbuf_len(display_term), rank_type,
1862                        0 /* preserve position */,
1863                        zapt->term->which, rset_nmem, 
1864                        kc, kc->scope, ol, index_type,
1865                        hits_limit_value,
1866                        term_ref_id_str);
1867         wrbuf_destroy(display_term);
1868         if (!result_sets[num_result_sets])
1869             break;
1870         num_result_sets++;
1871         if (!*termp)
1872             break;
1873     }
1874     grep_info_delete(&grep_info);
1875
1876     if (res != ZEBRA_OK)
1877         return res;
1878     if (num_result_sets == 0)
1879         *rset = rset_create_null(rset_nmem, kc, 0);
1880     else if (num_result_sets == 1)
1881         *rset = result_sets[0];
1882     else
1883         *rset = rset_create_and(rset_nmem, kc, kc->scope,
1884                                 num_result_sets, result_sets);
1885     if (!*rset)
1886         return ZEBRA_FAIL;
1887     return ZEBRA_OK;
1888 }
1889
1890 static ZEBRA_RES rpn_search_APT_local(ZebraHandle zh,
1891                                       Z_AttributesPlusTerm *zapt,
1892                                       const char *termz,
1893                                       const Odr_oid *attributeSet,
1894                                       NMEM stream,
1895                                       const char *rank_type, NMEM rset_nmem,
1896                                       RSET *rset,
1897                                       struct rset_key_control *kc)
1898 {
1899     Record rec;
1900     zint sysno = atozint(termz);
1901     
1902     if (sysno <= 0)
1903         sysno = 0;
1904     rec = rec_get(zh->reg->records, sysno);
1905     if (!rec)
1906         sysno = 0;
1907
1908     rec_free(&rec);
1909
1910     if (sysno <= 0)
1911     {
1912         *rset = rset_create_null(rset_nmem, kc, 0);
1913     }
1914     else
1915     {
1916         RSFD rsfd;
1917         struct it_key key;
1918         *rset = rset_create_temp(rset_nmem, kc, kc->scope,
1919                                  res_get(zh->res, "setTmpDir"), 0);
1920         rsfd = rset_open(*rset, RSETF_WRITE);
1921         
1922         key.mem[0] = sysno;
1923         key.mem[1] = 1;
1924         key.len = 2;
1925         rset_write(rsfd, &key);
1926         rset_close(rsfd);
1927     }
1928     return ZEBRA_OK;
1929 }
1930
1931 static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1932                                const Odr_oid *attributeSet, NMEM stream,
1933                                Z_SortKeySpecList *sort_sequence,
1934                                const char *rank_type,
1935                                NMEM rset_nmem,
1936                                RSET *rset,
1937                                struct rset_key_control *kc)
1938 {
1939     int i;
1940     int sort_relation_value;
1941     AttrType sort_relation_type;
1942     Z_SortKeySpec *sks;
1943     Z_SortKey *sk;
1944     char termz[20];
1945     
1946     attr_init_APT(&sort_relation_type, zapt, 7);
1947     sort_relation_value = attr_find(&sort_relation_type, &attributeSet);
1948
1949     if (!sort_sequence->specs)
1950     {
1951         sort_sequence->num_specs = 10;
1952         sort_sequence->specs = (Z_SortKeySpec **)
1953             nmem_malloc(stream, sort_sequence->num_specs *
1954                         sizeof(*sort_sequence->specs));
1955         for (i = 0; i<sort_sequence->num_specs; i++)
1956             sort_sequence->specs[i] = 0;
1957     }
1958     if (zapt->term->which != Z_Term_general)
1959         i = 0;
1960     else
1961         i = atoi_n((char *) zapt->term->u.general->buf,
1962                    zapt->term->u.general->len);
1963     if (i >= sort_sequence->num_specs)
1964         i = 0;
1965     sprintf(termz, "%d", i);
1966
1967     sks = (Z_SortKeySpec *) nmem_malloc(stream, sizeof(*sks));
1968     sks->sortElement = (Z_SortElement *)
1969         nmem_malloc(stream, sizeof(*sks->sortElement));
1970     sks->sortElement->which = Z_SortElement_generic;
1971     sk = sks->sortElement->u.generic = (Z_SortKey *)
1972         nmem_malloc(stream, sizeof(*sk));
1973     sk->which = Z_SortKey_sortAttributes;
1974     sk->u.sortAttributes = (Z_SortAttributes *)
1975         nmem_malloc(stream, sizeof(*sk->u.sortAttributes));
1976
1977     sk->u.sortAttributes->id = odr_oiddup_nmem(stream, attributeSet);
1978     sk->u.sortAttributes->list = zapt->attributes;
1979
1980     sks->sortRelation = (int *)
1981         nmem_malloc(stream, sizeof(*sks->sortRelation));
1982     if (sort_relation_value == 1)
1983         *sks->sortRelation = Z_SortKeySpec_ascending;
1984     else if (sort_relation_value == 2)
1985         *sks->sortRelation = Z_SortKeySpec_descending;
1986     else 
1987         *sks->sortRelation = Z_SortKeySpec_ascending;
1988
1989     sks->caseSensitivity = (int *)
1990         nmem_malloc(stream, sizeof(*sks->caseSensitivity));
1991     *sks->caseSensitivity = 0;
1992
1993     sks->which = Z_SortKeySpec_null;
1994     sks->u.null = odr_nullval ();
1995     sort_sequence->specs[i] = sks;
1996     *rset = rset_create_null(rset_nmem, kc, 0);
1997     return ZEBRA_OK;
1998 }
1999
2000
2001 static int rpn_check_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2002                            const Odr_oid *attributeSet,
2003                            struct xpath_location_step *xpath, int max,
2004                            NMEM mem)
2005 {
2006     const Odr_oid *curAttributeSet = attributeSet;
2007     AttrType use;
2008     const char *use_string = 0;
2009     
2010     attr_init_APT(&use, zapt, 1);
2011     attr_find_ex(&use, &curAttributeSet, &use_string);
2012
2013     if (!use_string || *use_string != '/')
2014         return -1;
2015
2016     return zebra_parse_xpath_str(use_string, xpath, max, mem);
2017 }
2018  
2019                
2020
2021 static RSET xpath_trunc(ZebraHandle zh, NMEM stream,
2022                         const char *index_type, const char *term, 
2023                         const char *xpath_use,
2024                         NMEM rset_nmem,
2025                         struct rset_key_control *kc)
2026 {
2027     struct grep_info grep_info;
2028     int ord = zebraExplain_lookup_attr_str(zh->reg->zei, 
2029                                            zinfo_index_category_index,
2030                                            index_type, xpath_use);
2031     if (grep_info_prepare(zh, 0 /* zapt */, &grep_info, "0") == ZEBRA_FAIL)
2032         return rset_create_null(rset_nmem, kc, 0);
2033     
2034     if (ord < 0)
2035         return rset_create_null(rset_nmem, kc, 0);
2036     else
2037     {
2038         int i, r, max_pos;
2039         char ord_buf[32];
2040         RSET rset;
2041         WRBUF term_dict = wrbuf_alloc();
2042         int ord_len = key_SU_encode(ord, ord_buf);
2043         int term_type = Z_Term_characterString;
2044         const char *flags = "void";
2045
2046         wrbuf_putc(term_dict, '(');
2047         for (i = 0; i<ord_len; i++)
2048         {
2049             wrbuf_putc(term_dict, 1);
2050             wrbuf_putc(term_dict, ord_buf[i]);
2051         }
2052         wrbuf_putc(term_dict, ')');
2053         wrbuf_puts(term_dict, term);
2054         
2055         grep_info.isam_p_indx = 0;
2056         r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), 0,
2057                              &grep_info, &max_pos, 0, grep_handle);
2058         yaz_log(YLOG_DEBUG, "%s %d positions", term,
2059                 grep_info.isam_p_indx);
2060         rset = rset_trunc(zh, grep_info.isam_p_buf,
2061                           grep_info.isam_p_indx, term, strlen(term),
2062                           flags, 1, term_type, rset_nmem,
2063                           kc, kc->scope, 0, index_type, 0 /* hits_limit */,
2064                           0 /* term_ref_id_str */);
2065         grep_info_delete(&grep_info);
2066         wrbuf_destroy(term_dict);
2067         return rset;
2068     }
2069 }
2070
2071 static
2072 ZEBRA_RES rpn_search_xpath(ZebraHandle zh,
2073                            NMEM stream, const char *rank_type, RSET rset,
2074                            int xpath_len, struct xpath_location_step *xpath,
2075                            NMEM rset_nmem,
2076                            RSET *rset_out,
2077                            struct rset_key_control *kc)
2078 {
2079     int i;
2080     int always_matches = rset ? 0 : 1;
2081
2082     if (xpath_len < 0)
2083     {
2084         *rset_out = rset;
2085         return ZEBRA_OK;
2086     }
2087
2088     yaz_log(YLOG_DEBUG, "xpath len=%d", xpath_len);
2089     for (i = 0; i<xpath_len; i++)
2090     {
2091         yaz_log(log_level_rpn, "XPATH %d %s", i, xpath[i].part);
2092
2093     }
2094
2095     /*
2096     //a    ->    a/.*
2097     //a/b  ->    b/a/.*
2098     /a     ->    a/
2099     /a/b   ->    b/a/
2100
2101     /      ->    none
2102
2103     a[@attr = value]/b[@other = othervalue]
2104
2105     /e/@a val      range(e/,range(@a,freetext(w,1015,val),@a),e/)
2106     /a/b val       range(b/a/,freetext(w,1016,val),b/a/)
2107     /a/b/@c val    range(b/a/,range(@c,freetext(w,1016,val),@c),b/a/)
2108     /a/b[@c = y] val range(b/a/,freetext(w,1016,val),b/a/,@c = y)
2109     /a[@c = y]/b val range(a/,range(b/a/,freetext(w,1016,val),b/a/),a/,@c = y)
2110     /a[@c = x]/b[@c = y] range(a/,range(b/a/,freetext(w,1016,val),b/a/,@c = y),a/,@c = x)
2111       
2112     */
2113
2114     dict_grep_cmap(zh->reg->dict, 0, 0);
2115     
2116     {
2117         int level = xpath_len;
2118         int first_path = 1;
2119         
2120         while (--level >= 0)
2121         {
2122             WRBUF xpath_rev = wrbuf_alloc();
2123             int i;
2124             RSET rset_start_tag = 0, rset_end_tag = 0, rset_attr = 0;
2125
2126             for (i = level; i >= 1; --i)
2127             {
2128                 const char *cp = xpath[i].part;
2129                 if (*cp)
2130                 {
2131                     for (; *cp; cp++)
2132                     {
2133                         if (*cp == '*')
2134                             wrbuf_puts(xpath_rev, "[^/]*");
2135                         else if (*cp == ' ')
2136                             wrbuf_puts(xpath_rev, "\001 ");
2137                         else
2138                             wrbuf_putc(xpath_rev, *cp);
2139
2140                         /* wrbuf_putc does not null-terminate , but
2141                            wrbuf_puts below ensures it does.. so xpath_rev
2142                            is OK iff length is > 0 */
2143                     }
2144                     wrbuf_puts(xpath_rev, "/");
2145                 }
2146                 else if (i == 1)  /* // case */
2147                     wrbuf_puts(xpath_rev, ".*");
2148             }
2149             if (xpath[level].predicate &&
2150                 xpath[level].predicate->which == XPATH_PREDICATE_RELATION &&
2151                 xpath[level].predicate->u.relation.name[0])
2152             {
2153                 WRBUF wbuf = wrbuf_alloc();
2154                 wrbuf_puts(wbuf, xpath[level].predicate->u.relation.name+1);
2155                 if (xpath[level].predicate->u.relation.value)
2156                 {
2157                     const char *cp = xpath[level].predicate->u.relation.value;
2158                     wrbuf_putc(wbuf, '=');
2159                     
2160                     while (*cp)
2161                     {
2162                         if (strchr(REGEX_CHARS, *cp))
2163                             wrbuf_putc(wbuf, '\\');
2164                         wrbuf_putc(wbuf, *cp);
2165                         cp++;
2166                     }
2167                 }
2168                 rset_attr = xpath_trunc(
2169                     zh, stream, "0", wrbuf_cstr(wbuf), 
2170                     ZEBRA_XPATH_ATTR_NAME, 
2171                     rset_nmem, kc);
2172                 wrbuf_destroy(wbuf);
2173             } 
2174             else 
2175             {
2176                 if (!first_path)
2177                 {
2178                     wrbuf_destroy(xpath_rev);
2179                     continue;
2180                 }
2181             }
2182             yaz_log(log_level_rpn, "xpath_rev (%d) = %s", level, 
2183                     wrbuf_cstr(xpath_rev));
2184             if (wrbuf_len(xpath_rev))
2185             {
2186                 rset_start_tag = xpath_trunc(zh, stream, "0", 
2187                                              wrbuf_cstr(xpath_rev),
2188                                              ZEBRA_XPATH_ELM_BEGIN, 
2189                                              rset_nmem, kc);
2190                 if (always_matches)
2191                     rset = rset_start_tag;
2192                 else
2193                 {
2194                     rset_end_tag = xpath_trunc(zh, stream, "0", 
2195                                                wrbuf_cstr(xpath_rev),
2196                                                ZEBRA_XPATH_ELM_END, 
2197                                                rset_nmem, kc);
2198                     
2199                     rset = rset_create_between(rset_nmem, kc, kc->scope,
2200                                                rset_start_tag, rset,
2201                                                rset_end_tag, rset_attr);
2202                 }
2203             }
2204             wrbuf_destroy(xpath_rev);
2205             first_path = 0;
2206         }
2207     }
2208     *rset_out = rset;
2209     return ZEBRA_OK;
2210 }
2211
2212 #define MAX_XPATH_STEPS 10
2213
2214 static ZEBRA_RES rpn_search_database(ZebraHandle zh, 
2215                                      Z_AttributesPlusTerm *zapt,
2216                                      const Odr_oid *attributeSet,
2217                                      zint hits_limit, NMEM stream,
2218                                      Z_SortKeySpecList *sort_sequence,
2219                                      NMEM rset_nmem,
2220                                      RSET *rset,
2221                                      struct rset_key_control *kc);
2222
2223 static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2224                                 const Odr_oid *attributeSet,
2225                                 zint hits_limit, NMEM stream,
2226                                 Z_SortKeySpecList *sort_sequence,
2227                                 int num_bases, const char **basenames, 
2228                                 NMEM rset_nmem,
2229                                 RSET *rset,
2230                                 struct rset_key_control *kc)
2231 {
2232     RSET *rsets = nmem_malloc(stream, num_bases * sizeof(*rsets));
2233     ZEBRA_RES res = ZEBRA_OK;
2234     int i;
2235     for (i = 0; i < num_bases; i++)
2236     {
2237
2238         if (zebraExplain_curDatabase(zh->reg->zei, basenames[i]))
2239         {
2240             zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
2241                            basenames[i]);
2242             res = ZEBRA_FAIL;
2243             break;
2244         }
2245         res = rpn_search_database(zh, zapt, attributeSet, hits_limit, stream,
2246                                   sort_sequence,
2247                                   rset_nmem, rsets+i, kc);
2248         if (res != ZEBRA_OK)
2249             break;
2250     }
2251     if (res != ZEBRA_OK)
2252     {   /* must clean up the already created sets */
2253         while (--i >= 0)
2254             rset_delete(rsets[i]);
2255         *rset = 0;
2256     }
2257     else 
2258     {
2259         if (num_bases == 1)
2260             *rset = rsets[0];
2261         else if (num_bases == 0)
2262             *rset = rset_create_null(rset_nmem, kc, 0); 
2263         else
2264             *rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* TERMID */,
2265                                    num_bases, rsets);
2266     }
2267     return res;
2268 }
2269
2270 static ZEBRA_RES rpn_search_database(ZebraHandle zh, 
2271                                      Z_AttributesPlusTerm *zapt,
2272                                      const Odr_oid *attributeSet,
2273                                      zint hits_limit, NMEM stream,
2274                                      Z_SortKeySpecList *sort_sequence,
2275                                      NMEM rset_nmem,
2276                                      RSET *rset,
2277                                      struct rset_key_control *kc)
2278 {
2279     ZEBRA_RES res = ZEBRA_OK;
2280     const char *index_type;
2281     char *search_type = NULL;
2282     char rank_type[128];
2283     int complete_flag;
2284     int sort_flag;
2285     char termz[IT_MAX_WORD+1];
2286     int xpath_len;
2287     const char *xpath_use = 0;
2288     struct xpath_location_step xpath[MAX_XPATH_STEPS];
2289
2290     if (!log_level_set)
2291     {
2292         log_level_rpn = yaz_log_module_level("rpn");
2293         log_level_set = 1;
2294     }
2295     zebra_maps_attr(zh->reg->zebra_maps, zapt, &index_type, &search_type,
2296                     rank_type, &complete_flag, &sort_flag);
2297     
2298     yaz_log(YLOG_DEBUG, "index_type=%s", index_type);
2299     yaz_log(YLOG_DEBUG, "complete_flag=%d", complete_flag);
2300     yaz_log(YLOG_DEBUG, "search_type=%s", search_type);
2301     yaz_log(YLOG_DEBUG, "rank_type=%s", rank_type);
2302
2303     if (zapt_term_to_utf8(zh, zapt, termz) == ZEBRA_FAIL)
2304         return ZEBRA_FAIL;
2305
2306     if (sort_flag)
2307         return rpn_sort_spec(zh, zapt, attributeSet, stream, sort_sequence,
2308                              rank_type, rset_nmem, rset, kc);
2309     /* consider if an X-Path query is used */
2310     xpath_len = rpn_check_xpath(zh, zapt, attributeSet, 
2311                                 xpath, MAX_XPATH_STEPS, stream);
2312     if (xpath_len >= 0)
2313     {
2314         if (xpath[xpath_len-1].part[0] == '@') 
2315             xpath_use = ZEBRA_XPATH_ATTR_CDATA;  /* last step is attribute  */
2316         else
2317             xpath_use = ZEBRA_XPATH_CDATA;  /* searching for cdata */        
2318
2319         if (1)
2320         {
2321             AttrType relation;
2322             int relation_value;
2323
2324             attr_init_APT(&relation, zapt, 2);
2325             relation_value = attr_find(&relation, NULL);
2326
2327             if (relation_value == 103) /* alwaysmatches */
2328             {
2329                 *rset = 0; /* signal no "term" set */
2330                 return rpn_search_xpath(zh, stream, rank_type, *rset, 
2331                                         xpath_len, xpath, rset_nmem, rset, kc);
2332             }
2333         }
2334     }
2335
2336     /* search using one of the various search type strategies
2337        termz is our UTF-8 search term
2338        attributeSet is top-level default attribute set 
2339        stream is ODR for search
2340        reg_id is the register type
2341        complete_flag is 1 for complete subfield, 0 for incomplete
2342        xpath_use is use-attribute to be used for X-Path search, 0 for none
2343     */
2344     if (!strcmp(search_type, "phrase"))
2345     {
2346         res = rpn_search_APT_phrase(zh, zapt, termz, attributeSet, hits_limit,
2347                                     stream,
2348                                     index_type, complete_flag, rank_type,
2349                                     xpath_use,
2350                                     rset_nmem,
2351                                     rset, kc);
2352     }
2353     else if (!strcmp(search_type, "and-list"))
2354     {
2355         res = rpn_search_APT_and_list(zh, zapt, termz, attributeSet, hits_limit,
2356                                       stream,
2357                                       index_type, complete_flag, rank_type,
2358                                       xpath_use,
2359                                       rset_nmem,
2360                                       rset, kc);
2361     }
2362     else if (!strcmp(search_type, "or-list"))
2363     {
2364         res = rpn_search_APT_or_list(zh, zapt, termz, attributeSet, hits_limit,
2365                                      stream,
2366                                      index_type, complete_flag, rank_type,
2367                                      xpath_use,
2368                                      rset_nmem,
2369                                      rset, kc);
2370     }
2371     else if (!strcmp(search_type, "local"))
2372     {
2373         res = rpn_search_APT_local(zh, zapt, termz, attributeSet, stream,
2374                                    rank_type, rset_nmem, rset, kc);
2375     }
2376     else if (!strcmp(search_type, "numeric"))
2377     {
2378         res = rpn_search_APT_numeric(zh, zapt, termz, attributeSet, stream,
2379                                      index_type, complete_flag, rank_type,
2380                                      xpath_use,
2381                                      rset_nmem,
2382                                      rset, kc);
2383     }
2384     else
2385     {
2386         zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2387         res = ZEBRA_FAIL;
2388     }
2389     if (res != ZEBRA_OK)
2390         return res;
2391     if (!*rset)
2392         return ZEBRA_FAIL;
2393     return rpn_search_xpath(zh, stream, rank_type, *rset, 
2394                             xpath_len, xpath, rset_nmem, rset, kc);
2395 }
2396
2397 static ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2398                                       const Odr_oid *attributeSet,
2399                                       zint hits_limit,
2400                                       NMEM stream, NMEM rset_nmem,
2401                                       Z_SortKeySpecList *sort_sequence,
2402                                       int num_bases, const char **basenames,
2403                                       RSET **result_sets, int *num_result_sets,
2404                                       Z_Operator *parent_op,
2405                                       struct rset_key_control *kc);
2406
2407 ZEBRA_RES rpn_get_top_approx_limit(ZebraHandle zh, Z_RPNStructure *zs,
2408                                    zint *approx_limit)
2409 {
2410     ZEBRA_RES res = ZEBRA_OK;
2411     if (zs->which == Z_RPNStructure_complex)
2412     {
2413         if (res == ZEBRA_OK)
2414             res = rpn_get_top_approx_limit(zh, zs->u.complex->s1,
2415                                            approx_limit);
2416         if (res == ZEBRA_OK)
2417             res = rpn_get_top_approx_limit(zh, zs->u.complex->s2,
2418                                            approx_limit);
2419     }
2420     else if (zs->which == Z_RPNStructure_simple)
2421     {
2422         if (zs->u.simple->which == Z_Operand_APT)
2423         {
2424             Z_AttributesPlusTerm *zapt = zs->u.simple->u.attributesPlusTerm;
2425             AttrType global_hits_limit_attr;
2426             int l;
2427             
2428             attr_init_APT(&global_hits_limit_attr, zapt, 12);
2429             
2430             l = attr_find(&global_hits_limit_attr, NULL);
2431             if (l != -1)
2432                 *approx_limit = l;
2433         }
2434     }
2435     return res;
2436 }
2437
2438 ZEBRA_RES rpn_search_top(ZebraHandle zh, Z_RPNStructure *zs,
2439                          const Odr_oid *attributeSet, 
2440                          zint hits_limit,
2441                          NMEM stream, NMEM rset_nmem,
2442                          Z_SortKeySpecList *sort_sequence,
2443                          int num_bases, const char **basenames,
2444                          RSET *result_set)
2445 {
2446     RSET *result_sets = 0;
2447     int num_result_sets = 0;
2448     ZEBRA_RES res;
2449     struct rset_key_control *kc = zebra_key_control_create(zh);
2450
2451     res = rpn_search_structure(zh, zs, attributeSet, hits_limit,
2452                                stream, rset_nmem,
2453                                sort_sequence, 
2454                                num_bases, basenames,
2455                                &result_sets, &num_result_sets,
2456                                0 /* no parent op */,
2457                                kc);
2458     if (res != ZEBRA_OK)
2459     {
2460         int i;
2461         for (i = 0; i<num_result_sets; i++)
2462             rset_delete(result_sets[i]);
2463         *result_set = 0;
2464     }
2465     else
2466     {
2467         assert(num_result_sets == 1);
2468         assert(result_sets);
2469         assert(*result_sets);
2470         *result_set = *result_sets;
2471     }
2472     (*kc->dec)(kc);
2473     return res;
2474 }
2475
2476 ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2477                                const Odr_oid *attributeSet, zint hits_limit,
2478                                NMEM stream, NMEM rset_nmem,
2479                                Z_SortKeySpecList *sort_sequence,
2480                                int num_bases, const char **basenames,
2481                                RSET **result_sets, int *num_result_sets,
2482                                Z_Operator *parent_op,
2483                                struct rset_key_control *kc)
2484 {
2485     *num_result_sets = 0;
2486     if (zs->which == Z_RPNStructure_complex)
2487     {
2488         ZEBRA_RES res;
2489         Z_Operator *zop = zs->u.complex->roperator;
2490         RSET *result_sets_l = 0;
2491         int num_result_sets_l = 0;
2492         RSET *result_sets_r = 0;
2493         int num_result_sets_r = 0;
2494
2495         res = rpn_search_structure(zh, zs->u.complex->s1,
2496                                    attributeSet, hits_limit, stream, rset_nmem,
2497                                    sort_sequence,
2498                                    num_bases, basenames,
2499                                    &result_sets_l, &num_result_sets_l,
2500                                    zop, kc);
2501         if (res != ZEBRA_OK)
2502         {
2503             int i;
2504             for (i = 0; i<num_result_sets_l; i++)
2505                 rset_delete(result_sets_l[i]);
2506             return res;
2507         }
2508         res = rpn_search_structure(zh, zs->u.complex->s2,
2509                                    attributeSet, hits_limit, stream, rset_nmem,
2510                                    sort_sequence,
2511                                    num_bases, basenames,
2512                                    &result_sets_r, &num_result_sets_r,
2513                                    zop, kc);
2514         if (res != ZEBRA_OK)
2515         {
2516             int i;
2517             for (i = 0; i<num_result_sets_l; i++)
2518                 rset_delete(result_sets_l[i]);
2519             for (i = 0; i<num_result_sets_r; i++)
2520                 rset_delete(result_sets_r[i]);
2521             return res;
2522         }
2523
2524         /* make a new list of result for all children */
2525         *num_result_sets = num_result_sets_l + num_result_sets_r;
2526         *result_sets = nmem_malloc(stream, *num_result_sets * 
2527                                    sizeof(**result_sets));
2528         memcpy(*result_sets, result_sets_l, 
2529                num_result_sets_l * sizeof(**result_sets));
2530         memcpy(*result_sets + num_result_sets_l, result_sets_r, 
2531                num_result_sets_r * sizeof(**result_sets));
2532
2533         if (!parent_op || parent_op->which != zop->which
2534             || (zop->which != Z_Operator_and &&
2535                 zop->which != Z_Operator_or))
2536         {
2537             /* parent node different from this one (or non-present) */
2538             /* we must combine result sets now */
2539             RSET rset;
2540             switch (zop->which)
2541             {
2542             case Z_Operator_and:
2543                 rset = rset_create_and(rset_nmem, kc,
2544                                        kc->scope,
2545                                        *num_result_sets, *result_sets);
2546                 break;
2547             case Z_Operator_or:
2548                 rset = rset_create_or(rset_nmem, kc,
2549                                       kc->scope, 0, /* termid */
2550                                       *num_result_sets, *result_sets);
2551                 break;
2552             case Z_Operator_and_not:
2553                 rset = rset_create_not(rset_nmem, kc,
2554                                        kc->scope,
2555                                        (*result_sets)[0],
2556                                        (*result_sets)[1]);
2557                 break;
2558             case Z_Operator_prox:
2559                 if (zop->u.prox->which != Z_ProximityOperator_known)
2560                 {
2561                     zebra_setError(zh, 
2562                                    YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2563                                    0);
2564                     return ZEBRA_FAIL;
2565                 }
2566                 if (*zop->u.prox->u.known != Z_ProxUnit_word)
2567                 {
2568                     zebra_setError_zint(zh,
2569                                         YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2570                                         *zop->u.prox->u.known);
2571                     return ZEBRA_FAIL;
2572                 }
2573                 else
2574                 {
2575                     rset = rset_create_prox(rset_nmem, kc,
2576                                             kc->scope,
2577                                             *num_result_sets, *result_sets, 
2578                                             *zop->u.prox->ordered,
2579                                             (!zop->u.prox->exclusion ? 
2580                                              0 : *zop->u.prox->exclusion),
2581                                             *zop->u.prox->relationType,
2582                                             *zop->u.prox->distance );
2583                 }
2584                 break;
2585             default:
2586                 zebra_setError(zh, YAZ_BIB1_OPERATOR_UNSUPP, 0);
2587                 return ZEBRA_FAIL;
2588             }
2589             *num_result_sets = 1;
2590             *result_sets = nmem_malloc(stream, *num_result_sets * 
2591                                        sizeof(**result_sets));
2592             (*result_sets)[0] = rset;
2593         }
2594     }
2595     else if (zs->which == Z_RPNStructure_simple)
2596     {
2597         RSET rset;
2598         ZEBRA_RES res;
2599
2600         if (zs->u.simple->which == Z_Operand_APT)
2601         {
2602             yaz_log(YLOG_DEBUG, "rpn_search_APT");
2603             res = rpn_search_APT(zh, zs->u.simple->u.attributesPlusTerm,
2604                                  attributeSet, hits_limit,
2605                                  stream, sort_sequence,
2606                                  num_bases, basenames, rset_nmem, &rset,
2607                                  kc);
2608             if (res != ZEBRA_OK)
2609                 return res;
2610         }
2611         else if (zs->u.simple->which == Z_Operand_resultSetId)
2612         {
2613             yaz_log(YLOG_DEBUG, "rpn_search_ref");
2614             rset = resultSetRef(zh, zs->u.simple->u.resultSetId);
2615             if (!rset)
2616             {
2617                 zebra_setError(zh, 
2618                                YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
2619                                zs->u.simple->u.resultSetId);
2620                 return ZEBRA_FAIL;
2621             }
2622             rset_dup(rset);
2623         }
2624         else
2625         {
2626             zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2627             return ZEBRA_FAIL;
2628         }
2629         *num_result_sets = 1;
2630         *result_sets = nmem_malloc(stream, *num_result_sets * 
2631                                    sizeof(**result_sets));
2632         (*result_sets)[0] = rset;
2633     }
2634     else
2635     {
2636         zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2637         return ZEBRA_FAIL;
2638     }
2639     return ZEBRA_OK;
2640 }
2641
2642
2643
2644 /*
2645  * Local variables:
2646  * c-basic-offset: 4
2647  * c-file-style: "Stroustrup"
2648  * indent-tabs-mode: nil
2649  * End:
2650  * vim: shiftwidth=4 tabstop=8 expandtab
2651  */
2652