RPN: avoid \-handling for trunctation=105/106.
[idzebra-moved-to-github.git] / index / rpnsearch.c
1 /* This file is part of the Zebra server.
2    Copyright (C) 1994-2010 Index Data
3
4 Zebra is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 2, or (at your option) any later
7 version.
8
9 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
10 WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12 for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17
18 */
19
20 #include <stdio.h>
21 #include <assert.h>
22 #ifdef WIN32
23 #include <io.h>
24 #endif
25 #if HAVE_UNISTD_H
26 #include <unistd.h>
27 #endif
28 #include <ctype.h>
29
30 #include <yaz/diagbib1.h>
31 #include "index.h"
32 #include <zebra_xpath.h>
33 #include <attrfind.h>
34 #include <charmap.h>
35 #include <rset.h>
36
37 static int log_level_set = 0;
38 static int log_level_rpn = 0;
39
40 #define TERMSET_DISABLE 1
41
42 static const char **rpn_char_map_handler(void *vp, const char **from, int len)
43 {
44     struct rpn_char_map_info *p = (struct rpn_char_map_info *) vp;
45     const char **out = zebra_maps_input(p->zm, from, len, 0);
46 #if 0
47     if (out && *out)
48     {
49         const char *outp = *out;
50         yaz_log(YLOG_LOG, "---");
51         while (*outp)
52         {
53             yaz_log(YLOG_LOG, "%02X", *outp);
54             outp++;
55         }
56     }
57 #endif
58     return out;
59 }
60
61 void rpn_char_map_prepare(struct zebra_register *reg, zebra_map_t zm,
62                           struct rpn_char_map_info *map_info)
63 {
64     map_info->zm = zm;
65     if (zebra_maps_is_icu(zm))
66         dict_grep_cmap(reg->dict, 0, 0);
67     else
68         dict_grep_cmap(reg->dict, map_info, rpn_char_map_handler);
69 }
70
71 #define TERM_COUNT        
72        
73 struct grep_info {        
74 #ifdef TERM_COUNT        
75     int *term_no;        
76 #endif        
77     ISAM_P *isam_p_buf;
78     int isam_p_size;        
79     int isam_p_indx;
80     int trunc_max;
81     ZebraHandle zh;
82     const char *index_type;
83     ZebraSet termset;
84 };        
85
86 static int add_isam_p(const char *name, const char *info,
87                       struct grep_info *p)
88 {
89     if (!log_level_set)
90     {
91         log_level_rpn = yaz_log_module_level("rpn");
92         log_level_set = 1;
93     }
94     /* we may have to stop this madness.. NOTE: -1 so that if
95        truncmax == trunxlimit we do *not* generate result sets */
96     if (p->isam_p_indx >= p->trunc_max - 1)
97         return 1;
98
99     if (p->isam_p_indx == p->isam_p_size)
100     {
101         ISAM_P *new_isam_p_buf;
102 #ifdef TERM_COUNT        
103         int *new_term_no;        
104 #endif
105         p->isam_p_size = 2*p->isam_p_size + 100;
106         new_isam_p_buf = (ISAM_P *) xmalloc(sizeof(*new_isam_p_buf) *
107                                             p->isam_p_size);
108         if (p->isam_p_buf)
109         {
110             memcpy(new_isam_p_buf, p->isam_p_buf,
111                    p->isam_p_indx * sizeof(*p->isam_p_buf));
112             xfree(p->isam_p_buf);
113         }
114         p->isam_p_buf = new_isam_p_buf;
115
116 #ifdef TERM_COUNT
117         new_term_no = (int *) xmalloc(sizeof(*new_term_no) * p->isam_p_size);
118         if (p->term_no)
119         {
120             memcpy(new_term_no, p->isam_p_buf,
121                    p->isam_p_indx * sizeof(*p->term_no));
122             xfree(p->term_no);
123         }
124         p->term_no = new_term_no;
125 #endif
126     }
127     assert(*info == sizeof(*p->isam_p_buf));
128     memcpy(p->isam_p_buf + p->isam_p_indx, info+1, sizeof(*p->isam_p_buf));
129
130     if (p->termset)
131     {
132         const char *db;
133         char term_tmp[IT_MAX_WORD];
134         int ord = 0;
135         const char *index_name;
136         int len = key_SU_decode(&ord, (const unsigned char *) name);
137         
138         zebra_term_untrans (p->zh, p->index_type, term_tmp, name+len);
139         yaz_log(log_level_rpn, "grep: %d %c %s", ord, name[len], term_tmp);
140         zebraExplain_lookup_ord(p->zh->reg->zei,
141                                 ord, 0 /* index_type */, &db, &index_name);
142         yaz_log(log_level_rpn, "grep:  db=%s index=%s", db, index_name);
143         
144         resultSetAddTerm(p->zh, p->termset, name[len], db,
145                          index_name, term_tmp);
146     }
147     (p->isam_p_indx)++;
148     return 0;
149 }
150
151 static int grep_handle(char *name, const char *info, void *p)
152 {
153     return add_isam_p(name, info, (struct grep_info *) p);
154 }
155
156 static int term_pre(zebra_map_t zm, const char **src,
157                     const char *ct1, int first)
158 {
159     const char *s1, *s0 = *src;
160     const char **map;
161
162     /* skip white space */
163     while (*s0)
164     {
165         if (ct1 && strchr(ct1, *s0))
166             break;
167         s1 = s0;
168         map = zebra_maps_input(zm, &s1, strlen(s1), first);
169         if (**map != *CHR_SPACE)
170             break;
171         s0 = s1;
172     }
173     *src = s0;
174     return *s0;
175 }
176
177
178 static void esc_str(char *out_buf, size_t out_size,
179                     const char *in_buf, int in_size)
180 {
181     int k;
182
183     assert(out_buf);
184     assert(in_buf);
185     assert(out_size > 20);
186     *out_buf = '\0';
187     for (k = 0; k<in_size; k++)
188     {
189         int c = in_buf[k] & 0xff;
190         int pc;
191         if (c < 32 || c > 126)
192             pc = '?';
193         else
194             pc = c;
195         sprintf(out_buf +strlen(out_buf), "%02X:%c  ", c, pc);
196         if (strlen(out_buf) > out_size-20)
197         {
198             strcat(out_buf, "..");
199             break;
200         }
201     }
202 }
203
204 #define REGEX_CHARS " ^[]()|.*+?!\"$"
205
206 static void add_non_space(const char *start, const char *end,
207                           WRBUF term_dict,
208                           WRBUF display_term,
209                           const char **map, int q_map_match)
210 {
211     size_t sz = end - start;
212
213     wrbuf_write(display_term, start, sz);
214     if (!q_map_match)
215     {
216         while (start < end)
217         {
218             if (strchr(REGEX_CHARS, *start))
219                 wrbuf_putc(term_dict, '\\');
220             wrbuf_putc(term_dict, *start);
221             start++;
222         }
223     }
224     else
225     {
226         char tmpbuf[80];
227         esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
228         
229         wrbuf_puts(term_dict, map[0]);
230     }
231 }
232
233
234 static int term_100_icu(zebra_map_t zm,
235                         const char **src, WRBUF term_dict, int space_split,
236                         WRBUF display_term,
237                         int right_trunc)
238 {
239     int i;
240     const char *res_buf = 0;
241     size_t res_len = 0;
242     const char *display_buf;
243     size_t display_len;
244     if (!zebra_map_tokenize_next(zm, &res_buf, &res_len,
245                                  &display_buf, &display_len))
246     {
247         *src += strlen(*src);
248         return 0;
249     }
250     wrbuf_write(display_term, display_buf, display_len);
251     if (right_trunc)
252     {
253         /* ICU sort keys seem to be of the form
254            basechars \x01 accents \x01 length
255            For now we'll just right truncate from basechars . This 
256            may give false hits due to accents not being used.
257         */
258         i = res_len;
259         while (--i >= 0 && res_buf[i] != '\x01')
260             ;
261         if (i > 0)
262         {
263             while (--i >= 0 && res_buf[i] != '\x01')
264                 ;
265         }
266         if (i == 0)
267         {  /* did not find base chars at all. Throw error */
268             return -1;
269         }
270         res_len = i; /* reduce res_len */
271     }
272     for (i = 0; i < res_len; i++)
273     {
274         if (strchr(REGEX_CHARS "\\", res_buf[i]))
275             wrbuf_putc(term_dict, '\\');
276         if (res_buf[i] < 32)
277             wrbuf_putc(term_dict, 1);
278             
279         wrbuf_putc(term_dict, res_buf[i]);
280     }
281     if (right_trunc)
282         wrbuf_puts(term_dict, ".*");
283     return 1;
284 }
285
286 /* term_100: handle term, where trunc = none(no operators at all) */
287 static int term_100(zebra_map_t zm,
288                     const char **src, WRBUF term_dict, int space_split,
289                     WRBUF display_term)
290 {
291     const char *s0;
292     const char **map;
293     int i = 0;
294
295     const char *space_start = 0;
296     const char *space_end = 0;
297
298     if (!term_pre(zm, src, 0, !space_split))
299         return 0;
300     s0 = *src;
301     while (*s0)
302     {
303         const char *s1 = s0;
304         int q_map_match = 0;
305         map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
306         if (space_split)
307         {
308             if (**map == *CHR_SPACE)
309                 break;
310         }
311         else  /* complete subfield only. */
312         {
313             if (**map == *CHR_SPACE)
314             {   /* save space mapping for later  .. */
315                 space_start = s1;
316                 space_end = s0;
317                 continue;
318             }
319             else if (space_start)
320             {   /* reload last space */
321                 while (space_start < space_end)
322                 {
323                     if (strchr(REGEX_CHARS, *space_start))
324                         wrbuf_putc(term_dict, '\\');
325                     wrbuf_putc(display_term, *space_start);
326                     wrbuf_putc(term_dict, *space_start);
327                     space_start++;
328                                
329                 }
330                 /* and reset */
331                 space_start = space_end = 0;
332             }
333         }
334         i++;
335
336         add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
337     }
338     *src = s0;
339     return i;
340 }
341
342 /* term_101: handle term, where trunc = Process # */
343 static int term_101(zebra_map_t zm,
344                     const char **src, WRBUF term_dict, int space_split,
345                     WRBUF display_term)
346 {
347     const char *s0;
348     const char **map;
349     int i = 0;
350
351     if (!term_pre(zm, src, "#", !space_split))
352         return 0;
353     s0 = *src;
354     while (*s0)
355     {
356         if (*s0 == '#')
357         {
358             i++;
359             wrbuf_puts(term_dict, ".*");
360             wrbuf_putc(display_term, *s0);
361             s0++;
362         }
363         else
364         {
365             const char *s1 = s0;
366             int q_map_match = 0;
367             map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
368             if (space_split && **map == *CHR_SPACE)
369                 break;
370
371             i++;
372             add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
373         }
374     }
375     *src = s0;
376     return i;
377 }
378
379 /* term_103: handle term, where trunc = re-2 (regular expressions) */
380 static int term_103(zebra_map_t zm, const char **src,
381                     WRBUF term_dict, int *errors, int space_split,
382                     WRBUF display_term)
383 {
384     int i = 0;
385     const char *s0;
386     const char **map;
387
388     if (!term_pre(zm, src, "^\\()[].*+?|", !space_split))
389         return 0;
390     s0 = *src;
391     if (errors && *s0 == '+' && s0[1] && s0[2] == '+' && s0[3] &&
392         isdigit(((const unsigned char *)s0)[1]))
393     {
394         *errors = s0[1] - '0';
395         s0 += 3;
396         if (*errors > 3)
397             *errors = 3;
398     }
399     while (*s0)
400     {
401         if (strchr("^\\()[].*+?|-", *s0))
402         {
403             wrbuf_putc(display_term, *s0);
404             wrbuf_putc(term_dict, *s0);
405             s0++;
406             i++;
407         }
408         else
409         {
410             const char *s1 = s0;
411             int q_map_match = 0;
412             map = zebra_maps_search(zm, &s0, strlen(s0),  &q_map_match);
413             if (space_split && **map == *CHR_SPACE)
414                 break;
415
416             i++;
417             add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
418         }
419     }
420     *src = s0;
421     
422     return i;
423 }
424
425 /* term_103: handle term, where trunc = re-1 (regular expressions) */
426 static int term_102(zebra_map_t zm, const char **src,
427                     WRBUF term_dict, int space_split, WRBUF display_term)
428 {
429     return term_103(zm, src, term_dict, NULL, space_split, display_term);
430 }
431
432
433 /* term_104: handle term, process # and ! */
434 static int term_104(zebra_map_t zm, const char **src, 
435                     WRBUF term_dict, int space_split, WRBUF display_term)
436 {
437     const char *s0;
438     const char **map;
439     int i = 0;
440
441     if (!term_pre(zm, src, "?*#", !space_split))
442         return 0;
443     s0 = *src;
444     while (*s0)
445     {
446         if (*s0 == '?')
447         {
448             i++;
449             wrbuf_putc(display_term, *s0);
450             s0++;
451             if (*s0 >= '0' && *s0 <= '9')
452             {
453                 int limit = 0;
454                 while (*s0 >= '0' && *s0 <= '9')
455                 {
456                     limit = limit * 10 + (*s0 - '0');
457                     wrbuf_putc(display_term, *s0);
458                     s0++;
459                 }
460                 if (limit > 20)
461                     limit = 20;
462                 while (--limit >= 0)
463                 {
464                     wrbuf_puts(term_dict, ".?");
465                 }
466             }
467             else
468             {
469                 wrbuf_puts(term_dict, ".*");
470             }
471         }
472         else if (*s0 == '*')
473         {
474             i++;
475             wrbuf_puts(term_dict, ".*");
476             wrbuf_putc(display_term, *s0);
477             s0++;
478         }
479         else if (*s0 == '#')
480         {
481             i++;
482             wrbuf_puts(term_dict, ".");
483             wrbuf_putc(display_term, *s0);
484             s0++;
485         }
486         else
487         {
488             const char *s1 = s0;
489             int q_map_match = 0;
490             map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
491             if (space_split && **map == *CHR_SPACE)
492                 break;
493
494             i++;
495             add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
496         }
497     }
498     *src = s0;
499     return i;
500 }
501
502 /* term_105/106: handle term, where trunc = Process * and ! and right trunc */
503 static int term_105(zebra_map_t zm, const char **src, 
504                     WRBUF term_dict, int space_split,
505                     WRBUF display_term, int right_truncate)
506 {
507     const char *s0;
508     const char **map;
509     int i = 0;
510
511     if (!term_pre(zm, src, "\\*!", !space_split))
512         return 0;
513     s0 = *src;
514     while (*s0)
515     {
516         if (*s0 == '*')
517         {
518             i++;
519             wrbuf_puts(term_dict, ".*");
520             wrbuf_putc(display_term, *s0);
521             s0++;
522         }
523         else if (*s0 == '!')
524         {
525             i++;
526             wrbuf_putc(term_dict, '.');
527             wrbuf_putc(display_term, *s0);
528             s0++;
529         }
530         else if (*s0 == '\\')
531         {
532             i++;
533             wrbuf_puts(term_dict, "\\\\");
534             wrbuf_putc(display_term, *s0);
535             s0++;
536         }
537         else
538         {
539             const char *s1 = s0;
540             int q_map_match = 0;
541             map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
542             if (space_split && **map == *CHR_SPACE)
543                 break;
544
545             i++;
546             add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
547         }
548     }
549     if (right_truncate)
550         wrbuf_puts(term_dict, ".*");
551     *src = s0;
552     return i;
553 }
554
555
556 /* gen_regular_rel - generate regular expression from relation
557  *  val:     border value (inclusive)
558  *  islt:    1 if <=; 0 if >=.
559  */
560 static void gen_regular_rel(WRBUF term_dict, int val, int islt)
561 {
562     char dst_buf[20*5*20]; /* assuming enough for expansion */
563     char *dst = dst_buf;
564     int dst_p;
565     int w, d, i;
566     int pos = 0;
567     char numstr[20];
568
569     yaz_log(YLOG_DEBUG, "gen_regular_rel. val=%d, islt=%d", val, islt);
570     if (val >= 0)
571     {
572         if (islt)
573             strcpy(dst, "(-[0-9]+|(");
574         else
575             strcpy(dst, "((");
576     } 
577     else
578     {
579         if (!islt)
580         {
581             strcpy(dst, "([0-9]+|-(");
582             islt = 1;
583         }
584         else
585         {
586             strcpy(dst, "(-(");
587             islt = 0;
588         }
589         val = -val;
590     }
591     dst_p = strlen(dst);
592     sprintf(numstr, "%d", val);
593     for (w = strlen(numstr); --w >= 0; pos++)
594     {
595         d = numstr[w];
596         if (pos > 0)
597         {
598             if (islt)
599             {
600                 if (d == '0')
601                     continue;
602                 d--;
603             } 
604             else
605             {
606                 if (d == '9')
607                     continue;
608                 d++;
609             }
610         }
611         
612         strcpy(dst + dst_p, numstr);
613         dst_p = strlen(dst) - pos - 1;
614
615         if (islt)
616         {
617             if (d != '0')
618             {
619                 dst[dst_p++] = '[';
620                 dst[dst_p++] = '0';
621                 dst[dst_p++] = '-';
622                 dst[dst_p++] = d;
623                 dst[dst_p++] = ']';
624             }
625             else
626                 dst[dst_p++] = d;
627         }
628         else
629         {
630             if (d != '9')
631             { 
632                 dst[dst_p++] = '[';
633                 dst[dst_p++] = d;
634                 dst[dst_p++] = '-';
635                 dst[dst_p++] = '9';
636                 dst[dst_p++] = ']';
637             }
638             else
639                 dst[dst_p++] = d;
640         }
641         for (i = 0; i<pos; i++)
642         {
643             dst[dst_p++] = '[';
644             dst[dst_p++] = '0';
645             dst[dst_p++] = '-';
646             dst[dst_p++] = '9';
647             dst[dst_p++] = ']';
648         }
649         dst[dst_p++] = '|';
650     }
651     dst[dst_p] = '\0';
652     if (islt)
653     {
654         /* match everything less than 10^(pos-1) */
655         strcat(dst, "0*");
656         for (i = 1; i<pos; i++)
657             strcat(dst, "[0-9]?");
658     }
659     else
660     {
661         /* match everything greater than 10^pos */
662         for (i = 0; i <= pos; i++)
663             strcat(dst, "[0-9]");
664         strcat(dst, "[0-9]*");
665     }
666     strcat(dst, "))");
667     wrbuf_puts(term_dict, dst);
668 }
669
670 void string_rel_add_char(WRBUF term_p, WRBUF wsrc, int *indx)
671 {
672     const char *src = wrbuf_cstr(wsrc);
673     if (src[*indx] == '\\')
674     {
675         wrbuf_putc(term_p, src[*indx]);
676         (*indx)++;
677     }
678     wrbuf_putc(term_p, src[*indx]);
679     (*indx)++;
680 }
681
682 /*
683  *   >  abc     ([b-].*|a[c-].*|ab[d-].*|abc.+)
684  *              ([^-a].*|a[^-b].*ab[^-c].*|abc.+)
685  *   >= abc     ([b-].*|a[c-].*|ab[c-].*)
686  *              ([^-a].*|a[^-b].*|ab[c-].*)
687  *   <  abc     ([-0].*|a[-a].*|ab[-b].*)
688  *              ([^a-].*|a[^b-].*|ab[^c-].*)
689  *   <= abc     ([-0].*|a[-a].*|ab[-b].*|abc)
690  *              ([^a-].*|a[^b-].*|ab[^c-].*|abc)
691  */
692 static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
693                            const char **term_sub, WRBUF term_dict,
694                            const Odr_oid *attributeSet,
695                            zebra_map_t zm, int space_split, 
696                            WRBUF display_term,
697                            int *error_code)
698 {
699     AttrType relation;
700     int relation_value;
701     int i;
702     WRBUF term_component = wrbuf_alloc();
703
704     attr_init_APT(&relation, zapt, 2);
705     relation_value = attr_find(&relation, NULL);
706
707     *error_code = 0;
708     yaz_log(YLOG_DEBUG, "string relation value=%d", relation_value);
709     switch (relation_value)
710     {
711     case 1:
712         if (!term_100(zm, term_sub, term_component, space_split, display_term))
713         {
714             wrbuf_destroy(term_component);
715             return 0;
716         }
717         yaz_log(log_level_rpn, "Relation <");
718         
719         wrbuf_putc(term_dict, '(');
720         for (i = 0; i < wrbuf_len(term_component); )
721         {
722             int j = 0;
723             
724             if (i)
725                 wrbuf_putc(term_dict, '|');
726             while (j < i)
727                 string_rel_add_char(term_dict, term_component, &j);
728
729             wrbuf_putc(term_dict, '[');
730
731             wrbuf_putc(term_dict, '^');
732             
733             wrbuf_putc(term_dict, 1);
734             wrbuf_putc(term_dict, FIRST_IN_FIELD_CHAR);
735             
736             string_rel_add_char(term_dict, term_component, &i);
737             wrbuf_putc(term_dict, '-');
738             
739             wrbuf_putc(term_dict, ']');
740             wrbuf_putc(term_dict, '.');
741             wrbuf_putc(term_dict, '*');
742         }
743         wrbuf_putc(term_dict, ')');
744         break;
745     case 2:
746         if (!term_100(zm, term_sub, term_component, space_split, display_term))
747         {
748             wrbuf_destroy(term_component);
749             return 0;
750         }
751         yaz_log(log_level_rpn, "Relation <=");
752
753         wrbuf_putc(term_dict, '(');
754         for (i = 0; i < wrbuf_len(term_component); )
755         {
756             int j = 0;
757
758             while (j < i)
759                 string_rel_add_char(term_dict, term_component, &j);
760             wrbuf_putc(term_dict, '[');
761
762             wrbuf_putc(term_dict, '^');
763
764             wrbuf_putc(term_dict, 1);
765             wrbuf_putc(term_dict, FIRST_IN_FIELD_CHAR);
766
767             string_rel_add_char(term_dict, term_component, &i);
768             wrbuf_putc(term_dict, '-');
769
770             wrbuf_putc(term_dict, ']');
771             wrbuf_putc(term_dict, '.');
772             wrbuf_putc(term_dict, '*');
773
774             wrbuf_putc(term_dict, '|');
775         }
776         for (i = 0; i < wrbuf_len(term_component); )
777             string_rel_add_char(term_dict, term_component, &i);
778         wrbuf_putc(term_dict, ')');
779         break;
780     case 5:
781         if (!term_100(zm, term_sub, term_component, space_split, display_term))
782         {
783             wrbuf_destroy(term_component);
784             return 0;
785         }
786         yaz_log(log_level_rpn, "Relation >");
787
788         wrbuf_putc(term_dict, '(');
789         for (i = 0; i < wrbuf_len(term_component); )
790         {
791             int j = 0;
792
793             while (j < i)
794                 string_rel_add_char(term_dict, term_component, &j);
795             wrbuf_putc(term_dict, '[');
796             
797             wrbuf_putc(term_dict, '^');
798             wrbuf_putc(term_dict, '-');
799             string_rel_add_char(term_dict, term_component, &i);
800
801             wrbuf_putc(term_dict, ']');
802             wrbuf_putc(term_dict, '.');
803             wrbuf_putc(term_dict, '*');
804
805             wrbuf_putc(term_dict, '|');
806         }
807         for (i = 0; i < wrbuf_len(term_component); )
808             string_rel_add_char(term_dict, term_component, &i);
809         wrbuf_putc(term_dict, '.');
810         wrbuf_putc(term_dict, '+');
811         wrbuf_putc(term_dict, ')');
812         break;
813     case 4:
814         if (!term_100(zm, term_sub, term_component, space_split, display_term))
815         {
816             wrbuf_destroy(term_component);
817             return 0;
818         }
819         yaz_log(log_level_rpn, "Relation >=");
820
821         wrbuf_putc(term_dict, '(');
822         for (i = 0; i < wrbuf_len(term_component); )
823         {
824             int j = 0;
825
826             if (i)
827                 wrbuf_putc(term_dict, '|');
828             while (j < i)
829                 string_rel_add_char(term_dict, term_component, &j);
830             wrbuf_putc(term_dict, '[');
831
832             if (i < wrbuf_len(term_component)-1)
833             {
834                 wrbuf_putc(term_dict, '^');
835                 wrbuf_putc(term_dict, '-');
836                 string_rel_add_char(term_dict, term_component, &i);
837             }
838             else
839             {
840                 string_rel_add_char(term_dict, term_component, &i);
841                 wrbuf_putc(term_dict, '-');
842             }
843             wrbuf_putc(term_dict, ']');
844             wrbuf_putc(term_dict, '.');
845             wrbuf_putc(term_dict, '*');
846         }
847         wrbuf_putc(term_dict, ')');
848         break;
849     case 3:
850     case 102:
851     case -1:
852         if (!**term_sub)
853             return 1;
854         yaz_log(log_level_rpn, "Relation =");
855         if (!term_100(zm, term_sub, term_component, space_split, display_term))
856         {
857             wrbuf_destroy(term_component);
858             return 0;
859         }
860         wrbuf_puts(term_dict, "(");
861         wrbuf_puts(term_dict, wrbuf_cstr(term_component));
862         wrbuf_puts(term_dict, ")");
863         break;
864     case 103:
865         yaz_log(log_level_rpn, "Relation always matches");
866         /* skip to end of term (we don't care what it is) */
867         while (**term_sub != '\0')
868             (*term_sub)++;
869         break;
870     default:
871         *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
872         wrbuf_destroy(term_component);
873         return 0;
874     }
875     wrbuf_destroy(term_component);
876     return 1;
877 }
878
879 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
880                              const char **term_sub, 
881                              WRBUF term_dict,
882                              const Odr_oid *attributeSet, NMEM stream,
883                              struct grep_info *grep_info,
884                              const char *index_type, int complete_flag,
885                              WRBUF display_term,
886                              const char *xpath_use,
887                              struct ord_list **ol,
888                              zebra_map_t zm);
889
890 ZEBRA_RES zebra_term_limits_APT(ZebraHandle zh,
891                                 Z_AttributesPlusTerm *zapt,
892                                 zint *hits_limit_value,
893                                 const char **term_ref_id_str,
894                                 NMEM nmem)
895 {
896     AttrType term_ref_id_attr;
897     AttrType hits_limit_attr;
898     int term_ref_id_int;
899     zint hits_limit_from_attr;
900  
901     attr_init_APT(&hits_limit_attr, zapt, 11);
902     hits_limit_from_attr  = attr_find(&hits_limit_attr, NULL);
903
904     attr_init_APT(&term_ref_id_attr, zapt, 10);
905     term_ref_id_int = attr_find_ex(&term_ref_id_attr, NULL, term_ref_id_str);
906     if (term_ref_id_int >= 0)
907     {
908         char *res = nmem_malloc(nmem, 20);
909         sprintf(res, "%d", term_ref_id_int);
910         *term_ref_id_str = res;
911     }
912     if (hits_limit_from_attr != -1)
913         *hits_limit_value = hits_limit_from_attr;
914
915     yaz_log(YLOG_DEBUG, "term_limits_APT ref_id=%s limit=" ZINT_FORMAT,
916             *term_ref_id_str ? *term_ref_id_str : "none",
917             *hits_limit_value);
918     return ZEBRA_OK;
919 }
920
921 /** \brief search for term (which may be truncated)
922  */
923 static ZEBRA_RES search_term(ZebraHandle zh,
924                              Z_AttributesPlusTerm *zapt,
925                              const char **term_sub, 
926                              const Odr_oid *attributeSet,
927                              zint hits_limit, NMEM stream,
928                              struct grep_info *grep_info,
929                              const char *index_type, int complete_flag,
930                              const char *rank_type, 
931                              const char *xpath_use,
932                              NMEM rset_nmem,
933                              RSET *rset,
934                              struct rset_key_control *kc,
935                              zebra_map_t zm)
936 {
937     ZEBRA_RES res;
938     struct ord_list *ol;
939     zint hits_limit_value = hits_limit;
940     const char *term_ref_id_str = 0;
941     WRBUF term_dict = wrbuf_alloc();
942     WRBUF display_term = wrbuf_alloc();
943     *rset = 0;
944     zebra_term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str,
945                           stream);
946     grep_info->isam_p_indx = 0;
947     res = string_term(zh, zapt, term_sub, term_dict,
948                       attributeSet, stream, grep_info,
949                       index_type, complete_flag,
950                       display_term, xpath_use, &ol, zm);
951     wrbuf_destroy(term_dict);
952     if (res == ZEBRA_OK && *term_sub)
953     {
954         yaz_log(log_level_rpn, "term: %s", wrbuf_cstr(display_term));
955         *rset = rset_trunc(zh, grep_info->isam_p_buf,
956                            grep_info->isam_p_indx, wrbuf_buf(display_term),
957                            wrbuf_len(display_term), rank_type, 
958                            1 /* preserve pos */,
959                            zapt->term->which, rset_nmem,
960                            kc, kc->scope, ol, index_type, hits_limit_value,
961                            term_ref_id_str);
962         if (!*rset)
963             res = ZEBRA_FAIL;
964     }
965     wrbuf_destroy(display_term);
966     return res;
967 }
968
969 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
970                              const char **term_sub, 
971                              WRBUF term_dict,
972                              const Odr_oid *attributeSet, NMEM stream,
973                              struct grep_info *grep_info,
974                              const char *index_type, int complete_flag,
975                              WRBUF display_term,
976                              const char *xpath_use,
977                              struct ord_list **ol,
978                              zebra_map_t zm)
979 {
980     int r;
981     AttrType truncation;
982     int truncation_value;
983     const char *termp;
984     struct rpn_char_map_info rcmi;
985
986     int space_split = complete_flag ? 0 : 1;
987     int ord = -1;
988     int regex_range = 0;
989     int max_pos, prefix_len = 0;
990     int relation_error;
991     char ord_buf[32];
992     int ord_len, i;
993
994     *ol = ord_list_create(stream);
995
996     rpn_char_map_prepare(zh->reg, zm, &rcmi);
997     attr_init_APT(&truncation, zapt, 5);
998     truncation_value = attr_find(&truncation, NULL);
999     yaz_log(log_level_rpn, "truncation value %d", truncation_value);
1000
1001     termp = *term_sub; /* start of term for each database */
1002     
1003     if (zebra_apt_get_ord(zh, zapt, index_type, xpath_use,
1004                           attributeSet, &ord) != ZEBRA_OK)
1005     {
1006         *term_sub = 0;
1007         return ZEBRA_FAIL;
1008     }
1009     
1010     wrbuf_rewind(term_dict); /* new dictionary regexp term */
1011     
1012     *ol = ord_list_append(stream, *ol, ord);
1013     ord_len = key_SU_encode(ord, ord_buf);
1014     
1015     wrbuf_putc(term_dict, '(');
1016     
1017     for (i = 0; i<ord_len; i++)
1018     {
1019         wrbuf_putc(term_dict, 1);  /* our internal regexp escape char */
1020         wrbuf_putc(term_dict, ord_buf[i]);
1021     }
1022     wrbuf_putc(term_dict, ')');
1023     
1024     prefix_len = wrbuf_len(term_dict);
1025
1026     if (zebra_maps_is_icu(zm))
1027     {
1028         int relation_value;
1029         AttrType relation;
1030         
1031         attr_init_APT(&relation, zapt, 2);
1032         relation_value = attr_find(&relation, NULL);
1033         if (relation_value == 103) /* always matches */
1034             termp += strlen(termp); /* move to end of term */
1035         else if (relation_value == 3 || relation_value == 102 || relation_value == -1)
1036         {
1037             /* ICU case */
1038             switch (truncation_value)
1039             {
1040             case -1:         /* not specified */
1041             case 100:        /* do not truncate */
1042                 if (!term_100_icu(zm, &termp, term_dict, space_split, display_term, 0))
1043                 {
1044                     *term_sub = 0;
1045                     return ZEBRA_OK;
1046                 }
1047                 break;
1048             case 1:          /* right truncation */
1049                 if (!term_100_icu(zm, &termp, term_dict, space_split, display_term, 1))
1050                 {
1051                     *term_sub = 0;
1052                     return ZEBRA_OK;
1053                 }
1054                 break;
1055             default:
1056                 zebra_setError_zint(zh,
1057                                     YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
1058                                     truncation_value);
1059                 return ZEBRA_FAIL;
1060             }
1061         }
1062         else
1063         {
1064             zebra_setError_zint(zh,
1065                                 YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE,
1066                                 relation_value);
1067             return ZEBRA_FAIL;
1068         }
1069     }
1070     else
1071     {
1072         /* non-ICU case. using string.chr and friends */
1073         switch (truncation_value)
1074         {
1075         case -1:         /* not specified */
1076         case 100:        /* do not truncate */
1077             if (!string_relation(zh, zapt, &termp, term_dict,
1078                                  attributeSet,
1079                                  zm, space_split, display_term,
1080                                  &relation_error))
1081             {
1082                 if (relation_error)
1083                 {
1084                     zebra_setError(zh, relation_error, 0);
1085                     return ZEBRA_FAIL;
1086                 }
1087                 *term_sub = 0;
1088                 return ZEBRA_OK;
1089             }
1090             break;
1091         case 1:          /* right truncation */
1092             wrbuf_putc(term_dict, '(');
1093             if (!term_100(zm, &termp, term_dict, space_split, display_term))
1094             {
1095                 *term_sub = 0;
1096                 return ZEBRA_OK;
1097             }
1098             wrbuf_puts(term_dict, ".*)");
1099             break;
1100         case 2:          /* left truncation */
1101             wrbuf_puts(term_dict, "(.*");
1102             if (!term_100(zm, &termp, term_dict, space_split, display_term))
1103             {
1104                 *term_sub = 0;
1105                 return ZEBRA_OK;
1106             }
1107             wrbuf_putc(term_dict, ')');
1108             break;
1109         case 3:          /* left&right truncation */
1110             wrbuf_puts(term_dict, "(.*");
1111             if (!term_100(zm, &termp, term_dict, space_split, display_term))
1112             {
1113                 *term_sub = 0;
1114                 return ZEBRA_OK;
1115             }
1116             wrbuf_puts(term_dict, ".*)");
1117             break;
1118         case 101:        /* process # in term */
1119             wrbuf_putc(term_dict, '(');
1120             if (!term_101(zm, &termp, term_dict, space_split, display_term))
1121             {
1122                 *term_sub = 0;
1123                 return ZEBRA_OK;
1124             }
1125             wrbuf_puts(term_dict, ")");
1126             break;
1127         case 102:        /* Regexp-1 */
1128             wrbuf_putc(term_dict, '(');
1129             if (!term_102(zm, &termp, term_dict, space_split, display_term))
1130             {
1131                 *term_sub = 0;
1132                 return ZEBRA_OK;
1133             }
1134             wrbuf_putc(term_dict, ')');
1135             break;
1136         case 103:       /* Regexp-2 */
1137             regex_range = 1;
1138             wrbuf_putc(term_dict, '(');
1139             if (!term_103(zm, &termp, term_dict, &regex_range,
1140                           space_split, display_term))
1141             {
1142                 *term_sub = 0;
1143                 return ZEBRA_OK;
1144             }
1145             wrbuf_putc(term_dict, ')');
1146             break;
1147         case 104:        /* process # and ! in term */
1148             wrbuf_putc(term_dict, '(');
1149             if (!term_104(zm, &termp, term_dict, space_split, display_term))
1150             {
1151                 *term_sub = 0;
1152                 return ZEBRA_OK;
1153             }
1154             wrbuf_putc(term_dict, ')');
1155             break;
1156         case 105:        /* process * and ! in term */
1157             wrbuf_putc(term_dict, '(');
1158             if (!term_105(zm, &termp, term_dict, space_split, display_term, 1))
1159             {
1160                 *term_sub = 0;
1161                 return ZEBRA_OK;
1162             }
1163             wrbuf_putc(term_dict, ')');
1164             break;
1165         case 106:        /* process * and ! in term */
1166             wrbuf_putc(term_dict, '(');
1167             if (!term_105(zm, &termp, term_dict, space_split, display_term, 0))
1168             {
1169                 *term_sub = 0;
1170                 return ZEBRA_OK;
1171             }
1172             wrbuf_putc(term_dict, ')');
1173             break;
1174         default:
1175             zebra_setError_zint(zh,
1176                                 YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
1177                                 truncation_value);
1178             return ZEBRA_FAIL;
1179         }
1180     }
1181     if (1)
1182     {
1183         char buf[1000];
1184         const char *input = wrbuf_cstr(term_dict) + prefix_len;
1185         esc_str(buf, sizeof(buf), input, strlen(input));
1186     }
1187     {
1188         WRBUF pr_wr = wrbuf_alloc();
1189
1190         wrbuf_write_escaped(pr_wr, wrbuf_buf(term_dict), wrbuf_len(term_dict));
1191         yaz_log(YLOG_LOG, "dict_lookup_grep: %s", wrbuf_cstr(pr_wr));
1192         wrbuf_destroy(pr_wr);
1193     }
1194     r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), regex_range,
1195                          grep_info, &max_pos, 
1196                          ord_len /* number of "exact" chars */,
1197                          grep_handle);
1198     if (r == 1)
1199         zebra_set_partial_result(zh);
1200     else if (r)
1201         yaz_log(YLOG_WARN, "dict_lookup_grep fail %d", r);
1202     *term_sub = termp;
1203     yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1204     return ZEBRA_OK;
1205 }
1206
1207
1208
1209 static void grep_info_delete(struct grep_info *grep_info)
1210 {
1211 #ifdef TERM_COUNT
1212     xfree(grep_info->term_no);
1213 #endif
1214     xfree(grep_info->isam_p_buf);
1215 }
1216
1217 static ZEBRA_RES grep_info_prepare(ZebraHandle zh,
1218                                    Z_AttributesPlusTerm *zapt,
1219                                    struct grep_info *grep_info,
1220                                    const char *index_type)
1221 {
1222 #ifdef TERM_COUNT
1223     grep_info->term_no = 0;
1224 #endif
1225     grep_info->trunc_max = atoi(res_get_def(zh->res, "truncmax", "10000"));
1226     grep_info->isam_p_size = 0;
1227     grep_info->isam_p_buf = NULL;
1228     grep_info->zh = zh;
1229     grep_info->index_type = index_type;
1230     grep_info->termset = 0;
1231     if (zapt)
1232     {
1233         AttrType truncmax;
1234         int truncmax_value;
1235
1236         attr_init_APT(&truncmax, zapt, 13);
1237         truncmax_value = attr_find(&truncmax, NULL);
1238         if (truncmax_value != -1)
1239             grep_info->trunc_max = truncmax_value;
1240     }
1241     if (zapt)
1242     {
1243         AttrType termset;
1244         int termset_value_numeric;
1245         const char *termset_value_string;
1246
1247         attr_init_APT(&termset, zapt, 8);
1248         termset_value_numeric =
1249             attr_find_ex(&termset, NULL, &termset_value_string);
1250         if (termset_value_numeric != -1)
1251         {
1252 #if TERMSET_DISABLE
1253             zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, "termset");
1254             return ZEBRA_FAIL;
1255 #else
1256             char resname[32];
1257             const char *termset_name = 0;
1258             if (termset_value_numeric != -2)
1259             {
1260                 
1261                 sprintf(resname, "%d", termset_value_numeric);
1262                 termset_name = resname;
1263             }
1264             else
1265                 termset_name = termset_value_string;
1266             yaz_log(log_level_rpn, "creating termset set %s", termset_name);
1267             grep_info->termset = resultSetAdd(zh, termset_name, 1);
1268             if (!grep_info->termset)
1269             {
1270                 zebra_setError(zh, YAZ_BIB1_ILLEGAL_RESULT_SET_NAME, termset_name);
1271                 return ZEBRA_FAIL;
1272             }
1273 #endif
1274         }
1275     }
1276     return ZEBRA_OK;
1277 }
1278
1279 static ZEBRA_RES search_terms_chrmap(ZebraHandle zh,
1280                                      Z_AttributesPlusTerm *zapt,
1281                                      const char *termz,
1282                                      const Odr_oid *attributeSet,
1283                                      zint hits_limit,
1284                                      NMEM stream,
1285                                      const char *index_type, int complete_flag,
1286                                      const char *rank_type,
1287                                      const char *xpath_use,
1288                                      NMEM rset_nmem,
1289                                      RSET **result_sets, int *num_result_sets,
1290                                      struct rset_key_control *kc,
1291                                      zebra_map_t zm)
1292 {
1293     struct grep_info grep_info;
1294     const char *termp = termz;
1295     int alloc_sets = 0;
1296     
1297     *num_result_sets = 0;
1298     if (grep_info_prepare(zh, zapt, &grep_info, index_type) == ZEBRA_FAIL)
1299         return ZEBRA_FAIL;
1300     while (1)
1301     { 
1302         ZEBRA_RES res;
1303
1304         if (alloc_sets == *num_result_sets)
1305         {
1306             int add = 10;
1307             RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) * 
1308                                               sizeof(*rnew));
1309             if (alloc_sets)
1310                 memcpy(rnew, *result_sets, alloc_sets * sizeof(*rnew));
1311             alloc_sets = alloc_sets + add;
1312             *result_sets = rnew;
1313         }
1314         res = search_term(zh, zapt, &termp, attributeSet, hits_limit,
1315                           stream, &grep_info,
1316                           index_type, complete_flag,
1317                           rank_type,
1318                           xpath_use, rset_nmem,
1319                           &(*result_sets)[*num_result_sets],
1320                           kc, zm);
1321         if (res != ZEBRA_OK)
1322         {
1323             int i;
1324             for (i = 0; i < *num_result_sets; i++)
1325                 rset_delete((*result_sets)[i]);
1326             grep_info_delete(&grep_info);
1327             return res;
1328         }
1329         if ((*result_sets)[*num_result_sets] == 0)
1330             break;
1331         (*num_result_sets)++;
1332
1333         if (!*termp)
1334             break;
1335     }
1336     grep_info_delete(&grep_info);
1337     return ZEBRA_OK;
1338 }
1339                                
1340 /**
1341    \brief Create result set(s) for list of terms
1342    \param zh Zebra Handle
1343    \param zapt Attributes Plust Term (RPN leaf)
1344    \param termz term as used in query but converted to UTF-8
1345    \param attributeSet default attribute set
1346    \param stream memory for result
1347    \param index_type register type ("w", "p",..)
1348    \param complete_flag whether it's phrases or not
1349    \param rank_type term flags for ranking
1350    \param xpath_use use attribute for X-Path (-1 for no X-path)
1351    \param rset_nmem memory for result sets
1352    \param result_sets output result set for each term in list (output)
1353    \param num_result_sets number of output result sets
1354    \param kc rset key control to be used for created result sets
1355 */
1356 static ZEBRA_RES search_terms_list(ZebraHandle zh,
1357                                    Z_AttributesPlusTerm *zapt,
1358                                    const char *termz,
1359                                    const Odr_oid *attributeSet,
1360                                    zint hits_limit,
1361                                    NMEM stream,
1362                                    const char *index_type, int complete_flag,
1363                                    const char *rank_type,
1364                                    const char *xpath_use,
1365                                    NMEM rset_nmem,
1366                                    RSET **result_sets, int *num_result_sets,
1367                                    struct rset_key_control *kc)
1368 {
1369     zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type);
1370     if (zebra_maps_is_icu(zm))
1371         zebra_map_tokenize_start(zm, termz, strlen(termz));
1372     return search_terms_chrmap(zh, zapt, termz, attributeSet, hits_limit,
1373                                stream, index_type, complete_flag,
1374                                rank_type, xpath_use,
1375                                rset_nmem, result_sets, num_result_sets,
1376                                kc, zm);
1377 }
1378
1379
1380 /** \brief limit a search by position - returns result set
1381  */
1382 static ZEBRA_RES search_position(ZebraHandle zh,
1383                                  Z_AttributesPlusTerm *zapt,
1384                                  const Odr_oid *attributeSet,
1385                                  const char *index_type,
1386                                  NMEM rset_nmem,
1387                                  RSET *rset,
1388                                  struct rset_key_control *kc)
1389 {
1390     int position_value;
1391     AttrType position;
1392     int ord = -1;
1393     char ord_buf[32];
1394     char term_dict[100];
1395     int ord_len;
1396     char *val;
1397     ISAM_P isam_p;
1398     zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type);
1399     
1400     attr_init_APT(&position, zapt, 3);
1401     position_value = attr_find(&position, NULL);
1402     switch(position_value)
1403     {
1404     case 3:
1405     case -1:
1406         return ZEBRA_OK;
1407     case 1:
1408     case 2:
1409         break;
1410     default:
1411         zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1412                             position_value);
1413         return ZEBRA_FAIL;
1414     }
1415
1416
1417     if (!zebra_maps_is_first_in_field(zm))
1418     {
1419         zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1420                             position_value);
1421         return ZEBRA_FAIL;
1422     }
1423
1424     if (zebra_apt_get_ord(zh, zapt, index_type, 0,
1425                           attributeSet, &ord) != ZEBRA_OK)
1426     {
1427         return ZEBRA_FAIL;
1428     }
1429     ord_len = key_SU_encode(ord, ord_buf);
1430     memcpy(term_dict, ord_buf, ord_len);
1431     strcpy(term_dict+ord_len, FIRST_IN_FIELD_STR);
1432     val = dict_lookup(zh->reg->dict, term_dict);
1433     if (val)
1434     {
1435         assert(*val == sizeof(ISAM_P));
1436         memcpy(&isam_p, val+1, sizeof(isam_p));
1437
1438         *rset = zebra_create_rset_isam(zh, rset_nmem, kc, kc->scope, 
1439                                        isam_p, 0);
1440     }
1441     return ZEBRA_OK;
1442 }
1443
1444 /** \brief returns result set for phrase search
1445  */
1446 static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh,
1447                                        Z_AttributesPlusTerm *zapt,
1448                                        const char *termz_org,
1449                                        const Odr_oid *attributeSet,
1450                                        zint hits_limit,
1451                                        NMEM stream,
1452                                        const char *index_type,
1453                                        int complete_flag,
1454                                        const char *rank_type,
1455                                        const char *xpath_use,
1456                                        NMEM rset_nmem,
1457                                        RSET *rset,
1458                                        struct rset_key_control *kc)
1459 {
1460     RSET *result_sets = 0;
1461     int num_result_sets = 0;
1462     ZEBRA_RES res =
1463         search_terms_list(zh, zapt, termz_org, attributeSet, hits_limit,
1464                           stream, index_type, complete_flag,
1465                           rank_type, xpath_use,
1466                           rset_nmem,
1467                           &result_sets, &num_result_sets, kc);
1468     
1469     if (res != ZEBRA_OK)
1470         return res;
1471
1472     if (num_result_sets > 0)
1473     {
1474         RSET first_set = 0;
1475         res = search_position(zh, zapt, attributeSet, 
1476                               index_type,
1477                               rset_nmem, &first_set,
1478                               kc);
1479         if (res != ZEBRA_OK)
1480         {
1481             int i;
1482             for (i = 0; i<num_result_sets; i++)
1483                 rset_delete(result_sets[i]);
1484             return res;
1485         }
1486         if (first_set)
1487         {
1488             RSET *nsets = nmem_malloc(stream,
1489                                       sizeof(RSET) * (num_result_sets+1));
1490             nsets[0] = first_set;
1491             memcpy(nsets+1, result_sets, sizeof(RSET) * num_result_sets);
1492             result_sets = nsets;
1493             num_result_sets++;
1494         }
1495     }
1496     if (num_result_sets == 0)
1497         *rset = rset_create_null(rset_nmem, kc, 0); 
1498     else if (num_result_sets == 1)
1499         *rset = result_sets[0];
1500     else
1501         *rset = rset_create_prox(rset_nmem, kc, kc->scope,
1502                                  num_result_sets, result_sets,
1503                                  1 /* ordered */, 0 /* exclusion */,
1504                                  3 /* relation */, 1 /* distance */);
1505     if (!*rset)
1506         return ZEBRA_FAIL;
1507     return ZEBRA_OK;
1508 }
1509
1510 /** \brief returns result set for or-list search
1511  */
1512 static ZEBRA_RES rpn_search_APT_or_list(ZebraHandle zh,
1513                                         Z_AttributesPlusTerm *zapt,
1514                                         const char *termz_org,
1515                                         const Odr_oid *attributeSet,
1516                                         zint hits_limit,
1517                                         NMEM stream,
1518                                         const char *index_type, 
1519                                         int complete_flag,
1520                                         const char *rank_type,
1521                                         const char *xpath_use,
1522                                         NMEM rset_nmem,
1523                                         RSET *rset,
1524                                         struct rset_key_control *kc)
1525 {
1526     RSET *result_sets = 0;
1527     int num_result_sets = 0;
1528     int i;
1529     ZEBRA_RES res =
1530         search_terms_list(zh, zapt, termz_org, attributeSet, hits_limit,
1531                           stream, index_type, complete_flag,
1532                           rank_type, xpath_use,
1533                           rset_nmem,
1534                           &result_sets, &num_result_sets, kc);
1535     if (res != ZEBRA_OK)
1536         return res;
1537
1538     for (i = 0; i<num_result_sets; i++)
1539     {
1540         RSET first_set = 0;
1541         res = search_position(zh, zapt, attributeSet, 
1542                               index_type,
1543                               rset_nmem, &first_set,
1544                               kc);
1545         if (res != ZEBRA_OK)
1546         {
1547             for (i = 0; i<num_result_sets; i++)
1548                 rset_delete(result_sets[i]);
1549             return res;
1550         }
1551
1552         if (first_set)
1553         {
1554             RSET tmp_set[2];
1555
1556             tmp_set[0] = first_set;
1557             tmp_set[1] = result_sets[i];
1558             
1559             result_sets[i] = rset_create_prox(
1560                 rset_nmem, kc, kc->scope,
1561                 2, tmp_set,
1562                 1 /* ordered */, 0 /* exclusion */,
1563                 3 /* relation */, 1 /* distance */);
1564         }
1565     }
1566     if (num_result_sets == 0)
1567         *rset = rset_create_null(rset_nmem, kc, 0); 
1568     else if (num_result_sets == 1)
1569         *rset = result_sets[0];
1570     else
1571         *rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* termid */,
1572                                num_result_sets, result_sets);
1573     if (!*rset)
1574         return ZEBRA_FAIL;
1575     return ZEBRA_OK;
1576 }
1577
1578 /** \brief returns result set for and-list search
1579  */
1580 static ZEBRA_RES rpn_search_APT_and_list(ZebraHandle zh,
1581                                          Z_AttributesPlusTerm *zapt,
1582                                          const char *termz_org,
1583                                          const Odr_oid *attributeSet,
1584                                          zint hits_limit,
1585                                          NMEM stream,
1586                                          const char *index_type, 
1587                                          int complete_flag,
1588                                          const char *rank_type, 
1589                                          const char *xpath_use,
1590                                          NMEM rset_nmem,
1591                                          RSET *rset,
1592                                          struct rset_key_control *kc)
1593 {
1594     RSET *result_sets = 0;
1595     int num_result_sets = 0;
1596     int i;
1597     ZEBRA_RES res =
1598         search_terms_list(zh, zapt, termz_org, attributeSet, hits_limit,
1599                           stream, index_type, complete_flag,
1600                           rank_type, xpath_use,
1601                           rset_nmem,
1602                           &result_sets, &num_result_sets,
1603                           kc);
1604     if (res != ZEBRA_OK)
1605         return res;
1606     for (i = 0; i<num_result_sets; i++)
1607     {
1608         RSET first_set = 0;
1609         res = search_position(zh, zapt, attributeSet, 
1610                               index_type,
1611                               rset_nmem, &first_set,
1612                               kc);
1613         if (res != ZEBRA_OK)
1614         {
1615             for (i = 0; i<num_result_sets; i++)
1616                 rset_delete(result_sets[i]);
1617             return res;
1618         }
1619
1620         if (first_set)
1621         {
1622             RSET tmp_set[2];
1623
1624             tmp_set[0] = first_set;
1625             tmp_set[1] = result_sets[i];
1626             
1627             result_sets[i] = rset_create_prox(
1628                 rset_nmem, kc, kc->scope,
1629                 2, tmp_set,
1630                 1 /* ordered */, 0 /* exclusion */,
1631                 3 /* relation */, 1 /* distance */);
1632         }
1633     }
1634
1635
1636     if (num_result_sets == 0)
1637         *rset = rset_create_null(rset_nmem, kc, 0); 
1638     else if (num_result_sets == 1)
1639         *rset = result_sets[0];
1640     else
1641         *rset = rset_create_and(rset_nmem, kc, kc->scope,
1642                                 num_result_sets, result_sets);
1643     if (!*rset)
1644         return ZEBRA_FAIL;
1645     return ZEBRA_OK;
1646 }
1647
1648 static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1649                             const char **term_sub,
1650                             WRBUF term_dict,
1651                             const Odr_oid *attributeSet,
1652                             struct grep_info *grep_info,
1653                             int *max_pos,
1654                             zebra_map_t zm,
1655                             WRBUF display_term,
1656                             int *error_code)
1657 {
1658     AttrType relation;
1659     int relation_value;
1660     int term_value;
1661     int r;
1662     WRBUF term_num = wrbuf_alloc();
1663
1664     *error_code = 0;
1665     attr_init_APT(&relation, zapt, 2);
1666     relation_value = attr_find(&relation, NULL);
1667
1668     yaz_log(log_level_rpn, "numeric relation value=%d", relation_value);
1669
1670     switch (relation_value)
1671     {
1672     case 1:
1673         yaz_log(log_level_rpn, "Relation <");
1674         if (!term_100(zm, term_sub, term_num, 1, display_term))
1675         { 
1676             wrbuf_destroy(term_num);
1677             return 0;
1678         }
1679         term_value = atoi(wrbuf_cstr(term_num));
1680         gen_regular_rel(term_dict, term_value-1, 1);
1681         break;
1682     case 2:
1683         yaz_log(log_level_rpn, "Relation <=");
1684         if (!term_100(zm, term_sub, term_num, 1, display_term))
1685         {
1686             wrbuf_destroy(term_num);
1687             return 0;
1688         }
1689         term_value = atoi(wrbuf_cstr(term_num));
1690         gen_regular_rel(term_dict, term_value, 1);
1691         break;
1692     case 4:
1693         yaz_log(log_level_rpn, "Relation >=");
1694         if (!term_100(zm, term_sub, term_num, 1, display_term))
1695         {
1696             wrbuf_destroy(term_num);
1697             return 0;
1698         }
1699         term_value = atoi(wrbuf_cstr(term_num));
1700         gen_regular_rel(term_dict, term_value, 0);
1701         break;
1702     case 5:
1703         yaz_log(log_level_rpn, "Relation >");
1704         if (!term_100(zm, term_sub, term_num, 1, display_term))
1705         {
1706             wrbuf_destroy(term_num);
1707             return 0;
1708         }
1709         term_value = atoi(wrbuf_cstr(term_num));
1710         gen_regular_rel(term_dict, term_value+1, 0);
1711         break;
1712     case -1:
1713     case 3:
1714         yaz_log(log_level_rpn, "Relation =");
1715         if (!term_100(zm, term_sub, term_num, 1, display_term))
1716         {
1717             wrbuf_destroy(term_num);
1718             return 0; 
1719         }
1720         term_value = atoi(wrbuf_cstr(term_num));
1721         wrbuf_printf(term_dict, "(0*%d)", term_value);
1722         break;
1723     case 103:
1724         /* term_tmp untouched.. */
1725         while (**term_sub != '\0')
1726             (*term_sub)++;
1727         break;
1728     default:
1729         *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
1730         wrbuf_destroy(term_num); 
1731         return 0;
1732     }
1733     r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), 
1734                          0, grep_info, max_pos, 0, grep_handle);
1735
1736     if (r == 1)
1737         zebra_set_partial_result(zh);
1738     else if (r)
1739         yaz_log(YLOG_WARN, "dict_lookup_grep fail, rel = gt: %d", r);
1740     yaz_log(log_level_rpn, "%d positions", grep_info->isam_p_indx);
1741     wrbuf_destroy(term_num);
1742     return 1;
1743 }
1744
1745 static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1746                               const char **term_sub, 
1747                               WRBUF term_dict,
1748                               const Odr_oid *attributeSet, NMEM stream,
1749                               struct grep_info *grep_info,
1750                               const char *index_type, int complete_flag,
1751                               WRBUF display_term,
1752                               const char *xpath_use,
1753                               struct ord_list **ol)
1754 {
1755     const char *termp;
1756     struct rpn_char_map_info rcmi;
1757     int max_pos;
1758     int relation_error = 0;
1759     int ord, ord_len, i;
1760     char ord_buf[32];
1761     zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type);
1762     
1763     *ol = ord_list_create(stream);
1764
1765     rpn_char_map_prepare(zh->reg, zm, &rcmi);
1766
1767     termp = *term_sub;
1768     
1769     if (zebra_apt_get_ord(zh, zapt, index_type, xpath_use,
1770                           attributeSet, &ord) != ZEBRA_OK)
1771     {
1772         return ZEBRA_FAIL;
1773     }
1774     
1775     wrbuf_rewind(term_dict);
1776     
1777     *ol = ord_list_append(stream, *ol, ord);
1778     
1779     ord_len = key_SU_encode(ord, ord_buf);
1780     
1781     wrbuf_putc(term_dict, '(');
1782     for (i = 0; i < ord_len; i++)
1783     {
1784         wrbuf_putc(term_dict, 1);
1785         wrbuf_putc(term_dict, ord_buf[i]);
1786     }
1787     wrbuf_putc(term_dict, ')');
1788     
1789     if (!numeric_relation(zh, zapt, &termp, term_dict,
1790                           attributeSet, grep_info, &max_pos, zm,
1791                           display_term, &relation_error))
1792     {
1793         if (relation_error)
1794         {
1795             zebra_setError(zh, relation_error, 0);
1796             return ZEBRA_FAIL;
1797         }
1798         *term_sub = 0;
1799         return ZEBRA_OK;
1800     }
1801     *term_sub = termp;
1802     yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1803     return ZEBRA_OK;
1804 }
1805
1806                                  
1807 static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh,
1808                                         Z_AttributesPlusTerm *zapt,
1809                                         const char *termz,
1810                                         const Odr_oid *attributeSet,
1811                                         NMEM stream,
1812                                         const char *index_type, 
1813                                         int complete_flag,
1814                                         const char *rank_type, 
1815                                         const char *xpath_use,
1816                                         NMEM rset_nmem,
1817                                         RSET *rset,
1818                                         struct rset_key_control *kc)
1819 {
1820     const char *termp = termz;
1821     RSET *result_sets = 0;
1822     int num_result_sets = 0;
1823     ZEBRA_RES res;
1824     struct grep_info grep_info;
1825     int alloc_sets = 0;
1826     zint hits_limit_value;
1827     const char *term_ref_id_str = 0;
1828
1829     zebra_term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str,
1830                           stream);
1831
1832     yaz_log(log_level_rpn, "APT_numeric t='%s'", termz);
1833     if (grep_info_prepare(zh, zapt, &grep_info, index_type) == ZEBRA_FAIL)
1834         return ZEBRA_FAIL;
1835     while (1)
1836     { 
1837         struct ord_list *ol;
1838         WRBUF term_dict = wrbuf_alloc();
1839         WRBUF display_term = wrbuf_alloc();
1840         if (alloc_sets == num_result_sets)
1841         {
1842             int add = 10;
1843             RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) * 
1844                                               sizeof(*rnew));
1845             if (alloc_sets)
1846                 memcpy(rnew, result_sets, alloc_sets * sizeof(*rnew));
1847             alloc_sets = alloc_sets + add;
1848             result_sets = rnew;
1849         }
1850         yaz_log(YLOG_DEBUG, "APT_numeric termp=%s", termp);
1851         grep_info.isam_p_indx = 0;
1852         res = numeric_term(zh, zapt, &termp, term_dict,
1853                            attributeSet, stream, &grep_info,
1854                            index_type, complete_flag,
1855                            display_term, xpath_use, &ol);
1856         wrbuf_destroy(term_dict);
1857         if (res == ZEBRA_FAIL || termp == 0)
1858         {
1859             wrbuf_destroy(display_term);
1860             break;
1861         }
1862         yaz_log(YLOG_DEBUG, "term: %s", wrbuf_cstr(display_term));
1863         result_sets[num_result_sets] =
1864             rset_trunc(zh, grep_info.isam_p_buf,
1865                        grep_info.isam_p_indx, wrbuf_buf(display_term),
1866                        wrbuf_len(display_term), rank_type,
1867                        0 /* preserve position */,
1868                        zapt->term->which, rset_nmem, 
1869                        kc, kc->scope, ol, index_type,
1870                        hits_limit_value,
1871                        term_ref_id_str);
1872         wrbuf_destroy(display_term);
1873         if (!result_sets[num_result_sets])
1874             break;
1875         num_result_sets++;
1876         if (!*termp)
1877             break;
1878     }
1879     grep_info_delete(&grep_info);
1880
1881     if (res != ZEBRA_OK)
1882         return res;
1883     if (num_result_sets == 0)
1884         *rset = rset_create_null(rset_nmem, kc, 0);
1885     else if (num_result_sets == 1)
1886         *rset = result_sets[0];
1887     else
1888         *rset = rset_create_and(rset_nmem, kc, kc->scope,
1889                                 num_result_sets, result_sets);
1890     if (!*rset)
1891         return ZEBRA_FAIL;
1892     return ZEBRA_OK;
1893 }
1894
1895 static ZEBRA_RES rpn_search_APT_local(ZebraHandle zh,
1896                                       Z_AttributesPlusTerm *zapt,
1897                                       const char *termz,
1898                                       const Odr_oid *attributeSet,
1899                                       NMEM stream,
1900                                       const char *rank_type, NMEM rset_nmem,
1901                                       RSET *rset,
1902                                       struct rset_key_control *kc)
1903 {
1904     Record rec;
1905     zint sysno = atozint(termz);
1906     
1907     if (sysno <= 0)
1908         sysno = 0;
1909     rec = rec_get(zh->reg->records, sysno);
1910     if (!rec)
1911         sysno = 0;
1912
1913     rec_free(&rec);
1914
1915     if (sysno <= 0)
1916     {
1917         *rset = rset_create_null(rset_nmem, kc, 0);
1918     }
1919     else
1920     {
1921         RSFD rsfd;
1922         struct it_key key;
1923         *rset = rset_create_temp(rset_nmem, kc, kc->scope,
1924                                  res_get(zh->res, "setTmpDir"), 0);
1925         rsfd = rset_open(*rset, RSETF_WRITE);
1926         
1927         key.mem[0] = sysno;
1928         key.mem[1] = 1;
1929         key.len = 2;
1930         rset_write(rsfd, &key);
1931         rset_close(rsfd);
1932     }
1933     return ZEBRA_OK;
1934 }
1935
1936 static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1937                                const Odr_oid *attributeSet, NMEM stream,
1938                                Z_SortKeySpecList *sort_sequence,
1939                                const char *rank_type,
1940                                NMEM rset_nmem,
1941                                RSET *rset,
1942                                struct rset_key_control *kc)
1943 {
1944     int i;
1945     int sort_relation_value;
1946     AttrType sort_relation_type;
1947     Z_SortKeySpec *sks;
1948     Z_SortKey *sk;
1949     char termz[20];
1950     
1951     attr_init_APT(&sort_relation_type, zapt, 7);
1952     sort_relation_value = attr_find(&sort_relation_type, &attributeSet);
1953
1954     if (!sort_sequence->specs)
1955     {
1956         sort_sequence->num_specs = 10;
1957         sort_sequence->specs = (Z_SortKeySpec **)
1958             nmem_malloc(stream, sort_sequence->num_specs *
1959                         sizeof(*sort_sequence->specs));
1960         for (i = 0; i<sort_sequence->num_specs; i++)
1961             sort_sequence->specs[i] = 0;
1962     }
1963     if (zapt->term->which != Z_Term_general)
1964         i = 0;
1965     else
1966         i = atoi_n((char *) zapt->term->u.general->buf,
1967                    zapt->term->u.general->len);
1968     if (i >= sort_sequence->num_specs)
1969         i = 0;
1970     sprintf(termz, "%d", i);
1971
1972     sks = (Z_SortKeySpec *) nmem_malloc(stream, sizeof(*sks));
1973     sks->sortElement = (Z_SortElement *)
1974         nmem_malloc(stream, sizeof(*sks->sortElement));
1975     sks->sortElement->which = Z_SortElement_generic;
1976     sk = sks->sortElement->u.generic = (Z_SortKey *)
1977         nmem_malloc(stream, sizeof(*sk));
1978     sk->which = Z_SortKey_sortAttributes;
1979     sk->u.sortAttributes = (Z_SortAttributes *)
1980         nmem_malloc(stream, sizeof(*sk->u.sortAttributes));
1981
1982     sk->u.sortAttributes->id = odr_oiddup_nmem(stream, attributeSet);
1983     sk->u.sortAttributes->list = zapt->attributes;
1984
1985     sks->sortRelation = (Odr_int *)
1986         nmem_malloc(stream, sizeof(*sks->sortRelation));
1987     if (sort_relation_value == 1)
1988         *sks->sortRelation = Z_SortKeySpec_ascending;
1989     else if (sort_relation_value == 2)
1990         *sks->sortRelation = Z_SortKeySpec_descending;
1991     else 
1992         *sks->sortRelation = Z_SortKeySpec_ascending;
1993
1994     sks->caseSensitivity = (Odr_int *)
1995         nmem_malloc(stream, sizeof(*sks->caseSensitivity));
1996     *sks->caseSensitivity = 0;
1997
1998     sks->which = Z_SortKeySpec_null;
1999     sks->u.null = odr_nullval ();
2000     sort_sequence->specs[i] = sks;
2001     *rset = rset_create_null(rset_nmem, kc, 0);
2002     return ZEBRA_OK;
2003 }
2004
2005
2006 static int rpn_check_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2007                            const Odr_oid *attributeSet,
2008                            struct xpath_location_step *xpath, int max,
2009                            NMEM mem)
2010 {
2011     const Odr_oid *curAttributeSet = attributeSet;
2012     AttrType use;
2013     const char *use_string = 0;
2014     
2015     attr_init_APT(&use, zapt, 1);
2016     attr_find_ex(&use, &curAttributeSet, &use_string);
2017
2018     if (!use_string || *use_string != '/')
2019         return -1;
2020
2021     return zebra_parse_xpath_str(use_string, xpath, max, mem);
2022 }
2023  
2024                
2025
2026 static RSET xpath_trunc(ZebraHandle zh, NMEM stream,
2027                         const char *index_type, const char *term, 
2028                         const char *xpath_use,
2029                         NMEM rset_nmem,
2030                         struct rset_key_control *kc)
2031 {
2032     struct grep_info grep_info;
2033     int ord = zebraExplain_lookup_attr_str(zh->reg->zei, 
2034                                            zinfo_index_category_index,
2035                                            index_type, xpath_use);
2036     if (grep_info_prepare(zh, 0 /* zapt */, &grep_info, "0") == ZEBRA_FAIL)
2037         return rset_create_null(rset_nmem, kc, 0);
2038     
2039     if (ord < 0)
2040         return rset_create_null(rset_nmem, kc, 0);
2041     else
2042     {
2043         int i, r, max_pos;
2044         char ord_buf[32];
2045         RSET rset;
2046         WRBUF term_dict = wrbuf_alloc();
2047         int ord_len = key_SU_encode(ord, ord_buf);
2048         int term_type = Z_Term_characterString;
2049         const char *flags = "void";
2050
2051         wrbuf_putc(term_dict, '(');
2052         for (i = 0; i<ord_len; i++)
2053         {
2054             wrbuf_putc(term_dict, 1);
2055             wrbuf_putc(term_dict, ord_buf[i]);
2056         }
2057         wrbuf_putc(term_dict, ')');
2058         wrbuf_puts(term_dict, term);
2059         
2060         grep_info.isam_p_indx = 0;
2061         r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), 0,
2062                              &grep_info, &max_pos, 0, grep_handle);
2063         yaz_log(YLOG_DEBUG, "%s %d positions", term,
2064                 grep_info.isam_p_indx);
2065         rset = rset_trunc(zh, grep_info.isam_p_buf,
2066                           grep_info.isam_p_indx, term, strlen(term),
2067                           flags, 1, term_type, rset_nmem,
2068                           kc, kc->scope, 0, index_type, 0 /* hits_limit */,
2069                           0 /* term_ref_id_str */);
2070         grep_info_delete(&grep_info);
2071         wrbuf_destroy(term_dict);
2072         return rset;
2073     }
2074 }
2075
2076 static
2077 ZEBRA_RES rpn_search_xpath(ZebraHandle zh,
2078                            NMEM stream, const char *rank_type, RSET rset,
2079                            int xpath_len, struct xpath_location_step *xpath,
2080                            NMEM rset_nmem,
2081                            RSET *rset_out,
2082                            struct rset_key_control *kc)
2083 {
2084     int i;
2085     int always_matches = rset ? 0 : 1;
2086
2087     if (xpath_len < 0)
2088     {
2089         *rset_out = rset;
2090         return ZEBRA_OK;
2091     }
2092
2093     yaz_log(YLOG_DEBUG, "xpath len=%d", xpath_len);
2094     for (i = 0; i<xpath_len; i++)
2095     {
2096         yaz_log(log_level_rpn, "XPATH %d %s", i, xpath[i].part);
2097
2098     }
2099
2100     /*
2101     //a    ->    a/.*
2102     //a/b  ->    b/a/.*
2103     /a     ->    a/
2104     /a/b   ->    b/a/
2105
2106     /      ->    none
2107
2108     a[@attr = value]/b[@other = othervalue]
2109
2110     /e/@a val      range(e/,range(@a,freetext(w,1015,val),@a),e/)
2111     /a/b val       range(b/a/,freetext(w,1016,val),b/a/)
2112     /a/b/@c val    range(b/a/,range(@c,freetext(w,1016,val),@c),b/a/)
2113     /a/b[@c = y] val range(b/a/,freetext(w,1016,val),b/a/,@c = y)
2114     /a[@c = y]/b val range(a/,range(b/a/,freetext(w,1016,val),b/a/),a/,@c = y)
2115     /a[@c = x]/b[@c = y] range(a/,range(b/a/,freetext(w,1016,val),b/a/,@c = y),a/,@c = x)
2116       
2117     */
2118
2119     dict_grep_cmap(zh->reg->dict, 0, 0);
2120     
2121     {
2122         int level = xpath_len;
2123         int first_path = 1;
2124         
2125         while (--level >= 0)
2126         {
2127             WRBUF xpath_rev = wrbuf_alloc();
2128             int i;
2129             RSET rset_start_tag = 0, rset_end_tag = 0, rset_attr = 0;
2130
2131             for (i = level; i >= 1; --i)
2132             {
2133                 const char *cp = xpath[i].part;
2134                 if (*cp)
2135                 {
2136                     for (; *cp; cp++)
2137                     {
2138                         if (*cp == '*')
2139                             wrbuf_puts(xpath_rev, "[^/]*");
2140                         else if (*cp == ' ')
2141                             wrbuf_puts(xpath_rev, "\001 ");
2142                         else
2143                             wrbuf_putc(xpath_rev, *cp);
2144
2145                         /* wrbuf_putc does not null-terminate , but
2146                            wrbuf_puts below ensures it does.. so xpath_rev
2147                            is OK iff length is > 0 */
2148                     }
2149                     wrbuf_puts(xpath_rev, "/");
2150                 }
2151                 else if (i == 1)  /* // case */
2152                     wrbuf_puts(xpath_rev, ".*");
2153             }
2154             if (xpath[level].predicate &&
2155                 xpath[level].predicate->which == XPATH_PREDICATE_RELATION &&
2156                 xpath[level].predicate->u.relation.name[0])
2157             {
2158                 WRBUF wbuf = wrbuf_alloc();
2159                 wrbuf_puts(wbuf, xpath[level].predicate->u.relation.name+1);
2160                 if (xpath[level].predicate->u.relation.value)
2161                 {
2162                     const char *cp = xpath[level].predicate->u.relation.value;
2163                     wrbuf_putc(wbuf, '=');
2164                     
2165                     while (*cp)
2166                     {
2167                         if (strchr(REGEX_CHARS, *cp))
2168                             wrbuf_putc(wbuf, '\\');
2169                         wrbuf_putc(wbuf, *cp);
2170                         cp++;
2171                     }
2172                 }
2173                 rset_attr = xpath_trunc(
2174                     zh, stream, "0", wrbuf_cstr(wbuf), 
2175                     ZEBRA_XPATH_ATTR_NAME, 
2176                     rset_nmem, kc);
2177                 wrbuf_destroy(wbuf);
2178             } 
2179             else 
2180             {
2181                 if (!first_path)
2182                 {
2183                     wrbuf_destroy(xpath_rev);
2184                     continue;
2185                 }
2186             }
2187             yaz_log(log_level_rpn, "xpath_rev (%d) = %s", level, 
2188                     wrbuf_cstr(xpath_rev));
2189             if (wrbuf_len(xpath_rev))
2190             {
2191                 rset_start_tag = xpath_trunc(zh, stream, "0", 
2192                                              wrbuf_cstr(xpath_rev),
2193                                              ZEBRA_XPATH_ELM_BEGIN, 
2194                                              rset_nmem, kc);
2195                 if (always_matches)
2196                     rset = rset_start_tag;
2197                 else
2198                 {
2199                     rset_end_tag = xpath_trunc(zh, stream, "0", 
2200                                                wrbuf_cstr(xpath_rev),
2201                                                ZEBRA_XPATH_ELM_END, 
2202                                                rset_nmem, kc);
2203                     
2204                     rset = rset_create_between(rset_nmem, kc, kc->scope,
2205                                                rset_start_tag, rset,
2206                                                rset_end_tag, rset_attr);
2207                 }
2208             }
2209             wrbuf_destroy(xpath_rev);
2210             first_path = 0;
2211         }
2212     }
2213     *rset_out = rset;
2214     return ZEBRA_OK;
2215 }
2216
2217 #define MAX_XPATH_STEPS 10
2218
2219 static ZEBRA_RES rpn_search_database(ZebraHandle zh, 
2220                                      Z_AttributesPlusTerm *zapt,
2221                                      const Odr_oid *attributeSet,
2222                                      zint hits_limit, NMEM stream,
2223                                      Z_SortKeySpecList *sort_sequence,
2224                                      NMEM rset_nmem,
2225                                      RSET *rset,
2226                                      struct rset_key_control *kc);
2227
2228 static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2229                                 const Odr_oid *attributeSet,
2230                                 zint hits_limit, NMEM stream,
2231                                 Z_SortKeySpecList *sort_sequence,
2232                                 int num_bases, const char **basenames, 
2233                                 NMEM rset_nmem,
2234                                 RSET *rset,
2235                                 struct rset_key_control *kc)
2236 {
2237     RSET *rsets = nmem_malloc(stream, num_bases * sizeof(*rsets));
2238     ZEBRA_RES res = ZEBRA_OK;
2239     int i;
2240     for (i = 0; i < num_bases; i++)
2241     {
2242
2243         if (zebraExplain_curDatabase(zh->reg->zei, basenames[i]))
2244         {
2245             zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
2246                            basenames[i]);
2247             res = ZEBRA_FAIL;
2248             break;
2249         }
2250         res = rpn_search_database(zh, zapt, attributeSet, hits_limit, stream,
2251                                   sort_sequence,
2252                                   rset_nmem, rsets+i, kc);
2253         if (res != ZEBRA_OK)
2254             break;
2255     }
2256     if (res != ZEBRA_OK)
2257     {   /* must clean up the already created sets */
2258         while (--i >= 0)
2259             rset_delete(rsets[i]);
2260         *rset = 0;
2261     }
2262     else 
2263     {
2264         if (num_bases == 1)
2265             *rset = rsets[0];
2266         else if (num_bases == 0)
2267             *rset = rset_create_null(rset_nmem, kc, 0); 
2268         else
2269             *rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* TERMID */,
2270                                    num_bases, rsets);
2271     }
2272     return res;
2273 }
2274
2275 static ZEBRA_RES rpn_search_database(ZebraHandle zh, 
2276                                      Z_AttributesPlusTerm *zapt,
2277                                      const Odr_oid *attributeSet,
2278                                      zint hits_limit, NMEM stream,
2279                                      Z_SortKeySpecList *sort_sequence,
2280                                      NMEM rset_nmem,
2281                                      RSET *rset,
2282                                      struct rset_key_control *kc)
2283 {
2284     ZEBRA_RES res = ZEBRA_OK;
2285     const char *index_type;
2286     char *search_type = NULL;
2287     char rank_type[128];
2288     int complete_flag;
2289     int sort_flag;
2290     char termz[IT_MAX_WORD+1];
2291     int xpath_len;
2292     const char *xpath_use = 0;
2293     struct xpath_location_step xpath[MAX_XPATH_STEPS];
2294
2295     if (!log_level_set)
2296     {
2297         log_level_rpn = yaz_log_module_level("rpn");
2298         log_level_set = 1;
2299     }
2300     zebra_maps_attr(zh->reg->zebra_maps, zapt, &index_type, &search_type,
2301                     rank_type, &complete_flag, &sort_flag);
2302     
2303     yaz_log(YLOG_DEBUG, "index_type=%s", index_type);
2304     yaz_log(YLOG_DEBUG, "complete_flag=%d", complete_flag);
2305     yaz_log(YLOG_DEBUG, "search_type=%s", search_type);
2306     yaz_log(YLOG_DEBUG, "rank_type=%s", rank_type);
2307
2308     if (zapt_term_to_utf8(zh, zapt, termz) == ZEBRA_FAIL)
2309         return ZEBRA_FAIL;
2310
2311     if (sort_flag)
2312         return rpn_sort_spec(zh, zapt, attributeSet, stream, sort_sequence,
2313                              rank_type, rset_nmem, rset, kc);
2314     /* consider if an X-Path query is used */
2315     xpath_len = rpn_check_xpath(zh, zapt, attributeSet, 
2316                                 xpath, MAX_XPATH_STEPS, stream);
2317     if (xpath_len >= 0)
2318     {
2319         if (xpath[xpath_len-1].part[0] == '@') 
2320             xpath_use = ZEBRA_XPATH_ATTR_CDATA;  /* last step is attribute  */
2321         else
2322             xpath_use = ZEBRA_XPATH_CDATA;  /* searching for cdata */        
2323
2324         if (1)
2325         {
2326             AttrType relation;
2327             int relation_value;
2328
2329             attr_init_APT(&relation, zapt, 2);
2330             relation_value = attr_find(&relation, NULL);
2331
2332             if (relation_value == 103) /* alwaysmatches */
2333             {
2334                 *rset = 0; /* signal no "term" set */
2335                 return rpn_search_xpath(zh, stream, rank_type, *rset, 
2336                                         xpath_len, xpath, rset_nmem, rset, kc);
2337             }
2338         }
2339     }
2340
2341     /* search using one of the various search type strategies
2342        termz is our UTF-8 search term
2343        attributeSet is top-level default attribute set 
2344        stream is ODR for search
2345        reg_id is the register type
2346        complete_flag is 1 for complete subfield, 0 for incomplete
2347        xpath_use is use-attribute to be used for X-Path search, 0 for none
2348     */
2349     if (!strcmp(search_type, "phrase"))
2350     {
2351         res = rpn_search_APT_phrase(zh, zapt, termz, attributeSet, hits_limit,
2352                                     stream,
2353                                     index_type, complete_flag, rank_type,
2354                                     xpath_use,
2355                                     rset_nmem,
2356                                     rset, kc);
2357     }
2358     else if (!strcmp(search_type, "and-list"))
2359     {
2360         res = rpn_search_APT_and_list(zh, zapt, termz, attributeSet, hits_limit,
2361                                       stream,
2362                                       index_type, complete_flag, rank_type,
2363                                       xpath_use,
2364                                       rset_nmem,
2365                                       rset, kc);
2366     }
2367     else if (!strcmp(search_type, "or-list"))
2368     {
2369         res = rpn_search_APT_or_list(zh, zapt, termz, attributeSet, hits_limit,
2370                                      stream,
2371                                      index_type, complete_flag, rank_type,
2372                                      xpath_use,
2373                                      rset_nmem,
2374                                      rset, kc);
2375     }
2376     else if (!strcmp(search_type, "local"))
2377     {
2378         res = rpn_search_APT_local(zh, zapt, termz, attributeSet, stream,
2379                                    rank_type, rset_nmem, rset, kc);
2380     }
2381     else if (!strcmp(search_type, "numeric"))
2382     {
2383         res = rpn_search_APT_numeric(zh, zapt, termz, attributeSet, stream,
2384                                      index_type, complete_flag, rank_type,
2385                                      xpath_use,
2386                                      rset_nmem,
2387                                      rset, kc);
2388     }
2389     else
2390     {
2391         zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2392         res = ZEBRA_FAIL;
2393     }
2394     if (res != ZEBRA_OK)
2395         return res;
2396     if (!*rset)
2397         return ZEBRA_FAIL;
2398     return rpn_search_xpath(zh, stream, rank_type, *rset, 
2399                             xpath_len, xpath, rset_nmem, rset, kc);
2400 }
2401
2402 static ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2403                                       const Odr_oid *attributeSet,
2404                                       zint hits_limit,
2405                                       NMEM stream, NMEM rset_nmem,
2406                                       Z_SortKeySpecList *sort_sequence,
2407                                       int num_bases, const char **basenames,
2408                                       RSET **result_sets, int *num_result_sets,
2409                                       Z_Operator *parent_op,
2410                                       struct rset_key_control *kc);
2411
2412 ZEBRA_RES rpn_get_top_approx_limit(ZebraHandle zh, Z_RPNStructure *zs,
2413                                    zint *approx_limit)
2414 {
2415     ZEBRA_RES res = ZEBRA_OK;
2416     if (zs->which == Z_RPNStructure_complex)
2417     {
2418         if (res == ZEBRA_OK)
2419             res = rpn_get_top_approx_limit(zh, zs->u.complex->s1,
2420                                            approx_limit);
2421         if (res == ZEBRA_OK)
2422             res = rpn_get_top_approx_limit(zh, zs->u.complex->s2,
2423                                            approx_limit);
2424     }
2425     else if (zs->which == Z_RPNStructure_simple)
2426     {
2427         if (zs->u.simple->which == Z_Operand_APT)
2428         {
2429             Z_AttributesPlusTerm *zapt = zs->u.simple->u.attributesPlusTerm;
2430             AttrType global_hits_limit_attr;
2431             int l;
2432             
2433             attr_init_APT(&global_hits_limit_attr, zapt, 12);
2434             
2435             l = attr_find(&global_hits_limit_attr, NULL);
2436             if (l != -1)
2437                 *approx_limit = l;
2438         }
2439     }
2440     return res;
2441 }
2442
2443 ZEBRA_RES rpn_search_top(ZebraHandle zh, Z_RPNStructure *zs,
2444                          const Odr_oid *attributeSet, 
2445                          zint hits_limit,
2446                          NMEM stream, NMEM rset_nmem,
2447                          Z_SortKeySpecList *sort_sequence,
2448                          int num_bases, const char **basenames,
2449                          RSET *result_set)
2450 {
2451     RSET *result_sets = 0;
2452     int num_result_sets = 0;
2453     ZEBRA_RES res;
2454     struct rset_key_control *kc = zebra_key_control_create(zh);
2455
2456     res = rpn_search_structure(zh, zs, attributeSet, hits_limit,
2457                                stream, rset_nmem,
2458                                sort_sequence, 
2459                                num_bases, basenames,
2460                                &result_sets, &num_result_sets,
2461                                0 /* no parent op */,
2462                                kc);
2463     if (res != ZEBRA_OK)
2464     {
2465         int i;
2466         for (i = 0; i<num_result_sets; i++)
2467             rset_delete(result_sets[i]);
2468         *result_set = 0;
2469     }
2470     else
2471     {
2472         assert(num_result_sets == 1);
2473         assert(result_sets);
2474         assert(*result_sets);
2475         *result_set = *result_sets;
2476     }
2477     (*kc->dec)(kc);
2478     return res;
2479 }
2480
2481 ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2482                                const Odr_oid *attributeSet, zint hits_limit,
2483                                NMEM stream, NMEM rset_nmem,
2484                                Z_SortKeySpecList *sort_sequence,
2485                                int num_bases, const char **basenames,
2486                                RSET **result_sets, int *num_result_sets,
2487                                Z_Operator *parent_op,
2488                                struct rset_key_control *kc)
2489 {
2490     *num_result_sets = 0;
2491     if (zs->which == Z_RPNStructure_complex)
2492     {
2493         ZEBRA_RES res;
2494         Z_Operator *zop = zs->u.complex->roperator;
2495         RSET *result_sets_l = 0;
2496         int num_result_sets_l = 0;
2497         RSET *result_sets_r = 0;
2498         int num_result_sets_r = 0;
2499
2500         res = rpn_search_structure(zh, zs->u.complex->s1,
2501                                    attributeSet, hits_limit, stream, rset_nmem,
2502                                    sort_sequence,
2503                                    num_bases, basenames,
2504                                    &result_sets_l, &num_result_sets_l,
2505                                    zop, kc);
2506         if (res != ZEBRA_OK)
2507         {
2508             int i;
2509             for (i = 0; i<num_result_sets_l; i++)
2510                 rset_delete(result_sets_l[i]);
2511             return res;
2512         }
2513         res = rpn_search_structure(zh, zs->u.complex->s2,
2514                                    attributeSet, hits_limit, stream, rset_nmem,
2515                                    sort_sequence,
2516                                    num_bases, basenames,
2517                                    &result_sets_r, &num_result_sets_r,
2518                                    zop, kc);
2519         if (res != ZEBRA_OK)
2520         {
2521             int i;
2522             for (i = 0; i<num_result_sets_l; i++)
2523                 rset_delete(result_sets_l[i]);
2524             for (i = 0; i<num_result_sets_r; i++)
2525                 rset_delete(result_sets_r[i]);
2526             return res;
2527         }
2528
2529         /* make a new list of result for all children */
2530         *num_result_sets = num_result_sets_l + num_result_sets_r;
2531         *result_sets = nmem_malloc(stream, *num_result_sets * 
2532                                    sizeof(**result_sets));
2533         memcpy(*result_sets, result_sets_l, 
2534                num_result_sets_l * sizeof(**result_sets));
2535         memcpy(*result_sets + num_result_sets_l, result_sets_r, 
2536                num_result_sets_r * sizeof(**result_sets));
2537
2538         if (!parent_op || parent_op->which != zop->which
2539             || (zop->which != Z_Operator_and &&
2540                 zop->which != Z_Operator_or))
2541         {
2542             /* parent node different from this one (or non-present) */
2543             /* we must combine result sets now */
2544             RSET rset;
2545             switch (zop->which)
2546             {
2547             case Z_Operator_and:
2548                 rset = rset_create_and(rset_nmem, kc,
2549                                        kc->scope,
2550                                        *num_result_sets, *result_sets);
2551                 break;
2552             case Z_Operator_or:
2553                 rset = rset_create_or(rset_nmem, kc,
2554                                       kc->scope, 0, /* termid */
2555                                       *num_result_sets, *result_sets);
2556                 break;
2557             case Z_Operator_and_not:
2558                 rset = rset_create_not(rset_nmem, kc,
2559                                        kc->scope,
2560                                        (*result_sets)[0],
2561                                        (*result_sets)[1]);
2562                 break;
2563             case Z_Operator_prox:
2564                 if (zop->u.prox->which != Z_ProximityOperator_known)
2565                 {
2566                     zebra_setError(zh, 
2567                                    YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2568                                    0);
2569                     return ZEBRA_FAIL;
2570                 }
2571                 if (*zop->u.prox->u.known != Z_ProxUnit_word)
2572                 {
2573                     zebra_setError_zint(zh,
2574                                         YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2575                                         *zop->u.prox->u.known);
2576                     return ZEBRA_FAIL;
2577                 }
2578                 else
2579                 {
2580                     rset = rset_create_prox(rset_nmem, kc,
2581                                             kc->scope,
2582                                             *num_result_sets, *result_sets, 
2583                                             *zop->u.prox->ordered,
2584                                             (!zop->u.prox->exclusion ? 
2585                                              0 : *zop->u.prox->exclusion),
2586                                             *zop->u.prox->relationType,
2587                                             *zop->u.prox->distance );
2588                 }
2589                 break;
2590             default:
2591                 zebra_setError(zh, YAZ_BIB1_OPERATOR_UNSUPP, 0);
2592                 return ZEBRA_FAIL;
2593             }
2594             *num_result_sets = 1;
2595             *result_sets = nmem_malloc(stream, *num_result_sets * 
2596                                        sizeof(**result_sets));
2597             (*result_sets)[0] = rset;
2598         }
2599     }
2600     else if (zs->which == Z_RPNStructure_simple)
2601     {
2602         RSET rset;
2603         ZEBRA_RES res;
2604
2605         if (zs->u.simple->which == Z_Operand_APT)
2606         {
2607             yaz_log(YLOG_DEBUG, "rpn_search_APT");
2608             res = rpn_search_APT(zh, zs->u.simple->u.attributesPlusTerm,
2609                                  attributeSet, hits_limit,
2610                                  stream, sort_sequence,
2611                                  num_bases, basenames, rset_nmem, &rset,
2612                                  kc);
2613             if (res != ZEBRA_OK)
2614                 return res;
2615         }
2616         else if (zs->u.simple->which == Z_Operand_resultSetId)
2617         {
2618             yaz_log(YLOG_DEBUG, "rpn_search_ref");
2619             rset = resultSetRef(zh, zs->u.simple->u.resultSetId);
2620             if (!rset)
2621             {
2622                 zebra_setError(zh, 
2623                                YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
2624                                zs->u.simple->u.resultSetId);
2625                 return ZEBRA_FAIL;
2626             }
2627             rset_dup(rset);
2628         }
2629         else
2630         {
2631             zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2632             return ZEBRA_FAIL;
2633         }
2634         *num_result_sets = 1;
2635         *result_sets = nmem_malloc(stream, *num_result_sets * 
2636                                    sizeof(**result_sets));
2637         (*result_sets)[0] = rset;
2638     }
2639     else
2640     {
2641         zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2642         return ZEBRA_FAIL;
2643     }
2644     return ZEBRA_OK;
2645 }
2646
2647
2648
2649 /*
2650  * Local variables:
2651  * c-basic-offset: 4
2652  * c-file-style: "Stroustrup"
2653  * indent-tabs-mode: nil
2654  * End:
2655  * vim: shiftwidth=4 tabstop=8 expandtab
2656  */
2657