Added function zebra_create_rset_isam .
[idzebra-moved-to-github.git] / index / rpnsearch.c
1 /* $Id: rpnsearch.c,v 1.25 2007-12-03 13:04:04 adam Exp $
2    Copyright (C) 1995-2007
3    Index Data ApS
4
5 This file is part of the Zebra server.
6
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
11
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
20
21 */
22
23 #include <stdio.h>
24 #include <assert.h>
25 #ifdef WIN32
26 #include <io.h>
27 #endif
28 #if HAVE_UNISTD_H
29 #include <unistd.h>
30 #endif
31 #include <ctype.h>
32
33 #include <yaz/diagbib1.h>
34 #include "index.h"
35 #include <zebra_xpath.h>
36 #include <attrfind.h>
37 #include <charmap.h>
38 #include <rset.h>
39
40 static int log_level_set = 0;
41 static int log_level_rpn = 0;
42
43 #define TERMSET_DISABLE 1
44
45 static const char **rpn_char_map_handler(void *vp, const char **from, int len)
46 {
47     struct rpn_char_map_info *p = (struct rpn_char_map_info *) vp;
48     const char **out = zebra_maps_input(p->zm, from, len, 0);
49 #if 0
50     if (out && *out)
51     {
52         const char *outp = *out;
53         yaz_log(YLOG_LOG, "---");
54         while (*outp)
55         {
56             yaz_log(YLOG_LOG, "%02X", *outp);
57             outp++;
58         }
59     }
60 #endif
61     return out;
62 }
63
64 void rpn_char_map_prepare(struct zebra_register *reg, zebra_map_t zm,
65                           struct rpn_char_map_info *map_info)
66 {
67     map_info->zm = zm;
68     if (zebra_maps_is_icu(zm))
69         dict_grep_cmap(reg->dict, 0, 0);
70     else
71         dict_grep_cmap(reg->dict, map_info, rpn_char_map_handler);
72 }
73
74 #define TERM_COUNT        
75        
76 struct grep_info {        
77 #ifdef TERM_COUNT        
78     int *term_no;        
79 #endif        
80     ISAM_P *isam_p_buf;
81     int isam_p_size;        
82     int isam_p_indx;
83     int trunc_max;
84     ZebraHandle zh;
85     const char *index_type;
86     ZebraSet termset;
87 };        
88
89 static int add_isam_p(const char *name, const char *info,
90                       struct grep_info *p)
91 {
92     if (!log_level_set)
93     {
94         log_level_rpn = yaz_log_module_level("rpn");
95         log_level_set = 1;
96     }
97     /* we may have to stop this madness.. NOTE: -1 so that if
98        truncmax == trunxlimit we do *not* generate result sets */
99     if (p->isam_p_indx >= p->trunc_max - 1)
100         return 1;
101
102     if (p->isam_p_indx == p->isam_p_size)
103     {
104         ISAM_P *new_isam_p_buf;
105 #ifdef TERM_COUNT        
106         int *new_term_no;        
107 #endif
108         p->isam_p_size = 2*p->isam_p_size + 100;
109         new_isam_p_buf = (ISAM_P *) xmalloc(sizeof(*new_isam_p_buf) *
110                                             p->isam_p_size);
111         if (p->isam_p_buf)
112         {
113             memcpy(new_isam_p_buf, p->isam_p_buf,
114                     p->isam_p_indx * sizeof(*p->isam_p_buf));
115             xfree(p->isam_p_buf);
116         }
117         p->isam_p_buf = new_isam_p_buf;
118
119 #ifdef TERM_COUNT
120         new_term_no = (int *) xmalloc(sizeof(*new_term_no) * p->isam_p_size);
121         if (p->term_no)
122         {
123             memcpy(new_term_no, p->isam_p_buf,
124                     p->isam_p_indx * sizeof(*p->term_no));
125             xfree(p->term_no);
126         }
127         p->term_no = new_term_no;
128 #endif
129     }
130     assert(*info == sizeof(*p->isam_p_buf));
131     memcpy(p->isam_p_buf + p->isam_p_indx, info+1, sizeof(*p->isam_p_buf));
132
133     if (p->termset)
134     {
135         const char *db;
136         char term_tmp[IT_MAX_WORD];
137         int ord = 0;
138         const char *index_name;
139         int len = key_SU_decode(&ord, (const unsigned char *) name);
140         
141         zebra_term_untrans (p->zh, p->index_type, term_tmp, name+len);
142         yaz_log(log_level_rpn, "grep: %d %c %s", ord, name[len], term_tmp);
143         zebraExplain_lookup_ord(p->zh->reg->zei,
144                                 ord, 0 /* index_type */, &db, &index_name);
145         yaz_log(log_level_rpn, "grep:  db=%s index=%s", db, index_name);
146         
147         resultSetAddTerm(p->zh, p->termset, name[len], db,
148                          index_name, term_tmp);
149     }
150     (p->isam_p_indx)++;
151     return 0;
152 }
153
154 static int grep_handle(char *name, const char *info, void *p)
155 {
156     return add_isam_p(name, info, (struct grep_info *) p);
157 }
158
159 static int term_pre(zebra_map_t zm, const char **src,
160                     const char *ct1, const char *ct2, int first)
161 {
162     const char *s1, *s0 = *src;
163     const char **map;
164
165     /* skip white space */
166     while (*s0)
167     {
168         if (ct1 && strchr(ct1, *s0))
169             break;
170         if (ct2 && strchr(ct2, *s0))
171             break;
172         s1 = s0;
173         map = zebra_maps_input(zm, &s1, strlen(s1), first);
174         if (**map != *CHR_SPACE)
175             break;
176         s0 = s1;
177     }
178     *src = s0;
179     return *s0;
180 }
181
182
183 static void esc_str(char *out_buf, size_t out_size,
184                     const char *in_buf, int in_size)
185 {
186     int k;
187
188     assert(out_buf);
189     assert(in_buf);
190     assert(out_size > 20);
191     *out_buf = '\0';
192     for (k = 0; k<in_size; k++)
193     {
194         int c = in_buf[k] & 0xff;
195         int pc;
196         if (c < 32 || c > 126)
197             pc = '?';
198         else
199             pc = c;
200         sprintf(out_buf +strlen(out_buf), "%02X:%c  ", c, pc);
201         if (strlen(out_buf) > out_size-20)
202         {
203             strcat(out_buf, "..");
204             break;
205         }
206     }
207 }
208
209 #define REGEX_CHARS " []()|.*+?!"
210
211 static void add_non_space(const char *start, const char *end,
212                           WRBUF term_dict,
213                           char *dst_term, int *dst_ptr,
214                           const char **map, int q_map_match)
215 {
216     size_t sz = end - start;
217     memcpy(dst_term + *dst_ptr, start, sz);
218     (*dst_ptr) += sz;
219     if (!q_map_match)
220     {
221         while (start < end)
222         {
223             if (strchr(REGEX_CHARS, *start))
224                 wrbuf_putc(term_dict, '\\');
225             wrbuf_putc(term_dict, *start);
226             start++;
227         }
228     }
229     else
230     {
231         char tmpbuf[80];
232         esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
233         
234         wrbuf_puts(term_dict, map[0]);
235     }
236 }
237
238
239 static int term_100_icu(zebra_map_t zm,
240                         const char **src, WRBUF term_dict, int space_split,
241                         char *dst_term)
242 {
243     int no = 0;
244     const char *res_buf = 0;
245     size_t res_len = 0;
246     int r = zebra_map_tokenize(zm, *src, strlen(*src),
247                                &res_buf, &res_len);
248
249     yaz_log(YLOG_LOG, "term_100_icu r=%d", r);
250     if (r)
251         strcat(dst_term, *src);
252     *src += strlen(*src);
253     while (r)
254     {
255         int i;
256         no++;
257         for (i = 0; i < res_len; i++)
258         {
259             if (strchr(REGEX_CHARS, res_buf[i]))
260                 wrbuf_putc(term_dict, '\\');
261             if (res_buf[i] < 32)
262                 wrbuf_putc(term_dict, 1);
263             wrbuf_putc(term_dict, res_buf[i]);
264         }
265         r = zebra_map_tokenize(zm, 0, 0, &res_buf, &res_len);
266     }
267     return no;
268 }
269
270 /* term_100: handle term, where trunc = none(no operators at all) */
271 static int term_100(zebra_map_t zm,
272                     const char **src, WRBUF term_dict, int space_split,
273                     char *dst_term)
274 {
275     const char *s0;
276     const char **map;
277     int i = 0;
278     int j = 0;
279
280     const char *space_start = 0;
281     const char *space_end = 0;
282
283     if (zebra_maps_is_icu(zm))
284         return term_100_icu(zm, src, term_dict, space_split, dst_term);
285
286     if (!term_pre(zm, src, NULL, NULL, !space_split))
287         return 0;
288     s0 = *src;
289     while (*s0)
290     {
291         const char *s1 = s0;
292         int q_map_match = 0;
293         map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
294         if (space_split)
295         {
296             if (**map == *CHR_SPACE)
297                 break;
298         }
299         else  /* complete subfield only. */
300         {
301             if (**map == *CHR_SPACE)
302             {   /* save space mapping for later  .. */
303                 space_start = s1;
304                 space_end = s0;
305                 continue;
306             }
307             else if (space_start)
308             {   /* reload last space */
309                 while (space_start < space_end)
310                 {
311                     if (strchr(REGEX_CHARS, *space_start))
312                         wrbuf_putc(term_dict, '\\');
313                     dst_term[j++] = *space_start;
314                     wrbuf_putc(term_dict, *space_start);
315                     space_start++;
316                                
317                 }
318                 /* and reset */
319                 space_start = space_end = 0;
320             }
321         }
322         i++;
323
324         add_non_space(s1, s0, term_dict, dst_term, &j,
325                       map, q_map_match);
326     }
327     dst_term[j] = '\0';
328     *src = s0;
329     return i;
330 }
331
332 /* term_101: handle term, where trunc = Process # */
333 static int term_101(zebra_map_t zm,
334                     const char **src, WRBUF term_dict, int space_split,
335                     char *dst_term)
336 {
337     const char *s0;
338     const char **map;
339     int i = 0;
340     int j = 0;
341
342     if (!term_pre(zm, src, "#", "#", !space_split))
343         return 0;
344     s0 = *src;
345     while (*s0)
346     {
347         if (*s0 == '#')
348         {
349             i++;
350             wrbuf_puts(term_dict, ".*");
351             dst_term[j++] = *s0++;
352         }
353         else
354         {
355             const char *s1 = s0;
356             int q_map_match = 0;
357             map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
358             if (space_split && **map == *CHR_SPACE)
359                 break;
360
361             i++;
362             add_non_space(s1, s0, term_dict, dst_term, &j,
363                           map, q_map_match);
364         }
365     }
366     dst_term[j++] = '\0';
367     *src = s0;
368     return i;
369 }
370
371 /* term_103: handle term, where trunc = re-2 (regular expressions) */
372 static int term_103(zebra_map_t zm, const char **src,
373                     WRBUF term_dict, int *errors, int space_split,
374                     char *dst_term)
375 {
376     int i = 0;
377     int j = 0;
378     const char *s0;
379     const char **map;
380
381     if (!term_pre(zm, src, "^\\()[].*+?|", "(", !space_split))
382         return 0;
383     s0 = *src;
384     if (errors && *s0 == '+' && s0[1] && s0[2] == '+' && s0[3] &&
385         isdigit(((const unsigned char *)s0)[1]))
386     {
387         *errors = s0[1] - '0';
388         s0 += 3;
389         if (*errors > 3)
390             *errors = 3;
391     }
392     while (*s0)
393     {
394         if (strchr("^\\()[].*+?|-", *s0))
395         {
396             dst_term[j++] = *s0;
397             wrbuf_putc(term_dict, *s0);
398             s0++;
399             i++;
400         }
401         else
402         {
403             const char *s1 = s0;
404             int q_map_match = 0;
405             map = zebra_maps_search(zm, &s0, strlen(s0),  &q_map_match);
406             if (space_split && **map == *CHR_SPACE)
407                 break;
408
409             i++;
410             add_non_space(s1, s0, term_dict, dst_term, &j,
411                           map, q_map_match);
412         }
413     }
414     dst_term[j] = '\0';
415     *src = s0;
416     
417     return i;
418 }
419
420 /* term_103: handle term, where trunc = re-1 (regular expressions) */
421 static int term_102(zebra_map_t zm, const char **src,
422                     WRBUF term_dict, int space_split, char *dst_term)
423 {
424     return term_103(zm, src, term_dict, NULL, space_split, dst_term);
425 }
426
427
428 /* term_104: handle term, process # and ! */
429 static int term_104(zebra_map_t zm, const char **src, 
430                     WRBUF term_dict, int space_split, char *dst_term)
431 {
432     const char *s0;
433     const char **map;
434     int i = 0;
435     int j = 0;
436
437     if (!term_pre(zm, src, "?*#", "?*#", !space_split))
438         return 0;
439     s0 = *src;
440     while (*s0)
441     {
442         if (*s0 == '?')
443         {
444             i++;
445             dst_term[j++] = *s0++;
446             if (*s0 >= '0' && *s0 <= '9')
447             {
448                 int limit = 0;
449                 while (*s0 >= '0' && *s0 <= '9')
450                 {
451                     limit = limit * 10 + (*s0 - '0');
452                     dst_term[j++] = *s0++;
453                 }
454                 if (limit > 20)
455                     limit = 20;
456                 while (--limit >= 0)
457                 {
458                     wrbuf_puts(term_dict, ".?");
459                 }
460             }
461             else
462             {
463                 wrbuf_puts(term_dict, ".*");
464             }
465         }
466         else if (*s0 == '*')
467         {
468             i++;
469             wrbuf_puts(term_dict, ".*");
470             dst_term[j++] = *s0++;
471         }
472         else if (*s0 == '#')
473         {
474             i++;
475             wrbuf_puts(term_dict, ".");
476             dst_term[j++] = *s0++;
477         }
478         else
479         {
480             const char *s1 = s0;
481             int q_map_match = 0;
482             map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
483             if (space_split && **map == *CHR_SPACE)
484                 break;
485
486             i++;
487             add_non_space(s1, s0, term_dict, dst_term, &j,
488                           map, q_map_match);
489         }
490     }
491     dst_term[j++] = '\0';
492     *src = s0;
493     return i;
494 }
495
496 /* term_105/106: handle term, where trunc = Process * and ! and right trunc */
497 static int term_105(zebra_map_t zm, const char **src, 
498                     WRBUF term_dict, int space_split,
499                     char *dst_term, int right_truncate)
500 {
501     const char *s0;
502     const char **map;
503     int i = 0;
504     int j = 0;
505
506     if (!term_pre(zm, src, "*!", "*!", !space_split))
507         return 0;
508     s0 = *src;
509     while (*s0)
510     {
511         if (*s0 == '*')
512         {
513             i++;
514             wrbuf_puts(term_dict, ".*");
515             dst_term[j++] = *s0++;
516         }
517         else if (*s0 == '!')
518         {
519             i++;
520             wrbuf_putc(term_dict, '.');
521             dst_term[j++] = *s0++;
522         }
523         else
524         {
525             const char *s1 = s0;
526             int q_map_match = 0;
527             map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
528             if (space_split && **map == *CHR_SPACE)
529                 break;
530
531             i++;
532             add_non_space(s1, s0, term_dict, dst_term, &j,
533                           map, q_map_match);
534         }
535     }
536     if (right_truncate)
537         wrbuf_puts(term_dict, ".*");
538     dst_term[j++] = '\0';
539     *src = s0;
540     return i;
541 }
542
543
544 /* gen_regular_rel - generate regular expression from relation
545  *  val:     border value (inclusive)
546  *  islt:    1 if <=; 0 if >=.
547  */
548 static void gen_regular_rel(WRBUF term_dict, int val, int islt)
549 {
550     char dst_buf[20*5*20]; /* assuming enough for expansion */
551     char *dst = dst_buf;
552     int dst_p;
553     int w, d, i;
554     int pos = 0;
555     char numstr[20];
556
557     yaz_log(YLOG_DEBUG, "gen_regular_rel. val=%d, islt=%d", val, islt);
558     if (val >= 0)
559     {
560         if (islt)
561             strcpy(dst, "(-[0-9]+|(");
562         else
563             strcpy(dst, "((");
564     } 
565     else
566     {
567         if (!islt)
568         {
569             strcpy(dst, "([0-9]+|-(");
570             islt = 1;
571         }
572         else
573         {
574             strcpy(dst, "(-(");
575             islt = 0;
576         }
577         val = -val;
578     }
579     dst_p = strlen(dst);
580     sprintf(numstr, "%d", val);
581     for (w = strlen(numstr); --w >= 0; pos++)
582     {
583         d = numstr[w];
584         if (pos > 0)
585         {
586             if (islt)
587             {
588                 if (d == '0')
589                     continue;
590                 d--;
591             } 
592             else
593             {
594                 if (d == '9')
595                     continue;
596                 d++;
597             }
598         }
599         
600         strcpy(dst + dst_p, numstr);
601         dst_p = strlen(dst) - pos - 1;
602
603         if (islt)
604         {
605             if (d != '0')
606             {
607                 dst[dst_p++] = '[';
608                 dst[dst_p++] = '0';
609                 dst[dst_p++] = '-';
610                 dst[dst_p++] = d;
611                 dst[dst_p++] = ']';
612             }
613             else
614                 dst[dst_p++] = d;
615         }
616         else
617         {
618             if (d != '9')
619             { 
620                 dst[dst_p++] = '[';
621                 dst[dst_p++] = d;
622                 dst[dst_p++] = '-';
623                 dst[dst_p++] = '9';
624                 dst[dst_p++] = ']';
625             }
626             else
627                 dst[dst_p++] = d;
628         }
629         for (i = 0; i<pos; i++)
630         {
631             dst[dst_p++] = '[';
632             dst[dst_p++] = '0';
633             dst[dst_p++] = '-';
634             dst[dst_p++] = '9';
635             dst[dst_p++] = ']';
636         }
637         dst[dst_p++] = '|';
638     }
639     dst[dst_p] = '\0';
640     if (islt)
641     {
642         /* match everything less than 10^(pos-1) */
643         strcat(dst, "0*");
644         for (i = 1; i<pos; i++)
645             strcat(dst, "[0-9]?");
646     }
647     else
648     {
649         /* match everything greater than 10^pos */
650         for (i = 0; i <= pos; i++)
651             strcat(dst, "[0-9]");
652         strcat(dst, "[0-9]*");
653     }
654     strcat(dst, "))");
655     wrbuf_puts(term_dict, dst);
656 }
657
658 void string_rel_add_char(WRBUF term_p, WRBUF wsrc, int *indx)
659 {
660     const char *src = wrbuf_cstr(wsrc);
661     if (src[*indx] == '\\')
662     {
663         wrbuf_putc(term_p, src[*indx]);
664         (*indx)++;
665     }
666     wrbuf_putc(term_p, src[*indx]);
667     (*indx)++;
668 }
669
670 /*
671  *   >  abc     ([b-].*|a[c-].*|ab[d-].*|abc.+)
672  *              ([^-a].*|a[^-b].*ab[^-c].*|abc.+)
673  *   >= abc     ([b-].*|a[c-].*|ab[c-].*)
674  *              ([^-a].*|a[^-b].*|ab[c-].*)
675  *   <  abc     ([-0].*|a[-a].*|ab[-b].*)
676  *              ([^a-].*|a[^b-].*|ab[^c-].*)
677  *   <= abc     ([-0].*|a[-a].*|ab[-b].*|abc)
678  *              ([^a-].*|a[^b-].*|ab[^c-].*|abc)
679  */
680 static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
681                            const char **term_sub, WRBUF term_dict,
682                            const Odr_oid *attributeSet,
683                            zebra_map_t zm, int space_split, char *term_dst,
684                            int *error_code)
685 {
686     AttrType relation;
687     int relation_value;
688     int i;
689     WRBUF term_component = wrbuf_alloc();
690
691     attr_init_APT(&relation, zapt, 2);
692     relation_value = attr_find(&relation, NULL);
693
694     *error_code = 0;
695     yaz_log(YLOG_DEBUG, "string relation value=%d", relation_value);
696     switch (relation_value)
697     {
698     case 1:
699         if (!term_100(zm, term_sub, term_component, space_split, term_dst))
700         {
701             wrbuf_destroy(term_component);
702             return 0;
703         }
704         yaz_log(log_level_rpn, "Relation <");
705         
706         wrbuf_putc(term_dict, '(');
707         for (i = 0; i < wrbuf_len(term_component); )
708         {
709             int j = 0;
710             
711             if (i)
712                 wrbuf_putc(term_dict, '|');
713             while (j < i)
714                 string_rel_add_char(term_dict, term_component, &j);
715
716             wrbuf_putc(term_dict, '[');
717
718             wrbuf_putc(term_dict, '^');
719             
720             wrbuf_putc(term_dict, 1);
721             wrbuf_putc(term_dict, FIRST_IN_FIELD_CHAR);
722             
723             string_rel_add_char(term_dict, term_component, &i);
724             wrbuf_putc(term_dict, '-');
725             
726             wrbuf_putc(term_dict, ']');
727             wrbuf_putc(term_dict, '.');
728             wrbuf_putc(term_dict, '*');
729         }
730         wrbuf_putc(term_dict, ')');
731         break;
732     case 2:
733         if (!term_100(zm, term_sub, term_component, space_split, term_dst))
734         {
735             wrbuf_destroy(term_component);
736             return 0;
737         }
738         yaz_log(log_level_rpn, "Relation <=");
739
740         wrbuf_putc(term_dict, '(');
741         for (i = 0; i < wrbuf_len(term_component); )
742         {
743             int j = 0;
744
745             while (j < i)
746                 string_rel_add_char(term_dict, term_component, &j);
747             wrbuf_putc(term_dict, '[');
748
749             wrbuf_putc(term_dict, '^');
750
751             wrbuf_putc(term_dict, 1);
752             wrbuf_putc(term_dict, FIRST_IN_FIELD_CHAR);
753
754             string_rel_add_char(term_dict, term_component, &i);
755             wrbuf_putc(term_dict, '-');
756
757             wrbuf_putc(term_dict, ']');
758             wrbuf_putc(term_dict, '.');
759             wrbuf_putc(term_dict, '*');
760
761             wrbuf_putc(term_dict, '|');
762         }
763         for (i = 0; i < wrbuf_len(term_component); )
764             string_rel_add_char(term_dict, term_component, &i);
765         wrbuf_putc(term_dict, ')');
766         break;
767     case 5:
768         if (!term_100(zm, term_sub, term_component, space_split, term_dst))
769         {
770             wrbuf_destroy(term_component);
771             return 0;
772         }
773         yaz_log(log_level_rpn, "Relation >");
774
775         wrbuf_putc(term_dict, '(');
776         for (i = 0; i < wrbuf_len(term_component); )
777         {
778             int j = 0;
779
780             while (j < i)
781                 string_rel_add_char(term_dict, term_component, &j);
782             wrbuf_putc(term_dict, '[');
783             
784             wrbuf_putc(term_dict, '^');
785             wrbuf_putc(term_dict, '-');
786             string_rel_add_char(term_dict, term_component, &i);
787
788             wrbuf_putc(term_dict, ']');
789             wrbuf_putc(term_dict, '.');
790             wrbuf_putc(term_dict, '*');
791
792             wrbuf_putc(term_dict, '|');
793         }
794         for (i = 0; i < wrbuf_len(term_component); )
795             string_rel_add_char(term_dict, term_component, &i);
796         wrbuf_putc(term_dict, '.');
797         wrbuf_putc(term_dict, '+');
798         wrbuf_putc(term_dict, ')');
799         break;
800     case 4:
801         if (!term_100(zm, term_sub, term_component, space_split, term_dst))
802         {
803             wrbuf_destroy(term_component);
804             return 0;
805         }
806         yaz_log(log_level_rpn, "Relation >=");
807
808         wrbuf_putc(term_dict, '(');
809         for (i = 0; i < wrbuf_len(term_component); )
810         {
811             int j = 0;
812
813             if (i)
814                 wrbuf_putc(term_dict, '|');
815             while (j < i)
816                 string_rel_add_char(term_dict, term_component, &j);
817             wrbuf_putc(term_dict, '[');
818
819             if (i < wrbuf_len(term_component)-1)
820             {
821                 wrbuf_putc(term_dict, '^');
822                 wrbuf_putc(term_dict, '-');
823                 string_rel_add_char(term_dict, term_component, &i);
824             }
825             else
826             {
827                 string_rel_add_char(term_dict, term_component, &i);
828                 wrbuf_putc(term_dict, '-');
829             }
830             wrbuf_putc(term_dict, ']');
831             wrbuf_putc(term_dict, '.');
832             wrbuf_putc(term_dict, '*');
833         }
834         wrbuf_putc(term_dict, ')');
835         break;
836     case 3:
837     case 102:
838     case -1:
839         if (!**term_sub)
840             return 1;
841         yaz_log(log_level_rpn, "Relation =");
842         if (!term_100(zm, term_sub, term_component, space_split, term_dst))
843         {
844             wrbuf_destroy(term_component);
845             return 0;
846         }
847         wrbuf_puts(term_dict, "(");
848         wrbuf_puts(term_dict, wrbuf_cstr(term_component));
849         wrbuf_puts(term_dict, ")");
850         break;
851     case 103:
852         yaz_log(log_level_rpn, "Relation always matches");
853         /* skip to end of term (we don't care what it is) */
854         while (**term_sub != '\0')
855             (*term_sub)++;
856         break;
857     default:
858         *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
859         wrbuf_destroy(term_component);
860         return 0;
861     }
862     wrbuf_destroy(term_component);
863     return 1;
864 }
865
866 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
867                              const char **term_sub, 
868                              WRBUF term_dict,
869                              const Odr_oid *attributeSet, NMEM stream,
870                              struct grep_info *grep_info,
871                              const char *index_type, int complete_flag,
872                              char *term_dst,
873                              const char *xpath_use,
874                              struct ord_list **ol);
875
876 ZEBRA_RES zebra_term_limits_APT(ZebraHandle zh,
877                                 Z_AttributesPlusTerm *zapt,
878                                 zint *hits_limit_value,
879                                 const char **term_ref_id_str,
880                                 NMEM nmem)
881 {
882     AttrType term_ref_id_attr;
883     AttrType hits_limit_attr;
884     int term_ref_id_int;
885  
886     attr_init_APT(&hits_limit_attr, zapt, 11);
887     *hits_limit_value  = attr_find(&hits_limit_attr, NULL);
888
889     attr_init_APT(&term_ref_id_attr, zapt, 10);
890     term_ref_id_int = attr_find_ex(&term_ref_id_attr, NULL, term_ref_id_str);
891     if (term_ref_id_int >= 0)
892     {
893         char *res = nmem_malloc(nmem, 20);
894         sprintf(res, "%d", term_ref_id_int);
895         *term_ref_id_str = res;
896     }
897
898     /* no limit given ? */
899     if (*hits_limit_value == -1)
900     {
901         if (*term_ref_id_str)
902         {
903             /* use global if term_ref is present */
904             *hits_limit_value = zh->approx_limit;
905         }
906         else
907         {
908             /* no counting if term_ref is not present */
909             *hits_limit_value = 0;
910         }
911     }
912     else if (*hits_limit_value == 0)
913     {
914         /* 0 is the same as global limit */
915         *hits_limit_value = zh->approx_limit;
916     }
917     yaz_log(YLOG_DEBUG, "term_limits_APT ref_id=%s limit=" ZINT_FORMAT,
918             *term_ref_id_str ? *term_ref_id_str : "none",
919             *hits_limit_value);
920     return ZEBRA_OK;
921 }
922
923 static ZEBRA_RES term_trunc(ZebraHandle zh,
924                             Z_AttributesPlusTerm *zapt,
925                             const char **term_sub, 
926                             const Odr_oid *attributeSet, NMEM stream,
927                             struct grep_info *grep_info,
928                             const char *index_type, int complete_flag,
929                             char *term_dst,
930                             const char *rank_type, 
931                             const char *xpath_use,
932                             NMEM rset_nmem,
933                             RSET *rset,
934                             struct rset_key_control *kc)
935 {
936     ZEBRA_RES res;
937     struct ord_list *ol;
938     zint hits_limit_value;
939     const char *term_ref_id_str = 0;
940     WRBUF term_dict = wrbuf_alloc();
941
942     *rset = 0;
943     zebra_term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str,
944                           stream);
945     grep_info->isam_p_indx = 0;
946     res = string_term(zh, zapt, term_sub, term_dict,
947                       attributeSet, stream, grep_info,
948                       index_type, complete_flag,
949                       term_dst, xpath_use, &ol);
950     wrbuf_destroy(term_dict);
951     if (res != ZEBRA_OK)
952         return res;
953     if (!*term_sub)  /* no more terms ? */
954         return res;
955     yaz_log(log_level_rpn, "term: %s", term_dst);
956     *rset = rset_trunc(zh, grep_info->isam_p_buf,
957                        grep_info->isam_p_indx, term_dst,
958                        strlen(term_dst), rank_type, 1 /* preserve pos */,
959                        zapt->term->which, rset_nmem,
960                        kc, kc->scope, ol, index_type, hits_limit_value,
961                        term_ref_id_str);
962     if (!*rset)
963         return ZEBRA_FAIL;
964     return ZEBRA_OK;
965 }
966
967 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
968                              const char **term_sub, 
969                              WRBUF term_dict,
970                              const Odr_oid *attributeSet, NMEM stream,
971                              struct grep_info *grep_info,
972                              const char *index_type, int complete_flag,
973                              char *term_dst,
974                              const char *xpath_use,
975                              struct ord_list **ol)
976 {
977     int r;
978     AttrType truncation;
979     int truncation_value;
980     const char *termp;
981     struct rpn_char_map_info rcmi;
982
983     int space_split = complete_flag ? 0 : 1;
984     int ord = -1;
985     int regex_range = 0;
986     int max_pos, prefix_len = 0;
987     int relation_error;
988     char ord_buf[32];
989     int ord_len, i;
990     zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type);
991
992     *ol = ord_list_create(stream);
993
994     rpn_char_map_prepare(zh->reg, zm, &rcmi);
995     attr_init_APT(&truncation, zapt, 5);
996     truncation_value = attr_find(&truncation, NULL);
997     yaz_log(log_level_rpn, "truncation value %d", truncation_value);
998
999     termp = *term_sub; /* start of term for each database */
1000     
1001     if (zebra_apt_get_ord(zh, zapt, index_type, xpath_use,
1002                           attributeSet, &ord) != ZEBRA_OK)
1003     {
1004         *term_sub = 0;
1005         return ZEBRA_FAIL;
1006     }
1007     
1008     wrbuf_rewind(term_dict); /* new dictionary regexp term */
1009     
1010     *ol = ord_list_append(stream, *ol, ord);
1011     ord_len = key_SU_encode(ord, ord_buf);
1012     
1013     wrbuf_putc(term_dict, '(');
1014     
1015     for (i = 0; i<ord_len; i++)
1016     {
1017         wrbuf_putc(term_dict, 1);  /* our internal regexp escape char */
1018         wrbuf_putc(term_dict, ord_buf[i]);
1019     }
1020     wrbuf_putc(term_dict, ')');
1021     
1022     prefix_len = wrbuf_len(term_dict);
1023     
1024     switch (truncation_value)
1025     {
1026     case -1:         /* not specified */
1027     case 100:        /* do not truncate */
1028         if (!string_relation(zh, zapt, &termp, term_dict,
1029                              attributeSet,
1030                              zm, space_split, term_dst,
1031                              &relation_error))
1032         {
1033             if (relation_error)
1034             {
1035                 zebra_setError(zh, relation_error, 0);
1036                 return ZEBRA_FAIL;
1037             }
1038             *term_sub = 0;
1039             return ZEBRA_OK;
1040         }
1041         break;
1042     case 1:          /* right truncation */
1043         wrbuf_putc(term_dict, '(');
1044         if (!term_100(zm, &termp, term_dict, space_split, term_dst))
1045         {
1046             *term_sub = 0;
1047             return ZEBRA_OK;
1048         }
1049         wrbuf_puts(term_dict, ".*)");
1050         break;
1051     case 2:          /* keft truncation */
1052         wrbuf_puts(term_dict, "(.*");
1053         if (!term_100(zm, &termp, term_dict, space_split, term_dst))
1054         {
1055             *term_sub = 0;
1056             return ZEBRA_OK;
1057         }
1058         wrbuf_putc(term_dict, ')');
1059         break;
1060     case 3:          /* left&right truncation */
1061         wrbuf_puts(term_dict, "(.*");
1062         if (!term_100(zm, &termp, term_dict, space_split, term_dst))
1063         {
1064             *term_sub = 0;
1065             return ZEBRA_OK;
1066         }
1067         wrbuf_puts(term_dict, ".*)");
1068         break;
1069     case 101:        /* process # in term */
1070         wrbuf_putc(term_dict, '(');
1071         if (!term_101(zm, &termp, term_dict, space_split, term_dst))
1072         {
1073             *term_sub = 0;
1074             return ZEBRA_OK;
1075         }
1076         wrbuf_puts(term_dict, ")");
1077         break;
1078     case 102:        /* Regexp-1 */
1079         wrbuf_putc(term_dict, '(');
1080         if (!term_102(zm, &termp, term_dict, space_split, term_dst))
1081         {
1082             *term_sub = 0;
1083             return ZEBRA_OK;
1084         }
1085         wrbuf_putc(term_dict, ')');
1086         break;
1087     case 103:       /* Regexp-2 */
1088         regex_range = 1;
1089         wrbuf_putc(term_dict, '(');
1090         if (!term_103(zm, &termp, term_dict, &regex_range,
1091                       space_split, term_dst))
1092         {
1093             *term_sub = 0;
1094             return ZEBRA_OK;
1095         }
1096         wrbuf_putc(term_dict, ')');
1097         break;
1098     case 104:        /* process # and ! in term */
1099         wrbuf_putc(term_dict, '(');
1100         if (!term_104(zm, &termp, term_dict, space_split, term_dst))
1101         {
1102             *term_sub = 0;
1103             return ZEBRA_OK;
1104         }
1105         wrbuf_putc(term_dict, ')');
1106         break;
1107     case 105:        /* process * and ! in term */
1108         wrbuf_putc(term_dict, '(');
1109         if (!term_105(zm, &termp, term_dict, space_split, term_dst, 1))
1110         {
1111             *term_sub = 0;
1112             return ZEBRA_OK;
1113         }
1114         wrbuf_putc(term_dict, ')');
1115         break;
1116     case 106:        /* process * and ! in term */
1117         wrbuf_putc(term_dict, '(');
1118         if (!term_105(zm, &termp, term_dict, space_split, term_dst, 0))
1119         {
1120             *term_sub = 0;
1121             return ZEBRA_OK;
1122         }
1123         wrbuf_putc(term_dict, ')');
1124         break;
1125     default:
1126         zebra_setError_zint(zh,
1127                             YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
1128                             truncation_value);
1129         return ZEBRA_FAIL;
1130     }
1131     if (1)
1132     {
1133         char buf[1000];
1134         const char *input = wrbuf_cstr(term_dict) + prefix_len;
1135         esc_str(buf, sizeof(buf), input, strlen(input));
1136     }
1137     {
1138         WRBUF pr_wr = wrbuf_alloc();
1139
1140         wrbuf_write_escaped(pr_wr, wrbuf_buf(term_dict), wrbuf_len(term_dict));
1141         yaz_log(YLOG_LOG, "dict_lookup_grep: %s", wrbuf_cstr(pr_wr));
1142         wrbuf_destroy(pr_wr);
1143     }
1144     r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), regex_range,
1145                          grep_info, &max_pos, 
1146                          ord_len /* number of "exact" chars */,
1147                          grep_handle);
1148     if (r == 1)
1149         zebra_set_partial_result(zh);
1150     else if (r)
1151         yaz_log(YLOG_WARN, "dict_lookup_grep fail %d", r);
1152     *term_sub = termp;
1153     yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1154     return ZEBRA_OK;
1155 }
1156
1157
1158
1159 static void grep_info_delete(struct grep_info *grep_info)
1160 {
1161 #ifdef TERM_COUNT
1162     xfree(grep_info->term_no);
1163 #endif
1164     xfree(grep_info->isam_p_buf);
1165 }
1166
1167 static ZEBRA_RES grep_info_prepare(ZebraHandle zh,
1168                                    Z_AttributesPlusTerm *zapt,
1169                                    struct grep_info *grep_info,
1170                                    const char *index_type)
1171 {
1172 #ifdef TERM_COUNT
1173     grep_info->term_no = 0;
1174 #endif
1175     grep_info->trunc_max = atoi(res_get_def(zh->res, "truncmax", "10000"));
1176     grep_info->isam_p_size = 0;
1177     grep_info->isam_p_buf = NULL;
1178     grep_info->zh = zh;
1179     grep_info->index_type = index_type;
1180     grep_info->termset = 0;
1181     if (zapt)
1182     {
1183         AttrType truncmax;
1184         int truncmax_value;
1185
1186         attr_init_APT(&truncmax, zapt, 13);
1187         truncmax_value = attr_find(&truncmax, NULL);
1188         if (truncmax_value != -1)
1189             grep_info->trunc_max = truncmax_value;
1190     }
1191     if (zapt)
1192     {
1193         AttrType termset;
1194         int termset_value_numeric;
1195         const char *termset_value_string;
1196
1197         attr_init_APT(&termset, zapt, 8);
1198         termset_value_numeric =
1199             attr_find_ex(&termset, NULL, &termset_value_string);
1200         if (termset_value_numeric != -1)
1201         {
1202 #if TERMSET_DISABLE
1203             zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, "termset");
1204             return ZEBRA_FAIL;
1205 #else
1206             char resname[32];
1207             const char *termset_name = 0;
1208             if (termset_value_numeric != -2)
1209             {
1210                 
1211                 sprintf(resname, "%d", termset_value_numeric);
1212                 termset_name = resname;
1213             }
1214             else
1215             termset_name = termset_value_string;
1216             yaz_log(log_level_rpn, "creating termset set %s", termset_name);
1217             grep_info->termset = resultSetAdd(zh, termset_name, 1);
1218             if (!grep_info->termset)
1219             {
1220                 zebra_setError(zh, YAZ_BIB1_ILLEGAL_RESULT_SET_NAME, termset_name);
1221                 return ZEBRA_FAIL;
1222             }
1223 #endif
1224         }
1225     }
1226     return ZEBRA_OK;
1227 }
1228                                
1229 /**
1230   \brief Create result set(s) for list of terms
1231   \param zh Zebra Handle
1232   \param zapt Attributes Plust Term (RPN leaf)
1233   \param termz term as used in query but converted to UTF-8
1234   \param attributeSet default attribute set
1235   \param stream memory for result
1236   \param index_type register type ("w", "p",..)
1237   \param complete_flag whether it's phrases or not
1238   \param rank_type term flags for ranking
1239   \param xpath_use use attribute for X-Path (-1 for no X-path)
1240   \param rset_nmem memory for result sets
1241   \param result_sets output result set for each term in list (output)
1242   \param num_result_sets number of output result sets
1243   \param kc rset key control to be used for created result sets
1244 */
1245 static ZEBRA_RES term_list_trunc(ZebraHandle zh,
1246                                  Z_AttributesPlusTerm *zapt,
1247                                  const char *termz,
1248                                  const Odr_oid *attributeSet,
1249                                  NMEM stream,
1250                                  const char *index_type, int complete_flag,
1251                                  const char *rank_type,
1252                                  const char *xpath_use,
1253                                  NMEM rset_nmem,
1254                                  RSET **result_sets, int *num_result_sets,
1255                                  struct rset_key_control *kc)
1256 {
1257     char term_dst[IT_MAX_WORD+1];
1258     struct grep_info grep_info;
1259     const char *termp = termz;
1260     int alloc_sets = 0;
1261
1262     *num_result_sets = 0;
1263     *term_dst = 0;
1264     if (grep_info_prepare(zh, zapt, &grep_info, index_type) == ZEBRA_FAIL)
1265         return ZEBRA_FAIL;
1266     while(1)
1267     { 
1268         ZEBRA_RES res;
1269
1270         if (alloc_sets == *num_result_sets)
1271         {
1272             int add = 10;
1273             RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) * 
1274                                               sizeof(*rnew));
1275             if (alloc_sets)
1276                 memcpy(rnew, *result_sets, alloc_sets * sizeof(*rnew));
1277             alloc_sets = alloc_sets + add;
1278             *result_sets = rnew;
1279         }
1280         res = term_trunc(zh, zapt, &termp, attributeSet,
1281                          stream, &grep_info,
1282                          index_type, complete_flag,
1283                          term_dst, rank_type,
1284                          xpath_use, rset_nmem,
1285                          &(*result_sets)[*num_result_sets],
1286                          kc);
1287         if (res != ZEBRA_OK)
1288         {
1289             int i;
1290             for (i = 0; i < *num_result_sets; i++)
1291                 rset_delete((*result_sets)[i]);
1292             grep_info_delete(&grep_info);
1293             return res;
1294         }
1295         if ((*result_sets)[*num_result_sets] == 0)
1296             break;
1297         (*num_result_sets)++;
1298
1299         if (!*termp)
1300             break;
1301     }
1302     grep_info_delete(&grep_info);
1303     return ZEBRA_OK;
1304 }
1305
1306 static ZEBRA_RES rpn_search_APT_position(ZebraHandle zh,
1307                                          Z_AttributesPlusTerm *zapt,
1308                                          const Odr_oid *attributeSet,
1309                                          const char *index_type,
1310                                          NMEM rset_nmem,
1311                                          RSET *rset,
1312                                          struct rset_key_control *kc)
1313 {
1314     int position_value;
1315     AttrType position;
1316     int ord = -1;
1317     char ord_buf[32];
1318     char term_dict[100];
1319     int ord_len;
1320     char *val;
1321     ISAM_P isam_p;
1322     zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type);
1323     
1324     attr_init_APT(&position, zapt, 3);
1325     position_value = attr_find(&position, NULL);
1326     switch(position_value)
1327     {
1328     case 3:
1329     case -1:
1330         return ZEBRA_OK;
1331     case 1:
1332     case 2:
1333         break;
1334     default:
1335         zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1336                             position_value);
1337         return ZEBRA_FAIL;
1338     }
1339
1340
1341     if (!zebra_maps_is_first_in_field(zm))
1342     {
1343         zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1344                             position_value);
1345         return ZEBRA_FAIL;
1346     }
1347
1348     if (zebra_apt_get_ord(zh, zapt, index_type, 0,
1349                           attributeSet, &ord) != ZEBRA_OK)
1350     {
1351         return ZEBRA_FAIL;
1352     }
1353     ord_len = key_SU_encode(ord, ord_buf);
1354     memcpy(term_dict, ord_buf, ord_len);
1355     strcpy(term_dict+ord_len, FIRST_IN_FIELD_STR);
1356     val = dict_lookup(zh->reg->dict, term_dict);
1357     if (val)
1358     {
1359         assert(*val == sizeof(ISAM_P));
1360         memcpy(&isam_p, val+1, sizeof(isam_p));
1361
1362         *rset = zebra_create_rset_isam(zh, rset_nmem, kc, kc->scope, 
1363                                        isam_p, 0);
1364     }
1365     return ZEBRA_OK;
1366 }
1367                                          
1368 static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh,
1369                                        Z_AttributesPlusTerm *zapt,
1370                                        const char *termz_org,
1371                                        const Odr_oid *attributeSet,
1372                                        NMEM stream,
1373                                        const char *index_type, int complete_flag,
1374                                        const char *rank_type,
1375                                        const char *xpath_use,
1376                                        NMEM rset_nmem,
1377                                        RSET *rset,
1378                                        struct rset_key_control *kc)
1379 {
1380     RSET *result_sets = 0;
1381     int num_result_sets = 0;
1382     ZEBRA_RES res =
1383         term_list_trunc(zh, zapt, termz_org, attributeSet,
1384                         stream, index_type, complete_flag,
1385                         rank_type, xpath_use,
1386                         rset_nmem,
1387                         &result_sets, &num_result_sets, kc);
1388
1389     if (res != ZEBRA_OK)
1390         return res;
1391
1392     if (num_result_sets > 0)
1393     {
1394         RSET first_set = 0;
1395         res = rpn_search_APT_position(zh, zapt, attributeSet, 
1396                                       index_type,
1397                                       rset_nmem, &first_set,
1398                                       kc);
1399         if (res != ZEBRA_OK)
1400             return res;
1401         if (first_set)
1402         {
1403             RSET *nsets = nmem_malloc(stream,
1404                                       sizeof(RSET) * (num_result_sets+1));
1405             nsets[0] = first_set;
1406             memcpy(nsets+1, result_sets, sizeof(RSET) * num_result_sets);
1407             result_sets = nsets;
1408             num_result_sets++;
1409         }
1410     }
1411     if (num_result_sets == 0)
1412         *rset = rset_create_null(rset_nmem, kc, 0); 
1413     else if (num_result_sets == 1)
1414         *rset = result_sets[0];
1415     else
1416         *rset = rset_create_prox(rset_nmem, kc, kc->scope,
1417                                  num_result_sets, result_sets,
1418                                  1 /* ordered */, 0 /* exclusion */,
1419                                  3 /* relation */, 1 /* distance */);
1420     if (!*rset)
1421         return ZEBRA_FAIL;
1422     return ZEBRA_OK;
1423 }
1424
1425 static ZEBRA_RES rpn_search_APT_or_list(ZebraHandle zh,
1426                                         Z_AttributesPlusTerm *zapt,
1427                                         const char *termz_org,
1428                                         const Odr_oid *attributeSet,
1429                                         NMEM stream,
1430                                         const char *index_type, 
1431                                         int complete_flag,
1432                                         const char *rank_type,
1433                                         const char *xpath_use,
1434                                         NMEM rset_nmem,
1435                                         RSET *rset,
1436                                         struct rset_key_control *kc)
1437 {
1438     RSET *result_sets = 0;
1439     int num_result_sets = 0;
1440     int i;
1441     ZEBRA_RES res =
1442         term_list_trunc(zh, zapt, termz_org, attributeSet,
1443                         stream, index_type, complete_flag,
1444                         rank_type, xpath_use,
1445                         rset_nmem,
1446                         &result_sets, &num_result_sets, kc);
1447     if (res != ZEBRA_OK)
1448         return res;
1449
1450     for (i = 0; i<num_result_sets; i++)
1451     {
1452         RSET first_set = 0;
1453         res = rpn_search_APT_position(zh, zapt, attributeSet, 
1454                                       index_type,
1455                                       rset_nmem, &first_set,
1456                                       kc);
1457         if (res != ZEBRA_OK)
1458         {
1459             for (i = 0; i<num_result_sets; i++)
1460                 rset_delete(result_sets[i]);
1461             return res;
1462         }
1463
1464         if (first_set)
1465         {
1466             RSET tmp_set[2];
1467
1468             tmp_set[0] = first_set;
1469             tmp_set[1] = result_sets[i];
1470             
1471             result_sets[i] = rset_create_prox(
1472                 rset_nmem, kc, kc->scope,
1473                 2, tmp_set,
1474                 1 /* ordered */, 0 /* exclusion */,
1475                 3 /* relation */, 1 /* distance */);
1476         }
1477     }
1478     if (num_result_sets == 0)
1479         *rset = rset_create_null(rset_nmem, kc, 0); 
1480     else if (num_result_sets == 1)
1481         *rset = result_sets[0];
1482     else
1483         *rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* termid */,
1484                                num_result_sets, result_sets);
1485     if (!*rset)
1486         return ZEBRA_FAIL;
1487     return ZEBRA_OK;
1488 }
1489
1490 static ZEBRA_RES rpn_search_APT_and_list(ZebraHandle zh,
1491                                          Z_AttributesPlusTerm *zapt,
1492                                          const char *termz_org,
1493                                          const Odr_oid *attributeSet,
1494                                          NMEM stream,
1495                                          const char *index_type, 
1496                                          int complete_flag,
1497                                          const char *rank_type, 
1498                                          const char *xpath_use,
1499                                          NMEM rset_nmem,
1500                                          RSET *rset,
1501                                          struct rset_key_control *kc)
1502 {
1503     RSET *result_sets = 0;
1504     int num_result_sets = 0;
1505     int i;
1506     ZEBRA_RES res =
1507         term_list_trunc(zh, zapt, termz_org, attributeSet,
1508                         stream, index_type, complete_flag,
1509                         rank_type, xpath_use,
1510                         rset_nmem,
1511                         &result_sets, &num_result_sets,
1512                         kc);
1513     if (res != ZEBRA_OK)
1514         return res;
1515     for (i = 0; i<num_result_sets; i++)
1516     {
1517         RSET first_set = 0;
1518         res = rpn_search_APT_position(zh, zapt, attributeSet, 
1519                                       index_type,
1520                                       rset_nmem, &first_set,
1521                                       kc);
1522         if (res != ZEBRA_OK)
1523         {
1524             for (i = 0; i<num_result_sets; i++)
1525                 rset_delete(result_sets[i]);
1526             return res;
1527         }
1528
1529         if (first_set)
1530         {
1531             RSET tmp_set[2];
1532
1533             tmp_set[0] = first_set;
1534             tmp_set[1] = result_sets[i];
1535             
1536             result_sets[i] = rset_create_prox(
1537                 rset_nmem, kc, kc->scope,
1538                 2, tmp_set,
1539                 1 /* ordered */, 0 /* exclusion */,
1540                 3 /* relation */, 1 /* distance */);
1541         }
1542     }
1543
1544
1545     if (num_result_sets == 0)
1546         *rset = rset_create_null(rset_nmem, kc, 0); 
1547     else if (num_result_sets == 1)
1548         *rset = result_sets[0];
1549     else
1550         *rset = rset_create_and(rset_nmem, kc, kc->scope,
1551                                num_result_sets, result_sets);
1552     if (!*rset)
1553         return ZEBRA_FAIL;
1554     return ZEBRA_OK;
1555 }
1556
1557 static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1558                             const char **term_sub,
1559                             WRBUF term_dict,
1560                             const Odr_oid *attributeSet,
1561                             struct grep_info *grep_info,
1562                             int *max_pos,
1563                             zebra_map_t zm,
1564                             char *term_dst,
1565                             int *error_code)
1566 {
1567     AttrType relation;
1568     int relation_value;
1569     int term_value;
1570     int r;
1571     WRBUF term_num = wrbuf_alloc();
1572
1573     *error_code = 0;
1574     attr_init_APT(&relation, zapt, 2);
1575     relation_value = attr_find(&relation, NULL);
1576
1577     yaz_log(log_level_rpn, "numeric relation value=%d", relation_value);
1578
1579     switch (relation_value)
1580     {
1581     case 1:
1582         yaz_log(log_level_rpn, "Relation <");
1583         if (!term_100(zm, term_sub, term_num, 1, term_dst))
1584         { 
1585             wrbuf_destroy(term_num);
1586             return 0;
1587         }
1588         term_value = atoi(wrbuf_cstr(term_num));
1589         gen_regular_rel(term_dict, term_value-1, 1);
1590         break;
1591     case 2:
1592         yaz_log(log_level_rpn, "Relation <=");
1593         if (!term_100(zm, term_sub, term_num, 1, term_dst))
1594         {
1595             wrbuf_destroy(term_num);
1596             return 0;
1597         }
1598         term_value = atoi(wrbuf_cstr(term_num));
1599         gen_regular_rel(term_dict, term_value, 1);
1600         break;
1601     case 4:
1602         yaz_log(log_level_rpn, "Relation >=");
1603         if (!term_100(zm, term_sub, term_num, 1, term_dst))
1604         {
1605             wrbuf_destroy(term_num);
1606             return 0;
1607         }
1608         term_value = atoi(wrbuf_cstr(term_num));
1609         gen_regular_rel(term_dict, term_value, 0);
1610         break;
1611     case 5:
1612         yaz_log(log_level_rpn, "Relation >");
1613         if (!term_100(zm, term_sub, term_num, 1, term_dst))
1614         {
1615             wrbuf_destroy(term_num);
1616             return 0;
1617         }
1618         term_value = atoi(wrbuf_cstr(term_num));
1619         gen_regular_rel(term_dict, term_value+1, 0);
1620         break;
1621     case -1:
1622     case 3:
1623         yaz_log(log_level_rpn, "Relation =");
1624         if (!term_100(zm, term_sub, term_num, 1, term_dst))
1625         {
1626             wrbuf_destroy(term_num);
1627             return 0; 
1628         }
1629         term_value = atoi(wrbuf_cstr(term_num));
1630         wrbuf_printf(term_dict, "(0*%d)", term_value);
1631         break;
1632     case 103:
1633         /* term_tmp untouched.. */
1634         while (**term_sub != '\0')
1635             (*term_sub)++;
1636         break;
1637     default:
1638         *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
1639         wrbuf_destroy(term_num); 
1640         return 0;
1641     }
1642     r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), 
1643                          0, grep_info, max_pos, 0, grep_handle);
1644
1645     if (r == 1)
1646         zebra_set_partial_result(zh);
1647     else if (r)
1648         yaz_log(YLOG_WARN, "dict_lookup_grep fail, rel = gt: %d", r);
1649     yaz_log(log_level_rpn, "%d positions", grep_info->isam_p_indx);
1650     wrbuf_destroy(term_num);
1651     return 1;
1652 }
1653
1654 static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1655                               const char **term_sub, 
1656                               WRBUF term_dict,
1657                               const Odr_oid *attributeSet, NMEM stream,
1658                               struct grep_info *grep_info,
1659                               const char *index_type, int complete_flag,
1660                               char *term_dst, 
1661                               const char *xpath_use,
1662                               struct ord_list **ol)
1663 {
1664     const char *termp;
1665     struct rpn_char_map_info rcmi;
1666     int max_pos;
1667     int relation_error = 0;
1668     int ord, ord_len, i;
1669     char ord_buf[32];
1670     zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type);
1671     
1672     *ol = ord_list_create(stream);
1673
1674     rpn_char_map_prepare(zh->reg, zm, &rcmi);
1675
1676     termp = *term_sub;
1677     
1678     if (zebra_apt_get_ord(zh, zapt, index_type, xpath_use,
1679                           attributeSet, &ord) != ZEBRA_OK)
1680     {
1681         return ZEBRA_FAIL;
1682     }
1683     
1684     wrbuf_rewind(term_dict);
1685     
1686     *ol = ord_list_append(stream, *ol, ord);
1687     
1688     ord_len = key_SU_encode(ord, ord_buf);
1689     
1690     wrbuf_putc(term_dict, '(');
1691     for (i = 0; i < ord_len; i++)
1692     {
1693         wrbuf_putc(term_dict, 1);
1694         wrbuf_putc(term_dict, ord_buf[i]);
1695     }
1696     wrbuf_putc(term_dict, ')');
1697     
1698     if (!numeric_relation(zh, zapt, &termp, term_dict,
1699                           attributeSet, grep_info, &max_pos, zm,
1700                           term_dst, &relation_error))
1701     {
1702         if (relation_error)
1703         {
1704             zebra_setError(zh, relation_error, 0);
1705             return ZEBRA_FAIL;
1706         }
1707         *term_sub = 0;
1708         return ZEBRA_OK;
1709     }
1710     *term_sub = termp;
1711     yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1712     return ZEBRA_OK;
1713 }
1714
1715                                  
1716 static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh,
1717                                         Z_AttributesPlusTerm *zapt,
1718                                         const char *termz,
1719                                         const Odr_oid *attributeSet,
1720                                         NMEM stream,
1721                                         const char *index_type, 
1722                                         int complete_flag,
1723                                         const char *rank_type, 
1724                                         const char *xpath_use,
1725                                         NMEM rset_nmem,
1726                                         RSET *rset,
1727                                         struct rset_key_control *kc)
1728 {
1729     char term_dst[IT_MAX_WORD+1];
1730     const char *termp = termz;
1731     RSET *result_sets = 0;
1732     int num_result_sets = 0;
1733     ZEBRA_RES res;
1734     struct grep_info grep_info;
1735     int alloc_sets = 0;
1736     zint hits_limit_value;
1737     const char *term_ref_id_str = 0;
1738
1739     zebra_term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str,
1740                           stream);
1741
1742     yaz_log(log_level_rpn, "APT_numeric t='%s'", termz);
1743     if (grep_info_prepare(zh, zapt, &grep_info, index_type) == ZEBRA_FAIL)
1744         return ZEBRA_FAIL;
1745     while (1)
1746     { 
1747         struct ord_list *ol;
1748         WRBUF term_dict = wrbuf_alloc();
1749         if (alloc_sets == num_result_sets)
1750         {
1751             int add = 10;
1752             RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) * 
1753                                               sizeof(*rnew));
1754             if (alloc_sets)
1755                 memcpy(rnew, result_sets, alloc_sets * sizeof(*rnew));
1756             alloc_sets = alloc_sets + add;
1757             result_sets = rnew;
1758         }
1759         yaz_log(YLOG_DEBUG, "APT_numeric termp=%s", termp);
1760         grep_info.isam_p_indx = 0;
1761         res = numeric_term(zh, zapt, &termp, term_dict,
1762                            attributeSet, stream, &grep_info,
1763                            index_type, complete_flag,
1764                            term_dst, xpath_use, &ol);
1765         wrbuf_destroy(term_dict);
1766         if (res == ZEBRA_FAIL || termp == 0)
1767             break;
1768         yaz_log(YLOG_DEBUG, "term: %s", term_dst);
1769         result_sets[num_result_sets] =
1770             rset_trunc(zh, grep_info.isam_p_buf,
1771                        grep_info.isam_p_indx, term_dst,
1772                        strlen(term_dst), rank_type,
1773                        0 /* preserve position */,
1774                        zapt->term->which, rset_nmem, 
1775                        kc, kc->scope, ol, index_type,
1776                        hits_limit_value,
1777                        term_ref_id_str);
1778         if (!result_sets[num_result_sets])
1779             break;
1780         num_result_sets++;
1781         if (!*termp)
1782             break;
1783     }
1784     grep_info_delete(&grep_info);
1785
1786     if (res != ZEBRA_OK)
1787         return res;
1788     if (num_result_sets == 0)
1789         *rset = rset_create_null(rset_nmem, kc, 0);
1790     else if (num_result_sets == 1)
1791         *rset = result_sets[0];
1792     else
1793         *rset = rset_create_and(rset_nmem, kc, kc->scope,
1794                                 num_result_sets, result_sets);
1795     if (!*rset)
1796         return ZEBRA_FAIL;
1797     return ZEBRA_OK;
1798 }
1799
1800 static ZEBRA_RES rpn_search_APT_local(ZebraHandle zh,
1801                                       Z_AttributesPlusTerm *zapt,
1802                                       const char *termz,
1803                                       const Odr_oid *attributeSet,
1804                                       NMEM stream,
1805                                       const char *rank_type, NMEM rset_nmem,
1806                                       RSET *rset,
1807                                       struct rset_key_control *kc)
1808 {
1809     Record rec;
1810     zint sysno = atozint(termz);
1811     
1812     if (sysno <= 0)
1813         sysno = 0;
1814     rec = rec_get(zh->reg->records, sysno);
1815     if (!rec)
1816         sysno = 0;
1817
1818     rec_free(&rec);
1819
1820     if (sysno <= 0)
1821     {
1822         *rset = rset_create_null(rset_nmem, kc, 0);
1823     }
1824     else
1825     {
1826         RSFD rsfd;
1827         struct it_key key;
1828         *rset = rset_create_temp(rset_nmem, kc, kc->scope,
1829                                  res_get(zh->res, "setTmpDir"), 0);
1830         rsfd = rset_open(*rset, RSETF_WRITE);
1831         
1832         key.mem[0] = sysno;
1833         key.mem[1] = 1;
1834         key.len = 2;
1835         rset_write(rsfd, &key);
1836         rset_close(rsfd);
1837     }
1838     return ZEBRA_OK;
1839 }
1840
1841 static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1842                                const Odr_oid *attributeSet, NMEM stream,
1843                                Z_SortKeySpecList *sort_sequence,
1844                                const char *rank_type,
1845                                NMEM rset_nmem,
1846                                RSET *rset,
1847                                struct rset_key_control *kc)
1848 {
1849     int i;
1850     int sort_relation_value;
1851     AttrType sort_relation_type;
1852     Z_SortKeySpec *sks;
1853     Z_SortKey *sk;
1854     char termz[20];
1855     
1856     attr_init_APT(&sort_relation_type, zapt, 7);
1857     sort_relation_value = attr_find(&sort_relation_type, &attributeSet);
1858
1859     if (!sort_sequence->specs)
1860     {
1861         sort_sequence->num_specs = 10;
1862         sort_sequence->specs = (Z_SortKeySpec **)
1863             nmem_malloc(stream, sort_sequence->num_specs *
1864                          sizeof(*sort_sequence->specs));
1865         for (i = 0; i<sort_sequence->num_specs; i++)
1866             sort_sequence->specs[i] = 0;
1867     }
1868     if (zapt->term->which != Z_Term_general)
1869         i = 0;
1870     else
1871         i = atoi_n((char *) zapt->term->u.general->buf,
1872                     zapt->term->u.general->len);
1873     if (i >= sort_sequence->num_specs)
1874         i = 0;
1875     sprintf(termz, "%d", i);
1876
1877     sks = (Z_SortKeySpec *) nmem_malloc(stream, sizeof(*sks));
1878     sks->sortElement = (Z_SortElement *)
1879         nmem_malloc(stream, sizeof(*sks->sortElement));
1880     sks->sortElement->which = Z_SortElement_generic;
1881     sk = sks->sortElement->u.generic = (Z_SortKey *)
1882         nmem_malloc(stream, sizeof(*sk));
1883     sk->which = Z_SortKey_sortAttributes;
1884     sk->u.sortAttributes = (Z_SortAttributes *)
1885         nmem_malloc(stream, sizeof(*sk->u.sortAttributes));
1886
1887     sk->u.sortAttributes->id = odr_oiddup_nmem(stream, attributeSet);
1888     sk->u.sortAttributes->list = zapt->attributes;
1889
1890     sks->sortRelation = (int *)
1891         nmem_malloc(stream, sizeof(*sks->sortRelation));
1892     if (sort_relation_value == 1)
1893         *sks->sortRelation = Z_SortKeySpec_ascending;
1894     else if (sort_relation_value == 2)
1895         *sks->sortRelation = Z_SortKeySpec_descending;
1896     else 
1897         *sks->sortRelation = Z_SortKeySpec_ascending;
1898
1899     sks->caseSensitivity = (int *)
1900         nmem_malloc(stream, sizeof(*sks->caseSensitivity));
1901     *sks->caseSensitivity = 0;
1902
1903     sks->which = Z_SortKeySpec_null;
1904     sks->u.null = odr_nullval ();
1905     sort_sequence->specs[i] = sks;
1906     *rset = rset_create_null(rset_nmem, kc, 0);
1907     return ZEBRA_OK;
1908 }
1909
1910
1911 static int rpn_check_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1912                            const Odr_oid *attributeSet,
1913                            struct xpath_location_step *xpath, int max,
1914                            NMEM mem)
1915 {
1916     const Odr_oid *curAttributeSet = attributeSet;
1917     AttrType use;
1918     const char *use_string = 0;
1919     
1920     attr_init_APT(&use, zapt, 1);
1921     attr_find_ex(&use, &curAttributeSet, &use_string);
1922
1923     if (!use_string || *use_string != '/')
1924         return -1;
1925
1926     return zebra_parse_xpath_str(use_string, xpath, max, mem);
1927 }
1928  
1929                
1930
1931 static RSET xpath_trunc(ZebraHandle zh, NMEM stream,
1932                         const char *index_type, const char *term, 
1933                         const char *xpath_use,
1934                         NMEM rset_nmem,
1935                         struct rset_key_control *kc)
1936 {
1937     struct grep_info grep_info;
1938     int ord = zebraExplain_lookup_attr_str(zh->reg->zei, 
1939                                            zinfo_index_category_index,
1940                                            index_type, xpath_use);
1941     if (grep_info_prepare(zh, 0 /* zapt */, &grep_info, "0") == ZEBRA_FAIL)
1942         return rset_create_null(rset_nmem, kc, 0);
1943     
1944     if (ord < 0)
1945         return rset_create_null(rset_nmem, kc, 0);
1946     else
1947     {
1948         int i, r, max_pos;
1949         char ord_buf[32];
1950         RSET rset;
1951         WRBUF term_dict = wrbuf_alloc();
1952         int ord_len = key_SU_encode(ord, ord_buf);
1953         int term_type = Z_Term_characterString;
1954         const char *flags = "void";
1955
1956         wrbuf_putc(term_dict, '(');
1957         for (i = 0; i<ord_len; i++)
1958         {
1959             wrbuf_putc(term_dict, 1);
1960             wrbuf_putc(term_dict, ord_buf[i]);
1961         }
1962         wrbuf_putc(term_dict, ')');
1963         wrbuf_puts(term_dict, term);
1964         
1965         grep_info.isam_p_indx = 0;
1966         r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), 0,
1967                              &grep_info, &max_pos, 0, grep_handle);
1968         yaz_log(YLOG_DEBUG, "%s %d positions", term,
1969                 grep_info.isam_p_indx);
1970         rset = rset_trunc(zh, grep_info.isam_p_buf,
1971                           grep_info.isam_p_indx, term, strlen(term),
1972                           flags, 1, term_type, rset_nmem,
1973                           kc, kc->scope, 0, index_type, 0 /* hits_limit */,
1974                           0 /* term_ref_id_str */);
1975         grep_info_delete(&grep_info);
1976         wrbuf_destroy(term_dict);
1977         return rset;
1978     }
1979 }
1980
1981 static
1982 ZEBRA_RES rpn_search_xpath(ZebraHandle zh,
1983                            NMEM stream, const char *rank_type, RSET rset,
1984                            int xpath_len, struct xpath_location_step *xpath,
1985                            NMEM rset_nmem,
1986                            RSET *rset_out,
1987                            struct rset_key_control *kc)
1988 {
1989     int i;
1990     int always_matches = rset ? 0 : 1;
1991
1992     if (xpath_len < 0)
1993     {
1994         *rset_out = rset;
1995         return ZEBRA_OK;
1996     }
1997
1998     yaz_log(YLOG_DEBUG, "xpath len=%d", xpath_len);
1999     for (i = 0; i<xpath_len; i++)
2000     {
2001         yaz_log(log_level_rpn, "XPATH %d %s", i, xpath[i].part);
2002
2003     }
2004
2005     /*
2006       //a    ->    a/.*
2007       //a/b  ->    b/a/.*
2008       /a     ->    a/
2009       /a/b   ->    b/a/
2010
2011       /      ->    none
2012
2013    a[@attr = value]/b[@other = othervalue]
2014
2015  /e/@a val      range(e/,range(@a,freetext(w,1015,val),@a),e/)
2016  /a/b val       range(b/a/,freetext(w,1016,val),b/a/)
2017  /a/b/@c val    range(b/a/,range(@c,freetext(w,1016,val),@c),b/a/)
2018  /a/b[@c = y] val range(b/a/,freetext(w,1016,val),b/a/,@c = y)
2019  /a[@c = y]/b val range(a/,range(b/a/,freetext(w,1016,val),b/a/),a/,@c = y)
2020  /a[@c = x]/b[@c = y] range(a/,range(b/a/,freetext(w,1016,val),b/a/,@c = y),a/,@c = x)
2021       
2022     */
2023
2024     dict_grep_cmap(zh->reg->dict, 0, 0);
2025     
2026     {
2027         int level = xpath_len;
2028         int first_path = 1;
2029         
2030         while (--level >= 0)
2031         {
2032             WRBUF xpath_rev = wrbuf_alloc();
2033             int i;
2034             RSET rset_start_tag = 0, rset_end_tag = 0, rset_attr = 0;
2035
2036             for (i = level; i >= 1; --i)
2037             {
2038                 const char *cp = xpath[i].part;
2039                 if (*cp)
2040                 {
2041                     for (; *cp; cp++)
2042                     {
2043                         if (*cp == '*')
2044                             wrbuf_puts(xpath_rev, "[^/]*");
2045                         else if (*cp == ' ')
2046                             wrbuf_puts(xpath_rev, "\001 ");
2047                         else
2048                             wrbuf_putc(xpath_rev, *cp);
2049
2050                         /* wrbuf_putc does not null-terminate , but
2051                            wrbuf_puts below ensures it does.. so xpath_rev
2052                            is OK iff length is > 0 */
2053                     }
2054                     wrbuf_puts(xpath_rev, "/");
2055                 }
2056                 else if (i == 1)  /* // case */
2057                     wrbuf_puts(xpath_rev, ".*");
2058             }
2059             if (xpath[level].predicate &&
2060                 xpath[level].predicate->which == XPATH_PREDICATE_RELATION &&
2061                 xpath[level].predicate->u.relation.name[0])
2062             {
2063                 WRBUF wbuf = wrbuf_alloc();
2064                 wrbuf_puts(wbuf, xpath[level].predicate->u.relation.name+1);
2065                 if (xpath[level].predicate->u.relation.value)
2066                 {
2067                     const char *cp = xpath[level].predicate->u.relation.value;
2068                     wrbuf_putc(wbuf, '=');
2069                     
2070                     while (*cp)
2071                     {
2072                         if (strchr(REGEX_CHARS, *cp))
2073                             wrbuf_putc(wbuf, '\\');
2074                         wrbuf_putc(wbuf, *cp);
2075                         cp++;
2076                     }
2077                 }
2078                 rset_attr = xpath_trunc(
2079                     zh, stream, "0", wrbuf_cstr(wbuf), 
2080                     ZEBRA_XPATH_ATTR_NAME, 
2081                     rset_nmem, kc);
2082                 wrbuf_destroy(wbuf);
2083             } 
2084             else 
2085             {
2086                 if (!first_path)
2087                 {
2088                     wrbuf_destroy(xpath_rev);
2089                     continue;
2090                 }
2091             }
2092             yaz_log(log_level_rpn, "xpath_rev (%d) = %s", level, 
2093                     wrbuf_cstr(xpath_rev));
2094             if (wrbuf_len(xpath_rev))
2095             {
2096                 rset_start_tag = xpath_trunc(zh, stream, "0", 
2097                                              wrbuf_cstr(xpath_rev),
2098                                              ZEBRA_XPATH_ELM_BEGIN, 
2099                                              rset_nmem, kc);
2100                 if (always_matches)
2101                     rset = rset_start_tag;
2102                 else
2103                 {
2104                     rset_end_tag = xpath_trunc(zh, stream, "0", 
2105                                                wrbuf_cstr(xpath_rev),
2106                                                ZEBRA_XPATH_ELM_END, 
2107                                                rset_nmem, kc);
2108                     
2109                     rset = rset_create_between(rset_nmem, kc, kc->scope,
2110                                                rset_start_tag, rset,
2111                                                rset_end_tag, rset_attr);
2112                 }
2113             }
2114             wrbuf_destroy(xpath_rev);
2115             first_path = 0;
2116         }
2117     }
2118     *rset_out = rset;
2119     return ZEBRA_OK;
2120 }
2121
2122 #define MAX_XPATH_STEPS 10
2123
2124 static ZEBRA_RES rpn_search_database(ZebraHandle zh, 
2125                                      Z_AttributesPlusTerm *zapt,
2126                                      const Odr_oid *attributeSet, NMEM stream,
2127                                      Z_SortKeySpecList *sort_sequence,
2128                                      NMEM rset_nmem,
2129                                      RSET *rset,
2130                                      struct rset_key_control *kc);
2131
2132 static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2133                                 const Odr_oid *attributeSet, NMEM stream,
2134                                 Z_SortKeySpecList *sort_sequence,
2135                                 int num_bases, const char **basenames, 
2136                                 NMEM rset_nmem,
2137                                 RSET *rset,
2138                                 struct rset_key_control *kc)
2139 {
2140     RSET *rsets = nmem_malloc(stream, num_bases * sizeof(*rsets));
2141     ZEBRA_RES res = ZEBRA_OK;
2142     int i;
2143     for (i = 0; i < num_bases; i++)
2144     {
2145
2146         if (zebraExplain_curDatabase(zh->reg->zei, basenames[i]))
2147         {
2148             zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
2149                            basenames[i]);
2150             res = ZEBRA_FAIL;
2151             break;
2152         }
2153         res = rpn_search_database(zh, zapt, attributeSet, stream,
2154                                   sort_sequence,
2155                                   rset_nmem, rsets+i, kc);
2156         if (res != ZEBRA_OK)
2157             break;
2158     }
2159     if (res != ZEBRA_OK)
2160     {   /* must clean up the already created sets */
2161         while (--i >= 0)
2162             rset_delete(rsets[i]);
2163         *rset = 0;
2164     }
2165     else 
2166     {
2167         if (num_bases == 1)
2168             *rset = rsets[0];
2169         else if (num_bases == 0)
2170             *rset = rset_create_null(rset_nmem, kc, 0); 
2171         else
2172             *rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* TERMID */,
2173                                    num_bases, rsets);
2174     }
2175     return res;
2176 }
2177
2178 static ZEBRA_RES rpn_search_database(ZebraHandle zh, 
2179                                      Z_AttributesPlusTerm *zapt,
2180                                      const Odr_oid *attributeSet, NMEM stream,
2181                                      Z_SortKeySpecList *sort_sequence,
2182                                      NMEM rset_nmem,
2183                                      RSET *rset,
2184                                      struct rset_key_control *kc)
2185 {
2186     ZEBRA_RES res = ZEBRA_OK;
2187     const char *index_type;
2188     char *search_type = NULL;
2189     char rank_type[128];
2190     int complete_flag;
2191     int sort_flag;
2192     char termz[IT_MAX_WORD+1];
2193     int xpath_len;
2194     const char *xpath_use = 0;
2195     struct xpath_location_step xpath[MAX_XPATH_STEPS];
2196
2197     if (!log_level_set)
2198     {
2199         log_level_rpn = yaz_log_module_level("rpn");
2200         log_level_set = 1;
2201     }
2202     zebra_maps_attr(zh->reg->zebra_maps, zapt, &index_type, &search_type,
2203                     rank_type, &complete_flag, &sort_flag);
2204     
2205     yaz_log(YLOG_DEBUG, "index_type=%s", index_type);
2206     yaz_log(YLOG_DEBUG, "complete_flag=%d", complete_flag);
2207     yaz_log(YLOG_DEBUG, "search_type=%s", search_type);
2208     yaz_log(YLOG_DEBUG, "rank_type=%s", rank_type);
2209
2210     if (zapt_term_to_utf8(zh, zapt, termz) == ZEBRA_FAIL)
2211         return ZEBRA_FAIL;
2212
2213     if (sort_flag)
2214         return rpn_sort_spec(zh, zapt, attributeSet, stream, sort_sequence,
2215                              rank_type, rset_nmem, rset, kc);
2216     /* consider if an X-Path query is used */
2217     xpath_len = rpn_check_xpath(zh, zapt, attributeSet, 
2218                                 xpath, MAX_XPATH_STEPS, stream);
2219     if (xpath_len >= 0)
2220     {
2221         if (xpath[xpath_len-1].part[0] == '@') 
2222             xpath_use = ZEBRA_XPATH_ATTR_CDATA;  /* last step is attribute  */
2223         else
2224             xpath_use = ZEBRA_XPATH_CDATA;  /* searching for cdata */        
2225
2226         if (1)
2227         {
2228             AttrType relation;
2229             int relation_value;
2230
2231             attr_init_APT(&relation, zapt, 2);
2232             relation_value = attr_find(&relation, NULL);
2233
2234             if (relation_value == 103) /* alwaysmatches */
2235             {
2236                 *rset = 0; /* signal no "term" set */
2237                 return rpn_search_xpath(zh, stream, rank_type, *rset, 
2238                                         xpath_len, xpath, rset_nmem, rset, kc);
2239             }
2240         }
2241     }
2242
2243     /* search using one of the various search type strategies
2244        termz is our UTF-8 search term
2245        attributeSet is top-level default attribute set 
2246        stream is ODR for search
2247        reg_id is the register type
2248        complete_flag is 1 for complete subfield, 0 for incomplete
2249        xpath_use is use-attribute to be used for X-Path search, 0 for none
2250     */
2251     if (!strcmp(search_type, "phrase"))
2252     {
2253         res = rpn_search_APT_phrase(zh, zapt, termz, attributeSet, stream,
2254                                     index_type, complete_flag, rank_type,
2255                                     xpath_use,
2256                                     rset_nmem,
2257                                     rset, kc);
2258     }
2259     else if (!strcmp(search_type, "and-list"))
2260     {
2261         res = rpn_search_APT_and_list(zh, zapt, termz, attributeSet, stream,
2262                                       index_type, complete_flag, rank_type,
2263                                       xpath_use,
2264                                       rset_nmem,
2265                                       rset, kc);
2266     }
2267     else if (!strcmp(search_type, "or-list"))
2268     {
2269         res = rpn_search_APT_or_list(zh, zapt, termz, attributeSet, stream,
2270                                      index_type, complete_flag, rank_type,
2271                                      xpath_use,
2272                                      rset_nmem,
2273                                      rset, kc);
2274     }
2275     else if (!strcmp(search_type, "local"))
2276     {
2277         res = rpn_search_APT_local(zh, zapt, termz, attributeSet, stream,
2278                                    rank_type, rset_nmem, rset, kc);
2279     }
2280     else if (!strcmp(search_type, "numeric"))
2281     {
2282         res = rpn_search_APT_numeric(zh, zapt, termz, attributeSet, stream,
2283                                      index_type, complete_flag, rank_type,
2284                                      xpath_use,
2285                                      rset_nmem,
2286                                      rset, kc);
2287     }
2288     else
2289     {
2290         zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2291         res = ZEBRA_FAIL;
2292     }
2293     if (res != ZEBRA_OK)
2294         return res;
2295     if (!*rset)
2296         return ZEBRA_FAIL;
2297     return rpn_search_xpath(zh, stream, rank_type, *rset, 
2298                             xpath_len, xpath, rset_nmem, rset, kc);
2299 }
2300
2301 static ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2302                                       const Odr_oid *attributeSet, 
2303                                       NMEM stream, NMEM rset_nmem,
2304                                       Z_SortKeySpecList *sort_sequence,
2305                                       int num_bases, const char **basenames,
2306                                       RSET **result_sets, int *num_result_sets,
2307                                       Z_Operator *parent_op,
2308                                       struct rset_key_control *kc);
2309
2310 ZEBRA_RES rpn_get_top_approx_limit(ZebraHandle zh, Z_RPNStructure *zs,
2311                                    zint *approx_limit)
2312 {
2313     ZEBRA_RES res = ZEBRA_OK;
2314     if (zs->which == Z_RPNStructure_complex)
2315     {
2316         if (res == ZEBRA_OK)
2317             res = rpn_get_top_approx_limit(zh, zs->u.complex->s1,
2318                                            approx_limit);
2319         if (res == ZEBRA_OK)
2320             res = rpn_get_top_approx_limit(zh, zs->u.complex->s2,
2321                                            approx_limit);
2322     }
2323     else if (zs->which == Z_RPNStructure_simple)
2324     {
2325         if (zs->u.simple->which == Z_Operand_APT)
2326         {
2327             Z_AttributesPlusTerm *zapt = zs->u.simple->u.attributesPlusTerm;
2328             AttrType global_hits_limit_attr;
2329             int l;
2330             
2331             attr_init_APT(&global_hits_limit_attr, zapt, 12);
2332             
2333             l = attr_find(&global_hits_limit_attr, NULL);
2334             if (l != -1)
2335                 *approx_limit = l;
2336         }
2337     }
2338     return res;
2339 }
2340
2341 ZEBRA_RES rpn_search_top(ZebraHandle zh, Z_RPNStructure *zs,
2342                          const Odr_oid *attributeSet, 
2343                          NMEM stream, NMEM rset_nmem,
2344                          Z_SortKeySpecList *sort_sequence,
2345                          int num_bases, const char **basenames,
2346                          RSET *result_set)
2347 {
2348     RSET *result_sets = 0;
2349     int num_result_sets = 0;
2350     ZEBRA_RES res;
2351     struct rset_key_control *kc = zebra_key_control_create(zh);
2352
2353     res = rpn_search_structure(zh, zs, attributeSet,
2354                                stream, rset_nmem,
2355                                sort_sequence, 
2356                                num_bases, basenames,
2357                                &result_sets, &num_result_sets,
2358                                0 /* no parent op */,
2359                                kc);
2360     if (res != ZEBRA_OK)
2361     {
2362         int i;
2363         for (i = 0; i<num_result_sets; i++)
2364             rset_delete(result_sets[i]);
2365         *result_set = 0;
2366     }
2367     else
2368     {
2369         assert(num_result_sets == 1);
2370         assert(result_sets);
2371         assert(*result_sets);
2372         *result_set = *result_sets;
2373     }
2374     (*kc->dec)(kc);
2375     return res;
2376 }
2377
2378 ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2379                                const Odr_oid *attributeSet, 
2380                                NMEM stream, NMEM rset_nmem,
2381                                Z_SortKeySpecList *sort_sequence,
2382                                int num_bases, const char **basenames,
2383                                RSET **result_sets, int *num_result_sets,
2384                                Z_Operator *parent_op,
2385                                struct rset_key_control *kc)
2386 {
2387     *num_result_sets = 0;
2388     if (zs->which == Z_RPNStructure_complex)
2389     {
2390         ZEBRA_RES res;
2391         Z_Operator *zop = zs->u.complex->roperator;
2392         RSET *result_sets_l = 0;
2393         int num_result_sets_l = 0;
2394         RSET *result_sets_r = 0;
2395         int num_result_sets_r = 0;
2396
2397         res = rpn_search_structure(zh, zs->u.complex->s1,
2398                                    attributeSet, stream, rset_nmem,
2399                                    sort_sequence,
2400                                    num_bases, basenames,
2401                                    &result_sets_l, &num_result_sets_l,
2402                                    zop, kc);
2403         if (res != ZEBRA_OK)
2404         {
2405             int i;
2406             for (i = 0; i<num_result_sets_l; i++)
2407                 rset_delete(result_sets_l[i]);
2408             return res;
2409         }
2410         res = rpn_search_structure(zh, zs->u.complex->s2,
2411                                    attributeSet, stream, rset_nmem,
2412                                    sort_sequence,
2413                                    num_bases, basenames,
2414                                    &result_sets_r, &num_result_sets_r,
2415                                    zop, kc);
2416         if (res != ZEBRA_OK)
2417         {
2418             int i;
2419             for (i = 0; i<num_result_sets_l; i++)
2420                 rset_delete(result_sets_l[i]);
2421             for (i = 0; i<num_result_sets_r; i++)
2422                 rset_delete(result_sets_r[i]);
2423             return res;
2424         }
2425
2426         /* make a new list of result for all children */
2427         *num_result_sets = num_result_sets_l + num_result_sets_r;
2428         *result_sets = nmem_malloc(stream, *num_result_sets * 
2429                                    sizeof(**result_sets));
2430         memcpy(*result_sets, result_sets_l, 
2431                num_result_sets_l * sizeof(**result_sets));
2432         memcpy(*result_sets + num_result_sets_l, result_sets_r, 
2433                num_result_sets_r * sizeof(**result_sets));
2434
2435         if (!parent_op || parent_op->which != zop->which
2436             || (zop->which != Z_Operator_and &&
2437                 zop->which != Z_Operator_or))
2438         {
2439             /* parent node different from this one (or non-present) */
2440             /* we must combine result sets now */
2441             RSET rset;
2442             switch (zop->which)
2443             {
2444             case Z_Operator_and:
2445                 rset = rset_create_and(rset_nmem, kc,
2446                                        kc->scope,
2447                                        *num_result_sets, *result_sets);
2448                 break;
2449             case Z_Operator_or:
2450                 rset = rset_create_or(rset_nmem, kc,
2451                                       kc->scope, 0, /* termid */
2452                                       *num_result_sets, *result_sets);
2453                 break;
2454             case Z_Operator_and_not:
2455                 rset = rset_create_not(rset_nmem, kc,
2456                                        kc->scope,
2457                                        (*result_sets)[0],
2458                                        (*result_sets)[1]);
2459                 break;
2460             case Z_Operator_prox:
2461                 if (zop->u.prox->which != Z_ProximityOperator_known)
2462                 {
2463                     zebra_setError(zh, 
2464                                    YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2465                                    0);
2466                     return ZEBRA_FAIL;
2467                 }
2468                 if (*zop->u.prox->u.known != Z_ProxUnit_word)
2469                 {
2470                     zebra_setError_zint(zh,
2471                                         YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2472                                         *zop->u.prox->u.known);
2473                     return ZEBRA_FAIL;
2474                 }
2475                 else
2476                 {
2477                     rset = rset_create_prox(rset_nmem, kc,
2478                                             kc->scope,
2479                                             *num_result_sets, *result_sets, 
2480                                             *zop->u.prox->ordered,
2481                                             (!zop->u.prox->exclusion ? 
2482                                              0 : *zop->u.prox->exclusion),
2483                                             *zop->u.prox->relationType,
2484                                             *zop->u.prox->distance );
2485                 }
2486                 break;
2487             default:
2488                 zebra_setError(zh, YAZ_BIB1_OPERATOR_UNSUPP, 0);
2489                 return ZEBRA_FAIL;
2490             }
2491             *num_result_sets = 1;
2492             *result_sets = nmem_malloc(stream, *num_result_sets * 
2493                                        sizeof(**result_sets));
2494             (*result_sets)[0] = rset;
2495         }
2496     }
2497     else if (zs->which == Z_RPNStructure_simple)
2498     {
2499         RSET rset;
2500         ZEBRA_RES res;
2501
2502         if (zs->u.simple->which == Z_Operand_APT)
2503         {
2504             yaz_log(YLOG_DEBUG, "rpn_search_APT");
2505             res = rpn_search_APT(zh, zs->u.simple->u.attributesPlusTerm,
2506                                  attributeSet, stream, sort_sequence,
2507                                  num_bases, basenames, rset_nmem, &rset,
2508                                  kc);
2509             if (res != ZEBRA_OK)
2510                 return res;
2511         }
2512         else if (zs->u.simple->which == Z_Operand_resultSetId)
2513         {
2514             yaz_log(YLOG_DEBUG, "rpn_search_ref");
2515             rset = resultSetRef(zh, zs->u.simple->u.resultSetId);
2516             if (!rset)
2517             {
2518                 zebra_setError(zh, 
2519                                YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
2520                                zs->u.simple->u.resultSetId);
2521                 return ZEBRA_FAIL;
2522             }
2523             rset_dup(rset);
2524         }
2525         else
2526         {
2527             zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2528             return ZEBRA_FAIL;
2529         }
2530         *num_result_sets = 1;
2531         *result_sets = nmem_malloc(stream, *num_result_sets * 
2532                                    sizeof(**result_sets));
2533         (*result_sets)[0] = rset;
2534     }
2535     else
2536     {
2537         zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2538         return ZEBRA_FAIL;
2539     }
2540     return ZEBRA_OK;
2541 }
2542
2543
2544
2545 /*
2546  * Local variables:
2547  * c-basic-offset: 4
2548  * indent-tabs-mode: nil
2549  * End:
2550  * vim: shiftwidth=4 tabstop=8 expandtab
2551  */
2552