Refactor database lookup to single function.
[idzebra-moved-to-github.git] / index / rpnsearch.c
1 /* $Id: rpnsearch.c,v 1.17 2007-10-29 20:07:04 adam Exp $
2    Copyright (C) 1995-2007
3    Index Data ApS
4
5 This file is part of the Zebra server.
6
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
11
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
20
21 */
22
23 #include <stdio.h>
24 #include <assert.h>
25 #ifdef WIN32
26 #include <io.h>
27 #endif
28 #if HAVE_UNISTD_H
29 #include <unistd.h>
30 #endif
31 #include <ctype.h>
32
33 #include <yaz/diagbib1.h>
34 #include "index.h"
35 #include <zebra_xpath.h>
36 #include <attrfind.h>
37 #include <charmap.h>
38 #include <rset.h>
39
40 static int log_level_set = 0;
41 static int log_level_rpn = 0;
42
43 #define TERMSET_DISABLE 1
44
45 static const char **rpn_char_map_handler(void *vp, const char **from, int len)
46 {
47     struct rpn_char_map_info *p = (struct rpn_char_map_info *) vp;
48     const char **out = zebra_maps_input(p->zm, p->reg_type, from, len, 0);
49 #if 0
50     if (out && *out)
51     {
52         const char *outp = *out;
53         yaz_log(YLOG_LOG, "---");
54         while (*outp)
55         {
56             yaz_log(YLOG_LOG, "%02X", *outp);
57             outp++;
58         }
59     }
60 #endif
61     return out;
62 }
63
64 void rpn_char_map_prepare(struct zebra_register *reg, int reg_type,
65                           struct rpn_char_map_info *map_info)
66 {
67     map_info->zm = reg->zebra_maps;
68     map_info->reg_type = reg_type;
69     dict_grep_cmap(reg->dict, map_info, rpn_char_map_handler);
70 }
71
72 #define TERM_COUNT        
73        
74 struct grep_info {        
75 #ifdef TERM_COUNT        
76     int *term_no;        
77 #endif        
78     ISAM_P *isam_p_buf;
79     int isam_p_size;        
80     int isam_p_indx;
81     int trunc_max;
82     ZebraHandle zh;
83     int reg_type;
84     ZebraSet termset;
85 };        
86
87 static int add_isam_p(const char *name, const char *info,
88                       struct grep_info *p)
89 {
90     if (!log_level_set)
91     {
92         log_level_rpn = yaz_log_module_level("rpn");
93         log_level_set = 1;
94     }
95     /* we may have to stop this madness.. NOTE: -1 so that if
96        truncmax == trunxlimit we do *not* generate result sets */
97     if (p->isam_p_indx >= p->trunc_max - 1)
98         return 1;
99
100     if (p->isam_p_indx == p->isam_p_size)
101     {
102         ISAM_P *new_isam_p_buf;
103 #ifdef TERM_COUNT        
104         int *new_term_no;        
105 #endif
106         p->isam_p_size = 2*p->isam_p_size + 100;
107         new_isam_p_buf = (ISAM_P *) xmalloc(sizeof(*new_isam_p_buf) *
108                                             p->isam_p_size);
109         if (p->isam_p_buf)
110         {
111             memcpy(new_isam_p_buf, p->isam_p_buf,
112                     p->isam_p_indx * sizeof(*p->isam_p_buf));
113             xfree(p->isam_p_buf);
114         }
115         p->isam_p_buf = new_isam_p_buf;
116
117 #ifdef TERM_COUNT
118         new_term_no = (int *) xmalloc(sizeof(*new_term_no) * p->isam_p_size);
119         if (p->term_no)
120         {
121             memcpy(new_term_no, p->isam_p_buf,
122                     p->isam_p_indx * sizeof(*p->term_no));
123             xfree(p->term_no);
124         }
125         p->term_no = new_term_no;
126 #endif
127     }
128     assert(*info == sizeof(*p->isam_p_buf));
129     memcpy(p->isam_p_buf + p->isam_p_indx, info+1, sizeof(*p->isam_p_buf));
130
131     if (p->termset)
132     {
133         const char *db;
134         char term_tmp[IT_MAX_WORD];
135         int ord = 0;
136         const char *index_name;
137         int len = key_SU_decode(&ord, (const unsigned char *) name);
138         
139         zebra_term_untrans (p->zh, p->reg_type, term_tmp, name+len);
140         yaz_log(log_level_rpn, "grep: %d %c %s", ord, name[len], term_tmp);
141         zebraExplain_lookup_ord(p->zh->reg->zei,
142                                 ord, 0 /* index_type */, &db, &index_name);
143         yaz_log(log_level_rpn, "grep:  db=%s index=%s", db, index_name);
144         
145         resultSetAddTerm(p->zh, p->termset, name[len], db,
146                          index_name, term_tmp);
147     }
148     (p->isam_p_indx)++;
149     return 0;
150 }
151
152 static int grep_handle(char *name, const char *info, void *p)
153 {
154     return add_isam_p(name, info, (struct grep_info *) p);
155 }
156
157 static int term_pre(ZebraMaps zebra_maps, int reg_type, const char **src,
158                     const char *ct1, const char *ct2, int first)
159 {
160     const char *s1, *s0 = *src;
161     const char **map;
162
163     /* skip white space */
164     while (*s0)
165     {
166         if (ct1 && strchr(ct1, *s0))
167             break;
168         if (ct2 && strchr(ct2, *s0))
169             break;
170         s1 = s0;
171         map = zebra_maps_input(zebra_maps, reg_type, &s1, strlen(s1), first);
172         if (**map != *CHR_SPACE)
173             break;
174         s0 = s1;
175     }
176     *src = s0;
177     return *s0;
178 }
179
180
181 static void esc_str(char *out_buf, size_t out_size,
182                     const char *in_buf, int in_size)
183 {
184     int k;
185
186     assert(out_buf);
187     assert(in_buf);
188     assert(out_size > 20);
189     *out_buf = '\0';
190     for (k = 0; k<in_size; k++)
191     {
192         int c = in_buf[k] & 0xff;
193         int pc;
194         if (c < 32 || c > 126)
195             pc = '?';
196         else
197             pc = c;
198         sprintf(out_buf +strlen(out_buf), "%02X:%c  ", c, pc);
199         if (strlen(out_buf) > out_size-20)
200         {
201             strcat(out_buf, "..");
202             break;
203         }
204     }
205 }
206
207 #define REGEX_CHARS " []()|.*+?!"
208
209 static void add_non_space(const char *start, const char *end,
210                           WRBUF term_dict,
211                           char *dst_term, int *dst_ptr,
212                           const char **map, int q_map_match)
213 {
214     size_t sz = end - start;
215     memcpy(dst_term + *dst_ptr, start, sz);
216     (*dst_ptr) += sz;
217     if (!q_map_match)
218     {
219         while (start < end)
220         {
221             if (strchr(REGEX_CHARS, *start))
222                 wrbuf_putc(term_dict, '\\');
223             wrbuf_putc(term_dict, *start);
224             start++;
225         }
226     }
227     else
228     {
229         char tmpbuf[80];
230         esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
231         
232         wrbuf_puts(term_dict, map[0]);
233     }
234 }
235
236 /* term_100: handle term, where trunc = none(no operators at all) */
237 static int term_100(ZebraMaps zebra_maps, const char *index_type,
238                     const char **src, WRBUF term_dict, int space_split,
239                     char *dst_term)
240 {
241     const char *s0;
242     const char **map;
243     int i = 0;
244     int j = 0;
245
246     const char *space_start = 0;
247     const char *space_end = 0;
248
249     if (!term_pre(zebra_maps, *index_type, src, NULL, NULL, !space_split))
250         return 0;
251     s0 = *src;
252     while (*s0)
253     {
254         const char *s1 = s0;
255         int q_map_match = 0;
256         map = zebra_maps_search(zebra_maps, *index_type, &s0, strlen(s0), 
257                                 &q_map_match);
258         if (space_split)
259         {
260             if (**map == *CHR_SPACE)
261                 break;
262         }
263         else  /* complete subfield only. */
264         {
265             if (**map == *CHR_SPACE)
266             {   /* save space mapping for later  .. */
267                 space_start = s1;
268                 space_end = s0;
269                 continue;
270             }
271             else if (space_start)
272             {   /* reload last space */
273                 while (space_start < space_end)
274                 {
275                     if (strchr(REGEX_CHARS, *space_start))
276                         wrbuf_putc(term_dict, '\\');
277                     dst_term[j++] = *space_start;
278                     wrbuf_putc(term_dict, *space_start);
279                     space_start++;
280                                
281                 }
282                 /* and reset */
283                 space_start = space_end = 0;
284             }
285         }
286         i++;
287
288         add_non_space(s1, s0, term_dict, dst_term, &j,
289                       map, q_map_match);
290     }
291     dst_term[j] = '\0';
292     *src = s0;
293     return i;
294 }
295
296 /* term_101: handle term, where trunc = Process # */
297 static int term_101(ZebraMaps zebra_maps, const char *index_type,
298                     const char **src, WRBUF term_dict, int space_split,
299                     char *dst_term)
300 {
301     const char *s0;
302     const char **map;
303     int i = 0;
304     int j = 0;
305
306     if (!term_pre(zebra_maps, *index_type, src, "#", "#", !space_split))
307         return 0;
308     s0 = *src;
309     while (*s0)
310     {
311         if (*s0 == '#')
312         {
313             i++;
314             wrbuf_puts(term_dict, ".*");
315             dst_term[j++] = *s0++;
316         }
317         else
318         {
319             const char *s1 = s0;
320             int q_map_match = 0;
321             map = zebra_maps_search(zebra_maps, *index_type, &s0, strlen(s0), 
322                                     &q_map_match);
323             if (space_split && **map == *CHR_SPACE)
324                 break;
325
326             i++;
327             add_non_space(s1, s0, term_dict, dst_term, &j,
328                           map, q_map_match);
329         }
330     }
331     dst_term[j++] = '\0';
332     *src = s0;
333     return i;
334 }
335
336 /* term_103: handle term, where trunc = re-2 (regular expressions) */
337 static int term_103(ZebraMaps zebra_maps, const char *index_type, 
338                     const char **src,
339                     WRBUF term_dict, int *errors, int space_split,
340                     char *dst_term)
341 {
342     int i = 0;
343     int j = 0;
344     const char *s0;
345     const char **map;
346
347     if (!term_pre(zebra_maps, *index_type, src, "^\\()[].*+?|", "(", !space_split))
348         return 0;
349     s0 = *src;
350     if (errors && *s0 == '+' && s0[1] && s0[2] == '+' && s0[3] &&
351         isdigit(((const unsigned char *)s0)[1]))
352     {
353         *errors = s0[1] - '0';
354         s0 += 3;
355         if (*errors > 3)
356             *errors = 3;
357     }
358     while (*s0)
359     {
360         if (strchr("^\\()[].*+?|-", *s0))
361         {
362             dst_term[j++] = *s0;
363             wrbuf_putc(term_dict, *s0);
364             s0++;
365             i++;
366         }
367         else
368         {
369             const char *s1 = s0;
370             int q_map_match = 0;
371             map = zebra_maps_search(zebra_maps, *index_type, &s0, strlen(s0), 
372                                     &q_map_match);
373             if (space_split && **map == *CHR_SPACE)
374                 break;
375
376             i++;
377             add_non_space(s1, s0, term_dict, dst_term, &j,
378                           map, q_map_match);
379         }
380     }
381     dst_term[j] = '\0';
382     *src = s0;
383     
384     return i;
385 }
386
387 /* term_103: handle term, where trunc = re-1 (regular expressions) */
388 static int term_102(ZebraMaps zebra_maps, const char *index_type, 
389                     const char **src,
390                     WRBUF term_dict, int space_split, char *dst_term)
391 {
392     return term_103(zebra_maps, index_type, src, term_dict, NULL, space_split,
393                     dst_term);
394 }
395
396
397 /* term_104: handle term, process # and ! */
398 static int term_104(ZebraMaps zebra_maps, const char *index_type,
399                     const char **src, WRBUF term_dict, int space_split,
400                     char *dst_term)
401 {
402     const char *s0;
403     const char **map;
404     int i = 0;
405     int j = 0;
406
407     if (!term_pre(zebra_maps, *index_type, src, "?*#", "?*#", !space_split))
408         return 0;
409     s0 = *src;
410     while (*s0)
411     {
412         if (*s0 == '?')
413         {
414             i++;
415             dst_term[j++] = *s0++;
416             if (*s0 >= '0' && *s0 <= '9')
417             {
418                 int limit = 0;
419                 while (*s0 >= '0' && *s0 <= '9')
420                 {
421                     limit = limit * 10 + (*s0 - '0');
422                     dst_term[j++] = *s0++;
423                 }
424                 if (limit > 20)
425                     limit = 20;
426                 while (--limit >= 0)
427                 {
428                     wrbuf_puts(term_dict, ".?");
429                 }
430             }
431             else
432             {
433                 wrbuf_puts(term_dict, ".*");
434             }
435         }
436         else if (*s0 == '*')
437         {
438             i++;
439             wrbuf_puts(term_dict, ".*");
440             dst_term[j++] = *s0++;
441         }
442         else if (*s0 == '#')
443         {
444             i++;
445             wrbuf_puts(term_dict, ".");
446             dst_term[j++] = *s0++;
447         }
448         else
449         {
450             const char *s1 = s0;
451             int q_map_match = 0;
452             map = zebra_maps_search(zebra_maps, *index_type, &s0, strlen(s0), 
453                                     &q_map_match);
454             if (space_split && **map == *CHR_SPACE)
455                 break;
456
457             i++;
458             add_non_space(s1, s0, term_dict, dst_term, &j,
459                           map, q_map_match);
460         }
461     }
462     dst_term[j++] = '\0';
463     *src = s0;
464     return i;
465 }
466
467 /* term_105/106: handle term, where trunc = Process * and ! and right trunc */
468 static int term_105(ZebraMaps zebra_maps, const char *index_type,
469                     const char **src, WRBUF term_dict, int space_split,
470                     char *dst_term, int right_truncate)
471 {
472     const char *s0;
473     const char **map;
474     int i = 0;
475     int j = 0;
476
477     if (!term_pre(zebra_maps, *index_type, src, "*!", "*!", !space_split))
478         return 0;
479     s0 = *src;
480     while (*s0)
481     {
482         if (*s0 == '*')
483         {
484             i++;
485             wrbuf_puts(term_dict, ".*");
486             dst_term[j++] = *s0++;
487         }
488         else if (*s0 == '!')
489         {
490             i++;
491             wrbuf_putc(term_dict, '.');
492             dst_term[j++] = *s0++;
493         }
494         else
495         {
496             const char *s1 = s0;
497             int q_map_match = 0;
498             map = zebra_maps_search(zebra_maps, *index_type, &s0, strlen(s0), 
499                                     &q_map_match);
500             if (space_split && **map == *CHR_SPACE)
501                 break;
502
503             i++;
504             add_non_space(s1, s0, term_dict, dst_term, &j,
505                           map, q_map_match);
506         }
507     }
508     if (right_truncate)
509         wrbuf_puts(term_dict, ".*");
510     dst_term[j++] = '\0';
511     *src = s0;
512     return i;
513 }
514
515
516 /* gen_regular_rel - generate regular expression from relation
517  *  val:     border value (inclusive)
518  *  islt:    1 if <=; 0 if >=.
519  */
520 static void gen_regular_rel(WRBUF term_dict, int val, int islt)
521 {
522     char dst_buf[20*5*20]; /* assuming enough for expansion */
523     char *dst = dst_buf;
524     int dst_p;
525     int w, d, i;
526     int pos = 0;
527     char numstr[20];
528
529     yaz_log(YLOG_DEBUG, "gen_regular_rel. val=%d, islt=%d", val, islt);
530     if (val >= 0)
531     {
532         if (islt)
533             strcpy(dst, "(-[0-9]+|(");
534         else
535             strcpy(dst, "((");
536     } 
537     else
538     {
539         if (!islt)
540         {
541             strcpy(dst, "([0-9]+|-(");
542             islt = 1;
543         }
544         else
545         {
546             strcpy(dst, "(-(");
547             islt = 0;
548         }
549         val = -val;
550     }
551     dst_p = strlen(dst);
552     sprintf(numstr, "%d", val);
553     for (w = strlen(numstr); --w >= 0; pos++)
554     {
555         d = numstr[w];
556         if (pos > 0)
557         {
558             if (islt)
559             {
560                 if (d == '0')
561                     continue;
562                 d--;
563             } 
564             else
565             {
566                 if (d == '9')
567                     continue;
568                 d++;
569             }
570         }
571         
572         strcpy(dst + dst_p, numstr);
573         dst_p = strlen(dst) - pos - 1;
574
575         if (islt)
576         {
577             if (d != '0')
578             {
579                 dst[dst_p++] = '[';
580                 dst[dst_p++] = '0';
581                 dst[dst_p++] = '-';
582                 dst[dst_p++] = d;
583                 dst[dst_p++] = ']';
584             }
585             else
586                 dst[dst_p++] = d;
587         }
588         else
589         {
590             if (d != '9')
591             { 
592                 dst[dst_p++] = '[';
593                 dst[dst_p++] = d;
594                 dst[dst_p++] = '-';
595                 dst[dst_p++] = '9';
596                 dst[dst_p++] = ']';
597             }
598             else
599                 dst[dst_p++] = d;
600         }
601         for (i = 0; i<pos; i++)
602         {
603             dst[dst_p++] = '[';
604             dst[dst_p++] = '0';
605             dst[dst_p++] = '-';
606             dst[dst_p++] = '9';
607             dst[dst_p++] = ']';
608         }
609         dst[dst_p++] = '|';
610     }
611     dst[dst_p] = '\0';
612     if (islt)
613     {
614         /* match everything less than 10^(pos-1) */
615         strcat(dst, "0*");
616         for (i = 1; i<pos; i++)
617             strcat(dst, "[0-9]?");
618     }
619     else
620     {
621         /* match everything greater than 10^pos */
622         for (i = 0; i <= pos; i++)
623             strcat(dst, "[0-9]");
624         strcat(dst, "[0-9]*");
625     }
626     strcat(dst, "))");
627     wrbuf_puts(term_dict, dst);
628 }
629
630 void string_rel_add_char(WRBUF term_p, WRBUF wsrc, int *indx)
631 {
632     const char *src = wrbuf_cstr(wsrc);
633     if (src[*indx] == '\\')
634     {
635         wrbuf_putc(term_p, src[*indx]);
636         (*indx)++;
637     }
638     wrbuf_putc(term_p, src[*indx]);
639     (*indx)++;
640 }
641
642 /*
643  *   >  abc     ([b-].*|a[c-].*|ab[d-].*|abc.+)
644  *              ([^-a].*|a[^-b].*ab[^-c].*|abc.+)
645  *   >= abc     ([b-].*|a[c-].*|ab[c-].*)
646  *              ([^-a].*|a[^-b].*|ab[c-].*)
647  *   <  abc     ([-0].*|a[-a].*|ab[-b].*)
648  *              ([^a-].*|a[^b-].*|ab[^c-].*)
649  *   <= abc     ([-0].*|a[-a].*|ab[-b].*|abc)
650  *              ([^a-].*|a[^b-].*|ab[^c-].*|abc)
651  */
652 static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
653                            const char **term_sub, WRBUF term_dict,
654                            const Odr_oid *attributeSet,
655                            const char *index_type, int space_split, char *term_dst,
656                            int *error_code)
657 {
658     AttrType relation;
659     int relation_value;
660     int i;
661     WRBUF term_component = wrbuf_alloc();
662
663     attr_init_APT(&relation, zapt, 2);
664     relation_value = attr_find(&relation, NULL);
665
666     *error_code = 0;
667     yaz_log(YLOG_DEBUG, "string relation value=%d", relation_value);
668     switch (relation_value)
669     {
670     case 1:
671         if (!term_100(zh->reg->zebra_maps, index_type,
672                       term_sub, term_component,
673                       space_split, term_dst))
674         {
675             wrbuf_destroy(term_component);
676             return 0;
677         }
678         yaz_log(log_level_rpn, "Relation <");
679         
680         wrbuf_putc(term_dict, '(');
681         for (i = 0; i < wrbuf_len(term_component); )
682         {
683             int j = 0;
684             
685             if (i)
686                 wrbuf_putc(term_dict, '|');
687             while (j < i)
688                 string_rel_add_char(term_dict, term_component, &j);
689
690             wrbuf_putc(term_dict, '[');
691
692             wrbuf_putc(term_dict, '^');
693             
694             wrbuf_putc(term_dict, 1);
695             wrbuf_putc(term_dict, FIRST_IN_FIELD_CHAR);
696             
697             string_rel_add_char(term_dict, term_component, &i);
698             wrbuf_putc(term_dict, '-');
699             
700             wrbuf_putc(term_dict, ']');
701             wrbuf_putc(term_dict, '.');
702             wrbuf_putc(term_dict, '*');
703         }
704         wrbuf_putc(term_dict, ')');
705         break;
706     case 2:
707         if (!term_100(zh->reg->zebra_maps, index_type,
708                       term_sub, term_component,
709                       space_split, term_dst))
710         {
711             wrbuf_destroy(term_component);
712             return 0;
713         }
714         yaz_log(log_level_rpn, "Relation <=");
715
716         wrbuf_putc(term_dict, '(');
717         for (i = 0; i < wrbuf_len(term_component); )
718         {
719             int j = 0;
720
721             while (j < i)
722                 string_rel_add_char(term_dict, term_component, &j);
723             wrbuf_putc(term_dict, '[');
724
725             wrbuf_putc(term_dict, '^');
726
727             wrbuf_putc(term_dict, 1);
728             wrbuf_putc(term_dict, FIRST_IN_FIELD_CHAR);
729
730             string_rel_add_char(term_dict, term_component, &i);
731             wrbuf_putc(term_dict, '-');
732
733             wrbuf_putc(term_dict, ']');
734             wrbuf_putc(term_dict, '.');
735             wrbuf_putc(term_dict, '*');
736
737             wrbuf_putc(term_dict, '|');
738         }
739         for (i = 0; i < wrbuf_len(term_component); )
740             string_rel_add_char(term_dict, term_component, &i);
741         wrbuf_putc(term_dict, ')');
742         break;
743     case 5:
744         if (!term_100(zh->reg->zebra_maps, index_type,
745                       term_sub, term_component, space_split, term_dst))
746         {
747             wrbuf_destroy(term_component);
748             return 0;
749         }
750         yaz_log(log_level_rpn, "Relation >");
751
752         wrbuf_putc(term_dict, '(');
753         for (i = 0; i < wrbuf_len(term_component); )
754         {
755             int j = 0;
756
757             while (j < i)
758                 string_rel_add_char(term_dict, term_component, &j);
759             wrbuf_putc(term_dict, '[');
760             
761             wrbuf_putc(term_dict, '^');
762             wrbuf_putc(term_dict, '-');
763             string_rel_add_char(term_dict, term_component, &i);
764
765             wrbuf_putc(term_dict, ']');
766             wrbuf_putc(term_dict, '.');
767             wrbuf_putc(term_dict, '*');
768
769             wrbuf_putc(term_dict, '|');
770         }
771         for (i = 0; i < wrbuf_len(term_component); )
772             string_rel_add_char(term_dict, term_component, &i);
773         wrbuf_putc(term_dict, '.');
774         wrbuf_putc(term_dict, '+');
775         wrbuf_putc(term_dict, ')');
776         break;
777     case 4:
778         if (!term_100(zh->reg->zebra_maps, index_type, term_sub,
779                       term_component, space_split, term_dst))
780         {
781             wrbuf_destroy(term_component);
782             return 0;
783         }
784         yaz_log(log_level_rpn, "Relation >=");
785
786         wrbuf_putc(term_dict, '(');
787         for (i = 0; i < wrbuf_len(term_component); )
788         {
789             int j = 0;
790
791             if (i)
792                 wrbuf_putc(term_dict, '|');
793             while (j < i)
794                 string_rel_add_char(term_dict, term_component, &j);
795             wrbuf_putc(term_dict, '[');
796
797             if (i < wrbuf_len(term_component)-1)
798             {
799                 wrbuf_putc(term_dict, '^');
800                 wrbuf_putc(term_dict, '-');
801                 string_rel_add_char(term_dict, term_component, &i);
802             }
803             else
804             {
805                 string_rel_add_char(term_dict, term_component, &i);
806                 wrbuf_putc(term_dict, '-');
807             }
808             wrbuf_putc(term_dict, ']');
809             wrbuf_putc(term_dict, '.');
810             wrbuf_putc(term_dict, '*');
811         }
812         wrbuf_putc(term_dict, ')');
813         break;
814     case 3:
815     case 102:
816     case -1:
817         if (!**term_sub)
818             return 1;
819         yaz_log(log_level_rpn, "Relation =");
820         if (!term_100(zh->reg->zebra_maps, index_type, term_sub,
821                       term_component, space_split, term_dst))
822         {
823             wrbuf_destroy(term_component);
824             return 0;
825         }
826         wrbuf_puts(term_dict, "(");
827         wrbuf_puts(term_dict, wrbuf_cstr(term_component));
828         wrbuf_puts(term_dict, ")");
829         break;
830     case 103:
831         yaz_log(log_level_rpn, "Relation always matches");
832         /* skip to end of term (we don't care what it is) */
833         while (**term_sub != '\0')
834             (*term_sub)++;
835         break;
836     default:
837         *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
838         wrbuf_destroy(term_component);
839         return 0;
840     }
841     wrbuf_destroy(term_component);
842     return 1;
843 }
844
845 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
846                              const char **term_sub, 
847                              WRBUF term_dict,
848                              const Odr_oid *attributeSet, NMEM stream,
849                              struct grep_info *grep_info,
850                              const char *index_type, int complete_flag,
851                              char *term_dst,
852                              const char *xpath_use,
853                              struct ord_list **ol);
854
855 static ZEBRA_RES term_limits_APT(ZebraHandle zh,
856                                  Z_AttributesPlusTerm *zapt,
857                                  zint *hits_limit_value,
858                                  const char **term_ref_id_str,
859                                  NMEM nmem)
860 {
861     AttrType term_ref_id_attr;
862     AttrType hits_limit_attr;
863     int term_ref_id_int;
864  
865     attr_init_APT(&hits_limit_attr, zapt, 11);
866     *hits_limit_value  = attr_find(&hits_limit_attr, NULL);
867
868     attr_init_APT(&term_ref_id_attr, zapt, 10);
869     term_ref_id_int = attr_find_ex(&term_ref_id_attr, NULL, term_ref_id_str);
870     if (term_ref_id_int >= 0)
871     {
872         char *res = nmem_malloc(nmem, 20);
873         sprintf(res, "%d", term_ref_id_int);
874         *term_ref_id_str = res;
875     }
876
877     /* no limit given ? */
878     if (*hits_limit_value == -1)
879     {
880         if (*term_ref_id_str)
881         {
882             /* use global if term_ref is present */
883             *hits_limit_value = zh->approx_limit;
884         }
885         else
886         {
887             /* no counting if term_ref is not present */
888             *hits_limit_value = 0;
889         }
890     }
891     else if (*hits_limit_value == 0)
892     {
893         /* 0 is the same as global limit */
894         *hits_limit_value = zh->approx_limit;
895     }
896     yaz_log(YLOG_DEBUG, "term_limits_APT ref_id=%s limit=" ZINT_FORMAT,
897             *term_ref_id_str ? *term_ref_id_str : "none",
898             *hits_limit_value);
899     return ZEBRA_OK;
900 }
901
902 static ZEBRA_RES term_trunc(ZebraHandle zh,
903                             Z_AttributesPlusTerm *zapt,
904                             const char **term_sub, 
905                             const Odr_oid *attributeSet, NMEM stream,
906                             struct grep_info *grep_info,
907                             const char *index_type, int complete_flag,
908                             char *term_dst,
909                             const char *rank_type, 
910                             const char *xpath_use,
911                             NMEM rset_nmem,
912                             RSET *rset,
913                             struct rset_key_control *kc)
914 {
915     ZEBRA_RES res;
916     struct ord_list *ol;
917     zint hits_limit_value;
918     const char *term_ref_id_str = 0;
919     WRBUF term_dict = wrbuf_alloc();
920
921     *rset = 0;
922     term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str, stream);
923     grep_info->isam_p_indx = 0;
924     res = string_term(zh, zapt, term_sub, term_dict,
925                       attributeSet, stream, grep_info,
926                       index_type, complete_flag,
927                       term_dst, xpath_use, &ol);
928     wrbuf_destroy(term_dict);
929     if (res != ZEBRA_OK)
930         return res;
931     if (!*term_sub)  /* no more terms ? */
932         return res;
933     yaz_log(log_level_rpn, "term: %s", term_dst);
934     *rset = rset_trunc(zh, grep_info->isam_p_buf,
935                        grep_info->isam_p_indx, term_dst,
936                        strlen(term_dst), rank_type, 1 /* preserve pos */,
937                        zapt->term->which, rset_nmem,
938                        kc, kc->scope, ol, index_type, hits_limit_value,
939                        term_ref_id_str);
940     if (!*rset)
941         return ZEBRA_FAIL;
942     return ZEBRA_OK;
943 }
944
945 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
946                              const char **term_sub, 
947                              WRBUF term_dict,
948                              const Odr_oid *attributeSet, NMEM stream,
949                              struct grep_info *grep_info,
950                              const char *index_type, int complete_flag,
951                              char *term_dst,
952                              const char *xpath_use,
953                              struct ord_list **ol)
954 {
955     int r;
956     AttrType truncation;
957     int truncation_value;
958     const char *termp;
959     struct rpn_char_map_info rcmi;
960
961     int space_split = complete_flag ? 0 : 1;
962     int ord = -1;
963     int regex_range = 0;
964     int max_pos, prefix_len = 0;
965     int relation_error;
966     char ord_buf[32];
967     int ord_len, i;
968     
969     *ol = ord_list_create(stream);
970
971     rpn_char_map_prepare(zh->reg, *index_type, &rcmi);
972     attr_init_APT(&truncation, zapt, 5);
973     truncation_value = attr_find(&truncation, NULL);
974     yaz_log(log_level_rpn, "truncation value %d", truncation_value);
975
976     termp = *term_sub; /* start of term for each database */
977     
978     if (zebra_apt_get_ord(zh, zapt, index_type, xpath_use,
979                           attributeSet, &ord) != ZEBRA_OK)
980     {
981         *term_sub = 0;
982         return ZEBRA_FAIL;
983     }
984     
985     wrbuf_rewind(term_dict); /* new dictionary regexp term */
986     
987     *ol = ord_list_append(stream, *ol, ord);
988     ord_len = key_SU_encode(ord, ord_buf);
989     
990     wrbuf_putc(term_dict, '(');
991     
992     for (i = 0; i<ord_len; i++)
993     {
994         wrbuf_putc(term_dict, 1);  /* our internal regexp escape char */
995         wrbuf_putc(term_dict, ord_buf[i]);
996     }
997     wrbuf_putc(term_dict, ')');
998     
999     prefix_len = wrbuf_len(term_dict);
1000     
1001     switch (truncation_value)
1002     {
1003     case -1:         /* not specified */
1004     case 100:        /* do not truncate */
1005         if (!string_relation(zh, zapt, &termp, term_dict,
1006                              attributeSet,
1007                              index_type, space_split, term_dst,
1008                              &relation_error))
1009         {
1010             if (relation_error)
1011             {
1012                 zebra_setError(zh, relation_error, 0);
1013                 return ZEBRA_FAIL;
1014             }
1015             *term_sub = 0;
1016             return ZEBRA_OK;
1017         }
1018         break;
1019     case 1:          /* right truncation */
1020         wrbuf_putc(term_dict, '(');
1021         if (!term_100(zh->reg->zebra_maps, index_type,
1022                       &termp, term_dict, space_split, term_dst))
1023         {
1024             *term_sub = 0;
1025             return ZEBRA_OK;
1026         }
1027         wrbuf_puts(term_dict, ".*)");
1028         break;
1029     case 2:          /* keft truncation */
1030         wrbuf_puts(term_dict, "(.*");
1031         if (!term_100(zh->reg->zebra_maps, index_type,
1032                       &termp, term_dict, space_split, term_dst))
1033         {
1034             *term_sub = 0;
1035             return ZEBRA_OK;
1036         }
1037         wrbuf_putc(term_dict, ')');
1038         break;
1039     case 3:          /* left&right truncation */
1040         wrbuf_puts(term_dict, "(.*");
1041         if (!term_100(zh->reg->zebra_maps, index_type,
1042                       &termp, term_dict, space_split, term_dst))
1043         {
1044             *term_sub = 0;
1045             return ZEBRA_OK;
1046         }
1047         wrbuf_puts(term_dict, ".*)");
1048         break;
1049     case 101:        /* process # in term */
1050         wrbuf_putc(term_dict, '(');
1051         if (!term_101(zh->reg->zebra_maps, index_type,
1052                       &termp, term_dict, space_split, term_dst))
1053         {
1054             *term_sub = 0;
1055             return ZEBRA_OK;
1056         }
1057         wrbuf_puts(term_dict, ")");
1058         break;
1059     case 102:        /* Regexp-1 */
1060         wrbuf_putc(term_dict, '(');
1061         if (!term_102(zh->reg->zebra_maps, index_type,
1062                       &termp, term_dict, space_split, term_dst))
1063         {
1064             *term_sub = 0;
1065             return ZEBRA_OK;
1066         }
1067         wrbuf_putc(term_dict, ')');
1068         break;
1069     case 103:       /* Regexp-2 */
1070         regex_range = 1;
1071         wrbuf_putc(term_dict, '(');
1072         if (!term_103(zh->reg->zebra_maps, index_type,
1073                       &termp, term_dict, &regex_range,
1074                       space_split, term_dst))
1075         {
1076             *term_sub = 0;
1077             return ZEBRA_OK;
1078         }
1079         wrbuf_putc(term_dict, ')');
1080         break;
1081     case 104:        /* process # and ! in term */
1082         wrbuf_putc(term_dict, '(');
1083         if (!term_104(zh->reg->zebra_maps, index_type,
1084                       &termp, term_dict, space_split, term_dst))
1085         {
1086             *term_sub = 0;
1087             return ZEBRA_OK;
1088         }
1089         wrbuf_putc(term_dict, ')');
1090         break;
1091     case 105:        /* process * and ! in term */
1092         wrbuf_putc(term_dict, '(');
1093         if (!term_105(zh->reg->zebra_maps, index_type,
1094                       &termp, term_dict, space_split, term_dst, 1))
1095         {
1096             *term_sub = 0;
1097             return ZEBRA_OK;
1098         }
1099         wrbuf_putc(term_dict, ')');
1100         break;
1101     case 106:        /* process * and ! in term */
1102         wrbuf_putc(term_dict, '(');
1103         if (!term_105(zh->reg->zebra_maps, index_type,
1104                           &termp, term_dict, space_split, term_dst, 0))
1105         {
1106             *term_sub = 0;
1107             return ZEBRA_OK;
1108         }
1109         wrbuf_putc(term_dict, ')');
1110         break;
1111     default:
1112         zebra_setError_zint(zh,
1113                             YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
1114                             truncation_value);
1115         return ZEBRA_FAIL;
1116     }
1117     if (1)
1118     {
1119         char buf[1000];
1120         const char *input = wrbuf_cstr(term_dict) + prefix_len;
1121         esc_str(buf, sizeof(buf), input, strlen(input));
1122     }
1123     yaz_log(log_level_rpn, "dict_lookup_grep: %s",
1124             wrbuf_cstr(term_dict) + prefix_len);
1125     r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), regex_range,
1126                          grep_info, &max_pos, 
1127                          ord_len /* number of "exact" chars */,
1128                          grep_handle);
1129     if (r == 1)
1130         zebra_set_partial_result(zh);
1131     else if (r)
1132         yaz_log(YLOG_WARN, "dict_lookup_grep fail %d", r);
1133     *term_sub = termp;
1134     yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1135     return ZEBRA_OK;
1136 }
1137
1138
1139
1140 static void grep_info_delete(struct grep_info *grep_info)
1141 {
1142 #ifdef TERM_COUNT
1143     xfree(grep_info->term_no);
1144 #endif
1145     xfree(grep_info->isam_p_buf);
1146 }
1147
1148 static ZEBRA_RES grep_info_prepare(ZebraHandle zh,
1149                                    Z_AttributesPlusTerm *zapt,
1150                                    struct grep_info *grep_info,
1151                                    int reg_type)
1152 {
1153 #ifdef TERM_COUNT
1154     grep_info->term_no = 0;
1155 #endif
1156     grep_info->trunc_max = atoi(res_get_def(zh->res, "truncmax", "10000"));
1157     grep_info->isam_p_size = 0;
1158     grep_info->isam_p_buf = NULL;
1159     grep_info->zh = zh;
1160     grep_info->reg_type = reg_type;
1161     grep_info->termset = 0;
1162     if (zapt)
1163     {
1164         AttrType truncmax;
1165         int truncmax_value;
1166
1167         attr_init_APT(&truncmax, zapt, 13);
1168         truncmax_value = attr_find(&truncmax, NULL);
1169         if (truncmax_value != -1)
1170             grep_info->trunc_max = truncmax_value;
1171     }
1172     if (zapt)
1173     {
1174         AttrType termset;
1175         int termset_value_numeric;
1176         const char *termset_value_string;
1177
1178         attr_init_APT(&termset, zapt, 8);
1179         termset_value_numeric =
1180             attr_find_ex(&termset, NULL, &termset_value_string);
1181         if (termset_value_numeric != -1)
1182         {
1183 #if TERMSET_DISABLE
1184             zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, "termset");
1185             return ZEBRA_FAIL;
1186 #else
1187             char resname[32];
1188             const char *termset_name = 0;
1189             if (termset_value_numeric != -2)
1190             {
1191                 
1192                 sprintf(resname, "%d", termset_value_numeric);
1193                 termset_name = resname;
1194             }
1195             else
1196             termset_name = termset_value_string;
1197             yaz_log(log_level_rpn, "creating termset set %s", termset_name);
1198             grep_info->termset = resultSetAdd(zh, termset_name, 1);
1199             if (!grep_info->termset)
1200             {
1201                 zebra_setError(zh, YAZ_BIB1_ILLEGAL_RESULT_SET_NAME, termset_name);
1202                 return ZEBRA_FAIL;
1203             }
1204 #endif
1205         }
1206     }
1207     return ZEBRA_OK;
1208 }
1209                                
1210 /**
1211   \brief Create result set(s) for list of terms
1212   \param zh Zebra Handle
1213   \param zapt Attributes Plust Term (RPN leaf)
1214   \param termz term as used in query but converted to UTF-8
1215   \param attributeSet default attribute set
1216   \param stream memory for result
1217   \param index_type register type ("w", "p",..)
1218   \param complete_flag whether it's phrases or not
1219   \param rank_type term flags for ranking
1220   \param xpath_use use attribute for X-Path (-1 for no X-path)
1221   \param rset_nmem memory for result sets
1222   \param result_sets output result set for each term in list (output)
1223   \param num_result_sets number of output result sets
1224   \param kc rset key control to be used for created result sets
1225 */
1226 static ZEBRA_RES term_list_trunc(ZebraHandle zh,
1227                                  Z_AttributesPlusTerm *zapt,
1228                                  const char *termz,
1229                                  const Odr_oid *attributeSet,
1230                                  NMEM stream,
1231                                  const char *index_type, int complete_flag,
1232                                  const char *rank_type,
1233                                  const char *xpath_use,
1234                                  NMEM rset_nmem,
1235                                  RSET **result_sets, int *num_result_sets,
1236                                  struct rset_key_control *kc)
1237 {
1238     char term_dst[IT_MAX_WORD+1];
1239     struct grep_info grep_info;
1240     const char *termp = termz;
1241     int alloc_sets = 0;
1242
1243     *num_result_sets = 0;
1244     *term_dst = 0;
1245     if (grep_info_prepare(zh, zapt, &grep_info, *index_type) == ZEBRA_FAIL)
1246         return ZEBRA_FAIL;
1247     while(1)
1248     { 
1249         ZEBRA_RES res;
1250
1251         if (alloc_sets == *num_result_sets)
1252         {
1253             int add = 10;
1254             RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) * 
1255                                               sizeof(*rnew));
1256             if (alloc_sets)
1257                 memcpy(rnew, *result_sets, alloc_sets * sizeof(*rnew));
1258             alloc_sets = alloc_sets + add;
1259             *result_sets = rnew;
1260         }
1261         res = term_trunc(zh, zapt, &termp, attributeSet,
1262                          stream, &grep_info,
1263                          index_type, complete_flag,
1264                          term_dst, rank_type,
1265                          xpath_use, rset_nmem,
1266                          &(*result_sets)[*num_result_sets],
1267                          kc);
1268         if (res != ZEBRA_OK)
1269         {
1270             int i;
1271             for (i = 0; i < *num_result_sets; i++)
1272                 rset_delete((*result_sets)[i]);
1273             grep_info_delete(&grep_info);
1274             return res;
1275         }
1276         if ((*result_sets)[*num_result_sets] == 0)
1277             break;
1278         (*num_result_sets)++;
1279
1280         if (!*termp)
1281             break;
1282     }
1283     grep_info_delete(&grep_info);
1284     return ZEBRA_OK;
1285 }
1286
1287 static ZEBRA_RES rpn_search_APT_position(ZebraHandle zh,
1288                                          Z_AttributesPlusTerm *zapt,
1289                                          const Odr_oid *attributeSet,
1290                                          const char *index_type,
1291                                          NMEM rset_nmem,
1292                                          RSET *rset,
1293                                          struct rset_key_control *kc)
1294 {
1295     int position_value;
1296     AttrType position;
1297     int ord = -1;
1298     char ord_buf[32];
1299     char term_dict[100];
1300     int ord_len;
1301     char *val;
1302     ISAM_P isam_p;
1303     
1304     attr_init_APT(&position, zapt, 3);
1305     position_value = attr_find(&position, NULL);
1306     switch(position_value)
1307     {
1308     case 3:
1309     case -1:
1310         return ZEBRA_OK;
1311     case 1:
1312     case 2:
1313         break;
1314     default:
1315         zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1316                             position_value);
1317         return ZEBRA_FAIL;
1318     }
1319
1320     if (!zebra_maps_is_first_in_field(zh->reg->zebra_maps, *index_type))
1321     {
1322         zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1323                             position_value);
1324         return ZEBRA_FAIL;
1325     }
1326
1327     if (!zh->reg->isamb && !zh->reg->isamc)
1328     {
1329         zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1330                             position_value);
1331         return ZEBRA_FAIL;
1332     }
1333
1334     if (zebra_apt_get_ord(zh, zapt, index_type, 0,
1335                           attributeSet, &ord) != ZEBRA_OK)
1336     {
1337         return ZEBRA_FAIL;
1338     }
1339     ord_len = key_SU_encode(ord, ord_buf);
1340     memcpy(term_dict, ord_buf, ord_len);
1341     strcpy(term_dict+ord_len, FIRST_IN_FIELD_STR);
1342     val = dict_lookup(zh->reg->dict, term_dict);
1343     if (val)
1344     {
1345         assert(*val == sizeof(ISAM_P));
1346         memcpy(&isam_p, val+1, sizeof(isam_p));
1347         
1348         if (zh->reg->isamb)
1349             *rset = rsisamb_create(rset_nmem, kc, kc->scope,
1350                                    zh->reg->isamb, isam_p, 0);
1351         else if (zh->reg->isamc)
1352             *rset = rsisamc_create(rset_nmem, kc, kc->scope,
1353                                    zh->reg->isamc, isam_p, 0);
1354     }
1355     return ZEBRA_OK;
1356 }
1357                                          
1358 static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh,
1359                                        Z_AttributesPlusTerm *zapt,
1360                                        const char *termz_org,
1361                                        const Odr_oid *attributeSet,
1362                                        NMEM stream,
1363                                        const char *index_type, int complete_flag,
1364                                        const char *rank_type,
1365                                        const char *xpath_use,
1366                                        NMEM rset_nmem,
1367                                        RSET *rset,
1368                                        struct rset_key_control *kc)
1369 {
1370     RSET *result_sets = 0;
1371     int num_result_sets = 0;
1372     ZEBRA_RES res =
1373         term_list_trunc(zh, zapt, termz_org, attributeSet,
1374                         stream, index_type, complete_flag,
1375                         rank_type, xpath_use,
1376                         rset_nmem,
1377                         &result_sets, &num_result_sets, kc);
1378
1379     if (res != ZEBRA_OK)
1380         return res;
1381
1382     if (num_result_sets > 0)
1383     {
1384         RSET first_set = 0;
1385         res = rpn_search_APT_position(zh, zapt, attributeSet, 
1386                                       index_type,
1387                                       rset_nmem, &first_set,
1388                                       kc);
1389         if (res != ZEBRA_OK)
1390             return res;
1391         if (first_set)
1392         {
1393             RSET *nsets = nmem_malloc(stream,
1394                                       sizeof(RSET) * (num_result_sets+1));
1395             nsets[0] = first_set;
1396             memcpy(nsets+1, result_sets, sizeof(RSET) * num_result_sets);
1397             result_sets = nsets;
1398             num_result_sets++;
1399         }
1400     }
1401     if (num_result_sets == 0)
1402         *rset = rset_create_null(rset_nmem, kc, 0); 
1403     else if (num_result_sets == 1)
1404         *rset = result_sets[0];
1405     else
1406         *rset = rset_create_prox(rset_nmem, kc, kc->scope,
1407                                  num_result_sets, result_sets,
1408                                  1 /* ordered */, 0 /* exclusion */,
1409                                  3 /* relation */, 1 /* distance */);
1410     if (!*rset)
1411         return ZEBRA_FAIL;
1412     return ZEBRA_OK;
1413 }
1414
1415 static ZEBRA_RES rpn_search_APT_or_list(ZebraHandle zh,
1416                                         Z_AttributesPlusTerm *zapt,
1417                                         const char *termz_org,
1418                                         const Odr_oid *attributeSet,
1419                                         NMEM stream,
1420                                         const char *index_type, 
1421                                         int complete_flag,
1422                                         const char *rank_type,
1423                                         const char *xpath_use,
1424                                         NMEM rset_nmem,
1425                                         RSET *rset,
1426                                         struct rset_key_control *kc)
1427 {
1428     RSET *result_sets = 0;
1429     int num_result_sets = 0;
1430     int i;
1431     ZEBRA_RES res =
1432         term_list_trunc(zh, zapt, termz_org, attributeSet,
1433                         stream, index_type, complete_flag,
1434                         rank_type, xpath_use,
1435                         rset_nmem,
1436                         &result_sets, &num_result_sets, kc);
1437     if (res != ZEBRA_OK)
1438         return res;
1439
1440     for (i = 0; i<num_result_sets; i++)
1441     {
1442         RSET first_set = 0;
1443         res = rpn_search_APT_position(zh, zapt, attributeSet, 
1444                                       index_type,
1445                                       rset_nmem, &first_set,
1446                                       kc);
1447         if (res != ZEBRA_OK)
1448         {
1449             for (i = 0; i<num_result_sets; i++)
1450                 rset_delete(result_sets[i]);
1451             return res;
1452         }
1453
1454         if (first_set)
1455         {
1456             RSET tmp_set[2];
1457
1458             tmp_set[0] = first_set;
1459             tmp_set[1] = result_sets[i];
1460             
1461             result_sets[i] = rset_create_prox(
1462                 rset_nmem, kc, kc->scope,
1463                 2, tmp_set,
1464                 1 /* ordered */, 0 /* exclusion */,
1465                 3 /* relation */, 1 /* distance */);
1466         }
1467     }
1468     if (num_result_sets == 0)
1469         *rset = rset_create_null(rset_nmem, kc, 0); 
1470     else if (num_result_sets == 1)
1471         *rset = result_sets[0];
1472     else
1473         *rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* termid */,
1474                                num_result_sets, result_sets);
1475     if (!*rset)
1476         return ZEBRA_FAIL;
1477     return ZEBRA_OK;
1478 }
1479
1480 static ZEBRA_RES rpn_search_APT_and_list(ZebraHandle zh,
1481                                          Z_AttributesPlusTerm *zapt,
1482                                          const char *termz_org,
1483                                          const Odr_oid *attributeSet,
1484                                          NMEM stream,
1485                                          const char *index_type, 
1486                                          int complete_flag,
1487                                          const char *rank_type, 
1488                                          const char *xpath_use,
1489                                          NMEM rset_nmem,
1490                                          RSET *rset,
1491                                          struct rset_key_control *kc)
1492 {
1493     RSET *result_sets = 0;
1494     int num_result_sets = 0;
1495     int i;
1496     ZEBRA_RES res =
1497         term_list_trunc(zh, zapt, termz_org, attributeSet,
1498                         stream, index_type, complete_flag,
1499                         rank_type, xpath_use,
1500                         rset_nmem,
1501                         &result_sets, &num_result_sets,
1502                         kc);
1503     if (res != ZEBRA_OK)
1504         return res;
1505     for (i = 0; i<num_result_sets; i++)
1506     {
1507         RSET first_set = 0;
1508         res = rpn_search_APT_position(zh, zapt, attributeSet, 
1509                                       index_type,
1510                                       rset_nmem, &first_set,
1511                                       kc);
1512         if (res != ZEBRA_OK)
1513         {
1514             for (i = 0; i<num_result_sets; i++)
1515                 rset_delete(result_sets[i]);
1516             return res;
1517         }
1518
1519         if (first_set)
1520         {
1521             RSET tmp_set[2];
1522
1523             tmp_set[0] = first_set;
1524             tmp_set[1] = result_sets[i];
1525             
1526             result_sets[i] = rset_create_prox(
1527                 rset_nmem, kc, kc->scope,
1528                 2, tmp_set,
1529                 1 /* ordered */, 0 /* exclusion */,
1530                 3 /* relation */, 1 /* distance */);
1531         }
1532     }
1533
1534
1535     if (num_result_sets == 0)
1536         *rset = rset_create_null(rset_nmem, kc, 0); 
1537     else if (num_result_sets == 1)
1538         *rset = result_sets[0];
1539     else
1540         *rset = rset_create_and(rset_nmem, kc, kc->scope,
1541                                num_result_sets, result_sets);
1542     if (!*rset)
1543         return ZEBRA_FAIL;
1544     return ZEBRA_OK;
1545 }
1546
1547 static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1548                             const char **term_sub,
1549                             WRBUF term_dict,
1550                             const Odr_oid *attributeSet,
1551                             struct grep_info *grep_info,
1552                             int *max_pos,
1553                             const char *index_type,
1554                             char *term_dst,
1555                             int *error_code)
1556 {
1557     AttrType relation;
1558     int relation_value;
1559     int term_value;
1560     int r;
1561     WRBUF term_num = wrbuf_alloc();
1562
1563     *error_code = 0;
1564     attr_init_APT(&relation, zapt, 2);
1565     relation_value = attr_find(&relation, NULL);
1566
1567     yaz_log(log_level_rpn, "numeric relation value=%d", relation_value);
1568
1569     switch (relation_value)
1570     {
1571     case 1:
1572         yaz_log(log_level_rpn, "Relation <");
1573         if (!term_100(zh->reg->zebra_maps, index_type, term_sub, term_num, 1,
1574                       term_dst))
1575         { 
1576             wrbuf_destroy(term_num);
1577             return 0;
1578         }
1579         term_value = atoi(wrbuf_cstr(term_num));
1580         gen_regular_rel(term_dict, term_value-1, 1);
1581         break;
1582     case 2:
1583         yaz_log(log_level_rpn, "Relation <=");
1584         if (!term_100(zh->reg->zebra_maps, index_type, term_sub, term_num, 1,
1585                       term_dst))
1586         {
1587             wrbuf_destroy(term_num);
1588             return 0;
1589         }
1590         term_value = atoi(wrbuf_cstr(term_num));
1591         gen_regular_rel(term_dict, term_value, 1);
1592         break;
1593     case 4:
1594         yaz_log(log_level_rpn, "Relation >=");
1595         if (!term_100(zh->reg->zebra_maps, index_type, term_sub, term_num, 1,
1596                       term_dst))
1597         {
1598             wrbuf_destroy(term_num);
1599             return 0;
1600         }
1601         term_value = atoi(wrbuf_cstr(term_num));
1602         gen_regular_rel(term_dict, term_value, 0);
1603         break;
1604     case 5:
1605         yaz_log(log_level_rpn, "Relation >");
1606         if (!term_100(zh->reg->zebra_maps, index_type, term_sub, term_num, 1,
1607                       term_dst))
1608         {
1609             wrbuf_destroy(term_num);
1610             return 0;
1611         }
1612         term_value = atoi(wrbuf_cstr(term_num));
1613         gen_regular_rel(term_dict, term_value+1, 0);
1614         break;
1615     case -1:
1616     case 3:
1617         yaz_log(log_level_rpn, "Relation =");
1618         if (!term_100(zh->reg->zebra_maps, index_type, term_sub, term_num, 1,
1619                       term_dst))
1620         {
1621             wrbuf_destroy(term_num);
1622             return 0; 
1623         }
1624         term_value = atoi(wrbuf_cstr(term_num));
1625         wrbuf_printf(term_dict, "(0*%d)", term_value);
1626         break;
1627     case 103:
1628         /* term_tmp untouched.. */
1629         while (**term_sub != '\0')
1630             (*term_sub)++;
1631         break;
1632     default:
1633         *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
1634         wrbuf_destroy(term_num); 
1635         return 0;
1636     }
1637     r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), 
1638                          0, grep_info, max_pos, 0, grep_handle);
1639
1640     if (r == 1)
1641         zebra_set_partial_result(zh);
1642     else if (r)
1643         yaz_log(YLOG_WARN, "dict_lookup_grep fail, rel = gt: %d", r);
1644     yaz_log(log_level_rpn, "%d positions", grep_info->isam_p_indx);
1645     wrbuf_destroy(term_num);
1646     return 1;
1647 }
1648
1649 static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1650                               const char **term_sub, 
1651                               WRBUF term_dict,
1652                               const Odr_oid *attributeSet, NMEM stream,
1653                               struct grep_info *grep_info,
1654                               const char *index_type, int complete_flag,
1655                               char *term_dst, 
1656                               const char *xpath_use,
1657                               struct ord_list **ol)
1658 {
1659     const char *termp;
1660     struct rpn_char_map_info rcmi;
1661     int max_pos;
1662     int relation_error = 0;
1663     int ord, ord_len, i;
1664     char ord_buf[32];
1665     
1666     *ol = ord_list_create(stream);
1667
1668     rpn_char_map_prepare(zh->reg, *index_type, &rcmi);
1669
1670     termp = *term_sub;
1671     
1672     if (zebra_apt_get_ord(zh, zapt, index_type, xpath_use,
1673                           attributeSet, &ord) != ZEBRA_OK)
1674     {
1675         return ZEBRA_FAIL;
1676     }
1677     
1678     wrbuf_rewind(term_dict);
1679     
1680     *ol = ord_list_append(stream, *ol, ord);
1681     
1682     ord_len = key_SU_encode(ord, ord_buf);
1683     
1684     wrbuf_putc(term_dict, '(');
1685     for (i = 0; i < ord_len; i++)
1686     {
1687         wrbuf_putc(term_dict, 1);
1688         wrbuf_putc(term_dict, ord_buf[i]);
1689     }
1690     wrbuf_putc(term_dict, ')');
1691     
1692     if (!numeric_relation(zh, zapt, &termp, term_dict,
1693                           attributeSet, grep_info, &max_pos, index_type,
1694                           term_dst, &relation_error))
1695     {
1696         if (relation_error)
1697         {
1698             zebra_setError(zh, relation_error, 0);
1699             return ZEBRA_FAIL;
1700         }
1701         *term_sub = 0;
1702         return ZEBRA_OK;
1703     }
1704     *term_sub = termp;
1705     yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1706     return ZEBRA_OK;
1707 }
1708
1709                                  
1710 static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh,
1711                                         Z_AttributesPlusTerm *zapt,
1712                                         const char *termz,
1713                                         const Odr_oid *attributeSet,
1714                                         NMEM stream,
1715                                         const char *index_type, 
1716                                         int complete_flag,
1717                                         const char *rank_type, 
1718                                         const char *xpath_use,
1719                                         NMEM rset_nmem,
1720                                         RSET *rset,
1721                                         struct rset_key_control *kc)
1722 {
1723     char term_dst[IT_MAX_WORD+1];
1724     const char *termp = termz;
1725     RSET *result_sets = 0;
1726     int num_result_sets = 0;
1727     ZEBRA_RES res;
1728     struct grep_info grep_info;
1729     int alloc_sets = 0;
1730     zint hits_limit_value;
1731     const char *term_ref_id_str = 0;
1732
1733     term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str, stream);
1734
1735     yaz_log(log_level_rpn, "APT_numeric t='%s'", termz);
1736     if (grep_info_prepare(zh, zapt, &grep_info, *index_type) == ZEBRA_FAIL)
1737         return ZEBRA_FAIL;
1738     while (1)
1739     { 
1740         struct ord_list *ol;
1741         WRBUF term_dict = wrbuf_alloc();
1742         if (alloc_sets == num_result_sets)
1743         {
1744             int add = 10;
1745             RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) * 
1746                                               sizeof(*rnew));
1747             if (alloc_sets)
1748                 memcpy(rnew, result_sets, alloc_sets * sizeof(*rnew));
1749             alloc_sets = alloc_sets + add;
1750             result_sets = rnew;
1751         }
1752         yaz_log(YLOG_DEBUG, "APT_numeric termp=%s", termp);
1753         grep_info.isam_p_indx = 0;
1754         res = numeric_term(zh, zapt, &termp, term_dict,
1755                            attributeSet, stream, &grep_info,
1756                            index_type, complete_flag,
1757                            term_dst, xpath_use, &ol);
1758         wrbuf_destroy(term_dict);
1759         if (res == ZEBRA_FAIL || termp == 0)
1760             break;
1761         yaz_log(YLOG_DEBUG, "term: %s", term_dst);
1762         result_sets[num_result_sets] =
1763             rset_trunc(zh, grep_info.isam_p_buf,
1764                        grep_info.isam_p_indx, term_dst,
1765                        strlen(term_dst), rank_type,
1766                        0 /* preserve position */,
1767                        zapt->term->which, rset_nmem, 
1768                        kc, kc->scope, ol, index_type,
1769                        hits_limit_value,
1770                        term_ref_id_str);
1771         if (!result_sets[num_result_sets])
1772             break;
1773         num_result_sets++;
1774         if (!*termp)
1775             break;
1776     }
1777     grep_info_delete(&grep_info);
1778
1779     if (res != ZEBRA_OK)
1780         return res;
1781     if (num_result_sets == 0)
1782         *rset = rset_create_null(rset_nmem, kc, 0);
1783     else if (num_result_sets == 1)
1784         *rset = result_sets[0];
1785     else
1786         *rset = rset_create_and(rset_nmem, kc, kc->scope,
1787                                 num_result_sets, result_sets);
1788     if (!*rset)
1789         return ZEBRA_FAIL;
1790     return ZEBRA_OK;
1791 }
1792
1793 static ZEBRA_RES rpn_search_APT_local(ZebraHandle zh,
1794                                       Z_AttributesPlusTerm *zapt,
1795                                       const char *termz,
1796                                       const Odr_oid *attributeSet,
1797                                       NMEM stream,
1798                                       const char *rank_type, NMEM rset_nmem,
1799                                       RSET *rset,
1800                                       struct rset_key_control *kc)
1801 {
1802     Record rec;
1803     zint sysno = atozint(termz);
1804     
1805     if (sysno <= 0)
1806         sysno = 0;
1807     rec = rec_get(zh->reg->records, sysno);
1808     if (!rec)
1809         sysno = 0;
1810
1811     rec_free(&rec);
1812
1813     if (sysno <= 0)
1814     {
1815         *rset = rset_create_null(rset_nmem, kc, 0);
1816     }
1817     else
1818     {
1819         RSFD rsfd;
1820         struct it_key key;
1821         *rset = rset_create_temp(rset_nmem, kc, kc->scope,
1822                                  res_get(zh->res, "setTmpDir"), 0);
1823         rsfd = rset_open(*rset, RSETF_WRITE);
1824         
1825         key.mem[0] = sysno;
1826         key.mem[1] = 1;
1827         key.len = 2;
1828         rset_write(rsfd, &key);
1829         rset_close(rsfd);
1830     }
1831     return ZEBRA_OK;
1832 }
1833
1834 static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1835                                const Odr_oid *attributeSet, NMEM stream,
1836                                Z_SortKeySpecList *sort_sequence,
1837                                const char *rank_type,
1838                                NMEM rset_nmem,
1839                                RSET *rset,
1840                                struct rset_key_control *kc)
1841 {
1842     int i;
1843     int sort_relation_value;
1844     AttrType sort_relation_type;
1845     Z_SortKeySpec *sks;
1846     Z_SortKey *sk;
1847     char termz[20];
1848     
1849     attr_init_APT(&sort_relation_type, zapt, 7);
1850     sort_relation_value = attr_find(&sort_relation_type, &attributeSet);
1851
1852     if (!sort_sequence->specs)
1853     {
1854         sort_sequence->num_specs = 10;
1855         sort_sequence->specs = (Z_SortKeySpec **)
1856             nmem_malloc(stream, sort_sequence->num_specs *
1857                          sizeof(*sort_sequence->specs));
1858         for (i = 0; i<sort_sequence->num_specs; i++)
1859             sort_sequence->specs[i] = 0;
1860     }
1861     if (zapt->term->which != Z_Term_general)
1862         i = 0;
1863     else
1864         i = atoi_n((char *) zapt->term->u.general->buf,
1865                     zapt->term->u.general->len);
1866     if (i >= sort_sequence->num_specs)
1867         i = 0;
1868     sprintf(termz, "%d", i);
1869
1870     sks = (Z_SortKeySpec *) nmem_malloc(stream, sizeof(*sks));
1871     sks->sortElement = (Z_SortElement *)
1872         nmem_malloc(stream, sizeof(*sks->sortElement));
1873     sks->sortElement->which = Z_SortElement_generic;
1874     sk = sks->sortElement->u.generic = (Z_SortKey *)
1875         nmem_malloc(stream, sizeof(*sk));
1876     sk->which = Z_SortKey_sortAttributes;
1877     sk->u.sortAttributes = (Z_SortAttributes *)
1878         nmem_malloc(stream, sizeof(*sk->u.sortAttributes));
1879
1880     sk->u.sortAttributes->id = odr_oiddup_nmem(stream, attributeSet);
1881     sk->u.sortAttributes->list = zapt->attributes;
1882
1883     sks->sortRelation = (int *)
1884         nmem_malloc(stream, sizeof(*sks->sortRelation));
1885     if (sort_relation_value == 1)
1886         *sks->sortRelation = Z_SortKeySpec_ascending;
1887     else if (sort_relation_value == 2)
1888         *sks->sortRelation = Z_SortKeySpec_descending;
1889     else 
1890         *sks->sortRelation = Z_SortKeySpec_ascending;
1891
1892     sks->caseSensitivity = (int *)
1893         nmem_malloc(stream, sizeof(*sks->caseSensitivity));
1894     *sks->caseSensitivity = 0;
1895
1896     sks->which = Z_SortKeySpec_null;
1897     sks->u.null = odr_nullval ();
1898     sort_sequence->specs[i] = sks;
1899     *rset = rset_create_null(rset_nmem, kc, 0);
1900     return ZEBRA_OK;
1901 }
1902
1903
1904 static int rpn_check_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1905                            const Odr_oid *attributeSet,
1906                            struct xpath_location_step *xpath, int max,
1907                            NMEM mem)
1908 {
1909     const Odr_oid *curAttributeSet = attributeSet;
1910     AttrType use;
1911     const char *use_string = 0;
1912     
1913     attr_init_APT(&use, zapt, 1);
1914     attr_find_ex(&use, &curAttributeSet, &use_string);
1915
1916     if (!use_string || *use_string != '/')
1917         return -1;
1918
1919     return zebra_parse_xpath_str(use_string, xpath, max, mem);
1920 }
1921  
1922                
1923
1924 static RSET xpath_trunc(ZebraHandle zh, NMEM stream,
1925                         const char *index_type, const char *term, 
1926                         const char *xpath_use,
1927                         NMEM rset_nmem,
1928                         struct rset_key_control *kc)
1929 {
1930     struct grep_info grep_info;
1931     int ord = zebraExplain_lookup_attr_str(zh->reg->zei, 
1932                                            zinfo_index_category_index,
1933                                            index_type, xpath_use);
1934     if (grep_info_prepare(zh, 0 /* zapt */, &grep_info, '0') == ZEBRA_FAIL)
1935         return rset_create_null(rset_nmem, kc, 0);
1936     
1937     if (ord < 0)
1938         return rset_create_null(rset_nmem, kc, 0);
1939     else
1940     {
1941         int i, r, max_pos;
1942         char ord_buf[32];
1943         RSET rset;
1944         WRBUF term_dict = wrbuf_alloc();
1945         int ord_len = key_SU_encode(ord, ord_buf);
1946         int term_type = Z_Term_characterString;
1947         const char *flags = "void";
1948
1949         wrbuf_putc(term_dict, '(');
1950         for (i = 0; i<ord_len; i++)
1951         {
1952             wrbuf_putc(term_dict, 1);
1953             wrbuf_putc(term_dict, ord_buf[i]);
1954         }
1955         wrbuf_putc(term_dict, ')');
1956         wrbuf_puts(term_dict, term);
1957         
1958         grep_info.isam_p_indx = 0;
1959         r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), 0,
1960                              &grep_info, &max_pos, 0, grep_handle);
1961         yaz_log(YLOG_DEBUG, "%s %d positions", term,
1962                 grep_info.isam_p_indx);
1963         rset = rset_trunc(zh, grep_info.isam_p_buf,
1964                           grep_info.isam_p_indx, term, strlen(term),
1965                           flags, 1, term_type, rset_nmem,
1966                           kc, kc->scope, 0, index_type, 0 /* hits_limit */,
1967                           0 /* term_ref_id_str */);
1968         grep_info_delete(&grep_info);
1969         wrbuf_destroy(term_dict);
1970         return rset;
1971     }
1972 }
1973
1974 static
1975 ZEBRA_RES rpn_search_xpath(ZebraHandle zh,
1976                            NMEM stream, const char *rank_type, RSET rset,
1977                            int xpath_len, struct xpath_location_step *xpath,
1978                            NMEM rset_nmem,
1979                            RSET *rset_out,
1980                            struct rset_key_control *kc)
1981 {
1982     int i;
1983     int always_matches = rset ? 0 : 1;
1984
1985     if (xpath_len < 0)
1986     {
1987         *rset_out = rset;
1988         return ZEBRA_OK;
1989     }
1990
1991     yaz_log(YLOG_DEBUG, "xpath len=%d", xpath_len);
1992     for (i = 0; i<xpath_len; i++)
1993     {
1994         yaz_log(log_level_rpn, "XPATH %d %s", i, xpath[i].part);
1995
1996     }
1997
1998     /*
1999       //a    ->    a/.*
2000       //a/b  ->    b/a/.*
2001       /a     ->    a/
2002       /a/b   ->    b/a/
2003
2004       /      ->    none
2005
2006    a[@attr = value]/b[@other = othervalue]
2007
2008  /e/@a val      range(e/,range(@a,freetext(w,1015,val),@a),e/)
2009  /a/b val       range(b/a/,freetext(w,1016,val),b/a/)
2010  /a/b/@c val    range(b/a/,range(@c,freetext(w,1016,val),@c),b/a/)
2011  /a/b[@c = y] val range(b/a/,freetext(w,1016,val),b/a/,@c = y)
2012  /a[@c = y]/b val range(a/,range(b/a/,freetext(w,1016,val),b/a/),a/,@c = y)
2013  /a[@c = x]/b[@c = y] range(a/,range(b/a/,freetext(w,1016,val),b/a/,@c = y),a/,@c = x)
2014       
2015     */
2016
2017     dict_grep_cmap(zh->reg->dict, 0, 0);
2018     
2019     {
2020         int level = xpath_len;
2021         int first_path = 1;
2022         
2023         while (--level >= 0)
2024         {
2025             WRBUF xpath_rev = wrbuf_alloc();
2026             int i;
2027             RSET rset_start_tag = 0, rset_end_tag = 0, rset_attr = 0;
2028
2029             for (i = level; i >= 1; --i)
2030             {
2031                 const char *cp = xpath[i].part;
2032                 if (*cp)
2033                 {
2034                     for (; *cp; cp++)
2035                     {
2036                         if (*cp == '*')
2037                             wrbuf_puts(xpath_rev, "[^/]*");
2038                         else if (*cp == ' ')
2039                             wrbuf_puts(xpath_rev, "\001 ");
2040                         else
2041                             wrbuf_putc(xpath_rev, *cp);
2042
2043                         /* wrbuf_putc does not null-terminate , but
2044                            wrbuf_puts below ensures it does.. so xpath_rev
2045                            is OK iff length is > 0 */
2046                     }
2047                     wrbuf_puts(xpath_rev, "/");
2048                 }
2049                 else if (i == 1)  /* // case */
2050                     wrbuf_puts(xpath_rev, ".*");
2051             }
2052             if (xpath[level].predicate &&
2053                 xpath[level].predicate->which == XPATH_PREDICATE_RELATION &&
2054                 xpath[level].predicate->u.relation.name[0])
2055             {
2056                 WRBUF wbuf = wrbuf_alloc();
2057                 wrbuf_puts(wbuf, xpath[level].predicate->u.relation.name+1);
2058                 if (xpath[level].predicate->u.relation.value)
2059                 {
2060                     const char *cp = xpath[level].predicate->u.relation.value;
2061                     wrbuf_putc(wbuf, '=');
2062                     
2063                     while (*cp)
2064                     {
2065                         if (strchr(REGEX_CHARS, *cp))
2066                             wrbuf_putc(wbuf, '\\');
2067                         wrbuf_putc(wbuf, *cp);
2068                         cp++;
2069                     }
2070                 }
2071                 rset_attr = xpath_trunc(
2072                     zh, stream, "0", wrbuf_cstr(wbuf), 
2073                     ZEBRA_XPATH_ATTR_NAME, 
2074                     rset_nmem, kc);
2075                 wrbuf_destroy(wbuf);
2076             } 
2077             else 
2078             {
2079                 if (!first_path)
2080                 {
2081                     wrbuf_destroy(xpath_rev);
2082                     continue;
2083                 }
2084             }
2085             yaz_log(log_level_rpn, "xpath_rev (%d) = %s", level, 
2086                     wrbuf_cstr(xpath_rev));
2087             if (wrbuf_len(xpath_rev))
2088             {
2089                 rset_start_tag = xpath_trunc(zh, stream, "0", 
2090                                              wrbuf_cstr(xpath_rev),
2091                                              ZEBRA_XPATH_ELM_BEGIN, 
2092                                              rset_nmem, kc);
2093                 if (always_matches)
2094                     rset = rset_start_tag;
2095                 else
2096                 {
2097                     rset_end_tag = xpath_trunc(zh, stream, "0", 
2098                                                wrbuf_cstr(xpath_rev),
2099                                                ZEBRA_XPATH_ELM_END, 
2100                                                rset_nmem, kc);
2101                     
2102                     rset = rset_create_between(rset_nmem, kc, kc->scope,
2103                                                rset_start_tag, rset,
2104                                                rset_end_tag, rset_attr);
2105                 }
2106             }
2107             wrbuf_destroy(xpath_rev);
2108             first_path = 0;
2109         }
2110     }
2111     *rset_out = rset;
2112     return ZEBRA_OK;
2113 }
2114
2115 #define MAX_XPATH_STEPS 10
2116
2117 static ZEBRA_RES rpn_search_database(ZebraHandle zh, 
2118                                      Z_AttributesPlusTerm *zapt,
2119                                      const Odr_oid *attributeSet, NMEM stream,
2120                                      Z_SortKeySpecList *sort_sequence,
2121                                      NMEM rset_nmem,
2122                                      RSET *rset,
2123                                      struct rset_key_control *kc);
2124
2125 static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2126                                 const Odr_oid *attributeSet, NMEM stream,
2127                                 Z_SortKeySpecList *sort_sequence,
2128                                 int num_bases, char **basenames, 
2129                                 NMEM rset_nmem,
2130                                 RSET *rset,
2131                                 struct rset_key_control *kc)
2132 {
2133     RSET *rsets = nmem_malloc(stream, num_bases * sizeof(*rsets));
2134     ZEBRA_RES res = ZEBRA_OK;
2135     int i;
2136     for (i = 0; i < num_bases; i++)
2137     {
2138
2139         if (zebraExplain_curDatabase(zh->reg->zei, basenames[i]))
2140         {
2141             zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
2142                            basenames[i]);
2143             res = ZEBRA_FAIL;
2144             break;
2145         }
2146         res = rpn_search_database(zh, zapt, attributeSet, stream,
2147                                   sort_sequence,
2148                                   rset_nmem, rsets+i, kc);
2149         if (res != ZEBRA_OK)
2150             break;
2151     }
2152     if (res != ZEBRA_OK)
2153     {   /* must clean up the already created sets */
2154         while (--i >= 0)
2155             rset_delete(rsets[i]);
2156         *rset = 0;
2157     }
2158     else 
2159     {
2160         if (num_bases == 1)
2161             *rset = rsets[0];
2162         else if (num_bases == 0)
2163             *rset = rset_create_null(rset_nmem, kc, 0); 
2164         else
2165             *rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* TERMID */,
2166                                    num_bases, rsets);
2167     }
2168     return res;
2169 }
2170
2171 static ZEBRA_RES rpn_search_database(ZebraHandle zh, 
2172                                      Z_AttributesPlusTerm *zapt,
2173                                      const Odr_oid *attributeSet, NMEM stream,
2174                                      Z_SortKeySpecList *sort_sequence,
2175                                      NMEM rset_nmem,
2176                                      RSET *rset,
2177                                      struct rset_key_control *kc)
2178 {
2179     ZEBRA_RES res = ZEBRA_OK;
2180     const char *index_type;
2181     char *search_type = NULL;
2182     char rank_type[128];
2183     int complete_flag;
2184     int sort_flag;
2185     char termz[IT_MAX_WORD+1];
2186     int xpath_len;
2187     const char *xpath_use = 0;
2188     struct xpath_location_step xpath[MAX_XPATH_STEPS];
2189
2190     if (!log_level_set)
2191     {
2192         log_level_rpn = yaz_log_module_level("rpn");
2193         log_level_set = 1;
2194     }
2195     zebra_maps_attr(zh->reg->zebra_maps, zapt, &index_type, &search_type,
2196                     rank_type, &complete_flag, &sort_flag);
2197     
2198     yaz_log(YLOG_DEBUG, "index_type=%s", index_type);
2199     yaz_log(YLOG_DEBUG, "complete_flag=%d", complete_flag);
2200     yaz_log(YLOG_DEBUG, "search_type=%s", search_type);
2201     yaz_log(YLOG_DEBUG, "rank_type=%s", rank_type);
2202
2203     if (zapt_term_to_utf8(zh, zapt, termz) == ZEBRA_FAIL)
2204         return ZEBRA_FAIL;
2205
2206     if (sort_flag)
2207         return rpn_sort_spec(zh, zapt, attributeSet, stream, sort_sequence,
2208                              rank_type, rset_nmem, rset, kc);
2209     /* consider if an X-Path query is used */
2210     xpath_len = rpn_check_xpath(zh, zapt, attributeSet, 
2211                                 xpath, MAX_XPATH_STEPS, stream);
2212     if (xpath_len >= 0)
2213     {
2214         if (xpath[xpath_len-1].part[0] == '@') 
2215             xpath_use = ZEBRA_XPATH_ATTR_CDATA;  /* last step is attribute  */
2216         else
2217             xpath_use = ZEBRA_XPATH_CDATA;  /* searching for cdata */        
2218
2219         if (1)
2220         {
2221             AttrType relation;
2222             int relation_value;
2223
2224             attr_init_APT(&relation, zapt, 2);
2225             relation_value = attr_find(&relation, NULL);
2226
2227             if (relation_value == 103) /* alwaysmatches */
2228             {
2229                 *rset = 0; /* signal no "term" set */
2230                 return rpn_search_xpath(zh, stream, rank_type, *rset, 
2231                                         xpath_len, xpath, rset_nmem, rset, kc);
2232             }
2233         }
2234     }
2235
2236     /* search using one of the various search type strategies
2237        termz is our UTF-8 search term
2238        attributeSet is top-level default attribute set 
2239        stream is ODR for search
2240        reg_id is the register type
2241        complete_flag is 1 for complete subfield, 0 for incomplete
2242        xpath_use is use-attribute to be used for X-Path search, 0 for none
2243     */
2244     if (!strcmp(search_type, "phrase"))
2245     {
2246         res = rpn_search_APT_phrase(zh, zapt, termz, attributeSet, stream,
2247                                     index_type, complete_flag, rank_type,
2248                                     xpath_use,
2249                                     rset_nmem,
2250                                     rset, kc);
2251     }
2252     else if (!strcmp(search_type, "and-list"))
2253     {
2254         res = rpn_search_APT_and_list(zh, zapt, termz, attributeSet, stream,
2255                                       index_type, complete_flag, rank_type,
2256                                       xpath_use,
2257                                       rset_nmem,
2258                                       rset, kc);
2259     }
2260     else if (!strcmp(search_type, "or-list"))
2261     {
2262         res = rpn_search_APT_or_list(zh, zapt, termz, attributeSet, stream,
2263                                      index_type, complete_flag, rank_type,
2264                                      xpath_use,
2265                                      rset_nmem,
2266                                      rset, kc);
2267     }
2268     else if (!strcmp(search_type, "local"))
2269     {
2270         res = rpn_search_APT_local(zh, zapt, termz, attributeSet, stream,
2271                                    rank_type, rset_nmem, rset, kc);
2272     }
2273     else if (!strcmp(search_type, "numeric"))
2274     {
2275         res = rpn_search_APT_numeric(zh, zapt, termz, attributeSet, stream,
2276                                      index_type, complete_flag, rank_type,
2277                                      xpath_use,
2278                                      rset_nmem,
2279                                      rset, kc);
2280     }
2281     else
2282     {
2283         zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2284         res = ZEBRA_FAIL;
2285     }
2286     if (res != ZEBRA_OK)
2287         return res;
2288     if (!*rset)
2289         return ZEBRA_FAIL;
2290     return rpn_search_xpath(zh, stream, rank_type, *rset, 
2291                             xpath_len, xpath, rset_nmem, rset, kc);
2292 }
2293
2294 static ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2295                                       const Odr_oid *attributeSet, 
2296                                       NMEM stream, NMEM rset_nmem,
2297                                       Z_SortKeySpecList *sort_sequence,
2298                                       int num_bases, char **basenames,
2299                                       RSET **result_sets, int *num_result_sets,
2300                                       Z_Operator *parent_op,
2301                                       struct rset_key_control *kc);
2302
2303 ZEBRA_RES rpn_get_top_approx_limit(ZebraHandle zh, Z_RPNStructure *zs,
2304                                    zint *approx_limit)
2305 {
2306     ZEBRA_RES res = ZEBRA_OK;
2307     if (zs->which == Z_RPNStructure_complex)
2308     {
2309         if (res == ZEBRA_OK)
2310             res = rpn_get_top_approx_limit(zh, zs->u.complex->s1,
2311                                            approx_limit);
2312         if (res == ZEBRA_OK)
2313             res = rpn_get_top_approx_limit(zh, zs->u.complex->s2,
2314                                            approx_limit);
2315     }
2316     else if (zs->which == Z_RPNStructure_simple)
2317     {
2318         if (zs->u.simple->which == Z_Operand_APT)
2319         {
2320             Z_AttributesPlusTerm *zapt = zs->u.simple->u.attributesPlusTerm;
2321             AttrType global_hits_limit_attr;
2322             int l;
2323             
2324             attr_init_APT(&global_hits_limit_attr, zapt, 12);
2325             
2326             l = attr_find(&global_hits_limit_attr, NULL);
2327             if (l != -1)
2328                 *approx_limit = l;
2329         }
2330     }
2331     return res;
2332 }
2333
2334 ZEBRA_RES rpn_search_top(ZebraHandle zh, Z_RPNStructure *zs,
2335                          const Odr_oid *attributeSet, 
2336                          NMEM stream, NMEM rset_nmem,
2337                          Z_SortKeySpecList *sort_sequence,
2338                          int num_bases, char **basenames,
2339                          RSET *result_set)
2340 {
2341     RSET *result_sets = 0;
2342     int num_result_sets = 0;
2343     ZEBRA_RES res;
2344     struct rset_key_control *kc = zebra_key_control_create(zh);
2345
2346     res = rpn_search_structure(zh, zs, attributeSet,
2347                                stream, rset_nmem,
2348                                sort_sequence, 
2349                                num_bases, basenames,
2350                                &result_sets, &num_result_sets,
2351                                0 /* no parent op */,
2352                                kc);
2353     if (res != ZEBRA_OK)
2354     {
2355         int i;
2356         for (i = 0; i<num_result_sets; i++)
2357             rset_delete(result_sets[i]);
2358         *result_set = 0;
2359     }
2360     else
2361     {
2362         assert(num_result_sets == 1);
2363         assert(result_sets);
2364         assert(*result_sets);
2365         *result_set = *result_sets;
2366     }
2367     (*kc->dec)(kc);
2368     return res;
2369 }
2370
2371 ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2372                                const Odr_oid *attributeSet, 
2373                                NMEM stream, NMEM rset_nmem,
2374                                Z_SortKeySpecList *sort_sequence,
2375                                int num_bases, char **basenames,
2376                                RSET **result_sets, int *num_result_sets,
2377                                Z_Operator *parent_op,
2378                                struct rset_key_control *kc)
2379 {
2380     *num_result_sets = 0;
2381     if (zs->which == Z_RPNStructure_complex)
2382     {
2383         ZEBRA_RES res;
2384         Z_Operator *zop = zs->u.complex->roperator;
2385         RSET *result_sets_l = 0;
2386         int num_result_sets_l = 0;
2387         RSET *result_sets_r = 0;
2388         int num_result_sets_r = 0;
2389
2390         res = rpn_search_structure(zh, zs->u.complex->s1,
2391                                    attributeSet, stream, rset_nmem,
2392                                    sort_sequence,
2393                                    num_bases, basenames,
2394                                    &result_sets_l, &num_result_sets_l,
2395                                    zop, kc);
2396         if (res != ZEBRA_OK)
2397         {
2398             int i;
2399             for (i = 0; i<num_result_sets_l; i++)
2400                 rset_delete(result_sets_l[i]);
2401             return res;
2402         }
2403         res = rpn_search_structure(zh, zs->u.complex->s2,
2404                                    attributeSet, stream, rset_nmem,
2405                                    sort_sequence,
2406                                    num_bases, basenames,
2407                                    &result_sets_r, &num_result_sets_r,
2408                                    zop, kc);
2409         if (res != ZEBRA_OK)
2410         {
2411             int i;
2412             for (i = 0; i<num_result_sets_l; i++)
2413                 rset_delete(result_sets_l[i]);
2414             for (i = 0; i<num_result_sets_r; i++)
2415                 rset_delete(result_sets_r[i]);
2416             return res;
2417         }
2418
2419         /* make a new list of result for all children */
2420         *num_result_sets = num_result_sets_l + num_result_sets_r;
2421         *result_sets = nmem_malloc(stream, *num_result_sets * 
2422                                    sizeof(**result_sets));
2423         memcpy(*result_sets, result_sets_l, 
2424                num_result_sets_l * sizeof(**result_sets));
2425         memcpy(*result_sets + num_result_sets_l, result_sets_r, 
2426                num_result_sets_r * sizeof(**result_sets));
2427
2428         if (!parent_op || parent_op->which != zop->which
2429             || (zop->which != Z_Operator_and &&
2430                 zop->which != Z_Operator_or))
2431         {
2432             /* parent node different from this one (or non-present) */
2433             /* we must combine result sets now */
2434             RSET rset;
2435             switch (zop->which)
2436             {
2437             case Z_Operator_and:
2438                 rset = rset_create_and(rset_nmem, kc,
2439                                        kc->scope,
2440                                        *num_result_sets, *result_sets);
2441                 break;
2442             case Z_Operator_or:
2443                 rset = rset_create_or(rset_nmem, kc,
2444                                       kc->scope, 0, /* termid */
2445                                       *num_result_sets, *result_sets);
2446                 break;
2447             case Z_Operator_and_not:
2448                 rset = rset_create_not(rset_nmem, kc,
2449                                        kc->scope,
2450                                        (*result_sets)[0],
2451                                        (*result_sets)[1]);
2452                 break;
2453             case Z_Operator_prox:
2454                 if (zop->u.prox->which != Z_ProximityOperator_known)
2455                 {
2456                     zebra_setError(zh, 
2457                                    YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2458                                    0);
2459                     return ZEBRA_FAIL;
2460                 }
2461                 if (*zop->u.prox->u.known != Z_ProxUnit_word)
2462                 {
2463                     zebra_setError_zint(zh,
2464                                         YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2465                                         *zop->u.prox->u.known);
2466                     return ZEBRA_FAIL;
2467                 }
2468                 else
2469                 {
2470                     rset = rset_create_prox(rset_nmem, kc,
2471                                             kc->scope,
2472                                             *num_result_sets, *result_sets, 
2473                                             *zop->u.prox->ordered,
2474                                             (!zop->u.prox->exclusion ? 
2475                                              0 : *zop->u.prox->exclusion),
2476                                             *zop->u.prox->relationType,
2477                                             *zop->u.prox->distance );
2478                 }
2479                 break;
2480             default:
2481                 zebra_setError(zh, YAZ_BIB1_OPERATOR_UNSUPP, 0);
2482                 return ZEBRA_FAIL;
2483             }
2484             *num_result_sets = 1;
2485             *result_sets = nmem_malloc(stream, *num_result_sets * 
2486                                        sizeof(**result_sets));
2487             (*result_sets)[0] = rset;
2488         }
2489     }
2490     else if (zs->which == Z_RPNStructure_simple)
2491     {
2492         RSET rset;
2493         ZEBRA_RES res;
2494
2495         if (zs->u.simple->which == Z_Operand_APT)
2496         {
2497             yaz_log(YLOG_DEBUG, "rpn_search_APT");
2498             res = rpn_search_APT(zh, zs->u.simple->u.attributesPlusTerm,
2499                                  attributeSet, stream, sort_sequence,
2500                                  num_bases, basenames, rset_nmem, &rset,
2501                                  kc);
2502             if (res != ZEBRA_OK)
2503                 return res;
2504         }
2505         else if (zs->u.simple->which == Z_Operand_resultSetId)
2506         {
2507             yaz_log(YLOG_DEBUG, "rpn_search_ref");
2508             rset = resultSetRef(zh, zs->u.simple->u.resultSetId);
2509             if (!rset)
2510             {
2511                 zebra_setError(zh, 
2512                                YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
2513                                zs->u.simple->u.resultSetId);
2514                 return ZEBRA_FAIL;
2515             }
2516             rset_dup(rset);
2517         }
2518         else
2519         {
2520             zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2521             return ZEBRA_FAIL;
2522         }
2523         *num_result_sets = 1;
2524         *result_sets = nmem_malloc(stream, *num_result_sets * 
2525                                    sizeof(**result_sets));
2526         (*result_sets)[0] = rset;
2527     }
2528     else
2529     {
2530         zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2531         return ZEBRA_FAIL;
2532     }
2533     return ZEBRA_OK;
2534 }
2535
2536
2537
2538 /*
2539  * Local variables:
2540  * c-basic-offset: 4
2541  * indent-tabs-mode: nil
2542  * End:
2543  * vim: shiftwidth=4 tabstop=8 expandtab
2544  */
2545