For data-1, do not chop text data in ISO2709 creation . The problem is
[idzebra-moved-to-github.git] / index / rpnsearch.c
1 /* $Id: rpnsearch.c,v 1.14 2007-05-14 14:05:21 adam Exp $
2    Copyright (C) 1995-2007
3    Index Data ApS
4
5 This file is part of the Zebra server.
6
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
11
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
20
21 */
22
23 #include <stdio.h>
24 #include <assert.h>
25 #ifdef WIN32
26 #include <io.h>
27 #endif
28 #if HAVE_UNISTD_H
29 #include <unistd.h>
30 #endif
31 #include <ctype.h>
32
33 #include <yaz/diagbib1.h>
34 #include "index.h"
35 #include <zebra_xpath.h>
36 #include <attrfind.h>
37 #include <charmap.h>
38 #include <rset.h>
39
40 static int log_level_set = 0;
41 static int log_level_rpn = 0;
42
43 #define TERMSET_DISABLE 1
44
45 static const char **rpn_char_map_handler(void *vp, const char **from, int len)
46 {
47     struct rpn_char_map_info *p = (struct rpn_char_map_info *) vp;
48     const char **out = zebra_maps_input(p->zm, p->reg_type, from, len, 0);
49 #if 0
50     if (out && *out)
51     {
52         const char *outp = *out;
53         yaz_log(YLOG_LOG, "---");
54         while (*outp)
55         {
56             yaz_log(YLOG_LOG, "%02X", *outp);
57             outp++;
58         }
59     }
60 #endif
61     return out;
62 }
63
64 void rpn_char_map_prepare(struct zebra_register *reg, int reg_type,
65                           struct rpn_char_map_info *map_info)
66 {
67     map_info->zm = reg->zebra_maps;
68     map_info->reg_type = reg_type;
69     dict_grep_cmap(reg->dict, map_info, rpn_char_map_handler);
70 }
71
72 #define TERM_COUNT        
73        
74 struct grep_info {        
75 #ifdef TERM_COUNT        
76     int *term_no;        
77 #endif        
78     ISAM_P *isam_p_buf;
79     int isam_p_size;        
80     int isam_p_indx;
81     int trunc_max;
82     ZebraHandle zh;
83     int reg_type;
84     ZebraSet termset;
85 };        
86
87 static int add_isam_p(const char *name, const char *info,
88                       struct grep_info *p)
89 {
90     if (!log_level_set)
91     {
92         log_level_rpn = yaz_log_module_level("rpn");
93         log_level_set = 1;
94     }
95     /* we may have to stop this madness.. NOTE: -1 so that if
96        truncmax == trunxlimit we do *not* generate result sets */
97     if (p->isam_p_indx >= p->trunc_max - 1)
98         return 1;
99
100     if (p->isam_p_indx == p->isam_p_size)
101     {
102         ISAM_P *new_isam_p_buf;
103 #ifdef TERM_COUNT        
104         int *new_term_no;        
105 #endif
106         p->isam_p_size = 2*p->isam_p_size + 100;
107         new_isam_p_buf = (ISAM_P *) xmalloc(sizeof(*new_isam_p_buf) *
108                                             p->isam_p_size);
109         if (p->isam_p_buf)
110         {
111             memcpy(new_isam_p_buf, p->isam_p_buf,
112                     p->isam_p_indx * sizeof(*p->isam_p_buf));
113             xfree(p->isam_p_buf);
114         }
115         p->isam_p_buf = new_isam_p_buf;
116
117 #ifdef TERM_COUNT
118         new_term_no = (int *) xmalloc(sizeof(*new_term_no) * p->isam_p_size);
119         if (p->term_no)
120         {
121             memcpy(new_term_no, p->isam_p_buf,
122                     p->isam_p_indx * sizeof(*p->term_no));
123             xfree(p->term_no);
124         }
125         p->term_no = new_term_no;
126 #endif
127     }
128     assert(*info == sizeof(*p->isam_p_buf));
129     memcpy(p->isam_p_buf + p->isam_p_indx, info+1, sizeof(*p->isam_p_buf));
130
131     if (p->termset)
132     {
133         const char *db;
134         char term_tmp[IT_MAX_WORD];
135         int ord = 0;
136         const char *index_name;
137         int len = key_SU_decode (&ord, (const unsigned char *) name);
138         
139         zebra_term_untrans  (p->zh, p->reg_type, term_tmp, name+len);
140         yaz_log(log_level_rpn, "grep: %d %c %s", ord, name[len], term_tmp);
141         zebraExplain_lookup_ord(p->zh->reg->zei,
142                                 ord, 0 /* index_type */, &db, &index_name);
143         yaz_log(log_level_rpn, "grep:  db=%s index=%s", db, index_name);
144         
145         resultSetAddTerm(p->zh, p->termset, name[len], db,
146                          index_name, term_tmp);
147     }
148     (p->isam_p_indx)++;
149     return 0;
150 }
151
152 static int grep_handle(char *name, const char *info, void *p)
153 {
154     return add_isam_p(name, info, (struct grep_info *) p);
155 }
156
157 static int term_pre(ZebraMaps zebra_maps, int reg_type, const char **src,
158                     const char *ct1, const char *ct2, int first)
159 {
160     const char *s1, *s0 = *src;
161     const char **map;
162
163     /* skip white space */
164     while (*s0)
165     {
166         if (ct1 && strchr(ct1, *s0))
167             break;
168         if (ct2 && strchr(ct2, *s0))
169             break;
170         s1 = s0;
171         map = zebra_maps_input(zebra_maps, reg_type, &s1, strlen(s1), first);
172         if (**map != *CHR_SPACE)
173             break;
174         s0 = s1;
175     }
176     *src = s0;
177     return *s0;
178 }
179
180
181 static void esc_str(char *out_buf, size_t out_size,
182                     const char *in_buf, int in_size)
183 {
184     int k;
185
186     assert(out_buf);
187     assert(in_buf);
188     assert(out_size > 20);
189     *out_buf = '\0';
190     for (k = 0; k<in_size; k++)
191     {
192         int c = in_buf[k] & 0xff;
193         int pc;
194         if (c < 32 || c > 126)
195             pc = '?';
196         else
197             pc = c;
198         sprintf(out_buf +strlen(out_buf), "%02X:%c  ", c, pc);
199         if (strlen(out_buf) > out_size-20)
200         {
201             strcat(out_buf, "..");
202             break;
203         }
204     }
205 }
206
207 #define REGEX_CHARS " []()|.*+?!"
208
209 static void add_non_space(const char *start, const char *end,
210                           WRBUF term_dict,
211                           char *dst_term, int *dst_ptr,
212                           const char **map, int q_map_match)
213 {
214     size_t sz = end - start;
215     memcpy(dst_term + *dst_ptr, start, sz);
216     (*dst_ptr) += sz;
217     if (!q_map_match)
218     {
219         while (start < end)
220         {
221             if (strchr(REGEX_CHARS, *start))
222                 wrbuf_putc(term_dict, '\\');
223             wrbuf_putc(term_dict, *start);
224             start++;
225         }
226     }
227     else
228     {
229         char tmpbuf[80];
230         esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
231         
232         wrbuf_puts(term_dict, map[0]);
233     }
234 }
235
236 /* term_100: handle term, where trunc = none(no operators at all) */
237 static int term_100(ZebraMaps zebra_maps, int reg_type,
238                     const char **src, WRBUF term_dict, int space_split,
239                     char *dst_term)
240 {
241     const char *s0;
242     const char **map;
243     int i = 0;
244     int j = 0;
245
246     const char *space_start = 0;
247     const char *space_end = 0;
248
249     if (!term_pre(zebra_maps, reg_type, src, NULL, NULL, !space_split))
250         return 0;
251     s0 = *src;
252     while (*s0)
253     {
254         const char *s1 = s0;
255         int q_map_match = 0;
256         map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), 
257                                 &q_map_match);
258         if (space_split)
259         {
260             if (**map == *CHR_SPACE)
261                 break;
262         }
263         else  /* complete subfield only. */
264         {
265             if (**map == *CHR_SPACE)
266             {   /* save space mapping for later  .. */
267                 space_start = s1;
268                 space_end = s0;
269                 continue;
270             }
271             else if (space_start)
272             {   /* reload last space */
273                 while (space_start < space_end)
274                 {
275                     if (strchr(REGEX_CHARS, *space_start))
276                         wrbuf_putc(term_dict, '\\');
277                     dst_term[j++] = *space_start;
278                     wrbuf_putc(term_dict, *space_start);
279                     space_start++;
280                                
281                 }
282                 /* and reset */
283                 space_start = space_end = 0;
284             }
285         }
286         i++;
287
288         add_non_space(s1, s0, term_dict, dst_term, &j,
289                       map, q_map_match);
290     }
291     dst_term[j] = '\0';
292     *src = s0;
293     return i;
294 }
295
296 /* term_101: handle term, where trunc = Process # */
297 static int term_101(ZebraMaps zebra_maps, int reg_type,
298                     const char **src, WRBUF term_dict, int space_split,
299                     char *dst_term)
300 {
301     const char *s0;
302     const char **map;
303     int i = 0;
304     int j = 0;
305
306     if (!term_pre(zebra_maps, reg_type, src, "#", "#", !space_split))
307         return 0;
308     s0 = *src;
309     while (*s0)
310     {
311         if (*s0 == '#')
312         {
313             i++;
314             wrbuf_puts(term_dict, ".*");
315             dst_term[j++] = *s0++;
316         }
317         else
318         {
319             const char *s1 = s0;
320             int q_map_match = 0;
321             map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), 
322                                     &q_map_match);
323             if (space_split && **map == *CHR_SPACE)
324                 break;
325
326             i++;
327             add_non_space(s1, s0, term_dict, dst_term, &j,
328                           map, q_map_match);
329         }
330     }
331     dst_term[j++] = '\0';
332     *src = s0;
333     return i;
334 }
335
336 /* term_103: handle term, where trunc = re-2 (regular expressions) */
337 static int term_103(ZebraMaps zebra_maps, int reg_type, const char **src,
338                     WRBUF term_dict, int *errors, int space_split,
339                     char *dst_term)
340 {
341     int i = 0;
342     int j = 0;
343     const char *s0;
344     const char **map;
345
346     if (!term_pre(zebra_maps, reg_type, src, "^\\()[].*+?|", "(", !space_split))
347         return 0;
348     s0 = *src;
349     if (errors && *s0 == '+' && s0[1] && s0[2] == '+' && s0[3] &&
350         isdigit(((const unsigned char *)s0)[1]))
351     {
352         *errors = s0[1] - '0';
353         s0 += 3;
354         if (*errors > 3)
355             *errors = 3;
356     }
357     while (*s0)
358     {
359         if (strchr("^\\()[].*+?|-", *s0))
360         {
361             dst_term[j++] = *s0;
362             wrbuf_putc(term_dict, *s0);
363             s0++;
364             i++;
365         }
366         else
367         {
368             const char *s1 = s0;
369             int q_map_match = 0;
370             map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), 
371                                     &q_map_match);
372             if (space_split && **map == *CHR_SPACE)
373                 break;
374
375             i++;
376             add_non_space(s1, s0, term_dict, dst_term, &j,
377                           map, q_map_match);
378         }
379     }
380     dst_term[j] = '\0';
381     *src = s0;
382     
383     return i;
384 }
385
386 /* term_103: handle term, where trunc = re-1 (regular expressions) */
387 static int term_102(ZebraMaps zebra_maps, int reg_type, const char **src,
388                     WRBUF term_dict, int space_split, char *dst_term)
389 {
390     return term_103(zebra_maps, reg_type, src, term_dict, NULL, space_split,
391                     dst_term);
392 }
393
394
395 /* term_104: handle term, process # and ! */
396 static int term_104(ZebraMaps zebra_maps, int reg_type,
397                     const char **src, WRBUF term_dict, int space_split,
398                     char *dst_term)
399 {
400     const char *s0;
401     const char **map;
402     int i = 0;
403     int j = 0;
404
405     if (!term_pre(zebra_maps, reg_type, src, "?*#", "?*#", !space_split))
406         return 0;
407     s0 = *src;
408     while (*s0)
409     {
410         if (*s0 == '?')
411         {
412             i++;
413             dst_term[j++] = *s0++;
414             if (*s0 >= '0' && *s0 <= '9')
415             {
416                 int limit = 0;
417                 while (*s0 >= '0' && *s0 <= '9')
418                 {
419                     limit = limit * 10 + (*s0 - '0');
420                     dst_term[j++] = *s0++;
421                 }
422                 if (limit > 20)
423                     limit = 20;
424                 while (--limit >= 0)
425                 {
426                     wrbuf_puts(term_dict, ".?");
427                 }
428             }
429             else
430             {
431                 wrbuf_puts(term_dict, ".*");
432             }
433         }
434         else if (*s0 == '*')
435         {
436             i++;
437             wrbuf_puts(term_dict, ".*");
438             dst_term[j++] = *s0++;
439         }
440         else if (*s0 == '#')
441         {
442             i++;
443             wrbuf_puts(term_dict, ".");
444             dst_term[j++] = *s0++;
445         }
446         else
447         {
448             const char *s1 = s0;
449             int q_map_match = 0;
450             map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), 
451                                     &q_map_match);
452             if (space_split && **map == *CHR_SPACE)
453                 break;
454
455             i++;
456             add_non_space(s1, s0, term_dict, dst_term, &j,
457                           map, q_map_match);
458         }
459     }
460     dst_term[j++] = '\0';
461     *src = s0;
462     return i;
463 }
464
465 /* term_105/106: handle term, where trunc = Process * and ! and right trunc */
466 static int term_105(ZebraMaps zebra_maps, int reg_type,
467                     const char **src, WRBUF term_dict, int space_split,
468                     char *dst_term, int right_truncate)
469 {
470     const char *s0;
471     const char **map;
472     int i = 0;
473     int j = 0;
474
475     if (!term_pre(zebra_maps, reg_type, src, "*!", "*!", !space_split))
476         return 0;
477     s0 = *src;
478     while (*s0)
479     {
480         if (*s0 == '*')
481         {
482             i++;
483             wrbuf_puts(term_dict, ".*");
484             dst_term[j++] = *s0++;
485         }
486         else if (*s0 == '!')
487         {
488             i++;
489             wrbuf_putc(term_dict, '.');
490             dst_term[j++] = *s0++;
491         }
492         else
493         {
494             const char *s1 = s0;
495             int q_map_match = 0;
496             map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), 
497                                     &q_map_match);
498             if (space_split && **map == *CHR_SPACE)
499                 break;
500
501             i++;
502             add_non_space(s1, s0, term_dict, dst_term, &j,
503                           map, q_map_match);
504         }
505     }
506     if (right_truncate)
507         wrbuf_puts(term_dict, ".*");
508     dst_term[j++] = '\0';
509     *src = s0;
510     return i;
511 }
512
513
514 /* gen_regular_rel - generate regular expression from relation
515  *  val:     border value (inclusive)
516  *  islt:    1 if <=; 0 if >=.
517  */
518 static void gen_regular_rel(WRBUF term_dict, int val, int islt)
519 {
520     char dst_buf[20*5*20]; /* assuming enough for expansion */
521     char *dst = dst_buf;
522     int dst_p;
523     int w, d, i;
524     int pos = 0;
525     char numstr[20];
526
527     yaz_log(YLOG_DEBUG, "gen_regular_rel. val=%d, islt=%d", val, islt);
528     if (val >= 0)
529     {
530         if (islt)
531             strcpy(dst, "(-[0-9]+|(");
532         else
533             strcpy(dst, "((");
534     } 
535     else
536     {
537         if (!islt)
538         {
539             strcpy(dst, "([0-9]+|-(");
540             islt = 1;
541         }
542         else
543         {
544             strcpy(dst, "(-(");
545             islt = 0;
546         }
547         val = -val;
548     }
549     dst_p = strlen(dst);
550     sprintf(numstr, "%d", val);
551     for (w = strlen(numstr); --w >= 0; pos++)
552     {
553         d = numstr[w];
554         if (pos > 0)
555         {
556             if (islt)
557             {
558                 if (d == '0')
559                     continue;
560                 d--;
561             } 
562             else
563             {
564                 if (d == '9')
565                     continue;
566                 d++;
567             }
568         }
569         
570         strcpy(dst + dst_p, numstr);
571         dst_p = strlen(dst) - pos - 1;
572
573         if (islt)
574         {
575             if (d != '0')
576             {
577                 dst[dst_p++] = '[';
578                 dst[dst_p++] = '0';
579                 dst[dst_p++] = '-';
580                 dst[dst_p++] = d;
581                 dst[dst_p++] = ']';
582             }
583             else
584                 dst[dst_p++] = d;
585         }
586         else
587         {
588             if (d != '9')
589             { 
590                 dst[dst_p++] = '[';
591                 dst[dst_p++] = d;
592                 dst[dst_p++] = '-';
593                 dst[dst_p++] = '9';
594                 dst[dst_p++] = ']';
595             }
596             else
597                 dst[dst_p++] = d;
598         }
599         for (i = 0; i<pos; i++)
600         {
601             dst[dst_p++] = '[';
602             dst[dst_p++] = '0';
603             dst[dst_p++] = '-';
604             dst[dst_p++] = '9';
605             dst[dst_p++] = ']';
606         }
607         dst[dst_p++] = '|';
608     }
609     dst[dst_p] = '\0';
610     if (islt)
611     {
612         /* match everything less than 10^(pos-1) */
613         strcat(dst, "0*");
614         for (i = 1; i<pos; i++)
615             strcat(dst, "[0-9]?");
616     }
617     else
618     {
619         /* match everything greater than 10^pos */
620         for (i = 0; i <= pos; i++)
621             strcat(dst, "[0-9]");
622         strcat(dst, "[0-9]*");
623     }
624     strcat(dst, "))");
625     wrbuf_puts(term_dict, dst);
626 }
627
628 void string_rel_add_char(WRBUF term_p, WRBUF wsrc, int *indx)
629 {
630     const char *src = wrbuf_cstr(wsrc);
631     if (src[*indx] == '\\')
632     {
633         wrbuf_putc(term_p, src[*indx]);
634         (*indx)++;
635     }
636     wrbuf_putc(term_p, src[*indx]);
637     (*indx)++;
638 }
639
640 /*
641  *   >  abc     ([b-].*|a[c-].*|ab[d-].*|abc.+)
642  *              ([^-a].*|a[^-b].*ab[^-c].*|abc.+)
643  *   >= abc     ([b-].*|a[c-].*|ab[c-].*)
644  *              ([^-a].*|a[^-b].*|ab[c-].*)
645  *   <  abc     ([-0].*|a[-a].*|ab[-b].*)
646  *              ([^a-].*|a[^b-].*|ab[^c-].*)
647  *   <= abc     ([-0].*|a[-a].*|ab[-b].*|abc)
648  *              ([^a-].*|a[^b-].*|ab[^c-].*|abc)
649  */
650 static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
651                            const char **term_sub, WRBUF term_dict,
652                            const Odr_oid *attributeSet,
653                            int reg_type, int space_split, char *term_dst,
654                            int *error_code)
655 {
656     AttrType relation;
657     int relation_value;
658     int i;
659     WRBUF term_component = wrbuf_alloc();
660
661     attr_init_APT(&relation, zapt, 2);
662     relation_value = attr_find(&relation, NULL);
663
664     *error_code = 0;
665     yaz_log(YLOG_DEBUG, "string relation value=%d", relation_value);
666     switch (relation_value)
667     {
668     case 1:
669         if (!term_100(zh->reg->zebra_maps, reg_type,
670                       term_sub, term_component,
671                       space_split, term_dst))
672         {
673             wrbuf_destroy(term_component);
674             return 0;
675         }
676         yaz_log(log_level_rpn, "Relation <");
677         
678         wrbuf_putc(term_dict, '(');
679         for (i = 0; i < wrbuf_len(term_component); )
680         {
681             int j = 0;
682             
683             if (i)
684                 wrbuf_putc(term_dict, '|');
685             while (j < i)
686                 string_rel_add_char(term_dict, term_component, &j);
687
688             wrbuf_putc(term_dict, '[');
689
690             wrbuf_putc(term_dict, '^');
691             
692             wrbuf_putc(term_dict, 1);
693             wrbuf_putc(term_dict, FIRST_IN_FIELD_CHAR);
694             
695             string_rel_add_char(term_dict, term_component, &i);
696             wrbuf_putc(term_dict, '-');
697             
698             wrbuf_putc(term_dict, ']');
699             wrbuf_putc(term_dict, '.');
700             wrbuf_putc(term_dict, '*');
701         }
702         wrbuf_putc(term_dict, ')');
703         break;
704     case 2:
705         if (!term_100(zh->reg->zebra_maps, reg_type,
706                       term_sub, term_component,
707                       space_split, term_dst))
708         {
709             wrbuf_destroy(term_component);
710             return 0;
711         }
712         yaz_log(log_level_rpn, "Relation <=");
713
714         wrbuf_putc(term_dict, '(');
715         for (i = 0; i < wrbuf_len(term_component); )
716         {
717             int j = 0;
718
719             while (j < i)
720                 string_rel_add_char(term_dict, term_component, &j);
721             wrbuf_putc(term_dict, '[');
722
723             wrbuf_putc(term_dict, '^');
724
725             wrbuf_putc(term_dict, 1);
726             wrbuf_putc(term_dict, FIRST_IN_FIELD_CHAR);
727
728             string_rel_add_char(term_dict, term_component, &i);
729             wrbuf_putc(term_dict, '-');
730
731             wrbuf_putc(term_dict, ']');
732             wrbuf_putc(term_dict, '.');
733             wrbuf_putc(term_dict, '*');
734
735             wrbuf_putc(term_dict, '|');
736         }
737         for (i = 0; i < wrbuf_len(term_component); )
738             string_rel_add_char(term_dict, term_component, &i);
739         wrbuf_putc(term_dict, ')');
740         break;
741     case 5:
742         if (!term_100 (zh->reg->zebra_maps, reg_type,
743                        term_sub, term_component, space_split, term_dst))
744         {
745             wrbuf_destroy(term_component);
746             return 0;
747         }
748         yaz_log(log_level_rpn, "Relation >");
749
750         wrbuf_putc(term_dict, '(');
751         for (i = 0; i < wrbuf_len(term_component); )
752         {
753             int j = 0;
754
755             while (j < i)
756                 string_rel_add_char(term_dict, term_component, &j);
757             wrbuf_putc(term_dict, '[');
758             
759             wrbuf_putc(term_dict, '^');
760             wrbuf_putc(term_dict, '-');
761             string_rel_add_char(term_dict, term_component, &i);
762
763             wrbuf_putc(term_dict, ']');
764             wrbuf_putc(term_dict, '.');
765             wrbuf_putc(term_dict, '*');
766
767             wrbuf_putc(term_dict, '|');
768         }
769         for (i = 0; i < wrbuf_len(term_component); )
770             string_rel_add_char(term_dict, term_component, &i);
771         wrbuf_putc(term_dict, '.');
772         wrbuf_putc(term_dict, '+');
773         wrbuf_putc(term_dict, ')');
774         break;
775     case 4:
776         if (!term_100(zh->reg->zebra_maps, reg_type, term_sub,
777                       term_component, space_split, term_dst))
778         {
779             wrbuf_destroy(term_component);
780             return 0;
781         }
782         yaz_log(log_level_rpn, "Relation >=");
783
784         wrbuf_putc(term_dict, '(');
785         for (i = 0; i < wrbuf_len(term_component); )
786         {
787             int j = 0;
788
789             if (i)
790                 wrbuf_putc(term_dict, '|');
791             while (j < i)
792                 string_rel_add_char(term_dict, term_component, &j);
793             wrbuf_putc(term_dict, '[');
794
795             if (i < wrbuf_len(term_component)-1)
796             {
797                 wrbuf_putc(term_dict, '^');
798                 wrbuf_putc(term_dict, '-');
799                 string_rel_add_char(term_dict, term_component, &i);
800             }
801             else
802             {
803                 string_rel_add_char(term_dict, term_component, &i);
804                 wrbuf_putc(term_dict, '-');
805             }
806             wrbuf_putc(term_dict, ']');
807             wrbuf_putc(term_dict, '.');
808             wrbuf_putc(term_dict, '*');
809         }
810         wrbuf_putc(term_dict, ')');
811         break;
812     case 3:
813     case 102:
814     case -1:
815         if (!**term_sub)
816             return 1;
817         yaz_log(log_level_rpn, "Relation =");
818         if (!term_100(zh->reg->zebra_maps, reg_type, term_sub,
819                       term_component, space_split, term_dst))
820         {
821             wrbuf_destroy(term_component);
822             return 0;
823         }
824         wrbuf_puts(term_dict, "(");
825         wrbuf_puts(term_dict, wrbuf_cstr(term_component));
826         wrbuf_puts(term_dict, ")");
827         break;
828     case 103:
829         yaz_log(log_level_rpn, "Relation always matches");
830         /* skip to end of term (we don't care what it is) */
831         while (**term_sub != '\0')
832             (*term_sub)++;
833         break;
834     default:
835         *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
836         wrbuf_destroy(term_component);
837         return 0;
838     }
839     wrbuf_destroy(term_component);
840     return 1;
841 }
842
843 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
844                              const char **term_sub, 
845                              WRBUF term_dict,
846                              const Odr_oid *attributeSet, NMEM stream,
847                              struct grep_info *grep_info,
848                              int reg_type, int complete_flag,
849                              int num_bases, char **basenames,
850                              char *term_dst,
851                              const char *xpath_use,
852                              struct ord_list **ol);
853
854 static ZEBRA_RES term_limits_APT(ZebraHandle zh,
855                                  Z_AttributesPlusTerm *zapt,
856                                  zint *hits_limit_value,
857                                  const char **term_ref_id_str,
858                                  NMEM nmem)
859 {
860     AttrType term_ref_id_attr;
861     AttrType hits_limit_attr;
862     int term_ref_id_int;
863  
864     attr_init_APT(&hits_limit_attr, zapt, 11);
865     *hits_limit_value  = attr_find(&hits_limit_attr, NULL);
866
867     attr_init_APT(&term_ref_id_attr, zapt, 10);
868     term_ref_id_int = attr_find_ex(&term_ref_id_attr, NULL, term_ref_id_str);
869     if (term_ref_id_int >= 0)
870     {
871         char *res = nmem_malloc(nmem, 20);
872         sprintf(res, "%d", term_ref_id_int);
873         *term_ref_id_str = res;
874     }
875
876     /* no limit given ? */
877     if (*hits_limit_value == -1)
878     {
879         if (*term_ref_id_str)
880         {
881             /* use global if term_ref is present */
882             *hits_limit_value = zh->approx_limit;
883         }
884         else
885         {
886             /* no counting if term_ref is not present */
887             *hits_limit_value = 0;
888         }
889     }
890     else if (*hits_limit_value == 0)
891     {
892         /* 0 is the same as global limit */
893         *hits_limit_value = zh->approx_limit;
894     }
895     yaz_log(YLOG_DEBUG, "term_limits_APT ref_id=%s limit=" ZINT_FORMAT,
896             *term_ref_id_str ? *term_ref_id_str : "none",
897             *hits_limit_value);
898     return ZEBRA_OK;
899 }
900
901 static ZEBRA_RES term_trunc(ZebraHandle zh,
902                             Z_AttributesPlusTerm *zapt,
903                             const char **term_sub, 
904                             const Odr_oid *attributeSet, NMEM stream,
905                             struct grep_info *grep_info,
906                             int reg_type, int complete_flag,
907                             int num_bases, char **basenames,
908                             char *term_dst,
909                             const char *rank_type, 
910                             const char *xpath_use,
911                             NMEM rset_nmem,
912                             RSET *rset,
913                             struct rset_key_control *kc)
914 {
915     ZEBRA_RES res;
916     struct ord_list *ol;
917     zint hits_limit_value;
918     const char *term_ref_id_str = 0;
919     WRBUF term_dict = wrbuf_alloc();
920
921     *rset = 0;
922     term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str, stream);
923     grep_info->isam_p_indx = 0;
924     res = string_term(zh, zapt, term_sub, term_dict,
925                       attributeSet, stream, grep_info,
926                       reg_type, complete_flag, num_bases, basenames,
927                       term_dst, xpath_use, &ol);
928     wrbuf_destroy(term_dict);
929     if (res != ZEBRA_OK)
930         return res;
931     if (!*term_sub)  /* no more terms ? */
932         return res;
933     yaz_log(log_level_rpn, "term: %s", term_dst);
934     *rset = rset_trunc(zh, grep_info->isam_p_buf,
935                        grep_info->isam_p_indx, term_dst,
936                        strlen(term_dst), rank_type, 1 /* preserve pos */,
937                        zapt->term->which, rset_nmem,
938                        kc, kc->scope, ol, reg_type, hits_limit_value,
939                        term_ref_id_str);
940     if (!*rset)
941         return ZEBRA_FAIL;
942     return ZEBRA_OK;
943 }
944
945 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
946                              const char **term_sub, 
947                              WRBUF term_dict,
948                              const Odr_oid *attributeSet, NMEM stream,
949                              struct grep_info *grep_info,
950                              int reg_type, int complete_flag,
951                              int num_bases, char **basenames,
952                              char *term_dst,
953                              const char *xpath_use,
954                              struct ord_list **ol)
955 {
956     int r, base_no;
957     AttrType truncation;
958     int truncation_value;
959     const char *termp;
960     struct rpn_char_map_info rcmi;
961     int space_split = complete_flag ? 0 : 1;
962
963     int bases_ok = 0;     /* no of databases with OK attribute */
964
965     *ol = ord_list_create(stream);
966
967     rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
968     attr_init_APT(&truncation, zapt, 5);
969     truncation_value = attr_find(&truncation, NULL);
970     yaz_log(log_level_rpn, "truncation value %d", truncation_value);
971
972     for (base_no = 0; base_no < num_bases; base_no++)
973     {
974         int ord = -1;
975         int regex_range = 0;
976         int max_pos, prefix_len = 0;
977         int relation_error;
978         char ord_buf[32];
979         int ord_len, i;
980
981         termp = *term_sub; /* start of term for each database */
982
983
984         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
985         {
986             zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
987                            basenames[base_no]);
988             return ZEBRA_FAIL;
989         }
990         
991         if (zebra_apt_get_ord(zh, zapt, reg_type, xpath_use,
992                               attributeSet, &ord) != ZEBRA_OK)
993             continue;
994
995
996         wrbuf_rewind(term_dict); /* new dictionary regexp term */
997
998         bases_ok++;
999
1000         *ol = ord_list_append(stream, *ol, ord);
1001         ord_len = key_SU_encode (ord, ord_buf);
1002         
1003         wrbuf_putc(term_dict, '(');
1004
1005         for (i = 0; i<ord_len; i++)
1006         {
1007             wrbuf_putc(term_dict, 1);  /* our internal regexp escape char */
1008             wrbuf_putc(term_dict, ord_buf[i]);
1009         }
1010         wrbuf_putc(term_dict, ')');
1011
1012         prefix_len = wrbuf_len(term_dict);
1013
1014         switch (truncation_value)
1015         {
1016         case -1:         /* not specified */
1017         case 100:        /* do not truncate */
1018             if (!string_relation(zh, zapt, &termp, term_dict,
1019                                  attributeSet,
1020                                  reg_type, space_split, term_dst,
1021                                  &relation_error))
1022             {
1023                 if (relation_error)
1024                 {
1025                     zebra_setError(zh, relation_error, 0);
1026                     return ZEBRA_FAIL;
1027                 }
1028                 *term_sub = 0;
1029                 return ZEBRA_OK;
1030             }
1031             break;
1032         case 1:          /* right truncation */
1033             wrbuf_putc(term_dict, '(');
1034             if (!term_100(zh->reg->zebra_maps, reg_type,
1035                           &termp, term_dict, space_split, term_dst))
1036             {
1037                 *term_sub = 0;
1038                 return ZEBRA_OK;
1039             }
1040             wrbuf_puts(term_dict, ".*)");
1041             break;
1042         case 2:          /* keft truncation */
1043             wrbuf_puts(term_dict, "(.*");
1044             if (!term_100(zh->reg->zebra_maps, reg_type,
1045                           &termp, term_dict, space_split, term_dst))
1046             {
1047                 *term_sub = 0;
1048                 return ZEBRA_OK;
1049             }
1050             wrbuf_putc(term_dict, ')');
1051             break;
1052         case 3:          /* left&right truncation */
1053             wrbuf_puts(term_dict, "(.*");
1054             if (!term_100(zh->reg->zebra_maps, reg_type,
1055                           &termp, term_dict, space_split, term_dst))
1056             {
1057                 *term_sub = 0;
1058                 return ZEBRA_OK;
1059             }
1060             wrbuf_puts(term_dict, ".*)");
1061             break;
1062         case 101:        /* process # in term */
1063             wrbuf_putc(term_dict, '(');
1064             if (!term_101(zh->reg->zebra_maps, reg_type,
1065                           &termp, term_dict, space_split, term_dst))
1066             {
1067                 *term_sub = 0;
1068                 return ZEBRA_OK;
1069             }
1070             wrbuf_puts(term_dict, ")");
1071             break;
1072         case 102:        /* Regexp-1 */
1073             wrbuf_putc(term_dict, '(');
1074             if (!term_102(zh->reg->zebra_maps, reg_type,
1075                           &termp, term_dict, space_split, term_dst))
1076             {
1077                 *term_sub = 0;
1078                 return ZEBRA_OK;
1079             }
1080             wrbuf_putc(term_dict, ')');
1081             break;
1082         case 103:       /* Regexp-2 */
1083             regex_range = 1;
1084             wrbuf_putc(term_dict, '(');
1085             if (!term_103(zh->reg->zebra_maps, reg_type,
1086                           &termp, term_dict, &regex_range,
1087                           space_split, term_dst))
1088             {
1089                 *term_sub = 0;
1090                 return ZEBRA_OK;
1091             }
1092             wrbuf_putc(term_dict, ')');
1093             break;
1094         case 104:        /* process # and ! in term */
1095             wrbuf_putc(term_dict, '(');
1096             if (!term_104(zh->reg->zebra_maps, reg_type,
1097                           &termp, term_dict, space_split, term_dst))
1098             {
1099                 *term_sub = 0;
1100                 return ZEBRA_OK;
1101             }
1102             wrbuf_putc(term_dict, ')');
1103             break;
1104         case 105:        /* process * and ! in term */
1105             wrbuf_putc(term_dict, '(');
1106             if (!term_105(zh->reg->zebra_maps, reg_type,
1107                           &termp, term_dict, space_split, term_dst, 1))
1108             {
1109                 *term_sub = 0;
1110                 return ZEBRA_OK;
1111             }
1112             wrbuf_putc(term_dict, ')');
1113             break;
1114         case 106:        /* process * and ! in term */
1115             wrbuf_putc(term_dict, '(');
1116             if (!term_105(zh->reg->zebra_maps, reg_type,
1117                           &termp, term_dict, space_split, term_dst, 0))
1118             {
1119                 *term_sub = 0;
1120                 return ZEBRA_OK;
1121             }
1122             wrbuf_putc(term_dict, ')');
1123             break;
1124         default:
1125             zebra_setError_zint(zh,
1126                                 YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
1127                                 truncation_value);
1128             return ZEBRA_FAIL;
1129         }
1130         if (1)
1131         {
1132             char buf[1000];
1133             const char *input = wrbuf_cstr(term_dict) + prefix_len;
1134             esc_str(buf, sizeof(buf), input, strlen(input));
1135         }
1136         yaz_log(log_level_rpn, "dict_lookup_grep: %s",
1137                 wrbuf_cstr(term_dict) + prefix_len);
1138         r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), regex_range,
1139                              grep_info, &max_pos, 
1140                              ord_len /* number of "exact" chars */,
1141                              grep_handle);
1142         if (r == 1)
1143             zebra_set_partial_result(zh);
1144         else if (r)
1145             yaz_log(YLOG_WARN, "dict_lookup_grep fail %d", r);
1146     }
1147     if (!bases_ok)
1148         return ZEBRA_FAIL;
1149     *term_sub = termp;
1150     yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1151     return ZEBRA_OK;
1152 }
1153
1154
1155
1156 static void grep_info_delete(struct grep_info *grep_info)
1157 {
1158 #ifdef TERM_COUNT
1159     xfree(grep_info->term_no);
1160 #endif
1161     xfree(grep_info->isam_p_buf);
1162 }
1163
1164 static ZEBRA_RES grep_info_prepare(ZebraHandle zh,
1165                                    Z_AttributesPlusTerm *zapt,
1166                                    struct grep_info *grep_info,
1167                                    int reg_type)
1168 {
1169 #ifdef TERM_COUNT
1170     grep_info->term_no = 0;
1171 #endif
1172     grep_info->trunc_max = atoi(res_get_def(zh->res, "truncmax", "10000"));
1173     grep_info->isam_p_size = 0;
1174     grep_info->isam_p_buf = NULL;
1175     grep_info->zh = zh;
1176     grep_info->reg_type = reg_type;
1177     grep_info->termset = 0;
1178     if (zapt)
1179     {
1180         AttrType truncmax;
1181         int truncmax_value;
1182
1183         attr_init_APT(&truncmax, zapt, 13);
1184         truncmax_value = attr_find(&truncmax, NULL);
1185         if (truncmax_value != -1)
1186             grep_info->trunc_max = truncmax_value;
1187     }
1188     if (zapt)
1189     {
1190         AttrType termset;
1191         int termset_value_numeric;
1192         const char *termset_value_string;
1193
1194         attr_init_APT(&termset, zapt, 8);
1195         termset_value_numeric =
1196             attr_find_ex(&termset, NULL, &termset_value_string);
1197         if (termset_value_numeric != -1)
1198         {
1199 #if TERMSET_DISABLE
1200             zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, "termset");
1201             return ZEBRA_FAIL;
1202 #else
1203             char resname[32];
1204             const char *termset_name = 0;
1205             if (termset_value_numeric != -2)
1206             {
1207                 
1208                 sprintf(resname, "%d", termset_value_numeric);
1209                 termset_name = resname;
1210             }
1211             else
1212             termset_name = termset_value_string;
1213             yaz_log(log_level_rpn, "creating termset set %s", termset_name);
1214             grep_info->termset = resultSetAdd(zh, termset_name, 1);
1215             if (!grep_info->termset)
1216             {
1217                 zebra_setError(zh, YAZ_BIB1_ILLEGAL_RESULT_SET_NAME, termset_name);
1218                 return ZEBRA_FAIL;
1219             }
1220 #endif
1221         }
1222     }
1223     return ZEBRA_OK;
1224 }
1225                                
1226 /**
1227   \brief Create result set(s) for list of terms
1228   \param zh Zebra Handle
1229   \param zapt Attributes Plust Term (RPN leaf)
1230   \param termz term as used in query but converted to UTF-8
1231   \param attributeSet default attribute set
1232   \param stream memory for result
1233   \param reg_type register type ('w', 'p',..)
1234   \param complete_flag whether it's phrases or not
1235   \param rank_type term flags for ranking
1236   \param xpath_use use attribute for X-Path (-1 for no X-path)
1237   \param num_bases number of databases
1238   \param basenames array of databases
1239   \param rset_nmem memory for result sets
1240   \param result_sets output result set for each term in list (output)
1241   \param num_result_sets number of output result sets
1242   \param kc rset key control to be used for created result sets
1243 */
1244 static ZEBRA_RES term_list_trunc(ZebraHandle zh,
1245                                  Z_AttributesPlusTerm *zapt,
1246                                  const char *termz,
1247                                  const Odr_oid *attributeSet,
1248                                  NMEM stream,
1249                                  int reg_type, int complete_flag,
1250                                  const char *rank_type,
1251                                  const char *xpath_use,
1252                                  int num_bases, char **basenames, 
1253                                  NMEM rset_nmem,
1254                                  RSET **result_sets, int *num_result_sets,
1255                                  struct rset_key_control *kc)
1256 {
1257     char term_dst[IT_MAX_WORD+1];
1258     struct grep_info grep_info;
1259     const char *termp = termz;
1260     int alloc_sets = 0;
1261
1262     *num_result_sets = 0;
1263     *term_dst = 0;
1264     if (grep_info_prepare(zh, zapt, &grep_info, reg_type) == ZEBRA_FAIL)
1265         return ZEBRA_FAIL;
1266     while(1)
1267     { 
1268         ZEBRA_RES res;
1269
1270         if (alloc_sets == *num_result_sets)
1271         {
1272             int add = 10;
1273             RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) * 
1274                                               sizeof(*rnew));
1275             if (alloc_sets)
1276                 memcpy(rnew, *result_sets, alloc_sets * sizeof(*rnew));
1277             alloc_sets = alloc_sets + add;
1278             *result_sets = rnew;
1279         }
1280         res = term_trunc(zh, zapt, &termp, attributeSet,
1281                          stream, &grep_info,
1282                          reg_type, complete_flag,
1283                          num_bases, basenames,
1284                          term_dst, rank_type,
1285                          xpath_use, rset_nmem,
1286                          &(*result_sets)[*num_result_sets],
1287                          kc);
1288         if (res != ZEBRA_OK)
1289         {
1290             int i;
1291             for (i = 0; i < *num_result_sets; i++)
1292                 rset_delete((*result_sets)[i]);
1293             grep_info_delete (&grep_info);
1294             return res;
1295         }
1296         if ((*result_sets)[*num_result_sets] == 0)
1297             break;
1298         (*num_result_sets)++;
1299
1300         if (!*termp)
1301             break;
1302     }
1303     grep_info_delete(&grep_info);
1304     return ZEBRA_OK;
1305 }
1306
1307 static ZEBRA_RES rpn_search_APT_position(ZebraHandle zh,
1308                                          Z_AttributesPlusTerm *zapt,
1309                                          const Odr_oid *attributeSet,
1310                                          int reg_type,
1311                                          int num_bases, char **basenames,
1312                                          NMEM rset_nmem,
1313                                          RSET *rset,
1314                                          struct rset_key_control *kc)
1315 {
1316     RSET *f_set;
1317     int base_no;
1318     int position_value;
1319     int num_sets = 0;
1320     AttrType position;
1321
1322     attr_init_APT(&position, zapt, 3);
1323     position_value = attr_find(&position, NULL);
1324     switch(position_value)
1325     {
1326     case 3:
1327     case -1:
1328         return ZEBRA_OK;
1329     case 1:
1330     case 2:
1331         break;
1332     default:
1333         zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1334                             position_value);
1335         return ZEBRA_FAIL;
1336     }
1337
1338     if (!zebra_maps_is_first_in_field(zh->reg->zebra_maps, reg_type))
1339     {
1340         zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1341                             position_value);
1342         return ZEBRA_FAIL;
1343     }
1344
1345     if (!zh->reg->isamb && !zh->reg->isamc)
1346     {
1347         zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1348                             position_value);
1349         return ZEBRA_FAIL;
1350     }
1351     f_set = xmalloc(sizeof(RSET) * num_bases);
1352     for (base_no = 0; base_no < num_bases; base_no++)
1353     {
1354         int ord = -1;
1355         char ord_buf[32];
1356         char term_dict[100];
1357         int ord_len;
1358         char *val;
1359         ISAM_P isam_p;
1360
1361         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1362         {
1363             zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
1364                            basenames[base_no]);
1365             return ZEBRA_FAIL;
1366         }
1367         
1368         if (zebra_apt_get_ord(zh, zapt, reg_type, 0,
1369                               attributeSet, &ord) != ZEBRA_OK)
1370             continue;
1371
1372         ord_len = key_SU_encode (ord, ord_buf);
1373         memcpy(term_dict, ord_buf, ord_len);
1374         strcpy(term_dict+ord_len, FIRST_IN_FIELD_STR);
1375         val = dict_lookup(zh->reg->dict, term_dict);
1376         if (!val)
1377             continue;
1378         assert(*val == sizeof(ISAM_P));
1379         memcpy(&isam_p, val+1, sizeof(isam_p));
1380         
1381
1382         if (zh->reg->isamb)
1383             f_set[num_sets++] = rsisamb_create(rset_nmem, kc, kc->scope,
1384                                                zh->reg->isamb, isam_p, 0);
1385         else if (zh->reg->isamc)
1386             f_set[num_sets++] = rsisamc_create(rset_nmem, kc, kc->scope,
1387                                                zh->reg->isamc, isam_p, 0);
1388     }
1389     if (num_sets)
1390     {
1391         *rset = rset_create_or(rset_nmem, kc, kc->scope,
1392                                0 /* termid */, num_sets, f_set);
1393     }
1394     xfree(f_set);
1395     return ZEBRA_OK;
1396 }
1397                                          
1398 static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh,
1399                                        Z_AttributesPlusTerm *zapt,
1400                                        const char *termz_org,
1401                                        const Odr_oid *attributeSet,
1402                                        NMEM stream,
1403                                        int reg_type, int complete_flag,
1404                                        const char *rank_type,
1405                                        const char *xpath_use,
1406                                        int num_bases, char **basenames, 
1407                                        NMEM rset_nmem,
1408                                        RSET *rset,
1409                                        struct rset_key_control *kc)
1410 {
1411     RSET *result_sets = 0;
1412     int num_result_sets = 0;
1413     ZEBRA_RES res =
1414         term_list_trunc(zh, zapt, termz_org, attributeSet,
1415                         stream, reg_type, complete_flag,
1416                         rank_type, xpath_use,
1417                         num_bases, basenames,
1418                         rset_nmem,
1419                         &result_sets, &num_result_sets, kc);
1420
1421     if (res != ZEBRA_OK)
1422         return res;
1423
1424     if (num_result_sets > 0)
1425     {
1426         RSET first_set = 0;
1427         res = rpn_search_APT_position(zh, zapt, attributeSet, 
1428                                       reg_type,
1429                                       num_bases, basenames,
1430                                       rset_nmem, &first_set,
1431                                       kc);
1432         if (res != ZEBRA_OK)
1433             return res;
1434         if (first_set)
1435         {
1436             RSET *nsets = nmem_malloc(stream,
1437                                       sizeof(RSET) * (num_result_sets+1));
1438             nsets[0] = first_set;
1439             memcpy(nsets+1, result_sets, sizeof(RSET) * num_result_sets);
1440             result_sets = nsets;
1441             num_result_sets++;
1442         }
1443     }
1444     if (num_result_sets == 0)
1445         *rset = rset_create_null(rset_nmem, kc, 0); 
1446     else if (num_result_sets == 1)
1447         *rset = result_sets[0];
1448     else
1449         *rset = rset_create_prox(rset_nmem, kc, kc->scope,
1450                                  num_result_sets, result_sets,
1451                                  1 /* ordered */, 0 /* exclusion */,
1452                                  3 /* relation */, 1 /* distance */);
1453     if (!*rset)
1454         return ZEBRA_FAIL;
1455     return ZEBRA_OK;
1456 }
1457
1458 static ZEBRA_RES rpn_search_APT_or_list(ZebraHandle zh,
1459                                         Z_AttributesPlusTerm *zapt,
1460                                         const char *termz_org,
1461                                         const Odr_oid *attributeSet,
1462                                         NMEM stream,
1463                                         int reg_type, int complete_flag,
1464                                         const char *rank_type,
1465                                         const char *xpath_use,
1466                                         int num_bases, char **basenames,
1467                                         NMEM rset_nmem,
1468                                         RSET *rset,
1469                                         struct rset_key_control *kc)
1470 {
1471     RSET *result_sets = 0;
1472     int num_result_sets = 0;
1473     int i;
1474     ZEBRA_RES res =
1475         term_list_trunc(zh, zapt, termz_org, attributeSet,
1476                         stream, reg_type, complete_flag,
1477                         rank_type, xpath_use,
1478                         num_bases, basenames,
1479                         rset_nmem,
1480                         &result_sets, &num_result_sets, kc);
1481     if (res != ZEBRA_OK)
1482         return res;
1483
1484     for (i = 0; i<num_result_sets; i++)
1485     {
1486         RSET first_set = 0;
1487         res = rpn_search_APT_position(zh, zapt, attributeSet, 
1488                                       reg_type,
1489                                       num_bases, basenames,
1490                                       rset_nmem, &first_set,
1491                                       kc);
1492         if (res != ZEBRA_OK)
1493         {
1494             for (i = 0; i<num_result_sets; i++)
1495                 rset_delete(result_sets[i]);
1496             return res;
1497         }
1498
1499         if (first_set)
1500         {
1501             RSET tmp_set[2];
1502
1503             tmp_set[0] = first_set;
1504             tmp_set[1] = result_sets[i];
1505             
1506             result_sets[i] = rset_create_prox(
1507                 rset_nmem, kc, kc->scope,
1508                 2, tmp_set,
1509                 1 /* ordered */, 0 /* exclusion */,
1510                 3 /* relation */, 1 /* distance */);
1511         }
1512     }
1513     if (num_result_sets == 0)
1514         *rset = rset_create_null(rset_nmem, kc, 0); 
1515     else if (num_result_sets == 1)
1516         *rset = result_sets[0];
1517     else
1518         *rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* termid */,
1519                                num_result_sets, result_sets);
1520     if (!*rset)
1521         return ZEBRA_FAIL;
1522     return ZEBRA_OK;
1523 }
1524
1525 static ZEBRA_RES rpn_search_APT_and_list(ZebraHandle zh,
1526                                          Z_AttributesPlusTerm *zapt,
1527                                          const char *termz_org,
1528                                          const Odr_oid *attributeSet,
1529                                          NMEM stream,
1530                                          int reg_type, int complete_flag,
1531                                          const char *rank_type, 
1532                                          const char *xpath_use,
1533                                          int num_bases, char **basenames,
1534                                          NMEM rset_nmem,
1535                                          RSET *rset,
1536                                          struct rset_key_control *kc)
1537 {
1538     RSET *result_sets = 0;
1539     int num_result_sets = 0;
1540     int i;
1541     ZEBRA_RES res =
1542         term_list_trunc(zh, zapt, termz_org, attributeSet,
1543                         stream, reg_type, complete_flag,
1544                         rank_type, xpath_use,
1545                         num_bases, basenames,
1546                         rset_nmem,
1547                         &result_sets, &num_result_sets,
1548                         kc);
1549     if (res != ZEBRA_OK)
1550         return res;
1551     for (i = 0; i<num_result_sets; i++)
1552     {
1553         RSET first_set = 0;
1554         res = rpn_search_APT_position(zh, zapt, attributeSet, 
1555                                       reg_type,
1556                                       num_bases, basenames,
1557                                       rset_nmem, &first_set,
1558                                       kc);
1559         if (res != ZEBRA_OK)
1560         {
1561             for (i = 0; i<num_result_sets; i++)
1562                 rset_delete(result_sets[i]);
1563             return res;
1564         }
1565
1566         if (first_set)
1567         {
1568             RSET tmp_set[2];
1569
1570             tmp_set[0] = first_set;
1571             tmp_set[1] = result_sets[i];
1572             
1573             result_sets[i] = rset_create_prox(
1574                 rset_nmem, kc, kc->scope,
1575                 2, tmp_set,
1576                 1 /* ordered */, 0 /* exclusion */,
1577                 3 /* relation */, 1 /* distance */);
1578         }
1579     }
1580
1581
1582     if (num_result_sets == 0)
1583         *rset = rset_create_null(rset_nmem, kc, 0); 
1584     else if (num_result_sets == 1)
1585         *rset = result_sets[0];
1586     else
1587         *rset = rset_create_and(rset_nmem, kc, kc->scope,
1588                                 num_result_sets, result_sets);
1589     if (!*rset)
1590         return ZEBRA_FAIL;
1591     return ZEBRA_OK;
1592 }
1593
1594 static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1595                             const char **term_sub,
1596                             WRBUF term_dict,
1597                             const Odr_oid *attributeSet,
1598                             struct grep_info *grep_info,
1599                             int *max_pos,
1600                             int reg_type,
1601                             char *term_dst,
1602                             int *error_code)
1603 {
1604     AttrType relation;
1605     int relation_value;
1606     int term_value;
1607     int r;
1608     WRBUF term_num = wrbuf_alloc();
1609
1610     *error_code = 0;
1611     attr_init_APT(&relation, zapt, 2);
1612     relation_value = attr_find(&relation, NULL);
1613
1614     yaz_log(log_level_rpn, "numeric relation value=%d", relation_value);
1615
1616     switch (relation_value)
1617     {
1618     case 1:
1619         yaz_log(log_level_rpn, "Relation <");
1620         if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_num, 1,
1621                       term_dst))
1622         { 
1623             wrbuf_destroy(term_num);
1624             return 0;
1625         }
1626         term_value = atoi (wrbuf_cstr(term_num));
1627         gen_regular_rel(term_dict, term_value-1, 1);
1628         break;
1629     case 2:
1630         yaz_log(log_level_rpn, "Relation <=");
1631         if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_num, 1,
1632                       term_dst))
1633         {
1634             wrbuf_destroy(term_num);
1635             return 0;
1636         }
1637         term_value = atoi (wrbuf_cstr(term_num));
1638         gen_regular_rel(term_dict, term_value, 1);
1639         break;
1640     case 4:
1641         yaz_log(log_level_rpn, "Relation >=");
1642         if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_num, 1,
1643                       term_dst))
1644         {
1645             wrbuf_destroy(term_num);
1646             return 0;
1647         }
1648         term_value = atoi (wrbuf_cstr(term_num));
1649         gen_regular_rel(term_dict, term_value, 0);
1650         break;
1651     case 5:
1652         yaz_log(log_level_rpn, "Relation >");
1653         if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_num, 1,
1654                       term_dst))
1655         {
1656             wrbuf_destroy(term_num);
1657             return 0;
1658         }
1659         term_value = atoi (wrbuf_cstr(term_num));
1660         gen_regular_rel(term_dict, term_value+1, 0);
1661         break;
1662     case -1:
1663     case 3:
1664         yaz_log(log_level_rpn, "Relation =");
1665         if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_num, 1,
1666                       term_dst))
1667         {
1668             wrbuf_destroy(term_num);
1669             return 0; 
1670         }
1671         term_value = atoi (wrbuf_cstr(term_num));
1672         wrbuf_printf(term_dict, "(0*%d)", term_value);
1673         break;
1674     case 103:
1675         /* term_tmp untouched.. */
1676         while (**term_sub != '\0')
1677             (*term_sub)++;
1678         break;
1679     default:
1680         *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
1681         wrbuf_destroy(term_num); 
1682         return 0;
1683     }
1684     r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), 
1685                          0, grep_info, max_pos, 0, grep_handle);
1686
1687     if (r == 1)
1688         zebra_set_partial_result(zh);
1689     else if (r)
1690         yaz_log(YLOG_WARN, "dict_lookup_grep fail, rel = gt: %d", r);
1691     yaz_log(log_level_rpn, "%d positions", grep_info->isam_p_indx);
1692     wrbuf_destroy(term_num);
1693     return 1;
1694 }
1695
1696 static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1697                               const char **term_sub, 
1698                               WRBUF term_dict,
1699                               const Odr_oid *attributeSet, NMEM stream,
1700                               struct grep_info *grep_info,
1701                               int reg_type, int complete_flag,
1702                               int num_bases, char **basenames,
1703                               char *term_dst, 
1704                               const char *xpath_use,
1705                               struct ord_list **ol)
1706 {
1707     int base_no;
1708     const char *termp;
1709     struct rpn_char_map_info rcmi;
1710
1711     int bases_ok = 0;     /* no of databases with OK attribute */
1712
1713     *ol = ord_list_create(stream);
1714
1715     rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
1716
1717     for (base_no = 0; base_no < num_bases; base_no++)
1718     {
1719         int max_pos;
1720         int relation_error = 0;
1721         int ord, ord_len, i;
1722         char ord_buf[32];
1723
1724         termp = *term_sub;
1725
1726         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1727         {
1728             zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
1729                            basenames[base_no]);
1730             return ZEBRA_FAIL;
1731         }
1732
1733         if (zebra_apt_get_ord(zh, zapt, reg_type, xpath_use,
1734                               attributeSet, &ord) != ZEBRA_OK)
1735             continue;
1736         bases_ok++;
1737
1738         wrbuf_rewind(term_dict);
1739
1740         *ol = ord_list_append(stream, *ol, ord);
1741
1742         ord_len = key_SU_encode (ord, ord_buf);
1743
1744         wrbuf_putc(term_dict, '(');
1745         for (i = 0; i < ord_len; i++)
1746         {
1747             wrbuf_putc(term_dict, 1);
1748             wrbuf_putc(term_dict, ord_buf[i]);
1749         }
1750         wrbuf_putc(term_dict, ')');
1751
1752         if (!numeric_relation(zh, zapt, &termp, term_dict,
1753                               attributeSet, grep_info, &max_pos, reg_type,
1754                               term_dst, &relation_error))
1755         {
1756             if (relation_error)
1757             {
1758                 zebra_setError(zh, relation_error, 0);
1759                 return ZEBRA_FAIL;
1760             }
1761             *term_sub = 0;
1762             return ZEBRA_OK;
1763         }
1764     }
1765     if (!bases_ok)
1766         return ZEBRA_FAIL;
1767     *term_sub = termp;
1768     yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1769     return ZEBRA_OK;
1770 }
1771
1772                                  
1773 static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh,
1774                                         Z_AttributesPlusTerm *zapt,
1775                                         const char *termz,
1776                                         const Odr_oid *attributeSet,
1777                                         NMEM stream,
1778                                         int reg_type, int complete_flag,
1779                                         const char *rank_type, 
1780                                         const char *xpath_use,
1781                                         int num_bases, char **basenames,
1782                                         NMEM rset_nmem,
1783                                         RSET *rset,
1784                                         struct rset_key_control *kc)
1785 {
1786     char term_dst[IT_MAX_WORD+1];
1787     const char *termp = termz;
1788     RSET *result_sets = 0;
1789     int num_result_sets = 0;
1790     ZEBRA_RES res;
1791     struct grep_info grep_info;
1792     int alloc_sets = 0;
1793     zint hits_limit_value;
1794     const char *term_ref_id_str = 0;
1795
1796     term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str, stream);
1797
1798     yaz_log(log_level_rpn, "APT_numeric t='%s'", termz);
1799     if (grep_info_prepare(zh, zapt, &grep_info, reg_type) == ZEBRA_FAIL)
1800         return ZEBRA_FAIL;
1801     while (1)
1802     { 
1803         struct ord_list *ol;
1804         WRBUF term_dict = wrbuf_alloc();
1805         if (alloc_sets == num_result_sets)
1806         {
1807             int add = 10;
1808             RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) * 
1809                                               sizeof(*rnew));
1810             if (alloc_sets)
1811                 memcpy(rnew, result_sets, alloc_sets * sizeof(*rnew));
1812             alloc_sets = alloc_sets + add;
1813             result_sets = rnew;
1814         }
1815         yaz_log(YLOG_DEBUG, "APT_numeric termp=%s", termp);
1816         grep_info.isam_p_indx = 0;
1817         res = numeric_term(zh, zapt, &termp, term_dict,
1818                            attributeSet, stream, &grep_info,
1819                            reg_type, complete_flag, num_bases, basenames,
1820                            term_dst, xpath_use, &ol);
1821         wrbuf_destroy(term_dict);
1822         if (res == ZEBRA_FAIL || termp == 0)
1823             break;
1824         yaz_log(YLOG_DEBUG, "term: %s", term_dst);
1825         result_sets[num_result_sets] =
1826             rset_trunc(zh, grep_info.isam_p_buf,
1827                        grep_info.isam_p_indx, term_dst,
1828                        strlen(term_dst), rank_type,
1829                        0 /* preserve position */,
1830                        zapt->term->which, rset_nmem, 
1831                        kc, kc->scope, ol, reg_type,
1832                        hits_limit_value,
1833                        term_ref_id_str);
1834         if (!result_sets[num_result_sets])
1835             break;
1836         num_result_sets++;
1837         if (!*termp)
1838             break;
1839     }
1840     grep_info_delete(&grep_info);
1841
1842     if (res != ZEBRA_OK)
1843         return res;
1844     if (num_result_sets == 0)
1845         *rset = rset_create_null(rset_nmem, kc, 0);
1846     else if (num_result_sets == 1)
1847         *rset = result_sets[0];
1848     else
1849         *rset = rset_create_and(rset_nmem, kc, kc->scope,
1850                                 num_result_sets, result_sets);
1851     if (!*rset)
1852         return ZEBRA_FAIL;
1853     return ZEBRA_OK;
1854 }
1855
1856 static ZEBRA_RES rpn_search_APT_local(ZebraHandle zh,
1857                                       Z_AttributesPlusTerm *zapt,
1858                                       const char *termz,
1859                                       const Odr_oid *attributeSet,
1860                                       NMEM stream,
1861                                       const char *rank_type, NMEM rset_nmem,
1862                                       RSET *rset,
1863                                       struct rset_key_control *kc)
1864 {
1865     Record rec;
1866     zint sysno = atozint(termz);
1867     
1868     if (sysno <= 0)
1869         sysno = 0;
1870     rec = rec_get(zh->reg->records, sysno);
1871     if (!rec)
1872         sysno = 0;
1873
1874     rec_free(&rec);
1875
1876     if (sysno <= 0)
1877     {
1878         *rset = rset_create_null(rset_nmem, kc, 0);
1879     }
1880     else
1881     {
1882         RSFD rsfd;
1883         struct it_key key;
1884         *rset = rset_create_temp(rset_nmem, kc, kc->scope,
1885                                  res_get(zh->res, "setTmpDir"), 0);
1886         rsfd = rset_open(*rset, RSETF_WRITE);
1887         
1888         key.mem[0] = sysno;
1889         key.mem[1] = 1;
1890         key.len = 2;
1891         rset_write(rsfd, &key);
1892         rset_close(rsfd);
1893     }
1894     return ZEBRA_OK;
1895 }
1896
1897 static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1898                                const Odr_oid *attributeSet, NMEM stream,
1899                                Z_SortKeySpecList *sort_sequence,
1900                                const char *rank_type,
1901                                NMEM rset_nmem,
1902                                RSET *rset,
1903                                struct rset_key_control *kc)
1904 {
1905     int i;
1906     int sort_relation_value;
1907     AttrType sort_relation_type;
1908     Z_SortKeySpec *sks;
1909     Z_SortKey *sk;
1910     char termz[20];
1911     
1912     attr_init_APT(&sort_relation_type, zapt, 7);
1913     sort_relation_value = attr_find(&sort_relation_type, &attributeSet);
1914
1915     if (!sort_sequence->specs)
1916     {
1917         sort_sequence->num_specs = 10;
1918         sort_sequence->specs = (Z_SortKeySpec **)
1919             nmem_malloc(stream, sort_sequence->num_specs *
1920                          sizeof(*sort_sequence->specs));
1921         for (i = 0; i<sort_sequence->num_specs; i++)
1922             sort_sequence->specs[i] = 0;
1923     }
1924     if (zapt->term->which != Z_Term_general)
1925         i = 0;
1926     else
1927         i = atoi_n ((char *) zapt->term->u.general->buf,
1928                     zapt->term->u.general->len);
1929     if (i >= sort_sequence->num_specs)
1930         i = 0;
1931     sprintf(termz, "%d", i);
1932
1933     sks = (Z_SortKeySpec *) nmem_malloc(stream, sizeof(*sks));
1934     sks->sortElement = (Z_SortElement *)
1935         nmem_malloc(stream, sizeof(*sks->sortElement));
1936     sks->sortElement->which = Z_SortElement_generic;
1937     sk = sks->sortElement->u.generic = (Z_SortKey *)
1938         nmem_malloc(stream, sizeof(*sk));
1939     sk->which = Z_SortKey_sortAttributes;
1940     sk->u.sortAttributes = (Z_SortAttributes *)
1941         nmem_malloc(stream, sizeof(*sk->u.sortAttributes));
1942
1943     sk->u.sortAttributes->id = odr_oiddup_nmem(stream, attributeSet);
1944     sk->u.sortAttributes->list = zapt->attributes;
1945
1946     sks->sortRelation = (int *)
1947         nmem_malloc(stream, sizeof(*sks->sortRelation));
1948     if (sort_relation_value == 1)
1949         *sks->sortRelation = Z_SortKeySpec_ascending;
1950     else if (sort_relation_value == 2)
1951         *sks->sortRelation = Z_SortKeySpec_descending;
1952     else 
1953         *sks->sortRelation = Z_SortKeySpec_ascending;
1954
1955     sks->caseSensitivity = (int *)
1956         nmem_malloc(stream, sizeof(*sks->caseSensitivity));
1957     *sks->caseSensitivity = 0;
1958
1959     sks->which = Z_SortKeySpec_null;
1960     sks->u.null = odr_nullval ();
1961     sort_sequence->specs[i] = sks;
1962     *rset = rset_create_null(rset_nmem, kc, 0);
1963     return ZEBRA_OK;
1964 }
1965
1966
1967 static int rpn_check_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1968                            const Odr_oid *attributeSet,
1969                            struct xpath_location_step *xpath, int max,
1970                            NMEM mem)
1971 {
1972     const Odr_oid *curAttributeSet = attributeSet;
1973     AttrType use;
1974     const char *use_string = 0;
1975     
1976     attr_init_APT(&use, zapt, 1);
1977     attr_find_ex(&use, &curAttributeSet, &use_string);
1978
1979     if (!use_string || *use_string != '/')
1980         return -1;
1981
1982     return zebra_parse_xpath_str(use_string, xpath, max, mem);
1983 }
1984  
1985                
1986
1987 static RSET xpath_trunc(ZebraHandle zh, NMEM stream,
1988                         int reg_type, const char *term, 
1989                         const char *xpath_use,
1990                         NMEM rset_nmem,
1991                         struct rset_key_control *kc)
1992 {
1993     struct grep_info grep_info;
1994     int ord = zebraExplain_lookup_attr_str(zh->reg->zei, 
1995                                            zinfo_index_category_index,
1996                                            reg_type, xpath_use);
1997     if (grep_info_prepare(zh, 0 /* zapt */, &grep_info, '0') == ZEBRA_FAIL)
1998         return rset_create_null(rset_nmem, kc, 0);
1999     
2000     if (ord < 0)
2001         return rset_create_null(rset_nmem, kc, 0);
2002     else
2003     {
2004         int i, r, max_pos;
2005         char ord_buf[32];
2006         RSET rset;
2007         WRBUF term_dict = wrbuf_alloc();
2008         int ord_len = key_SU_encode (ord, ord_buf);
2009         int term_type = Z_Term_characterString;
2010         const char *flags = "void";
2011
2012         wrbuf_putc(term_dict, '(');
2013         for (i = 0; i<ord_len; i++)
2014         {
2015             wrbuf_putc(term_dict, 1);
2016             wrbuf_putc(term_dict, ord_buf[i]);
2017         }
2018         wrbuf_putc(term_dict, ')');
2019         wrbuf_puts(term_dict, term);
2020         
2021         grep_info.isam_p_indx = 0;
2022         r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), 0,
2023                              &grep_info, &max_pos, 0, grep_handle);
2024         yaz_log(YLOG_DEBUG, "%s %d positions", term,
2025                 grep_info.isam_p_indx);
2026         rset = rset_trunc(zh, grep_info.isam_p_buf,
2027                           grep_info.isam_p_indx, term, strlen(term),
2028                           flags, 1, term_type, rset_nmem,
2029                           kc, kc->scope, 0, reg_type, 0 /* hits_limit */,
2030                           0 /* term_ref_id_str */);
2031         grep_info_delete(&grep_info);
2032         wrbuf_destroy(term_dict);
2033         return rset;
2034     }
2035 }
2036
2037 static
2038 ZEBRA_RES rpn_search_xpath(ZebraHandle zh,
2039                            int num_bases, char **basenames,
2040                            NMEM stream, const char *rank_type, RSET rset,
2041                            int xpath_len, struct xpath_location_step *xpath,
2042                            NMEM rset_nmem,
2043                            RSET *rset_out,
2044                            struct rset_key_control *kc)
2045 {
2046     int base_no;
2047     int i;
2048     int always_matches = rset ? 0 : 1;
2049
2050     if (xpath_len < 0)
2051     {
2052         *rset_out = rset;
2053         return ZEBRA_OK;
2054     }
2055
2056     yaz_log(YLOG_DEBUG, "xpath len=%d", xpath_len);
2057     for (i = 0; i<xpath_len; i++)
2058     {
2059         yaz_log(log_level_rpn, "XPATH %d %s", i, xpath[i].part);
2060
2061     }
2062
2063     /*
2064       //a    ->    a/.*
2065       //a/b  ->    b/a/.*
2066       /a     ->    a/
2067       /a/b   ->    b/a/
2068
2069       /      ->    none
2070
2071    a[@attr = value]/b[@other = othervalue]
2072
2073  /e/@a val      range(e/,range(@a,freetext(w,1015,val),@a),e/)
2074  /a/b val       range(b/a/,freetext(w,1016,val),b/a/)
2075  /a/b/@c val    range(b/a/,range(@c,freetext(w,1016,val),@c),b/a/)
2076  /a/b[@c = y] val range(b/a/,freetext(w,1016,val),b/a/,@c = y)
2077  /a[@c = y]/b val range(a/,range(b/a/,freetext(w,1016,val),b/a/),a/,@c = y)
2078  /a[@c = x]/b[@c = y] range(a/,range(b/a/,freetext(w,1016,val),b/a/,@c = y),a/,@c = x)
2079       
2080     */
2081
2082     dict_grep_cmap (zh->reg->dict, 0, 0);
2083
2084     for (base_no = 0; base_no < num_bases; base_no++)
2085     {
2086         int level = xpath_len;
2087         int first_path = 1;
2088         
2089         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
2090         {
2091             zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
2092                            basenames[base_no]);
2093             *rset_out = rset;
2094             return ZEBRA_FAIL;
2095         }
2096         while (--level >= 0)
2097         {
2098             WRBUF xpath_rev = wrbuf_alloc();
2099             int i;
2100             RSET rset_start_tag = 0, rset_end_tag = 0, rset_attr = 0;
2101
2102             for (i = level; i >= 1; --i)
2103             {
2104                 const char *cp = xpath[i].part;
2105                 if (*cp)
2106                 {
2107                     for (; *cp; cp++)
2108                     {
2109                         if (*cp == '*')
2110                             wrbuf_puts(xpath_rev, "[^/]*");
2111                         else if (*cp == ' ')
2112                             wrbuf_puts(xpath_rev, "\001 ");
2113                         else
2114                             wrbuf_putc(xpath_rev, *cp);
2115
2116                         /* wrbuf_putc does not null-terminate , but
2117                            wrbuf_puts below ensures it does.. so xpath_rev
2118                            is OK iff length is > 0 */
2119                     }
2120                     wrbuf_puts(xpath_rev, "/");
2121                 }
2122                 else if (i == 1)  /* // case */
2123                     wrbuf_puts(xpath_rev, ".*");
2124             }
2125             if (xpath[level].predicate &&
2126                 xpath[level].predicate->which == XPATH_PREDICATE_RELATION &&
2127                 xpath[level].predicate->u.relation.name[0])
2128             {
2129                 WRBUF wbuf = wrbuf_alloc();
2130                 wrbuf_puts(wbuf, xpath[level].predicate->u.relation.name+1);
2131                 if (xpath[level].predicate->u.relation.value)
2132                 {
2133                     const char *cp = xpath[level].predicate->u.relation.value;
2134                     wrbuf_putc(wbuf, '=');
2135                     
2136                     while (*cp)
2137                     {
2138                         if (strchr(REGEX_CHARS, *cp))
2139                             wrbuf_putc(wbuf, '\\');
2140                         wrbuf_putc(wbuf, *cp);
2141                         cp++;
2142                     }
2143                 }
2144                 rset_attr = xpath_trunc(
2145                     zh, stream, '0', wrbuf_cstr(wbuf), ZEBRA_XPATH_ATTR_NAME, 
2146                     rset_nmem, kc);
2147                 wrbuf_destroy(wbuf);
2148             } 
2149             else 
2150             {
2151                 if (!first_path)
2152                 {
2153                     wrbuf_destroy(xpath_rev);
2154                     continue;
2155                 }
2156             }
2157             yaz_log(log_level_rpn, "xpath_rev (%d) = %s", level, 
2158                     wrbuf_cstr(xpath_rev));
2159             if (wrbuf_len(xpath_rev))
2160             {
2161                 rset_start_tag = xpath_trunc(zh, stream, '0', 
2162                                              wrbuf_cstr(xpath_rev),
2163                                              ZEBRA_XPATH_ELM_BEGIN, 
2164                                              rset_nmem, kc);
2165                 if (always_matches)
2166                     rset = rset_start_tag;
2167                 else
2168                 {
2169                     rset_end_tag = xpath_trunc(zh, stream, '0', 
2170                                                wrbuf_cstr(xpath_rev),
2171                                                ZEBRA_XPATH_ELM_END, 
2172                                                rset_nmem, kc);
2173                     
2174                     rset = rset_create_between(rset_nmem, kc, kc->scope,
2175                                                rset_start_tag, rset,
2176                                                rset_end_tag, rset_attr);
2177                 }
2178             }
2179             wrbuf_destroy(xpath_rev);
2180             first_path = 0;
2181         }
2182     }
2183     *rset_out = rset;
2184     return ZEBRA_OK;
2185 }
2186
2187 #define MAX_XPATH_STEPS 10
2188
2189 static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2190                                 const Odr_oid *attributeSet, NMEM stream,
2191                                 Z_SortKeySpecList *sort_sequence,
2192                                 int num_bases, char **basenames, 
2193                                 NMEM rset_nmem,
2194                                 RSET *rset,
2195                                 struct rset_key_control *kc)
2196 {
2197     ZEBRA_RES res = ZEBRA_OK;
2198     unsigned reg_id;
2199     char *search_type = NULL;
2200     char rank_type[128];
2201     int complete_flag;
2202     int sort_flag;
2203     char termz[IT_MAX_WORD+1];
2204     int xpath_len;
2205     const char *xpath_use = 0;
2206     struct xpath_location_step xpath[MAX_XPATH_STEPS];
2207
2208     if (!log_level_set)
2209     {
2210         log_level_rpn = yaz_log_module_level("rpn");
2211         log_level_set = 1;
2212     }
2213     zebra_maps_attr(zh->reg->zebra_maps, zapt, &reg_id, &search_type,
2214                     rank_type, &complete_flag, &sort_flag);
2215     
2216     yaz_log(YLOG_DEBUG, "reg_id=%c", reg_id);
2217     yaz_log(YLOG_DEBUG, "complete_flag=%d", complete_flag);
2218     yaz_log(YLOG_DEBUG, "search_type=%s", search_type);
2219     yaz_log(YLOG_DEBUG, "rank_type=%s", rank_type);
2220
2221     if (zapt_term_to_utf8(zh, zapt, termz) == ZEBRA_FAIL)
2222         return ZEBRA_FAIL;
2223
2224     if (sort_flag)
2225         return rpn_sort_spec(zh, zapt, attributeSet, stream, sort_sequence,
2226                              rank_type, rset_nmem, rset, kc);
2227     /* consider if an X-Path query is used */
2228     xpath_len = rpn_check_xpath(zh, zapt, attributeSet, 
2229                                 xpath, MAX_XPATH_STEPS, stream);
2230     if (xpath_len >= 0)
2231     {
2232         if (xpath[xpath_len-1].part[0] == '@') 
2233             xpath_use = ZEBRA_XPATH_ATTR_CDATA;  /* last step is attribute  */
2234         else
2235             xpath_use = ZEBRA_XPATH_CDATA;  /* searching for cdata */        
2236
2237         if (1)
2238         {
2239             AttrType relation;
2240             int relation_value;
2241
2242             attr_init_APT(&relation, zapt, 2);
2243             relation_value = attr_find(&relation, NULL);
2244
2245             if (relation_value == 103) /* alwaysmatches */
2246             {
2247                 *rset = 0; /* signal no "term" set */
2248                 return rpn_search_xpath(zh, num_bases, basenames,
2249                                         stream, rank_type, *rset, 
2250                                         xpath_len, xpath, rset_nmem, rset, kc);
2251             }
2252         }
2253     }
2254
2255     /* search using one of the various search type strategies
2256        termz is our UTF-8 search term
2257        attributeSet is top-level default attribute set 
2258        stream is ODR for search
2259        reg_id is the register type
2260        complete_flag is 1 for complete subfield, 0 for incomplete
2261        xpath_use is use-attribute to be used for X-Path search, 0 for none
2262     */
2263     if (!strcmp(search_type, "phrase"))
2264     {
2265         res = rpn_search_APT_phrase(zh, zapt, termz, attributeSet, stream,
2266                                     reg_id, complete_flag, rank_type,
2267                                     xpath_use,
2268                                     num_bases, basenames, rset_nmem,
2269                                     rset, kc);
2270     }
2271     else if (!strcmp(search_type, "and-list"))
2272     {
2273         res = rpn_search_APT_and_list(zh, zapt, termz, attributeSet, stream,
2274                                       reg_id, complete_flag, rank_type,
2275                                       xpath_use,
2276                                       num_bases, basenames, rset_nmem,
2277                                       rset, kc);
2278     }
2279     else if (!strcmp(search_type, "or-list"))
2280     {
2281         res = rpn_search_APT_or_list(zh, zapt, termz, attributeSet, stream,
2282                                      reg_id, complete_flag, rank_type,
2283                                      xpath_use,
2284                                      num_bases, basenames, rset_nmem,
2285                                      rset, kc);
2286     }
2287     else if (!strcmp(search_type, "local"))
2288     {
2289         res = rpn_search_APT_local(zh, zapt, termz, attributeSet, stream,
2290                                    rank_type, rset_nmem, rset, kc);
2291     }
2292     else if (!strcmp(search_type, "numeric"))
2293     {
2294         res = rpn_search_APT_numeric(zh, zapt, termz, attributeSet, stream,
2295                                      reg_id, complete_flag, rank_type,
2296                                      xpath_use,
2297                                      num_bases, basenames, rset_nmem,
2298                                      rset, kc);
2299     }
2300     else
2301     {
2302         zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2303         res = ZEBRA_FAIL;
2304     }
2305     if (res != ZEBRA_OK)
2306         return res;
2307     if (!*rset)
2308         return ZEBRA_FAIL;
2309     return rpn_search_xpath(zh, num_bases, basenames,
2310                             stream, rank_type, *rset, 
2311                             xpath_len, xpath, rset_nmem, rset, kc);
2312 }
2313
2314 static ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2315                                       const Odr_oid *attributeSet, 
2316                                       NMEM stream, NMEM rset_nmem,
2317                                       Z_SortKeySpecList *sort_sequence,
2318                                       int num_bases, char **basenames,
2319                                       RSET **result_sets, int *num_result_sets,
2320                                       Z_Operator *parent_op,
2321                                       struct rset_key_control *kc);
2322
2323 ZEBRA_RES rpn_get_top_approx_limit(ZebraHandle zh, Z_RPNStructure *zs,
2324                                    zint *approx_limit)
2325 {
2326     ZEBRA_RES res = ZEBRA_OK;
2327     if (zs->which == Z_RPNStructure_complex)
2328     {
2329         if (res == ZEBRA_OK)
2330             res = rpn_get_top_approx_limit(zh, zs->u.complex->s1,
2331                                            approx_limit);
2332         if (res == ZEBRA_OK)
2333             res = rpn_get_top_approx_limit(zh, zs->u.complex->s2,
2334                                            approx_limit);
2335     }
2336     else if (zs->which == Z_RPNStructure_simple)
2337     {
2338         if (zs->u.simple->which == Z_Operand_APT)
2339         {
2340             Z_AttributesPlusTerm *zapt = zs->u.simple->u.attributesPlusTerm;
2341             AttrType global_hits_limit_attr;
2342             int l;
2343             
2344             attr_init_APT(&global_hits_limit_attr, zapt, 12);
2345             
2346             l = attr_find(&global_hits_limit_attr, NULL);
2347             if (l != -1)
2348                 *approx_limit = l;
2349         }
2350     }
2351     return res;
2352 }
2353
2354 ZEBRA_RES rpn_search_top(ZebraHandle zh, Z_RPNStructure *zs,
2355                          const Odr_oid *attributeSet, 
2356                          NMEM stream, NMEM rset_nmem,
2357                          Z_SortKeySpecList *sort_sequence,
2358                          int num_bases, char **basenames,
2359                          RSET *result_set)
2360 {
2361     RSET *result_sets = 0;
2362     int num_result_sets = 0;
2363     ZEBRA_RES res;
2364     struct rset_key_control *kc = zebra_key_control_create(zh);
2365
2366     res = rpn_search_structure(zh, zs, attributeSet,
2367                                stream, rset_nmem,
2368                                sort_sequence, 
2369                                num_bases, basenames,
2370                                &result_sets, &num_result_sets,
2371                                0 /* no parent op */,
2372                                kc);
2373     if (res != ZEBRA_OK)
2374     {
2375         int i;
2376         for (i = 0; i<num_result_sets; i++)
2377             rset_delete(result_sets[i]);
2378         *result_set = 0;
2379     }
2380     else
2381     {
2382         assert(num_result_sets == 1);
2383         assert(result_sets);
2384         assert(*result_sets);
2385         *result_set = *result_sets;
2386     }
2387     (*kc->dec)(kc);
2388     return res;
2389 }
2390
2391 ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2392                                const Odr_oid *attributeSet, 
2393                                NMEM stream, NMEM rset_nmem,
2394                                Z_SortKeySpecList *sort_sequence,
2395                                int num_bases, char **basenames,
2396                                RSET **result_sets, int *num_result_sets,
2397                                Z_Operator *parent_op,
2398                                struct rset_key_control *kc)
2399 {
2400     *num_result_sets = 0;
2401     if (zs->which == Z_RPNStructure_complex)
2402     {
2403         ZEBRA_RES res;
2404         Z_Operator *zop = zs->u.complex->roperator;
2405         RSET *result_sets_l = 0;
2406         int num_result_sets_l = 0;
2407         RSET *result_sets_r = 0;
2408         int num_result_sets_r = 0;
2409
2410         res = rpn_search_structure(zh, zs->u.complex->s1,
2411                                    attributeSet, stream, rset_nmem,
2412                                    sort_sequence,
2413                                    num_bases, basenames,
2414                                    &result_sets_l, &num_result_sets_l,
2415                                    zop, kc);
2416         if (res != ZEBRA_OK)
2417         {
2418             int i;
2419             for (i = 0; i<num_result_sets_l; i++)
2420                 rset_delete(result_sets_l[i]);
2421             return res;
2422         }
2423         res = rpn_search_structure(zh, zs->u.complex->s2,
2424                                    attributeSet, stream, rset_nmem,
2425                                    sort_sequence,
2426                                    num_bases, basenames,
2427                                    &result_sets_r, &num_result_sets_r,
2428                                    zop, kc);
2429         if (res != ZEBRA_OK)
2430         {
2431             int i;
2432             for (i = 0; i<num_result_sets_l; i++)
2433                 rset_delete(result_sets_l[i]);
2434             for (i = 0; i<num_result_sets_r; i++)
2435                 rset_delete(result_sets_r[i]);
2436             return res;
2437         }
2438
2439         /* make a new list of result for all children */
2440         *num_result_sets = num_result_sets_l + num_result_sets_r;
2441         *result_sets = nmem_malloc(stream, *num_result_sets * 
2442                                    sizeof(**result_sets));
2443         memcpy(*result_sets, result_sets_l, 
2444                num_result_sets_l * sizeof(**result_sets));
2445         memcpy(*result_sets + num_result_sets_l, result_sets_r, 
2446                num_result_sets_r * sizeof(**result_sets));
2447
2448         if (!parent_op || parent_op->which != zop->which
2449             || (zop->which != Z_Operator_and &&
2450                 zop->which != Z_Operator_or))
2451         {
2452             /* parent node different from this one (or non-present) */
2453             /* we must combine result sets now */
2454             RSET rset;
2455             switch (zop->which)
2456             {
2457             case Z_Operator_and:
2458                 rset = rset_create_and(rset_nmem, kc,
2459                                        kc->scope,
2460                                        *num_result_sets, *result_sets);
2461                 break;
2462             case Z_Operator_or:
2463                 rset = rset_create_or(rset_nmem, kc,
2464                                       kc->scope, 0, /* termid */
2465                                       *num_result_sets, *result_sets);
2466                 break;
2467             case Z_Operator_and_not:
2468                 rset = rset_create_not(rset_nmem, kc,
2469                                        kc->scope,
2470                                        (*result_sets)[0],
2471                                        (*result_sets)[1]);
2472                 break;
2473             case Z_Operator_prox:
2474                 if (zop->u.prox->which != Z_ProximityOperator_known)
2475                 {
2476                     zebra_setError(zh, 
2477                                    YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2478                                    0);
2479                     return ZEBRA_FAIL;
2480                 }
2481                 if (*zop->u.prox->u.known != Z_ProxUnit_word)
2482                 {
2483                     zebra_setError_zint(zh,
2484                                         YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2485                                         *zop->u.prox->u.known);
2486                     return ZEBRA_FAIL;
2487                 }
2488                 else
2489                 {
2490                     rset = rset_create_prox(rset_nmem, kc,
2491                                             kc->scope,
2492                                             *num_result_sets, *result_sets, 
2493                                             *zop->u.prox->ordered,
2494                                             (!zop->u.prox->exclusion ? 
2495                                              0 : *zop->u.prox->exclusion),
2496                                             *zop->u.prox->relationType,
2497                                             *zop->u.prox->distance );
2498                 }
2499                 break;
2500             default:
2501                 zebra_setError(zh, YAZ_BIB1_OPERATOR_UNSUPP, 0);
2502                 return ZEBRA_FAIL;
2503             }
2504             *num_result_sets = 1;
2505             *result_sets = nmem_malloc(stream, *num_result_sets * 
2506                                        sizeof(**result_sets));
2507             (*result_sets)[0] = rset;
2508         }
2509     }
2510     else if (zs->which == Z_RPNStructure_simple)
2511     {
2512         RSET rset;
2513         ZEBRA_RES res;
2514
2515         if (zs->u.simple->which == Z_Operand_APT)
2516         {
2517             yaz_log(YLOG_DEBUG, "rpn_search_APT");
2518             res = rpn_search_APT(zh, zs->u.simple->u.attributesPlusTerm,
2519                                  attributeSet, stream, sort_sequence,
2520                                  num_bases, basenames, rset_nmem, &rset,
2521                                  kc);
2522             if (res != ZEBRA_OK)
2523                 return res;
2524         }
2525         else if (zs->u.simple->which == Z_Operand_resultSetId)
2526         {
2527             yaz_log(YLOG_DEBUG, "rpn_search_ref");
2528             rset = resultSetRef(zh, zs->u.simple->u.resultSetId);
2529             if (!rset)
2530             {
2531                 zebra_setError(zh, 
2532                                YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
2533                                zs->u.simple->u.resultSetId);
2534                 return ZEBRA_FAIL;
2535             }
2536             rset_dup(rset);
2537         }
2538         else
2539         {
2540             zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2541             return ZEBRA_FAIL;
2542         }
2543         *num_result_sets = 1;
2544         *result_sets = nmem_malloc(stream, *num_result_sets * 
2545                                    sizeof(**result_sets));
2546         (*result_sets)[0] = rset;
2547     }
2548     else
2549     {
2550         zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2551         return ZEBRA_FAIL;
2552     }
2553     return ZEBRA_OK;
2554 }
2555
2556
2557
2558 /*
2559  * Local variables:
2560  * c-basic-offset: 4
2561  * indent-tabs-mode: nil
2562  * End:
2563  * vim: shiftwidth=4 tabstop=8 expandtab
2564  */
2565