Rename source files
[idzebra-moved-to-github.git] / index / rpnsearch.c
1 /* $Id: rpnsearch.c,v 1.1 2006-09-21 08:56:52 adam Exp $
2    Copyright (C) 1995-2006
3    Index Data ApS
4
5 This file is part of the Zebra server.
6
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
11
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
20
21 */
22
23 #include <stdio.h>
24 #include <assert.h>
25 #ifdef WIN32
26 #include <io.h>
27 #endif
28 #if HAVE_UNISTD_H
29 #include <unistd.h>
30 #endif
31 #include <ctype.h>
32
33 #include <yaz/diagbib1.h>
34 #include "index.h"
35 #include <zebra_xpath.h>
36 #include <attrfind.h>
37 #include <charmap.h>
38 #include <rset.h>
39
40 static int log_level_set = 0;
41 static int log_level_rpn = 0;
42
43 #define TERMSET_DISABLE 1
44
45 static const char **rpn_char_map_handler(void *vp, const char **from, int len)
46 {
47     struct rpn_char_map_info *p = (struct rpn_char_map_info *) vp;
48     const char **out = zebra_maps_input(p->zm, p->reg_type, from, len, 0);
49 #if 0
50     if (out && *out)
51     {
52         const char *outp = *out;
53         yaz_log(YLOG_LOG, "---");
54         while (*outp)
55         {
56             yaz_log(YLOG_LOG, "%02X", *outp);
57             outp++;
58         }
59     }
60 #endif
61     return out;
62 }
63
64 void rpn_char_map_prepare(struct zebra_register *reg, int reg_type,
65                           struct rpn_char_map_info *map_info)
66 {
67     map_info->zm = reg->zebra_maps;
68     map_info->reg_type = reg_type;
69     dict_grep_cmap(reg->dict, map_info, rpn_char_map_handler);
70 }
71
72 #define TERM_COUNT        
73        
74 struct grep_info {        
75 #ifdef TERM_COUNT        
76     int *term_no;        
77 #endif        
78     ISAM_P *isam_p_buf;
79     int isam_p_size;        
80     int isam_p_indx;
81     ZebraHandle zh;
82     int reg_type;
83     ZebraSet termset;
84 };        
85
86 static void add_isam_p(const char *name, const char *info,
87                        struct grep_info *p)
88 {
89     if (!log_level_set)
90     {
91         log_level_rpn = yaz_log_module_level("rpn");
92         log_level_set = 1;
93     }
94     if (p->isam_p_indx == p->isam_p_size)
95     {
96         ISAM_P *new_isam_p_buf;
97 #ifdef TERM_COUNT        
98         int *new_term_no;        
99 #endif
100         p->isam_p_size = 2*p->isam_p_size + 100;
101         new_isam_p_buf = (ISAM_P *) xmalloc(sizeof(*new_isam_p_buf) *
102                                             p->isam_p_size);
103         if (p->isam_p_buf)
104         {
105             memcpy(new_isam_p_buf, p->isam_p_buf,
106                     p->isam_p_indx * sizeof(*p->isam_p_buf));
107             xfree(p->isam_p_buf);
108         }
109         p->isam_p_buf = new_isam_p_buf;
110
111 #ifdef TERM_COUNT
112         new_term_no = (int *) xmalloc(sizeof(*new_term_no) * p->isam_p_size);
113         if (p->term_no)
114         {
115             memcpy(new_term_no, p->isam_p_buf,
116                     p->isam_p_indx * sizeof(*p->term_no));
117             xfree(p->term_no);
118         }
119         p->term_no = new_term_no;
120 #endif
121     }
122     assert(*info == sizeof(*p->isam_p_buf));
123     memcpy(p->isam_p_buf + p->isam_p_indx, info+1, sizeof(*p->isam_p_buf));
124
125     if (p->termset)
126     {
127         const char *db;
128         char term_tmp[IT_MAX_WORD];
129         int ord = 0;
130         const char *index_name;
131         int len = key_SU_decode (&ord, (const unsigned char *) name);
132         
133         zebra_term_untrans  (p->zh, p->reg_type, term_tmp, name+len);
134         yaz_log(log_level_rpn, "grep: %d %c %s", ord, name[len], term_tmp);
135         zebraExplain_lookup_ord(p->zh->reg->zei,
136                                 ord, 0 /* index_type */, &db, &index_name);
137         yaz_log(log_level_rpn, "grep:  db=%s index=%s", db, index_name);
138         
139         resultSetAddTerm(p->zh, p->termset, name[len], db,
140                          index_name, term_tmp);
141     }
142     (p->isam_p_indx)++;
143 }
144
145 static int grep_handle(char *name, const char *info, void *p)
146 {
147     add_isam_p(name, info, (struct grep_info *) p);
148     return 0;
149 }
150
151 static int term_pre(ZebraMaps zebra_maps, int reg_type, const char **src,
152                     const char *ct1, const char *ct2, int first)
153 {
154     const char *s1, *s0 = *src;
155     const char **map;
156
157     /* skip white space */
158     while (*s0)
159     {
160         if (ct1 && strchr(ct1, *s0))
161             break;
162         if (ct2 && strchr(ct2, *s0))
163             break;
164         s1 = s0;
165         map = zebra_maps_input(zebra_maps, reg_type, &s1, strlen(s1), first);
166         if (**map != *CHR_SPACE)
167             break;
168         s0 = s1;
169     }
170     *src = s0;
171     return *s0;
172 }
173
174
175 static void esc_str(char *out_buf, size_t out_size,
176                     const char *in_buf, int in_size)
177 {
178     int k;
179
180     assert(out_buf);
181     assert(in_buf);
182     assert(out_size > 20);
183     *out_buf = '\0';
184     for (k = 0; k<in_size; k++)
185     {
186         int c = in_buf[k] & 0xff;
187         int pc;
188         if (c < 32 || c > 126)
189             pc = '?';
190         else
191             pc = c;
192         sprintf(out_buf +strlen(out_buf), "%02X:%c  ", c, pc);
193         if (strlen(out_buf) > out_size-20)
194         {
195             strcat(out_buf, "..");
196             break;
197         }
198     }
199 }
200
201 #define REGEX_CHARS " []()|.*+?!"
202
203 /* term_100: handle term, where trunc = none(no operators at all) */
204 static int term_100(ZebraMaps zebra_maps, int reg_type,
205                     const char **src, char *dst, int space_split,
206                     char *dst_term)
207 {
208     const char *s0;
209     const char **map;
210     int i = 0;
211     int j = 0;
212
213     const char *space_start = 0;
214     const char *space_end = 0;
215
216     if (!term_pre(zebra_maps, reg_type, src, NULL, NULL, !space_split))
217         return 0;
218     s0 = *src;
219     while (*s0)
220     {
221         const char *s1 = s0;
222         int q_map_match = 0;
223         map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), 
224                                 &q_map_match);
225         if (space_split)
226         {
227             if (**map == *CHR_SPACE)
228                 break;
229         }
230         else  /* complete subfield only. */
231         {
232             if (**map == *CHR_SPACE)
233             {   /* save space mapping for later  .. */
234                 space_start = s1;
235                 space_end = s0;
236                 continue;
237             }
238             else if (space_start)
239             {   /* reload last space */
240                 while (space_start < space_end)
241                 {
242                     if (strchr(REGEX_CHARS, *space_start))
243                         dst[i++] = '\\';
244                     dst_term[j++] = *space_start;
245                     dst[i++] = *space_start++;
246                 }
247                 /* and reset */
248                 space_start = space_end = 0;
249             }
250         }
251         /* add non-space char */
252         memcpy(dst_term+j, s1, s0 - s1);
253         j += (s0 - s1);
254         if (!q_map_match)
255         {
256             while (s1 < s0)
257             {
258                 if (strchr(REGEX_CHARS, *s1))
259                     dst[i++] = '\\';
260                 dst[i++] = *s1++;
261             }
262         }
263         else
264         {
265             char tmpbuf[80];
266             esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
267             
268             strcpy(dst + i, map[0]);
269             i += strlen(map[0]);
270         }
271     }
272     dst[i] = '\0';
273     dst_term[j] = '\0';
274     *src = s0;
275     return i;
276 }
277
278 /* term_101: handle term, where trunc = Process # */
279 static int term_101(ZebraMaps zebra_maps, int reg_type,
280                     const char **src, char *dst, int space_split,
281                     char *dst_term)
282 {
283     const char *s0;
284     const char **map;
285     int i = 0;
286     int j = 0;
287
288     if (!term_pre(zebra_maps, reg_type, src, "#", "#", !space_split))
289         return 0;
290     s0 = *src;
291     while (*s0)
292     {
293         if (*s0 == '#')
294         {
295             dst[i++] = '.';
296             dst[i++] = '*';
297             dst_term[j++] = *s0++;
298         }
299         else
300         {
301             const char *s1 = s0;
302             int q_map_match = 0;
303             map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), 
304                                     &q_map_match);
305             if (space_split && **map == *CHR_SPACE)
306                 break;
307
308             /* add non-space char */
309             memcpy(dst_term+j, s1, s0 - s1);
310             j += (s0 - s1);
311             if (!q_map_match)
312             {
313                 while (s1 < s0)
314                 {
315                     if (strchr(REGEX_CHARS, *s1))
316                         dst[i++] = '\\';
317                     dst[i++] = *s1++;
318                 }
319             }
320             else
321             {
322                 char tmpbuf[80];
323                 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
324                 
325                 strcpy(dst + i, map[0]);
326                 i += strlen(map[0]);
327             }
328         }
329     }
330     dst[i] = '\0';
331     dst_term[j++] = '\0';
332     *src = s0;
333     return i;
334 }
335
336 /* term_103: handle term, where trunc = re-2 (regular expressions) */
337 static int term_103(ZebraMaps zebra_maps, int reg_type, const char **src,
338                     char *dst, int *errors, int space_split,
339                     char *dst_term)
340 {
341     int i = 0;
342     int j = 0;
343     const char *s0;
344     const char **map;
345
346     if (!term_pre(zebra_maps, reg_type, src, "^\\()[].*+?|", "(", !space_split))
347         return 0;
348     s0 = *src;
349     if (errors && *s0 == '+' && s0[1] && s0[2] == '+' && s0[3] &&
350         isdigit(((const unsigned char *)s0)[1]))
351     {
352         *errors = s0[1] - '0';
353         s0 += 3;
354         if (*errors > 3)
355             *errors = 3;
356     }
357     while (*s0)
358     {
359         if (strchr("^\\()[].*+?|-", *s0))
360         {
361             dst_term[j++] = *s0;
362             dst[i++] = *s0++;
363         }
364         else
365         {
366             const char *s1 = s0;
367             int q_map_match = 0;
368             map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), 
369                                     &q_map_match);
370             if (space_split && **map == *CHR_SPACE)
371                 break;
372
373             /* add non-space char */
374             memcpy(dst_term+j, s1, s0 - s1);
375             j += (s0 - s1);
376             if (!q_map_match)
377             {
378                 while (s1 < s0)
379                 {
380                     if (strchr(REGEX_CHARS, *s1))
381                         dst[i++] = '\\';
382                     dst[i++] = *s1++;
383                 }
384             }
385             else
386             {
387                 char tmpbuf[80];
388                 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
389                 
390                 strcpy(dst + i, map[0]);
391                 i += strlen(map[0]);
392             }
393         }
394     }
395     dst[i] = '\0';
396     dst_term[j] = '\0';
397     *src = s0;
398     
399     return i;
400 }
401
402 /* term_103: handle term, where trunc = re-1 (regular expressions) */
403 static int term_102(ZebraMaps zebra_maps, int reg_type, const char **src,
404                     char *dst, int space_split, char *dst_term)
405 {
406     return term_103(zebra_maps, reg_type, src, dst, NULL, space_split,
407                     dst_term);
408 }
409
410
411 /* term_104: handle term, where trunc = Process # and ! */
412 static int term_104(ZebraMaps zebra_maps, int reg_type,
413                     const char **src, char *dst, int space_split,
414                     char *dst_term)
415 {
416     const char *s0;
417     const char **map;
418     int i = 0;
419     int j = 0;
420
421     if (!term_pre(zebra_maps, reg_type, src, "?*#", "?*#", !space_split))
422         return 0;
423     s0 = *src;
424     while (*s0)
425     {
426         if (*s0 == '?')
427         {
428             dst_term[j++] = *s0++;
429             if (*s0 >= '0' && *s0 <= '9')
430             {
431                 int limit = 0;
432                 while (*s0 >= '0' && *s0 <= '9')
433                 {
434                     limit = limit * 10 + (*s0 - '0');
435                     dst_term[j++] = *s0++;
436                 }
437                 if (limit > 20)
438                     limit = 20;
439                 while (--limit >= 0)
440                 {
441                     dst[i++] = '.';
442                     dst[i++] = '?';
443                 }
444             }
445             else
446             {
447                 dst[i++] = '.';
448                 dst[i++] = '*';
449             }
450         }
451         else if (*s0 == '*')
452         {
453             dst[i++] = '.';
454             dst[i++] = '*';
455             dst_term[j++] = *s0++;
456         }
457         else if (*s0 == '#')
458         {
459             dst[i++] = '.';
460             dst_term[j++] = *s0++;
461         }
462         else
463         {
464             const char *s1 = s0;
465             int q_map_match = 0;
466             map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), 
467                                     &q_map_match);
468             if (space_split && **map == *CHR_SPACE)
469                 break;
470
471             /* add non-space char */
472             memcpy(dst_term+j, s1, s0 - s1);
473             j += (s0 - s1);
474             if (!q_map_match)
475             {
476                 while (s1 < s0)
477                 {
478                     if (strchr(REGEX_CHARS, *s1))
479                         dst[i++] = '\\';
480                     dst[i++] = *s1++;
481                 }
482             }
483             else
484             {
485                 char tmpbuf[80];
486                 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
487                 
488                 strcpy(dst + i, map[0]);
489                 i += strlen(map[0]);
490             }
491         }
492     }
493     dst[i] = '\0';
494     dst_term[j++] = '\0';
495     *src = s0;
496     return i;
497 }
498
499 /* term_105/106: handle term, where trunc = Process * and ! and right trunc */
500 static int term_105(ZebraMaps zebra_maps, int reg_type,
501                     const char **src, char *dst, int space_split,
502                     char *dst_term, int right_truncate)
503 {
504     const char *s0;
505     const char **map;
506     int i = 0;
507     int j = 0;
508
509     if (!term_pre(zebra_maps, reg_type, src, "*!", "*!", !space_split))
510         return 0;
511     s0 = *src;
512     while (*s0)
513     {
514         if (*s0 == '*')
515         {
516             dst[i++] = '.';
517             dst[i++] = '*';
518             dst_term[j++] = *s0++;
519         }
520         else if (*s0 == '!')
521         {
522             dst[i++] = '.';
523             dst_term[j++] = *s0++;
524         }
525         else
526         {
527             const char *s1 = s0;
528             int q_map_match = 0;
529             map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), 
530                                     &q_map_match);
531             if (space_split && **map == *CHR_SPACE)
532                 break;
533
534             /* add non-space char */
535             memcpy(dst_term+j, s1, s0 - s1);
536             j += (s0 - s1);
537             if (!q_map_match)
538             {
539                 while (s1 < s0)
540                 {
541                     if (strchr(REGEX_CHARS, *s1))
542                         dst[i++] = '\\';
543                     dst[i++] = *s1++;
544                 }
545             }
546             else
547             {
548                 char tmpbuf[80];
549                 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
550                 
551                 strcpy(dst + i, map[0]);
552                 i += strlen(map[0]);
553             }
554         }
555     }
556     if (right_truncate)
557     {
558         dst[i++] = '.';
559         dst[i++] = '*';
560     }
561     dst[i] = '\0';
562     
563     dst_term[j++] = '\0';
564     *src = s0;
565     return i;
566 }
567
568
569 /* gen_regular_rel - generate regular expression from relation
570  *  val:     border value (inclusive)
571  *  islt:    1 if <=; 0 if >=.
572  */
573 static void gen_regular_rel(char *dst, int val, int islt)
574 {
575     int dst_p;
576     int w, d, i;
577     int pos = 0;
578     char numstr[20];
579
580     yaz_log(YLOG_DEBUG, "gen_regular_rel. val=%d, islt=%d", val, islt);
581     if (val >= 0)
582     {
583         if (islt)
584             strcpy(dst, "(-[0-9]+|(");
585         else
586             strcpy(dst, "((");
587     } 
588     else
589     {
590         if (!islt)
591         {
592             strcpy(dst, "([0-9]+|-(");
593             dst_p = strlen(dst);
594             islt = 1;
595         }
596         else
597         {
598             strcpy(dst, "(-(");
599             islt = 0;
600         }
601         val = -val;
602     }
603     dst_p = strlen(dst);
604     sprintf(numstr, "%d", val);
605     for (w = strlen(numstr); --w >= 0; pos++)
606     {
607         d = numstr[w];
608         if (pos > 0)
609         {
610             if (islt)
611             {
612                 if (d == '0')
613                     continue;
614                 d--;
615             } 
616             else
617             {
618                 if (d == '9')
619                     continue;
620                 d++;
621             }
622         }
623         
624         strcpy(dst + dst_p, numstr);
625         dst_p = strlen(dst) - pos - 1;
626
627         if (islt)
628         {
629             if (d != '0')
630             {
631                 dst[dst_p++] = '[';
632                 dst[dst_p++] = '0';
633                 dst[dst_p++] = '-';
634                 dst[dst_p++] = d;
635                 dst[dst_p++] = ']';
636             }
637             else
638                 dst[dst_p++] = d;
639         }
640         else
641         {
642             if (d != '9')
643             { 
644                 dst[dst_p++] = '[';
645                 dst[dst_p++] = d;
646                 dst[dst_p++] = '-';
647                 dst[dst_p++] = '9';
648                 dst[dst_p++] = ']';
649             }
650             else
651                 dst[dst_p++] = d;
652         }
653         for (i = 0; i<pos; i++)
654         {
655             dst[dst_p++] = '[';
656             dst[dst_p++] = '0';
657             dst[dst_p++] = '-';
658             dst[dst_p++] = '9';
659             dst[dst_p++] = ']';
660         }
661         dst[dst_p++] = '|';
662     }
663     dst[dst_p] = '\0';
664     if (islt)
665     {
666         /* match everything less than 10^(pos-1) */
667         strcat(dst, "0*");
668         for (i = 1; i<pos; i++)
669             strcat(dst, "[0-9]?");
670     }
671     else
672     {
673         /* match everything greater than 10^pos */
674         for (i = 0; i <= pos; i++)
675             strcat(dst, "[0-9]");
676         strcat(dst, "[0-9]*");
677     }
678     strcat(dst, "))");
679 }
680
681 void string_rel_add_char(char **term_p, const char *src, int *indx)
682 {
683     if (src[*indx] == '\\')
684         *(*term_p)++ = src[(*indx)++];
685     *(*term_p)++ = src[(*indx)++];
686 }
687
688 /*
689  *   >  abc     ([b-].*|a[c-].*|ab[d-].*|abc.+)
690  *              ([^-a].*|a[^-b].*ab[^-c].*|abc.+)
691  *   >= abc     ([b-].*|a[c-].*|ab[c-].*)
692  *              ([^-a].*|a[^-b].*|ab[c-].*)
693  *   <  abc     ([-0].*|a[-a].*|ab[-b].*)
694  *              ([^a-].*|a[^b-].*|ab[^c-].*)
695  *   <= abc     ([-0].*|a[-a].*|ab[-b].*|abc)
696  *              ([^a-].*|a[^b-].*|ab[^c-].*|abc)
697  */
698 static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
699                            const char **term_sub, char *term_dict,
700                            oid_value attributeSet,
701                            int reg_type, int space_split, char *term_dst,
702                            int *error_code)
703 {
704     AttrType relation;
705     int relation_value;
706     int i;
707     char *term_tmp = term_dict + strlen(term_dict);
708     char term_component[2*IT_MAX_WORD+20];
709
710     attr_init_APT(&relation, zapt, 2);
711     relation_value = attr_find(&relation, NULL);
712
713     *error_code = 0;
714     yaz_log(YLOG_DEBUG, "string relation value=%d", relation_value);
715     switch (relation_value)
716     {
717     case 1:
718         if (!term_100(zh->reg->zebra_maps, reg_type,
719                       term_sub, term_component,
720                       space_split, term_dst))
721             return 0;
722         yaz_log(log_level_rpn, "Relation <");
723         
724         *term_tmp++ = '(';
725         for (i = 0; term_component[i]; )
726         {
727             int j = 0;
728
729             if (i)
730                 *term_tmp++ = '|';
731             while (j < i)
732                 string_rel_add_char(&term_tmp, term_component, &j);
733
734             *term_tmp++ = '[';
735
736             *term_tmp++ = '^';
737             string_rel_add_char(&term_tmp, term_component, &i);
738             *term_tmp++ = '-';
739
740             *term_tmp++ = ']';
741             *term_tmp++ = '.';
742             *term_tmp++ = '*';
743
744             if ((term_tmp - term_dict) > IT_MAX_WORD)
745                 break;
746         }
747         *term_tmp++ = ')';
748         *term_tmp = '\0';
749         break;
750     case 2:
751         if (!term_100(zh->reg->zebra_maps, reg_type,
752                       term_sub, term_component,
753                       space_split, term_dst))
754             return 0;
755         yaz_log(log_level_rpn, "Relation <=");
756
757         *term_tmp++ = '(';
758         for (i = 0; term_component[i]; )
759         {
760             int j = 0;
761
762             while (j < i)
763                 string_rel_add_char(&term_tmp, term_component, &j);
764             *term_tmp++ = '[';
765
766             *term_tmp++ = '^';
767             string_rel_add_char(&term_tmp, term_component, &i);
768             *term_tmp++ = '-';
769
770             *term_tmp++ = ']';
771             *term_tmp++ = '.';
772             *term_tmp++ = '*';
773
774             *term_tmp++ = '|';
775
776             if ((term_tmp - term_dict) > IT_MAX_WORD)
777                 break;
778         }
779         for (i = 0; term_component[i]; )
780             string_rel_add_char(&term_tmp, term_component, &i);
781         *term_tmp++ = ')';
782         *term_tmp = '\0';
783         break;
784     case 5:
785         if (!term_100 (zh->reg->zebra_maps, reg_type,
786                        term_sub, term_component, space_split, term_dst))
787             return 0;
788         yaz_log(log_level_rpn, "Relation >");
789
790         *term_tmp++ = '(';
791         for (i = 0; term_component[i];)
792         {
793             int j = 0;
794
795             while (j < i)
796                 string_rel_add_char(&term_tmp, term_component, &j);
797             *term_tmp++ = '[';
798             
799             *term_tmp++ = '^';
800             *term_tmp++ = '-';
801             string_rel_add_char(&term_tmp, term_component, &i);
802
803             *term_tmp++ = ']';
804             *term_tmp++ = '.';
805             *term_tmp++ = '*';
806
807             *term_tmp++ = '|';
808
809             if ((term_tmp - term_dict) > IT_MAX_WORD)
810                 break;
811         }
812         for (i = 0; term_component[i];)
813             string_rel_add_char(&term_tmp, term_component, &i);
814         *term_tmp++ = '.';
815         *term_tmp++ = '+';
816         *term_tmp++ = ')';
817         *term_tmp = '\0';
818         break;
819     case 4:
820         if (!term_100(zh->reg->zebra_maps, reg_type, term_sub,
821                       term_component, space_split, term_dst))
822             return 0;
823         yaz_log(log_level_rpn, "Relation >=");
824
825         *term_tmp++ = '(';
826         for (i = 0; term_component[i];)
827         {
828             int j = 0;
829
830             if (i)
831                 *term_tmp++ = '|';
832             while (j < i)
833                 string_rel_add_char(&term_tmp, term_component, &j);
834             *term_tmp++ = '[';
835
836             if (term_component[i+1])
837             {
838                 *term_tmp++ = '^';
839                 *term_tmp++ = '-';
840                 string_rel_add_char(&term_tmp, term_component, &i);
841             }
842             else
843             {
844                 string_rel_add_char(&term_tmp, term_component, &i);
845                 *term_tmp++ = '-';
846             }
847             *term_tmp++ = ']';
848             *term_tmp++ = '.';
849             *term_tmp++ = '*';
850
851             if ((term_tmp - term_dict) > IT_MAX_WORD)
852                 break;
853         }
854         *term_tmp++ = ')';
855         *term_tmp = '\0';
856         break;
857     case 3:
858     case 102:
859     case -1:
860         if (!**term_sub)
861             return 1;
862         yaz_log(log_level_rpn, "Relation =");
863         if (!term_100(zh->reg->zebra_maps, reg_type, term_sub,
864                       term_component, space_split, term_dst))
865             return 0;
866         strcat(term_tmp, "(");
867         strcat(term_tmp, term_component);
868         strcat(term_tmp, ")");
869         break;
870     case 103:
871         yaz_log(log_level_rpn, "Relation always matches");
872         /* skip to end of term (we don't care what it is) */
873         while (**term_sub != '\0')
874             (*term_sub)++;
875         break;
876     default:
877         *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
878         return 0;
879     }
880     return 1;
881 }
882
883 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
884                              const char **term_sub, 
885                              oid_value attributeSet, NMEM stream,
886                              struct grep_info *grep_info,
887                              int reg_type, int complete_flag,
888                              int num_bases, char **basenames,
889                              char *term_dst,
890                              const char *xpath_use,
891                              struct ord_list **ol);
892
893 static ZEBRA_RES term_limits_APT(ZebraHandle zh,
894                                  Z_AttributesPlusTerm *zapt,
895                                  zint *hits_limit_value,
896                                  const char **term_ref_id_str,
897                                  NMEM nmem)
898 {
899     AttrType term_ref_id_attr;
900     AttrType hits_limit_attr;
901     int term_ref_id_int;
902  
903     attr_init_APT(&hits_limit_attr, zapt, 11);
904     *hits_limit_value  = attr_find(&hits_limit_attr, NULL);
905
906     attr_init_APT(&term_ref_id_attr, zapt, 10);
907     term_ref_id_int = attr_find_ex(&term_ref_id_attr, NULL, term_ref_id_str);
908     if (term_ref_id_int >= 0)
909     {
910         char *res = nmem_malloc(nmem, 20);
911         sprintf(res, "%d", term_ref_id_int);
912         *term_ref_id_str = res;
913     }
914
915     /* no limit given ? */
916     if (*hits_limit_value == -1)
917     {
918         if (*term_ref_id_str)
919         {
920             /* use global if term_ref is present */
921             *hits_limit_value = zh->approx_limit;
922         }
923         else
924         {
925             /* no counting if term_ref is not present */
926             *hits_limit_value = 0;
927         }
928     }
929     else if (*hits_limit_value == 0)
930     {
931         /* 0 is the same as global limit */
932         *hits_limit_value = zh->approx_limit;
933     }
934     yaz_log(YLOG_DEBUG, "term_limits_APT ref_id=%s limit=" ZINT_FORMAT,
935             *term_ref_id_str ? *term_ref_id_str : "none",
936             *hits_limit_value);
937     return ZEBRA_OK;
938 }
939
940 static ZEBRA_RES term_trunc(ZebraHandle zh,
941                             Z_AttributesPlusTerm *zapt,
942                             const char **term_sub, 
943                             oid_value attributeSet, NMEM stream,
944                             struct grep_info *grep_info,
945                             int reg_type, int complete_flag,
946                             int num_bases, char **basenames,
947                             char *term_dst,
948                             const char *rank_type, 
949                             const char *xpath_use,
950                             NMEM rset_nmem,
951                             RSET *rset,
952                             struct rset_key_control *kc)
953 {
954     ZEBRA_RES res;
955     struct ord_list *ol;
956     zint hits_limit_value;
957     const char *term_ref_id_str = 0;
958     *rset = 0;
959
960     term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str, stream);
961     grep_info->isam_p_indx = 0;
962     res = string_term(zh, zapt, term_sub, attributeSet, stream, grep_info,
963                       reg_type, complete_flag, num_bases, basenames,
964                       term_dst, xpath_use, &ol);
965     if (res != ZEBRA_OK)
966         return res;
967     if (!*term_sub)  /* no more terms ? */
968         return res;
969     yaz_log(log_level_rpn, "term: %s", term_dst);
970     *rset = rset_trunc(zh, grep_info->isam_p_buf,
971                        grep_info->isam_p_indx, term_dst,
972                        strlen(term_dst), rank_type, 1 /* preserve pos */,
973                        zapt->term->which, rset_nmem,
974                        kc, kc->scope, ol, reg_type, hits_limit_value,
975                        term_ref_id_str);
976     if (!*rset)
977         return ZEBRA_FAIL;
978     return ZEBRA_OK;
979 }
980
981 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
982                              const char **term_sub, 
983                              oid_value attributeSet, NMEM stream,
984                              struct grep_info *grep_info,
985                              int reg_type, int complete_flag,
986                              int num_bases, char **basenames,
987                              char *term_dst,
988                              const char *xpath_use,
989                              struct ord_list **ol)
990 {
991     char term_dict[2*IT_MAX_WORD+4000];
992     int j, r, base_no;
993     AttrType truncation;
994     int truncation_value;
995     const char *termp;
996     struct rpn_char_map_info rcmi;
997     int space_split = complete_flag ? 0 : 1;
998
999     int bases_ok = 0;     /* no of databases with OK attribute */
1000
1001     *ol = ord_list_create(stream);
1002
1003     rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
1004     attr_init_APT(&truncation, zapt, 5);
1005     truncation_value = attr_find(&truncation, NULL);
1006     yaz_log(log_level_rpn, "truncation value %d", truncation_value);
1007
1008     for (base_no = 0; base_no < num_bases; base_no++)
1009     {
1010         int ord = -1;
1011         int regex_range = 0;
1012         int max_pos, prefix_len = 0;
1013         int relation_error;
1014         char ord_buf[32];
1015         int ord_len, i;
1016
1017         termp = *term_sub; /* start of term for each database */
1018
1019         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1020         {
1021             zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
1022                            basenames[base_no]);
1023             return ZEBRA_FAIL;
1024         }
1025         
1026         if (zebra_apt_get_ord(zh, zapt, reg_type, xpath_use,
1027                               attributeSet, &ord) != ZEBRA_OK)
1028             continue;
1029
1030         bases_ok++;
1031
1032         *ol = ord_list_append(stream, *ol, ord);
1033         ord_len = key_SU_encode (ord, ord_buf);
1034         
1035         term_dict[prefix_len++] = '(';
1036         for (i = 0; i<ord_len; i++)
1037         {
1038             term_dict[prefix_len++] = 1;  /* our internal regexp escape char */
1039             term_dict[prefix_len++] = ord_buf[i];
1040         }
1041         term_dict[prefix_len++] = ')';
1042         term_dict[prefix_len] = '\0';
1043         j = prefix_len;
1044         switch (truncation_value)
1045         {
1046         case -1:         /* not specified */
1047         case 100:        /* do not truncate */
1048             if (!string_relation(zh, zapt, &termp, term_dict,
1049                                  attributeSet,
1050                                  reg_type, space_split, term_dst,
1051                                  &relation_error))
1052             {
1053                 if (relation_error)
1054                 {
1055                     zebra_setError(zh, relation_error, 0);
1056                     return ZEBRA_FAIL;
1057                 }
1058                 *term_sub = 0;
1059                 return ZEBRA_OK;
1060             }
1061             break;
1062         case 1:          /* right truncation */
1063             term_dict[j++] = '(';
1064             if (!term_100(zh->reg->zebra_maps, reg_type,
1065                           &termp, term_dict + j, space_split, term_dst))
1066             {
1067                 *term_sub = 0;
1068                 return ZEBRA_OK;
1069             }
1070             strcat(term_dict, ".*)");
1071             break;
1072         case 2:          /* keft truncation */
1073             term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*';
1074             if (!term_100(zh->reg->zebra_maps, reg_type,
1075                           &termp, term_dict + j, space_split, term_dst))
1076             {
1077                 *term_sub = 0;
1078                 return ZEBRA_OK;
1079             }
1080             strcat(term_dict, ")");
1081             break;
1082         case 3:          /* left&right truncation */
1083             term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*';
1084             if (!term_100(zh->reg->zebra_maps, reg_type,
1085                           &termp, term_dict + j, space_split, term_dst))
1086             {
1087                 *term_sub = 0;
1088                 return ZEBRA_OK;
1089             }
1090             strcat(term_dict, ".*)");
1091             break;
1092         case 101:        /* process # in term */
1093             term_dict[j++] = '(';
1094             if (!term_101(zh->reg->zebra_maps, reg_type,
1095                           &termp, term_dict + j, space_split, term_dst))
1096             {
1097                 *term_sub = 0;
1098                 return ZEBRA_OK;
1099             }
1100             strcat(term_dict, ")");
1101             break;
1102         case 102:        /* Regexp-1 */
1103             term_dict[j++] = '(';
1104             if (!term_102(zh->reg->zebra_maps, reg_type,
1105                           &termp, term_dict + j, space_split, term_dst))
1106             {
1107                 *term_sub = 0;
1108                 return ZEBRA_OK;
1109             }
1110             strcat(term_dict, ")");
1111             break;
1112         case 103:       /* Regexp-2 */
1113             regex_range = 1;
1114             term_dict[j++] = '(';
1115             if (!term_103(zh->reg->zebra_maps, reg_type,
1116                           &termp, term_dict + j, &regex_range,
1117                           space_split, term_dst))
1118             {
1119                 *term_sub = 0;
1120                 return ZEBRA_OK;
1121             }
1122             strcat(term_dict, ")");
1123             break;
1124         case 104:        /* process # and ! in term */
1125             term_dict[j++] = '(';
1126             if (!term_104(zh->reg->zebra_maps, reg_type,
1127                           &termp, term_dict + j, space_split, term_dst))
1128             {
1129                 *term_sub = 0;
1130                 return ZEBRA_OK;
1131             }
1132             strcat(term_dict, ")");
1133             break;
1134         case 105:        /* process * and ! in term */
1135             term_dict[j++] = '(';
1136             if (!term_105(zh->reg->zebra_maps, reg_type,
1137                           &termp, term_dict + j, space_split, term_dst, 1))
1138             {
1139                 *term_sub = 0;
1140                 return ZEBRA_OK;
1141             }
1142             strcat(term_dict, ")");
1143             break;
1144         case 106:        /* process * and ! in term */
1145             term_dict[j++] = '(';
1146             if (!term_105(zh->reg->zebra_maps, reg_type,
1147                           &termp, term_dict + j, space_split, term_dst, 0))
1148             {
1149                 *term_sub = 0;
1150                 return ZEBRA_OK;
1151             }
1152             strcat(term_dict, ")");
1153             break;
1154         default:
1155             zebra_setError_zint(zh,
1156                                 YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
1157                                 truncation_value);
1158             return ZEBRA_FAIL;
1159         }
1160         if (1)
1161         {
1162             char buf[80];
1163             const char *input = term_dict + prefix_len;
1164             esc_str(buf, sizeof(buf), input, strlen(input));
1165         }
1166         yaz_log(log_level_rpn, "dict_lookup_grep: %s", term_dict+prefix_len);
1167         r = dict_lookup_grep(zh->reg->dict, term_dict, regex_range,
1168                              grep_info, &max_pos, 
1169                              ord_len /* number of "exact" chars */,
1170                              grep_handle);
1171         if (r)
1172             yaz_log(YLOG_WARN, "dict_lookup_grep fail %d", r);
1173     }
1174     if (!bases_ok)
1175         return ZEBRA_FAIL;
1176     *term_sub = termp;
1177     yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1178     return ZEBRA_OK;
1179 }
1180
1181
1182
1183 static void grep_info_delete(struct grep_info *grep_info)
1184 {
1185 #ifdef TERM_COUNT
1186     xfree(grep_info->term_no);
1187 #endif
1188     xfree(grep_info->isam_p_buf);
1189 }
1190
1191 static ZEBRA_RES grep_info_prepare(ZebraHandle zh,
1192                                    Z_AttributesPlusTerm *zapt,
1193                                    struct grep_info *grep_info,
1194                                    int reg_type)
1195 {
1196     AttrType termset;
1197     int termset_value_numeric;
1198     const char *termset_value_string;
1199
1200 #ifdef TERM_COUNT
1201     grep_info->term_no = 0;
1202 #endif
1203     grep_info->isam_p_size = 0;
1204     grep_info->isam_p_buf = NULL;
1205     grep_info->zh = zh;
1206     grep_info->reg_type = reg_type;
1207     grep_info->termset = 0;
1208     if (!zapt)
1209         return ZEBRA_OK;
1210     attr_init_APT(&termset, zapt, 8);
1211     termset_value_numeric =
1212         attr_find_ex(&termset, NULL, &termset_value_string);
1213     if (termset_value_numeric != -1)
1214     {
1215 #if TERMSET_DISABLE
1216         zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, "termset");
1217         return ZEBRA_FAIL;
1218 #else
1219         char resname[32];
1220         const char *termset_name = 0;
1221         if (termset_value_numeric != -2)
1222         {
1223     
1224             sprintf(resname, "%d", termset_value_numeric);
1225             termset_name = resname;
1226         }
1227         else
1228             termset_name = termset_value_string;
1229         yaz_log(log_level_rpn, "creating termset set %s", termset_name);
1230         grep_info->termset = resultSetAdd(zh, termset_name, 1);
1231         if (!grep_info->termset)
1232         {
1233             zebra_setError(zh, YAZ_BIB1_ILLEGAL_RESULT_SET_NAME, termset_name);
1234             return ZEBRA_FAIL;
1235         }
1236 #endif
1237     }
1238     return ZEBRA_OK;
1239 }
1240                                
1241 /**
1242   \brief Create result set(s) for list of terms
1243   \param zh Zebra Handle
1244   \param zapt Attributes Plust Term (RPN leaf)
1245   \param termz term as used in query but converted to UTF-8
1246   \param attributeSet default attribute set
1247   \param stream memory for result
1248   \param reg_type register type ('w', 'p',..)
1249   \param complete_flag whether it's phrases or not
1250   \param rank_type term flags for ranking
1251   \param xpath_use use attribute for X-Path (-1 for no X-path)
1252   \param num_bases number of databases
1253   \param basenames array of databases
1254   \param rset_nmem memory for result sets
1255   \param result_sets output result set for each term in list (output)
1256   \param num_result_sets number of output result sets
1257   \param kc rset key control to be used for created result sets
1258 */
1259 static ZEBRA_RES term_list_trunc(ZebraHandle zh,
1260                                  Z_AttributesPlusTerm *zapt,
1261                                  const char *termz,
1262                                  oid_value attributeSet,
1263                                  NMEM stream,
1264                                  int reg_type, int complete_flag,
1265                                  const char *rank_type,
1266                                  const char *xpath_use,
1267                                  int num_bases, char **basenames, 
1268                                  NMEM rset_nmem,
1269                                  RSET **result_sets, int *num_result_sets,
1270                                  struct rset_key_control *kc)
1271 {
1272     char term_dst[IT_MAX_WORD+1];
1273     struct grep_info grep_info;
1274     const char *termp = termz;
1275     int alloc_sets = 0;
1276
1277     *num_result_sets = 0;
1278     *term_dst = 0;
1279     if (grep_info_prepare(zh, zapt, &grep_info, reg_type) == ZEBRA_FAIL)
1280         return ZEBRA_FAIL;
1281     while(1)
1282     { 
1283         ZEBRA_RES res;
1284
1285         if (alloc_sets == *num_result_sets)
1286         {
1287             int add = 10;
1288             RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) * 
1289                                               sizeof(*rnew));
1290             if (alloc_sets)
1291                 memcpy(rnew, *result_sets, alloc_sets * sizeof(*rnew));
1292             alloc_sets = alloc_sets + add;
1293             *result_sets = rnew;
1294         }
1295         res = term_trunc(zh, zapt, &termp, attributeSet,
1296                          stream, &grep_info,
1297                          reg_type, complete_flag,
1298                          num_bases, basenames,
1299                          term_dst, rank_type,
1300                          xpath_use, rset_nmem,
1301                          &(*result_sets)[*num_result_sets],
1302                          kc);
1303         if (res != ZEBRA_OK)
1304         {
1305             int i;
1306             for (i = 0; i < *num_result_sets; i++)
1307                 rset_delete((*result_sets)[i]);
1308             grep_info_delete (&grep_info);
1309             return res;
1310         }
1311         if ((*result_sets)[*num_result_sets] == 0)
1312             break;
1313         (*num_result_sets)++;
1314
1315         if (!*termp)
1316             break;
1317     }
1318     grep_info_delete(&grep_info);
1319     return ZEBRA_OK;
1320 }
1321
1322 static ZEBRA_RES rpn_search_APT_position(ZebraHandle zh,
1323                                          Z_AttributesPlusTerm *zapt,
1324                                          oid_value attributeSet,
1325                                          int reg_type,
1326                                          int num_bases, char **basenames,
1327                                          NMEM rset_nmem,
1328                                          RSET *rset,
1329                                          struct rset_key_control *kc)
1330 {
1331     RSET *f_set;
1332     int base_no;
1333     int position_value;
1334     int num_sets = 0;
1335     AttrType position;
1336
1337     attr_init_APT(&position, zapt, 3);
1338     position_value = attr_find(&position, NULL);
1339     switch(position_value)
1340     {
1341     case 3:
1342     case -1:
1343         return ZEBRA_OK;
1344     case 1:
1345     case 2:
1346         break;
1347     default:
1348         zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1349                             position_value);
1350         return ZEBRA_FAIL;
1351     }
1352
1353     if (!zebra_maps_is_first_in_field(zh->reg->zebra_maps, reg_type))
1354     {
1355         zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1356                             position_value);
1357         return ZEBRA_FAIL;
1358     }
1359
1360     if (!zh->reg->isamb && !zh->reg->isamc)
1361     {
1362         zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1363                             position_value);
1364         return ZEBRA_FAIL;
1365     }
1366     f_set = xmalloc(sizeof(RSET) * num_bases);
1367     for (base_no = 0; base_no < num_bases; base_no++)
1368     {
1369         int ord = -1;
1370         char ord_buf[32];
1371         char term_dict[100];
1372         int ord_len;
1373         char *val;
1374         ISAM_P isam_p;
1375
1376         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1377         {
1378             zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
1379                            basenames[base_no]);
1380             return ZEBRA_FAIL;
1381         }
1382         
1383         if (zebra_apt_get_ord(zh, zapt, reg_type, 0,
1384                               attributeSet, &ord) != ZEBRA_OK)
1385             continue;
1386
1387         ord_len = key_SU_encode (ord, ord_buf);
1388         memcpy(term_dict, ord_buf, ord_len);
1389         strcpy(term_dict+ord_len, FIRST_IN_FIELD_STR);
1390         val = dict_lookup(zh->reg->dict, term_dict);
1391         if (!val)
1392             continue;
1393         assert(*val == sizeof(ISAM_P));
1394         memcpy(&isam_p, val+1, sizeof(isam_p));
1395         
1396
1397         if (zh->reg->isamb)
1398             f_set[num_sets++] = rsisamb_create(rset_nmem, kc, kc->scope,
1399                                                zh->reg->isamb, isam_p, 0);
1400         else if (zh->reg->isamc)
1401             f_set[num_sets++] = rsisamc_create(rset_nmem, kc, kc->scope,
1402                                                zh->reg->isamc, isam_p, 0);
1403     }
1404     if (num_sets)
1405     {
1406         *rset = rset_create_or(rset_nmem, kc, kc->scope,
1407                                0 /* termid */, num_sets, f_set);
1408     }
1409     xfree(f_set);
1410     return ZEBRA_OK;
1411 }
1412                                          
1413 static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh,
1414                                        Z_AttributesPlusTerm *zapt,
1415                                        const char *termz_org,
1416                                        oid_value attributeSet,
1417                                        NMEM stream,
1418                                        int reg_type, int complete_flag,
1419                                        const char *rank_type,
1420                                        const char *xpath_use,
1421                                        int num_bases, char **basenames, 
1422                                        NMEM rset_nmem,
1423                                        RSET *rset,
1424                                        struct rset_key_control *kc)
1425 {
1426     RSET *result_sets = 0;
1427     int num_result_sets = 0;
1428     ZEBRA_RES res =
1429         term_list_trunc(zh, zapt, termz_org, attributeSet,
1430                         stream, reg_type, complete_flag,
1431                         rank_type, xpath_use,
1432                         num_bases, basenames,
1433                         rset_nmem,
1434                         &result_sets, &num_result_sets, kc);
1435
1436     if (res != ZEBRA_OK)
1437         return res;
1438
1439     if (num_result_sets > 0)
1440     {
1441         RSET first_set = 0;
1442         res = rpn_search_APT_position(zh, zapt, attributeSet, 
1443                                       reg_type,
1444                                       num_bases, basenames,
1445                                       rset_nmem, &first_set,
1446                                       kc);
1447         if (res != ZEBRA_OK)
1448             return res;
1449         if (first_set)
1450         {
1451             RSET *nsets = nmem_malloc(stream,
1452                                       sizeof(RSET) * (num_result_sets+1));
1453             nsets[0] = first_set;
1454             memcpy(nsets+1, result_sets, sizeof(RSET) * num_result_sets);
1455             result_sets = nsets;
1456             num_result_sets++;
1457         }
1458     }
1459     if (num_result_sets == 0)
1460         *rset = rset_create_null(rset_nmem, kc, 0); 
1461     else if (num_result_sets == 1)
1462         *rset = result_sets[0];
1463     else
1464         *rset = rset_create_prox(rset_nmem, kc, kc->scope,
1465                                  num_result_sets, result_sets,
1466                                  1 /* ordered */, 0 /* exclusion */,
1467                                  3 /* relation */, 1 /* distance */);
1468     if (!*rset)
1469         return ZEBRA_FAIL;
1470     return ZEBRA_OK;
1471 }
1472
1473 static ZEBRA_RES rpn_search_APT_or_list(ZebraHandle zh,
1474                                         Z_AttributesPlusTerm *zapt,
1475                                         const char *termz_org,
1476                                         oid_value attributeSet,
1477                                         NMEM stream,
1478                                         int reg_type, int complete_flag,
1479                                         const char *rank_type,
1480                                         const char *xpath_use,
1481                                         int num_bases, char **basenames,
1482                                         NMEM rset_nmem,
1483                                         RSET *rset,
1484                                         struct rset_key_control *kc)
1485 {
1486     RSET *result_sets = 0;
1487     int num_result_sets = 0;
1488     int i;
1489     ZEBRA_RES res =
1490         term_list_trunc(zh, zapt, termz_org, attributeSet,
1491                         stream, reg_type, complete_flag,
1492                         rank_type, xpath_use,
1493                         num_bases, basenames,
1494                         rset_nmem,
1495                         &result_sets, &num_result_sets, kc);
1496     if (res != ZEBRA_OK)
1497         return res;
1498
1499     for (i = 0; i<num_result_sets; i++)
1500     {
1501         RSET first_set = 0;
1502         res = rpn_search_APT_position(zh, zapt, attributeSet, 
1503                                       reg_type,
1504                                       num_bases, basenames,
1505                                       rset_nmem, &first_set,
1506                                       kc);
1507         if (res != ZEBRA_OK)
1508         {
1509             for (i = 0; i<num_result_sets; i++)
1510                 rset_delete(result_sets[i]);
1511             return res;
1512         }
1513
1514         if (first_set)
1515         {
1516             RSET tmp_set[2];
1517
1518             tmp_set[0] = first_set;
1519             tmp_set[1] = result_sets[i];
1520             
1521             result_sets[i] = rset_create_prox(
1522                 rset_nmem, kc, kc->scope,
1523                 2, tmp_set,
1524                 1 /* ordered */, 0 /* exclusion */,
1525                 3 /* relation */, 1 /* distance */);
1526         }
1527     }
1528     if (num_result_sets == 0)
1529         *rset = rset_create_null(rset_nmem, kc, 0); 
1530     else if (num_result_sets == 1)
1531         *rset = result_sets[0];
1532     else
1533         *rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* termid */,
1534                                num_result_sets, result_sets);
1535     if (!*rset)
1536         return ZEBRA_FAIL;
1537     return ZEBRA_OK;
1538 }
1539
1540 static ZEBRA_RES rpn_search_APT_and_list(ZebraHandle zh,
1541                                          Z_AttributesPlusTerm *zapt,
1542                                          const char *termz_org,
1543                                          oid_value attributeSet,
1544                                          NMEM stream,
1545                                          int reg_type, int complete_flag,
1546                                          const char *rank_type, 
1547                                          const char *xpath_use,
1548                                          int num_bases, char **basenames,
1549                                          NMEM rset_nmem,
1550                                          RSET *rset,
1551                                          struct rset_key_control *kc)
1552 {
1553     RSET *result_sets = 0;
1554     int num_result_sets = 0;
1555     int i;
1556     ZEBRA_RES res =
1557         term_list_trunc(zh, zapt, termz_org, attributeSet,
1558                         stream, reg_type, complete_flag,
1559                         rank_type, xpath_use,
1560                         num_bases, basenames,
1561                         rset_nmem,
1562                         &result_sets, &num_result_sets,
1563                         kc);
1564     if (res != ZEBRA_OK)
1565         return res;
1566     for (i = 0; i<num_result_sets; i++)
1567     {
1568         RSET first_set = 0;
1569         res = rpn_search_APT_position(zh, zapt, attributeSet, 
1570                                       reg_type,
1571                                       num_bases, basenames,
1572                                       rset_nmem, &first_set,
1573                                       kc);
1574         if (res != ZEBRA_OK)
1575         {
1576             for (i = 0; i<num_result_sets; i++)
1577                 rset_delete(result_sets[i]);
1578             return res;
1579         }
1580
1581         if (first_set)
1582         {
1583             RSET tmp_set[2];
1584
1585             tmp_set[0] = first_set;
1586             tmp_set[1] = result_sets[i];
1587             
1588             result_sets[i] = rset_create_prox(
1589                 rset_nmem, kc, kc->scope,
1590                 2, tmp_set,
1591                 1 /* ordered */, 0 /* exclusion */,
1592                 3 /* relation */, 1 /* distance */);
1593         }
1594     }
1595
1596
1597     if (num_result_sets == 0)
1598         *rset = rset_create_null(rset_nmem, kc, 0); 
1599     else if (num_result_sets == 1)
1600         *rset = result_sets[0];
1601     else
1602         *rset = rset_create_and(rset_nmem, kc, kc->scope,
1603                                 num_result_sets, result_sets);
1604     if (!*rset)
1605         return ZEBRA_FAIL;
1606     return ZEBRA_OK;
1607 }
1608
1609 static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1610                             const char **term_sub,
1611                             char *term_dict,
1612                             oid_value attributeSet,
1613                             struct grep_info *grep_info,
1614                             int *max_pos,
1615                             int reg_type,
1616                             char *term_dst,
1617                             int *error_code)
1618 {
1619     AttrType relation;
1620     int relation_value;
1621     int term_value;
1622     int r;
1623     char *term_tmp = term_dict + strlen(term_dict);
1624
1625     *error_code = 0;
1626     attr_init_APT(&relation, zapt, 2);
1627     relation_value = attr_find(&relation, NULL);
1628
1629     yaz_log(log_level_rpn, "numeric relation value=%d", relation_value);
1630
1631     switch (relation_value)
1632     {
1633     case 1:
1634         yaz_log(log_level_rpn, "Relation <");
1635         if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1636                       term_dst))
1637             return 0;
1638         term_value = atoi (term_tmp);
1639         gen_regular_rel(term_tmp, term_value-1, 1);
1640         break;
1641     case 2:
1642         yaz_log(log_level_rpn, "Relation <=");
1643         if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1644                       term_dst))
1645             return 0;
1646         term_value = atoi (term_tmp);
1647         gen_regular_rel(term_tmp, term_value, 1);
1648         break;
1649     case 4:
1650         yaz_log(log_level_rpn, "Relation >=");
1651         if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1652                       term_dst))
1653             return 0;
1654         term_value = atoi (term_tmp);
1655         gen_regular_rel(term_tmp, term_value, 0);
1656         break;
1657     case 5:
1658         yaz_log(log_level_rpn, "Relation >");
1659         if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1660                       term_dst))
1661             return 0;
1662         term_value = atoi (term_tmp);
1663         gen_regular_rel(term_tmp, term_value+1, 0);
1664         break;
1665     case -1:
1666     case 3:
1667         yaz_log(log_level_rpn, "Relation =");
1668         if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1669                       term_dst))
1670             return 0;
1671         term_value = atoi (term_tmp);
1672         sprintf(term_tmp, "(0*%d)", term_value);
1673         break;
1674     case 103:
1675         /* term_tmp untouched.. */
1676         while (**term_sub != '\0')
1677             (*term_sub)++;
1678         break;
1679     default:
1680         *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
1681         return 0;
1682     }
1683     yaz_log(log_level_rpn, "dict_lookup_grep: %s", term_tmp);
1684     r = dict_lookup_grep(zh->reg->dict, term_dict, 0, grep_info, max_pos,
1685                           0, grep_handle);
1686     if (r)
1687         yaz_log(YLOG_WARN, "dict_lookup_grep fail, rel = gt: %d", r);
1688     yaz_log(log_level_rpn, "%d positions", grep_info->isam_p_indx);
1689     return 1;
1690 }
1691
1692 static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1693                               const char **term_sub, 
1694                               oid_value attributeSet, NMEM stream,
1695                               struct grep_info *grep_info,
1696                               int reg_type, int complete_flag,
1697                               int num_bases, char **basenames,
1698                               char *term_dst, 
1699                               const char *xpath_use,
1700                               struct ord_list **ol)
1701 {
1702     char term_dict[2*IT_MAX_WORD+2];
1703     int base_no;
1704     const char *termp;
1705     struct rpn_char_map_info rcmi;
1706
1707     int bases_ok = 0;     /* no of databases with OK attribute */
1708
1709     *ol = ord_list_create(stream);
1710
1711     rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
1712
1713     for (base_no = 0; base_no < num_bases; base_no++)
1714     {
1715         int max_pos, prefix_len = 0;
1716         int relation_error = 0;
1717         int ord, ord_len, i;
1718         char ord_buf[32];
1719
1720         termp = *term_sub;
1721
1722         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1723         {
1724             zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
1725                            basenames[base_no]);
1726             return ZEBRA_FAIL;
1727         }
1728
1729         if (zebra_apt_get_ord(zh, zapt, reg_type, xpath_use,
1730                               attributeSet, &ord) != ZEBRA_OK)
1731             continue;
1732         bases_ok++;
1733
1734         *ol = ord_list_append(stream, *ol, ord);
1735
1736         ord_len = key_SU_encode (ord, ord_buf);
1737
1738         term_dict[prefix_len++] = '(';
1739         for (i = 0; i < ord_len; i++)
1740         {
1741             term_dict[prefix_len++] = 1;
1742             term_dict[prefix_len++] = ord_buf[i];
1743         }
1744         term_dict[prefix_len++] = ')';
1745         term_dict[prefix_len] = '\0';
1746
1747         if (!numeric_relation(zh, zapt, &termp, term_dict,
1748                               attributeSet, grep_info, &max_pos, reg_type,
1749                               term_dst, &relation_error))
1750         {
1751             if (relation_error)
1752             {
1753                 zebra_setError(zh, relation_error, 0);
1754                 return ZEBRA_FAIL;
1755             }
1756             *term_sub = 0;
1757             return ZEBRA_OK;
1758         }
1759     }
1760     if (!bases_ok)
1761         return ZEBRA_FAIL;
1762     *term_sub = termp;
1763     yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1764     return ZEBRA_OK;
1765 }
1766
1767                                  
1768 static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh,
1769                                         Z_AttributesPlusTerm *zapt,
1770                                         const char *termz,
1771                                         oid_value attributeSet,
1772                                         NMEM stream,
1773                                         int reg_type, int complete_flag,
1774                                         const char *rank_type, 
1775                                         const char *xpath_use,
1776                                         int num_bases, char **basenames,
1777                                         NMEM rset_nmem,
1778                                         RSET *rset,
1779                                         struct rset_key_control *kc)
1780 {
1781     char term_dst[IT_MAX_WORD+1];
1782     const char *termp = termz;
1783     RSET *result_sets = 0;
1784     int num_result_sets = 0;
1785     ZEBRA_RES res;
1786     struct grep_info grep_info;
1787     int alloc_sets = 0;
1788     zint hits_limit_value;
1789     const char *term_ref_id_str = 0;
1790
1791     term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str, stream);
1792
1793     yaz_log(log_level_rpn, "APT_numeric t='%s'", termz);
1794     if (grep_info_prepare(zh, zapt, &grep_info, reg_type) == ZEBRA_FAIL)
1795         return ZEBRA_FAIL;
1796     while (1)
1797     { 
1798         struct ord_list *ol;
1799         if (alloc_sets == num_result_sets)
1800         {
1801             int add = 10;
1802             RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) * 
1803                                               sizeof(*rnew));
1804             if (alloc_sets)
1805                 memcpy(rnew, result_sets, alloc_sets * sizeof(*rnew));
1806             alloc_sets = alloc_sets + add;
1807             result_sets = rnew;
1808         }
1809         yaz_log(YLOG_DEBUG, "APT_numeric termp=%s", termp);
1810         grep_info.isam_p_indx = 0;
1811         res = numeric_term(zh, zapt, &termp, attributeSet, stream, &grep_info,
1812                            reg_type, complete_flag, num_bases, basenames,
1813                            term_dst, xpath_use, &ol);
1814         if (res == ZEBRA_FAIL || termp == 0)
1815             break;
1816         yaz_log(YLOG_DEBUG, "term: %s", term_dst);
1817         result_sets[num_result_sets] =
1818             rset_trunc(zh, grep_info.isam_p_buf,
1819                        grep_info.isam_p_indx, term_dst,
1820                        strlen(term_dst), rank_type,
1821                        0 /* preserve position */,
1822                        zapt->term->which, rset_nmem, 
1823                        kc, kc->scope, ol, reg_type,
1824                        hits_limit_value,
1825                        term_ref_id_str);
1826         if (!result_sets[num_result_sets])
1827             break;
1828         num_result_sets++;
1829         if (!*termp)
1830             break;
1831     }
1832     grep_info_delete(&grep_info);
1833
1834     if (res != ZEBRA_OK)
1835         return res;
1836     if (num_result_sets == 0)
1837         *rset = rset_create_null(rset_nmem, kc, 0);
1838     else if (num_result_sets == 1)
1839         *rset = result_sets[0];
1840     else
1841         *rset = rset_create_and(rset_nmem, kc, kc->scope,
1842                                 num_result_sets, result_sets);
1843     if (!*rset)
1844         return ZEBRA_FAIL;
1845     return ZEBRA_OK;
1846 }
1847
1848 static ZEBRA_RES rpn_search_APT_local(ZebraHandle zh,
1849                                       Z_AttributesPlusTerm *zapt,
1850                                       const char *termz,
1851                                       oid_value attributeSet,
1852                                       NMEM stream,
1853                                       const char *rank_type, NMEM rset_nmem,
1854                                       RSET *rset,
1855                                       struct rset_key_control *kc)
1856 {
1857     RSFD rsfd;
1858     struct it_key key;
1859     int sys;
1860     *rset = rset_create_temp(rset_nmem, kc, kc->scope,
1861                              res_get (zh->res, "setTmpDir"),0 );
1862     rsfd = rset_open(*rset, RSETF_WRITE);
1863     
1864     sys = atoi(termz);
1865     if (sys <= 0)
1866         sys = 1;
1867     key.mem[0] = sys;
1868     key.mem[1] = 1;
1869     key.len = 2;
1870     rset_write (rsfd, &key);
1871     rset_close (rsfd);
1872     return ZEBRA_OK;
1873 }
1874
1875 static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1876                                oid_value attributeSet, NMEM stream,
1877                                Z_SortKeySpecList *sort_sequence,
1878                                const char *rank_type,
1879                                NMEM rset_nmem,
1880                                RSET *rset,
1881                                struct rset_key_control *kc)
1882 {
1883     int i;
1884     int sort_relation_value;
1885     AttrType sort_relation_type;
1886     Z_SortKeySpec *sks;
1887     Z_SortKey *sk;
1888     int oid[OID_SIZE];
1889     oident oe;
1890     char termz[20];
1891     
1892     attr_init_APT(&sort_relation_type, zapt, 7);
1893     sort_relation_value = attr_find(&sort_relation_type, &attributeSet);
1894
1895     if (!sort_sequence->specs)
1896     {
1897         sort_sequence->num_specs = 10;
1898         sort_sequence->specs = (Z_SortKeySpec **)
1899             nmem_malloc(stream, sort_sequence->num_specs *
1900                          sizeof(*sort_sequence->specs));
1901         for (i = 0; i<sort_sequence->num_specs; i++)
1902             sort_sequence->specs[i] = 0;
1903     }
1904     if (zapt->term->which != Z_Term_general)
1905         i = 0;
1906     else
1907         i = atoi_n ((char *) zapt->term->u.general->buf,
1908                     zapt->term->u.general->len);
1909     if (i >= sort_sequence->num_specs)
1910         i = 0;
1911     sprintf(termz, "%d", i);
1912
1913     oe.proto = PROTO_Z3950;
1914     oe.oclass = CLASS_ATTSET;
1915     oe.value = attributeSet;
1916     if (!oid_ent_to_oid (&oe, oid))
1917         return ZEBRA_FAIL;
1918
1919     sks = (Z_SortKeySpec *) nmem_malloc(stream, sizeof(*sks));
1920     sks->sortElement = (Z_SortElement *)
1921         nmem_malloc(stream, sizeof(*sks->sortElement));
1922     sks->sortElement->which = Z_SortElement_generic;
1923     sk = sks->sortElement->u.generic = (Z_SortKey *)
1924         nmem_malloc(stream, sizeof(*sk));
1925     sk->which = Z_SortKey_sortAttributes;
1926     sk->u.sortAttributes = (Z_SortAttributes *)
1927         nmem_malloc(stream, sizeof(*sk->u.sortAttributes));
1928
1929     sk->u.sortAttributes->id = oid;
1930     sk->u.sortAttributes->list = zapt->attributes;
1931
1932     sks->sortRelation = (int *)
1933         nmem_malloc(stream, sizeof(*sks->sortRelation));
1934     if (sort_relation_value == 1)
1935         *sks->sortRelation = Z_SortKeySpec_ascending;
1936     else if (sort_relation_value == 2)
1937         *sks->sortRelation = Z_SortKeySpec_descending;
1938     else 
1939         *sks->sortRelation = Z_SortKeySpec_ascending;
1940
1941     sks->caseSensitivity = (int *)
1942         nmem_malloc(stream, sizeof(*sks->caseSensitivity));
1943     *sks->caseSensitivity = 0;
1944
1945     sks->which = Z_SortKeySpec_null;
1946     sks->u.null = odr_nullval ();
1947     sort_sequence->specs[i] = sks;
1948     *rset = rset_create_null(rset_nmem, kc, 0);
1949     return ZEBRA_OK;
1950 }
1951
1952
1953 static int rpn_check_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1954                            oid_value attributeSet,
1955                            struct xpath_location_step *xpath, int max,
1956                            NMEM mem)
1957 {
1958     oid_value curAttributeSet = attributeSet;
1959     AttrType use;
1960     const char *use_string = 0;
1961     
1962     attr_init_APT(&use, zapt, 1);
1963     attr_find_ex(&use, &curAttributeSet, &use_string);
1964
1965     if (!use_string || *use_string != '/')
1966         return -1;
1967
1968     return zebra_parse_xpath_str(use_string, xpath, max, mem);
1969 }
1970  
1971                
1972
1973 static RSET xpath_trunc(ZebraHandle zh, NMEM stream,
1974                         int reg_type, const char *term, 
1975                         const char *xpath_use,
1976                         NMEM rset_nmem,
1977                         struct rset_key_control *kc)
1978 {
1979     RSET rset;
1980     struct grep_info grep_info;
1981     char term_dict[2048];
1982     char ord_buf[32];
1983     int prefix_len = 0;
1984     int ord = zebraExplain_lookup_attr_str(zh->reg->zei, 
1985                                            zinfo_index_category_index,
1986                                            reg_type,
1987                                            xpath_use);
1988     int ord_len, i, r, max_pos;
1989     int term_type = Z_Term_characterString;
1990     const char *flags = "void";
1991
1992     if (grep_info_prepare(zh, 0 /* zapt */, &grep_info, '0') == ZEBRA_FAIL)
1993         return rset_create_null(rset_nmem, kc, 0);
1994     
1995     if (ord < 0)
1996         return rset_create_null(rset_nmem, kc, 0);
1997     if (prefix_len)
1998         term_dict[prefix_len++] = '|';
1999     else
2000         term_dict[prefix_len++] = '(';
2001     
2002     ord_len = key_SU_encode (ord, ord_buf);
2003     for (i = 0; i<ord_len; i++)
2004     {
2005         term_dict[prefix_len++] = 1;
2006         term_dict[prefix_len++] = ord_buf[i];
2007     }
2008     term_dict[prefix_len++] = ')';
2009     strcpy(term_dict+prefix_len, term);
2010     
2011     grep_info.isam_p_indx = 0;
2012     r = dict_lookup_grep(zh->reg->dict, term_dict, 0,
2013                           &grep_info, &max_pos, 0, grep_handle);
2014     yaz_log(YLOG_DEBUG, "%s %d positions", term,
2015              grep_info.isam_p_indx);
2016     rset = rset_trunc(zh, grep_info.isam_p_buf,
2017                       grep_info.isam_p_indx, term, strlen(term),
2018                       flags, 1, term_type,rset_nmem,
2019                       kc, kc->scope, 0, reg_type, 0 /* hits_limit */,
2020                       0 /* term_ref_id_str */);
2021     grep_info_delete(&grep_info);
2022     return rset;
2023 }
2024
2025 static
2026 ZEBRA_RES rpn_search_xpath(ZebraHandle zh,
2027                            int num_bases, char **basenames,
2028                            NMEM stream, const char *rank_type, RSET rset,
2029                            int xpath_len, struct xpath_location_step *xpath,
2030                            NMEM rset_nmem,
2031                            RSET *rset_out,
2032                            struct rset_key_control *kc)
2033 {
2034     int base_no;
2035     int i;
2036     int always_matches = rset ? 0 : 1;
2037
2038     if (xpath_len < 0)
2039     {
2040         *rset_out = rset;
2041         return ZEBRA_OK;
2042     }
2043
2044     yaz_log(YLOG_DEBUG, "xpath len=%d", xpath_len);
2045     for (i = 0; i<xpath_len; i++)
2046     {
2047         yaz_log(log_level_rpn, "XPATH %d %s", i, xpath[i].part);
2048
2049     }
2050
2051     /*
2052       //a    ->    a/.*
2053       //a/b  ->    b/a/.*
2054       /a     ->    a/
2055       /a/b   ->    b/a/
2056
2057       /      ->    none
2058
2059    a[@attr = value]/b[@other = othervalue]
2060
2061  /e/@a val      range(e/,range(@a,freetext(w,1015,val),@a),e/)
2062  /a/b val       range(b/a/,freetext(w,1016,val),b/a/)
2063  /a/b/@c val    range(b/a/,range(@c,freetext(w,1016,val),@c),b/a/)
2064  /a/b[@c = y] val range(b/a/,freetext(w,1016,val),b/a/,@c = y)
2065  /a[@c = y]/b val range(a/,range(b/a/,freetext(w,1016,val),b/a/),a/,@c = y)
2066  /a[@c = x]/b[@c = y] range(a/,range(b/a/,freetext(w,1016,val),b/a/,@c = y),a/,@c = x)
2067       
2068     */
2069
2070     dict_grep_cmap (zh->reg->dict, 0, 0);
2071
2072     for (base_no = 0; base_no < num_bases; base_no++)
2073     {
2074         int level = xpath_len;
2075         int first_path = 1;
2076         
2077         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
2078         {
2079             zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
2080                            basenames[base_no]);
2081             *rset_out = rset;
2082             return ZEBRA_FAIL;
2083         }
2084         while (--level >= 0)
2085         {
2086             WRBUF xpath_rev = wrbuf_alloc();
2087             int i;
2088             RSET rset_start_tag = 0, rset_end_tag = 0, rset_attr = 0;
2089
2090             for (i = level; i >= 1; --i)
2091             {
2092                 const char *cp = xpath[i].part;
2093                 if (*cp)
2094                 {
2095                     for (; *cp; cp++)
2096                     {
2097                         if (*cp == '*')
2098                             wrbuf_puts(xpath_rev, "[^/]*");
2099                         else if (*cp == ' ')
2100                             wrbuf_puts(xpath_rev, "\001 ");
2101                         else
2102                             wrbuf_putc(xpath_rev, *cp);
2103
2104                         /* wrbuf_putc does not null-terminate , but
2105                            wrbuf_puts below ensures it does.. so xpath_rev
2106                            is OK iff length is > 0 */
2107                     }
2108                     wrbuf_puts(xpath_rev, "/");
2109                 }
2110                 else if (i == 1)  /* // case */
2111                     wrbuf_puts(xpath_rev, ".*");
2112             }
2113             if (xpath[level].predicate &&
2114                 xpath[level].predicate->which == XPATH_PREDICATE_RELATION &&
2115                 xpath[level].predicate->u.relation.name[0])
2116             {
2117                 WRBUF wbuf = wrbuf_alloc();
2118                 wrbuf_puts(wbuf, xpath[level].predicate->u.relation.name+1);
2119                 if (xpath[level].predicate->u.relation.value)
2120                 {
2121                     const char *cp = xpath[level].predicate->u.relation.value;
2122                     wrbuf_putc(wbuf, '=');
2123                     
2124                     while (*cp)
2125                     {
2126                         if (strchr(REGEX_CHARS, *cp))
2127                             wrbuf_putc(wbuf, '\\');
2128                         wrbuf_putc(wbuf, *cp);
2129                         cp++;
2130                     }
2131                 }
2132                 wrbuf_puts(wbuf, "");
2133                 rset_attr = xpath_trunc(
2134                     zh, stream, '0', wrbuf_buf(wbuf), ZEBRA_XPATH_ATTR_NAME, 
2135                     rset_nmem, kc);
2136                 wrbuf_free(wbuf, 1);
2137             } 
2138             else 
2139             {
2140                 if (!first_path)
2141                 {
2142                     wrbuf_free(xpath_rev, 1);
2143                     continue;
2144                 }
2145             }
2146             yaz_log(log_level_rpn, "xpath_rev (%d) = %.*s", level, 
2147                     wrbuf_len(xpath_rev), wrbuf_buf(xpath_rev));
2148             if (wrbuf_len(xpath_rev))
2149             {
2150                 rset_start_tag = xpath_trunc(zh, stream, '0', 
2151                                              wrbuf_buf(xpath_rev),
2152                                              ZEBRA_XPATH_ELM_BEGIN, 
2153                                              rset_nmem, kc);
2154                 if (always_matches)
2155                     rset = rset_start_tag;
2156                 else
2157                 {
2158                     rset_end_tag = xpath_trunc(zh, stream, '0', 
2159                                                wrbuf_buf(xpath_rev),
2160                                                ZEBRA_XPATH_ELM_END, 
2161                                                rset_nmem, kc);
2162                     
2163                     rset = rset_create_between(rset_nmem, kc, kc->scope,
2164                                                rset_start_tag, rset,
2165                                                rset_end_tag, rset_attr);
2166                 }
2167             }
2168             wrbuf_free(xpath_rev, 1);
2169             first_path = 0;
2170         }
2171     }
2172     *rset_out = rset;
2173     return ZEBRA_OK;
2174 }
2175
2176 #define MAX_XPATH_STEPS 10
2177
2178 static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2179                                 oid_value attributeSet, NMEM stream,
2180                                 Z_SortKeySpecList *sort_sequence,
2181                                 int num_bases, char **basenames, 
2182                                 NMEM rset_nmem,
2183                                 RSET *rset,
2184                                 struct rset_key_control *kc)
2185 {
2186     ZEBRA_RES res = ZEBRA_OK;
2187     unsigned reg_id;
2188     char *search_type = NULL;
2189     char rank_type[128];
2190     int complete_flag;
2191     int sort_flag;
2192     char termz[IT_MAX_WORD+1];
2193     int xpath_len;
2194     const char *xpath_use = 0;
2195     struct xpath_location_step xpath[MAX_XPATH_STEPS];
2196
2197     if (!log_level_set)
2198     {
2199         log_level_rpn = yaz_log_module_level("rpn");
2200         log_level_set = 1;
2201     }
2202     zebra_maps_attr(zh->reg->zebra_maps, zapt, &reg_id, &search_type,
2203                     rank_type, &complete_flag, &sort_flag);
2204     
2205     yaz_log(YLOG_DEBUG, "reg_id=%c", reg_id);
2206     yaz_log(YLOG_DEBUG, "complete_flag=%d", complete_flag);
2207     yaz_log(YLOG_DEBUG, "search_type=%s", search_type);
2208     yaz_log(YLOG_DEBUG, "rank_type=%s", rank_type);
2209
2210     if (zapt_term_to_utf8(zh, zapt, termz) == ZEBRA_FAIL)
2211         return ZEBRA_FAIL;
2212
2213     if (sort_flag)
2214         return rpn_sort_spec(zh, zapt, attributeSet, stream, sort_sequence,
2215                              rank_type, rset_nmem, rset, kc);
2216     /* consider if an X-Path query is used */
2217     xpath_len = rpn_check_xpath(zh, zapt, attributeSet, 
2218                                 xpath, MAX_XPATH_STEPS, stream);
2219     if (xpath_len >= 0)
2220     {
2221         if (xpath[xpath_len-1].part[0] == '@') 
2222             xpath_use = ZEBRA_XPATH_ATTR_CDATA;  /* last step is attribute  */
2223         else
2224             xpath_use = ZEBRA_XPATH_CDATA;  /* searching for cdata */        
2225
2226         if (1)
2227         {
2228             AttrType relation;
2229             int relation_value;
2230
2231             attr_init_APT(&relation, zapt, 2);
2232             relation_value = attr_find(&relation, NULL);
2233
2234             if (relation_value == 103) /* alwaysmatches */
2235             {
2236                 *rset = 0; /* signal no "term" set */
2237                 return rpn_search_xpath(zh, num_bases, basenames,
2238                                         stream, rank_type, *rset, 
2239                                         xpath_len, xpath, rset_nmem, rset, kc);
2240             }
2241         }
2242     }
2243
2244     /* search using one of the various search type strategies
2245        termz is our UTF-8 search term
2246        attributeSet is top-level default attribute set 
2247        stream is ODR for search
2248        reg_id is the register type
2249        complete_flag is 1 for complete subfield, 0 for incomplete
2250        xpath_use is use-attribute to be used for X-Path search, 0 for none
2251     */
2252     if (!strcmp(search_type, "phrase"))
2253     {
2254         res = rpn_search_APT_phrase(zh, zapt, termz, attributeSet, stream,
2255                                     reg_id, complete_flag, rank_type,
2256                                     xpath_use,
2257                                     num_bases, basenames, rset_nmem,
2258                                     rset, kc);
2259     }
2260     else if (!strcmp(search_type, "and-list"))
2261     {
2262         res = rpn_search_APT_and_list(zh, zapt, termz, attributeSet, stream,
2263                                       reg_id, complete_flag, rank_type,
2264                                       xpath_use,
2265                                       num_bases, basenames, rset_nmem,
2266                                       rset, kc);
2267     }
2268     else if (!strcmp(search_type, "or-list"))
2269     {
2270         res = rpn_search_APT_or_list(zh, zapt, termz, attributeSet, stream,
2271                                      reg_id, complete_flag, rank_type,
2272                                      xpath_use,
2273                                      num_bases, basenames, rset_nmem,
2274                                      rset, kc);
2275     }
2276     else if (!strcmp(search_type, "local"))
2277     {
2278         res = rpn_search_APT_local(zh, zapt, termz, attributeSet, stream,
2279                                    rank_type, rset_nmem, rset, kc);
2280     }
2281     else if (!strcmp(search_type, "numeric"))
2282     {
2283         res = rpn_search_APT_numeric(zh, zapt, termz, attributeSet, stream,
2284                                      reg_id, complete_flag, rank_type,
2285                                      xpath_use,
2286                                      num_bases, basenames, rset_nmem,
2287                                      rset, kc);
2288     }
2289     else
2290     {
2291         zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2292         res = ZEBRA_FAIL;
2293     }
2294     if (res != ZEBRA_OK)
2295         return res;
2296     if (!*rset)
2297         return ZEBRA_FAIL;
2298     return rpn_search_xpath(zh, num_bases, basenames,
2299                             stream, rank_type, *rset, 
2300                             xpath_len, xpath, rset_nmem, rset, kc);
2301 }
2302
2303 static ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2304                                       oid_value attributeSet, 
2305                                       NMEM stream, NMEM rset_nmem,
2306                                       Z_SortKeySpecList *sort_sequence,
2307                                       int num_bases, char **basenames,
2308                                       RSET **result_sets, int *num_result_sets,
2309                                       Z_Operator *parent_op,
2310                                       struct rset_key_control *kc);
2311
2312 ZEBRA_RES rpn_search_top(ZebraHandle zh, Z_RPNStructure *zs,
2313                          oid_value attributeSet, 
2314                          NMEM stream, NMEM rset_nmem,
2315                          Z_SortKeySpecList *sort_sequence,
2316                          int num_bases, char **basenames,
2317                          RSET *result_set)
2318 {
2319     RSET *result_sets = 0;
2320     int num_result_sets = 0;
2321     ZEBRA_RES res;
2322     struct rset_key_control *kc = zebra_key_control_create(zh);
2323
2324     res = rpn_search_structure(zh, zs, attributeSet,
2325                                stream, rset_nmem,
2326                                sort_sequence, 
2327                                num_bases, basenames,
2328                                &result_sets, &num_result_sets,
2329                                0 /* no parent op */,
2330                                kc);
2331     if (res != ZEBRA_OK)
2332     {
2333         int i;
2334         for (i = 0; i<num_result_sets; i++)
2335             rset_delete(result_sets[i]);
2336         *result_set = 0;
2337     }
2338     else
2339     {
2340         assert(num_result_sets == 1);
2341         assert(result_sets);
2342         assert(*result_sets);
2343         *result_set = *result_sets;
2344     }
2345     (*kc->dec)(kc);
2346     return res;
2347 }
2348
2349 ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2350                                oid_value attributeSet, 
2351                                NMEM stream, NMEM rset_nmem,
2352                                Z_SortKeySpecList *sort_sequence,
2353                                int num_bases, char **basenames,
2354                                RSET **result_sets, int *num_result_sets,
2355                                Z_Operator *parent_op,
2356                                struct rset_key_control *kc)
2357 {
2358     *num_result_sets = 0;
2359     if (zs->which == Z_RPNStructure_complex)
2360     {
2361         ZEBRA_RES res;
2362         Z_Operator *zop = zs->u.complex->roperator;
2363         RSET *result_sets_l = 0;
2364         int num_result_sets_l = 0;
2365         RSET *result_sets_r = 0;
2366         int num_result_sets_r = 0;
2367
2368         res = rpn_search_structure(zh, zs->u.complex->s1,
2369                                    attributeSet, stream, rset_nmem,
2370                                    sort_sequence,
2371                                    num_bases, basenames,
2372                                    &result_sets_l, &num_result_sets_l,
2373                                    zop, kc);
2374         if (res != ZEBRA_OK)
2375         {
2376             int i;
2377             for (i = 0; i<num_result_sets_l; i++)
2378                 rset_delete(result_sets_l[i]);
2379             return res;
2380         }
2381         res = rpn_search_structure(zh, zs->u.complex->s2,
2382                                    attributeSet, stream, rset_nmem,
2383                                    sort_sequence,
2384                                    num_bases, basenames,
2385                                    &result_sets_r, &num_result_sets_r,
2386                                    zop, kc);
2387         if (res != ZEBRA_OK)
2388         {
2389             int i;
2390             for (i = 0; i<num_result_sets_l; i++)
2391                 rset_delete(result_sets_l[i]);
2392             for (i = 0; i<num_result_sets_r; i++)
2393                 rset_delete(result_sets_r[i]);
2394             return res;
2395         }
2396
2397         /* make a new list of result for all children */
2398         *num_result_sets = num_result_sets_l + num_result_sets_r;
2399         *result_sets = nmem_malloc(stream, *num_result_sets * 
2400                                    sizeof(**result_sets));
2401         memcpy(*result_sets, result_sets_l, 
2402                num_result_sets_l * sizeof(**result_sets));
2403         memcpy(*result_sets + num_result_sets_l, result_sets_r, 
2404                num_result_sets_r * sizeof(**result_sets));
2405
2406         if (!parent_op || parent_op->which != zop->which
2407             || (zop->which != Z_Operator_and &&
2408                 zop->which != Z_Operator_or))
2409         {
2410             /* parent node different from this one (or non-present) */
2411             /* we must combine result sets now */
2412             RSET rset;
2413             switch (zop->which)
2414             {
2415             case Z_Operator_and:
2416                 rset = rset_create_and(rset_nmem, kc,
2417                                        kc->scope,
2418                                        *num_result_sets, *result_sets);
2419                 break;
2420             case Z_Operator_or:
2421                 rset = rset_create_or(rset_nmem, kc,
2422                                       kc->scope, 0, /* termid */
2423                                       *num_result_sets, *result_sets);
2424                 break;
2425             case Z_Operator_and_not:
2426                 rset = rset_create_not(rset_nmem, kc,
2427                                        kc->scope,
2428                                        (*result_sets)[0],
2429                                        (*result_sets)[1]);
2430                 break;
2431             case Z_Operator_prox:
2432                 if (zop->u.prox->which != Z_ProximityOperator_known)
2433                 {
2434                     zebra_setError(zh, 
2435                                    YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2436                                    0);
2437                     return ZEBRA_FAIL;
2438                 }
2439                 if (*zop->u.prox->u.known != Z_ProxUnit_word)
2440                 {
2441                     zebra_setError_zint(zh,
2442                                         YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2443                                         *zop->u.prox->u.known);
2444                     return ZEBRA_FAIL;
2445                 }
2446                 else
2447                 {
2448                     rset = rset_create_prox(rset_nmem, kc,
2449                                             kc->scope,
2450                                             *num_result_sets, *result_sets, 
2451                                             *zop->u.prox->ordered,
2452                                             (!zop->u.prox->exclusion ? 
2453                                              0 : *zop->u.prox->exclusion),
2454                                             *zop->u.prox->relationType,
2455                                             *zop->u.prox->distance );
2456                 }
2457                 break;
2458             default:
2459                 zebra_setError(zh, YAZ_BIB1_OPERATOR_UNSUPP, 0);
2460                 return ZEBRA_FAIL;
2461             }
2462             *num_result_sets = 1;
2463             *result_sets = nmem_malloc(stream, *num_result_sets * 
2464                                        sizeof(**result_sets));
2465             (*result_sets)[0] = rset;
2466         }
2467     }
2468     else if (zs->which == Z_RPNStructure_simple)
2469     {
2470         RSET rset;
2471         ZEBRA_RES res;
2472
2473         if (zs->u.simple->which == Z_Operand_APT)
2474         {
2475             yaz_log(YLOG_DEBUG, "rpn_search_APT");
2476             res = rpn_search_APT(zh, zs->u.simple->u.attributesPlusTerm,
2477                                  attributeSet, stream, sort_sequence,
2478                                  num_bases, basenames, rset_nmem, &rset,
2479                                  kc);
2480             if (res != ZEBRA_OK)
2481                 return res;
2482         }
2483         else if (zs->u.simple->which == Z_Operand_resultSetId)
2484         {
2485             yaz_log(YLOG_DEBUG, "rpn_search_ref");
2486             rset = resultSetRef(zh, zs->u.simple->u.resultSetId);
2487             if (!rset)
2488             {
2489                 zebra_setError(zh, 
2490                                YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
2491                                zs->u.simple->u.resultSetId);
2492                 return ZEBRA_FAIL;
2493             }
2494             rset_dup(rset);
2495         }
2496         else
2497         {
2498             zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2499             return ZEBRA_FAIL;
2500         }
2501         *num_result_sets = 1;
2502         *result_sets = nmem_malloc(stream, *num_result_sets * 
2503                                    sizeof(**result_sets));
2504         (*result_sets)[0] = rset;
2505     }
2506     else
2507     {
2508         zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2509         return ZEBRA_FAIL;
2510     }
2511     return ZEBRA_OK;
2512 }
2513
2514
2515
2516 /*
2517  * Local variables:
2518  * c-basic-offset: 4
2519  * indent-tabs-mode: nil
2520  * End:
2521  * vim: shiftwidth=4 tabstop=8 expandtab
2522  */
2523