For searches, allow truncmax value to be controlled with attribute 13.
[idzebra-moved-to-github.git] / index / rpnsearch.c
1 /* $Id: rpnsearch.c,v 1.8 2007-01-17 12:59:38 adam Exp $
2    Copyright (C) 1995-2007
3    Index Data ApS
4
5 This file is part of the Zebra server.
6
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
11
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
20
21 */
22
23 #include <stdio.h>
24 #include <assert.h>
25 #ifdef WIN32
26 #include <io.h>
27 #endif
28 #if HAVE_UNISTD_H
29 #include <unistd.h>
30 #endif
31 #include <ctype.h>
32
33 #include <yaz/diagbib1.h>
34 #include "index.h"
35 #include <zebra_xpath.h>
36 #include <attrfind.h>
37 #include <charmap.h>
38 #include <rset.h>
39
40 static int log_level_set = 0;
41 static int log_level_rpn = 0;
42
43 #define TERMSET_DISABLE 1
44
45 static const char **rpn_char_map_handler(void *vp, const char **from, int len)
46 {
47     struct rpn_char_map_info *p = (struct rpn_char_map_info *) vp;
48     const char **out = zebra_maps_input(p->zm, p->reg_type, from, len, 0);
49 #if 0
50     if (out && *out)
51     {
52         const char *outp = *out;
53         yaz_log(YLOG_LOG, "---");
54         while (*outp)
55         {
56             yaz_log(YLOG_LOG, "%02X", *outp);
57             outp++;
58         }
59     }
60 #endif
61     return out;
62 }
63
64 void rpn_char_map_prepare(struct zebra_register *reg, int reg_type,
65                           struct rpn_char_map_info *map_info)
66 {
67     map_info->zm = reg->zebra_maps;
68     map_info->reg_type = reg_type;
69     dict_grep_cmap(reg->dict, map_info, rpn_char_map_handler);
70 }
71
72 #define TERM_COUNT        
73        
74 struct grep_info {        
75 #ifdef TERM_COUNT        
76     int *term_no;        
77 #endif        
78     ISAM_P *isam_p_buf;
79     int isam_p_size;        
80     int isam_p_indx;
81     int trunc_max;
82     ZebraHandle zh;
83     int reg_type;
84     ZebraSet termset;
85 };        
86
87 static int add_isam_p(const char *name, const char *info,
88                       struct grep_info *p)
89 {
90     if (!log_level_set)
91     {
92         log_level_rpn = yaz_log_module_level("rpn");
93         log_level_set = 1;
94     }
95     /* we may have to stop this madness.. NOTE: -1 so that if
96        truncmax == trunxlimit we do *not* generate result sets */
97     if (p->isam_p_indx >= p->trunc_max - 1)
98         return 1;
99
100     if (p->isam_p_indx == p->isam_p_size)
101     {
102         ISAM_P *new_isam_p_buf;
103 #ifdef TERM_COUNT        
104         int *new_term_no;        
105 #endif
106         p->isam_p_size = 2*p->isam_p_size + 100;
107         new_isam_p_buf = (ISAM_P *) xmalloc(sizeof(*new_isam_p_buf) *
108                                             p->isam_p_size);
109         if (p->isam_p_buf)
110         {
111             memcpy(new_isam_p_buf, p->isam_p_buf,
112                     p->isam_p_indx * sizeof(*p->isam_p_buf));
113             xfree(p->isam_p_buf);
114         }
115         p->isam_p_buf = new_isam_p_buf;
116
117 #ifdef TERM_COUNT
118         new_term_no = (int *) xmalloc(sizeof(*new_term_no) * p->isam_p_size);
119         if (p->term_no)
120         {
121             memcpy(new_term_no, p->isam_p_buf,
122                     p->isam_p_indx * sizeof(*p->term_no));
123             xfree(p->term_no);
124         }
125         p->term_no = new_term_no;
126 #endif
127     }
128     assert(*info == sizeof(*p->isam_p_buf));
129     memcpy(p->isam_p_buf + p->isam_p_indx, info+1, sizeof(*p->isam_p_buf));
130
131     if (p->termset)
132     {
133         const char *db;
134         char term_tmp[IT_MAX_WORD];
135         int ord = 0;
136         const char *index_name;
137         int len = key_SU_decode (&ord, (const unsigned char *) name);
138         
139         zebra_term_untrans  (p->zh, p->reg_type, term_tmp, name+len);
140         yaz_log(log_level_rpn, "grep: %d %c %s", ord, name[len], term_tmp);
141         zebraExplain_lookup_ord(p->zh->reg->zei,
142                                 ord, 0 /* index_type */, &db, &index_name);
143         yaz_log(log_level_rpn, "grep:  db=%s index=%s", db, index_name);
144         
145         resultSetAddTerm(p->zh, p->termset, name[len], db,
146                          index_name, term_tmp);
147     }
148     (p->isam_p_indx)++;
149     return 0;
150 }
151
152 static int grep_handle(char *name, const char *info, void *p)
153 {
154     return add_isam_p(name, info, (struct grep_info *) p);
155 }
156
157 static int term_pre(ZebraMaps zebra_maps, int reg_type, const char **src,
158                     const char *ct1, const char *ct2, int first)
159 {
160     const char *s1, *s0 = *src;
161     const char **map;
162
163     /* skip white space */
164     while (*s0)
165     {
166         if (ct1 && strchr(ct1, *s0))
167             break;
168         if (ct2 && strchr(ct2, *s0))
169             break;
170         s1 = s0;
171         map = zebra_maps_input(zebra_maps, reg_type, &s1, strlen(s1), first);
172         if (**map != *CHR_SPACE)
173             break;
174         s0 = s1;
175     }
176     *src = s0;
177     return *s0;
178 }
179
180
181 static void esc_str(char *out_buf, size_t out_size,
182                     const char *in_buf, int in_size)
183 {
184     int k;
185
186     assert(out_buf);
187     assert(in_buf);
188     assert(out_size > 20);
189     *out_buf = '\0';
190     for (k = 0; k<in_size; k++)
191     {
192         int c = in_buf[k] & 0xff;
193         int pc;
194         if (c < 32 || c > 126)
195             pc = '?';
196         else
197             pc = c;
198         sprintf(out_buf +strlen(out_buf), "%02X:%c  ", c, pc);
199         if (strlen(out_buf) > out_size-20)
200         {
201             strcat(out_buf, "..");
202             break;
203         }
204     }
205 }
206
207 #define REGEX_CHARS " []()|.*+?!"
208
209 /* term_100: handle term, where trunc = none(no operators at all) */
210 static int term_100(ZebraMaps zebra_maps, int reg_type,
211                     const char **src, char *dst, int space_split,
212                     char *dst_term)
213 {
214     const char *s0;
215     const char **map;
216     int i = 0;
217     int j = 0;
218
219     const char *space_start = 0;
220     const char *space_end = 0;
221
222     if (!term_pre(zebra_maps, reg_type, src, NULL, NULL, !space_split))
223         return 0;
224     s0 = *src;
225     while (*s0)
226     {
227         const char *s1 = s0;
228         int q_map_match = 0;
229         map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), 
230                                 &q_map_match);
231         if (space_split)
232         {
233             if (**map == *CHR_SPACE)
234                 break;
235         }
236         else  /* complete subfield only. */
237         {
238             if (**map == *CHR_SPACE)
239             {   /* save space mapping for later  .. */
240                 space_start = s1;
241                 space_end = s0;
242                 continue;
243             }
244             else if (space_start)
245             {   /* reload last space */
246                 while (space_start < space_end)
247                 {
248                     if (strchr(REGEX_CHARS, *space_start))
249                         dst[i++] = '\\';
250                     dst_term[j++] = *space_start;
251                     dst[i++] = *space_start++;
252                 }
253                 /* and reset */
254                 space_start = space_end = 0;
255             }
256         }
257         /* add non-space char */
258         memcpy(dst_term+j, s1, s0 - s1);
259         j += (s0 - s1);
260         if (!q_map_match)
261         {
262             while (s1 < s0)
263             {
264                 if (strchr(REGEX_CHARS, *s1))
265                     dst[i++] = '\\';
266                 dst[i++] = *s1++;
267             }
268         }
269         else
270         {
271             char tmpbuf[80];
272             esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
273             
274             strcpy(dst + i, map[0]);
275             i += strlen(map[0]);
276         }
277     }
278     dst[i] = '\0';
279     dst_term[j] = '\0';
280     *src = s0;
281     return i;
282 }
283
284 /* term_101: handle term, where trunc = Process # */
285 static int term_101(ZebraMaps zebra_maps, int reg_type,
286                     const char **src, char *dst, int space_split,
287                     char *dst_term)
288 {
289     const char *s0;
290     const char **map;
291     int i = 0;
292     int j = 0;
293
294     if (!term_pre(zebra_maps, reg_type, src, "#", "#", !space_split))
295         return 0;
296     s0 = *src;
297     while (*s0)
298     {
299         if (*s0 == '#')
300         {
301             dst[i++] = '.';
302             dst[i++] = '*';
303             dst_term[j++] = *s0++;
304         }
305         else
306         {
307             const char *s1 = s0;
308             int q_map_match = 0;
309             map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), 
310                                     &q_map_match);
311             if (space_split && **map == *CHR_SPACE)
312                 break;
313
314             /* add non-space char */
315             memcpy(dst_term+j, s1, s0 - s1);
316             j += (s0 - s1);
317             if (!q_map_match)
318             {
319                 while (s1 < s0)
320                 {
321                     if (strchr(REGEX_CHARS, *s1))
322                         dst[i++] = '\\';
323                     dst[i++] = *s1++;
324                 }
325             }
326             else
327             {
328                 char tmpbuf[80];
329                 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
330                 
331                 strcpy(dst + i, map[0]);
332                 i += strlen(map[0]);
333             }
334         }
335     }
336     dst[i] = '\0';
337     dst_term[j++] = '\0';
338     *src = s0;
339     return i;
340 }
341
342 /* term_103: handle term, where trunc = re-2 (regular expressions) */
343 static int term_103(ZebraMaps zebra_maps, int reg_type, const char **src,
344                     char *dst, int *errors, int space_split,
345                     char *dst_term)
346 {
347     int i = 0;
348     int j = 0;
349     const char *s0;
350     const char **map;
351
352     if (!term_pre(zebra_maps, reg_type, src, "^\\()[].*+?|", "(", !space_split))
353         return 0;
354     s0 = *src;
355     if (errors && *s0 == '+' && s0[1] && s0[2] == '+' && s0[3] &&
356         isdigit(((const unsigned char *)s0)[1]))
357     {
358         *errors = s0[1] - '0';
359         s0 += 3;
360         if (*errors > 3)
361             *errors = 3;
362     }
363     while (*s0)
364     {
365         if (strchr("^\\()[].*+?|-", *s0))
366         {
367             dst_term[j++] = *s0;
368             dst[i++] = *s0++;
369         }
370         else
371         {
372             const char *s1 = s0;
373             int q_map_match = 0;
374             map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), 
375                                     &q_map_match);
376             if (space_split && **map == *CHR_SPACE)
377                 break;
378
379             /* add non-space char */
380             memcpy(dst_term+j, s1, s0 - s1);
381             j += (s0 - s1);
382             if (!q_map_match)
383             {
384                 while (s1 < s0)
385                 {
386                     if (strchr(REGEX_CHARS, *s1))
387                         dst[i++] = '\\';
388                     dst[i++] = *s1++;
389                 }
390             }
391             else
392             {
393                 char tmpbuf[80];
394                 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
395                 
396                 strcpy(dst + i, map[0]);
397                 i += strlen(map[0]);
398             }
399         }
400     }
401     dst[i] = '\0';
402     dst_term[j] = '\0';
403     *src = s0;
404     
405     return i;
406 }
407
408 /* term_103: handle term, where trunc = re-1 (regular expressions) */
409 static int term_102(ZebraMaps zebra_maps, int reg_type, const char **src,
410                     char *dst, int space_split, char *dst_term)
411 {
412     return term_103(zebra_maps, reg_type, src, dst, NULL, space_split,
413                     dst_term);
414 }
415
416
417 /* term_104: handle term, where trunc = Process # and ! */
418 static int term_104(ZebraMaps zebra_maps, int reg_type,
419                     const char **src, char *dst, int space_split,
420                     char *dst_term)
421 {
422     const char *s0;
423     const char **map;
424     int i = 0;
425     int j = 0;
426
427     if (!term_pre(zebra_maps, reg_type, src, "?*#", "?*#", !space_split))
428         return 0;
429     s0 = *src;
430     while (*s0)
431     {
432         if (*s0 == '?')
433         {
434             dst_term[j++] = *s0++;
435             if (*s0 >= '0' && *s0 <= '9')
436             {
437                 int limit = 0;
438                 while (*s0 >= '0' && *s0 <= '9')
439                 {
440                     limit = limit * 10 + (*s0 - '0');
441                     dst_term[j++] = *s0++;
442                 }
443                 if (limit > 20)
444                     limit = 20;
445                 while (--limit >= 0)
446                 {
447                     dst[i++] = '.';
448                     dst[i++] = '?';
449                 }
450             }
451             else
452             {
453                 dst[i++] = '.';
454                 dst[i++] = '*';
455             }
456         }
457         else if (*s0 == '*')
458         {
459             dst[i++] = '.';
460             dst[i++] = '*';
461             dst_term[j++] = *s0++;
462         }
463         else if (*s0 == '#')
464         {
465             dst[i++] = '.';
466             dst_term[j++] = *s0++;
467         }
468         else
469         {
470             const char *s1 = s0;
471             int q_map_match = 0;
472             map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), 
473                                     &q_map_match);
474             if (space_split && **map == *CHR_SPACE)
475                 break;
476
477             /* add non-space char */
478             memcpy(dst_term+j, s1, s0 - s1);
479             j += (s0 - s1);
480             if (!q_map_match)
481             {
482                 while (s1 < s0)
483                 {
484                     if (strchr(REGEX_CHARS, *s1))
485                         dst[i++] = '\\';
486                     dst[i++] = *s1++;
487                 }
488             }
489             else
490             {
491                 char tmpbuf[80];
492                 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
493                 
494                 strcpy(dst + i, map[0]);
495                 i += strlen(map[0]);
496             }
497         }
498     }
499     dst[i] = '\0';
500     dst_term[j++] = '\0';
501     *src = s0;
502     return i;
503 }
504
505 /* term_105/106: handle term, where trunc = Process * and ! and right trunc */
506 static int term_105(ZebraMaps zebra_maps, int reg_type,
507                     const char **src, char *dst, int space_split,
508                     char *dst_term, int right_truncate)
509 {
510     const char *s0;
511     const char **map;
512     int i = 0;
513     int j = 0;
514
515     if (!term_pre(zebra_maps, reg_type, src, "*!", "*!", !space_split))
516         return 0;
517     s0 = *src;
518     while (*s0)
519     {
520         if (*s0 == '*')
521         {
522             dst[i++] = '.';
523             dst[i++] = '*';
524             dst_term[j++] = *s0++;
525         }
526         else if (*s0 == '!')
527         {
528             dst[i++] = '.';
529             dst_term[j++] = *s0++;
530         }
531         else
532         {
533             const char *s1 = s0;
534             int q_map_match = 0;
535             map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), 
536                                     &q_map_match);
537             if (space_split && **map == *CHR_SPACE)
538                 break;
539
540             /* add non-space char */
541             memcpy(dst_term+j, s1, s0 - s1);
542             j += (s0 - s1);
543             if (!q_map_match)
544             {
545                 while (s1 < s0)
546                 {
547                     if (strchr(REGEX_CHARS, *s1))
548                         dst[i++] = '\\';
549                     dst[i++] = *s1++;
550                 }
551             }
552             else
553             {
554                 char tmpbuf[80];
555                 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
556                 
557                 strcpy(dst + i, map[0]);
558                 i += strlen(map[0]);
559             }
560         }
561     }
562     if (right_truncate)
563     {
564         dst[i++] = '.';
565         dst[i++] = '*';
566     }
567     dst[i] = '\0';
568     
569     dst_term[j++] = '\0';
570     *src = s0;
571     return i;
572 }
573
574
575 /* gen_regular_rel - generate regular expression from relation
576  *  val:     border value (inclusive)
577  *  islt:    1 if <=; 0 if >=.
578  */
579 static void gen_regular_rel(char *dst, int val, int islt)
580 {
581     int dst_p;
582     int w, d, i;
583     int pos = 0;
584     char numstr[20];
585
586     yaz_log(YLOG_DEBUG, "gen_regular_rel. val=%d, islt=%d", val, islt);
587     if (val >= 0)
588     {
589         if (islt)
590             strcpy(dst, "(-[0-9]+|(");
591         else
592             strcpy(dst, "((");
593     } 
594     else
595     {
596         if (!islt)
597         {
598             strcpy(dst, "([0-9]+|-(");
599             dst_p = strlen(dst);
600             islt = 1;
601         }
602         else
603         {
604             strcpy(dst, "(-(");
605             islt = 0;
606         }
607         val = -val;
608     }
609     dst_p = strlen(dst);
610     sprintf(numstr, "%d", val);
611     for (w = strlen(numstr); --w >= 0; pos++)
612     {
613         d = numstr[w];
614         if (pos > 0)
615         {
616             if (islt)
617             {
618                 if (d == '0')
619                     continue;
620                 d--;
621             } 
622             else
623             {
624                 if (d == '9')
625                     continue;
626                 d++;
627             }
628         }
629         
630         strcpy(dst + dst_p, numstr);
631         dst_p = strlen(dst) - pos - 1;
632
633         if (islt)
634         {
635             if (d != '0')
636             {
637                 dst[dst_p++] = '[';
638                 dst[dst_p++] = '0';
639                 dst[dst_p++] = '-';
640                 dst[dst_p++] = d;
641                 dst[dst_p++] = ']';
642             }
643             else
644                 dst[dst_p++] = d;
645         }
646         else
647         {
648             if (d != '9')
649             { 
650                 dst[dst_p++] = '[';
651                 dst[dst_p++] = d;
652                 dst[dst_p++] = '-';
653                 dst[dst_p++] = '9';
654                 dst[dst_p++] = ']';
655             }
656             else
657                 dst[dst_p++] = d;
658         }
659         for (i = 0; i<pos; i++)
660         {
661             dst[dst_p++] = '[';
662             dst[dst_p++] = '0';
663             dst[dst_p++] = '-';
664             dst[dst_p++] = '9';
665             dst[dst_p++] = ']';
666         }
667         dst[dst_p++] = '|';
668     }
669     dst[dst_p] = '\0';
670     if (islt)
671     {
672         /* match everything less than 10^(pos-1) */
673         strcat(dst, "0*");
674         for (i = 1; i<pos; i++)
675             strcat(dst, "[0-9]?");
676     }
677     else
678     {
679         /* match everything greater than 10^pos */
680         for (i = 0; i <= pos; i++)
681             strcat(dst, "[0-9]");
682         strcat(dst, "[0-9]*");
683     }
684     strcat(dst, "))");
685 }
686
687 void string_rel_add_char(char **term_p, const char *src, int *indx)
688 {
689     if (src[*indx] == '\\')
690         *(*term_p)++ = src[(*indx)++];
691     *(*term_p)++ = src[(*indx)++];
692 }
693
694 /*
695  *   >  abc     ([b-].*|a[c-].*|ab[d-].*|abc.+)
696  *              ([^-a].*|a[^-b].*ab[^-c].*|abc.+)
697  *   >= abc     ([b-].*|a[c-].*|ab[c-].*)
698  *              ([^-a].*|a[^-b].*|ab[c-].*)
699  *   <  abc     ([-0].*|a[-a].*|ab[-b].*)
700  *              ([^a-].*|a[^b-].*|ab[^c-].*)
701  *   <= abc     ([-0].*|a[-a].*|ab[-b].*|abc)
702  *              ([^a-].*|a[^b-].*|ab[^c-].*|abc)
703  */
704 static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
705                            const char **term_sub, char *term_dict,
706                            oid_value attributeSet,
707                            int reg_type, int space_split, char *term_dst,
708                            int *error_code)
709 {
710     AttrType relation;
711     int relation_value;
712     int i;
713     char *term_tmp = term_dict + strlen(term_dict);
714     char term_component[2*IT_MAX_WORD+20];
715
716     attr_init_APT(&relation, zapt, 2);
717     relation_value = attr_find(&relation, NULL);
718
719     *error_code = 0;
720     yaz_log(YLOG_DEBUG, "string relation value=%d", relation_value);
721     switch (relation_value)
722     {
723     case 1:
724         if (!term_100(zh->reg->zebra_maps, reg_type,
725                       term_sub, term_component,
726                       space_split, term_dst))
727             return 0;
728         yaz_log(log_level_rpn, "Relation <");
729         
730         *term_tmp++ = '(';
731         for (i = 0; term_component[i]; )
732         {
733             int j = 0;
734
735             if (i)
736                 *term_tmp++ = '|';
737             while (j < i)
738                 string_rel_add_char(&term_tmp, term_component, &j);
739
740             *term_tmp++ = '[';
741
742             *term_tmp++ = '^';
743
744             *term_tmp++ = 1;
745             *term_tmp++ = FIRST_IN_FIELD_CHAR;
746
747             string_rel_add_char(&term_tmp, term_component, &i);
748             *term_tmp++ = '-';
749
750             *term_tmp++ = ']';
751             *term_tmp++ = '.';
752             *term_tmp++ = '*';
753
754             if ((term_tmp - term_dict) > IT_MAX_WORD)
755                 break;
756         }
757         *term_tmp++ = ')';
758         *term_tmp = '\0';
759         yaz_log(YLOG_LOG, "term_dict=%s", term_dict);
760         break;
761     case 2:
762         if (!term_100(zh->reg->zebra_maps, reg_type,
763                       term_sub, term_component,
764                       space_split, term_dst))
765             return 0;
766         yaz_log(log_level_rpn, "Relation <=");
767
768         *term_tmp++ = '(';
769         for (i = 0; term_component[i]; )
770         {
771             int j = 0;
772
773             while (j < i)
774                 string_rel_add_char(&term_tmp, term_component, &j);
775             *term_tmp++ = '[';
776
777             *term_tmp++ = '^';
778
779             *term_tmp++ = 1;
780             *term_tmp++ = FIRST_IN_FIELD_CHAR;
781
782             string_rel_add_char(&term_tmp, term_component, &i);
783             *term_tmp++ = '-';
784
785             *term_tmp++ = ']';
786             *term_tmp++ = '.';
787             *term_tmp++ = '*';
788
789             *term_tmp++ = '|';
790
791             if ((term_tmp - term_dict) > IT_MAX_WORD)
792                 break;
793         }
794         for (i = 0; term_component[i]; )
795             string_rel_add_char(&term_tmp, term_component, &i);
796         *term_tmp++ = ')';
797         *term_tmp = '\0';
798         break;
799     case 5:
800         if (!term_100 (zh->reg->zebra_maps, reg_type,
801                        term_sub, term_component, space_split, term_dst))
802             return 0;
803         yaz_log(log_level_rpn, "Relation >");
804
805         *term_tmp++ = '(';
806         for (i = 0; term_component[i];)
807         {
808             int j = 0;
809
810             while (j < i)
811                 string_rel_add_char(&term_tmp, term_component, &j);
812             *term_tmp++ = '[';
813             
814             *term_tmp++ = '^';
815             *term_tmp++ = '-';
816             string_rel_add_char(&term_tmp, term_component, &i);
817
818             *term_tmp++ = ']';
819             *term_tmp++ = '.';
820             *term_tmp++ = '*';
821
822             *term_tmp++ = '|';
823
824             if ((term_tmp - term_dict) > IT_MAX_WORD)
825                 break;
826         }
827         for (i = 0; term_component[i];)
828             string_rel_add_char(&term_tmp, term_component, &i);
829         *term_tmp++ = '.';
830         *term_tmp++ = '+';
831         *term_tmp++ = ')';
832         *term_tmp = '\0';
833         break;
834     case 4:
835         if (!term_100(zh->reg->zebra_maps, reg_type, term_sub,
836                       term_component, space_split, term_dst))
837             return 0;
838         yaz_log(log_level_rpn, "Relation >=");
839
840         *term_tmp++ = '(';
841         for (i = 0; term_component[i];)
842         {
843             int j = 0;
844
845             if (i)
846                 *term_tmp++ = '|';
847             while (j < i)
848                 string_rel_add_char(&term_tmp, term_component, &j);
849             *term_tmp++ = '[';
850
851             if (term_component[i+1])
852             {
853                 *term_tmp++ = '^';
854                 *term_tmp++ = '-';
855                 string_rel_add_char(&term_tmp, term_component, &i);
856             }
857             else
858             {
859                 string_rel_add_char(&term_tmp, term_component, &i);
860                 *term_tmp++ = '-';
861             }
862             *term_tmp++ = ']';
863             *term_tmp++ = '.';
864             *term_tmp++ = '*';
865
866             if ((term_tmp - term_dict) > IT_MAX_WORD)
867                 break;
868         }
869         *term_tmp++ = ')';
870         *term_tmp = '\0';
871         break;
872     case 3:
873     case 102:
874     case -1:
875         if (!**term_sub)
876             return 1;
877         yaz_log(log_level_rpn, "Relation =");
878         if (!term_100(zh->reg->zebra_maps, reg_type, term_sub,
879                       term_component, space_split, term_dst))
880             return 0;
881         strcat(term_tmp, "(");
882         strcat(term_tmp, term_component);
883         strcat(term_tmp, ")");
884         break;
885     case 103:
886         yaz_log(log_level_rpn, "Relation always matches");
887         /* skip to end of term (we don't care what it is) */
888         while (**term_sub != '\0')
889             (*term_sub)++;
890         break;
891     default:
892         *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
893         return 0;
894     }
895     return 1;
896 }
897
898 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
899                              const char **term_sub, 
900                              oid_value attributeSet, NMEM stream,
901                              struct grep_info *grep_info,
902                              int reg_type, int complete_flag,
903                              int num_bases, char **basenames,
904                              char *term_dst,
905                              const char *xpath_use,
906                              struct ord_list **ol);
907
908 static ZEBRA_RES term_limits_APT(ZebraHandle zh,
909                                  Z_AttributesPlusTerm *zapt,
910                                  zint *hits_limit_value,
911                                  const char **term_ref_id_str,
912                                  NMEM nmem)
913 {
914     AttrType term_ref_id_attr;
915     AttrType hits_limit_attr;
916     int term_ref_id_int;
917  
918     attr_init_APT(&hits_limit_attr, zapt, 11);
919     *hits_limit_value  = attr_find(&hits_limit_attr, NULL);
920
921     attr_init_APT(&term_ref_id_attr, zapt, 10);
922     term_ref_id_int = attr_find_ex(&term_ref_id_attr, NULL, term_ref_id_str);
923     if (term_ref_id_int >= 0)
924     {
925         char *res = nmem_malloc(nmem, 20);
926         sprintf(res, "%d", term_ref_id_int);
927         *term_ref_id_str = res;
928     }
929
930     /* no limit given ? */
931     if (*hits_limit_value == -1)
932     {
933         if (*term_ref_id_str)
934         {
935             /* use global if term_ref is present */
936             *hits_limit_value = zh->approx_limit;
937         }
938         else
939         {
940             /* no counting if term_ref is not present */
941             *hits_limit_value = 0;
942         }
943     }
944     else if (*hits_limit_value == 0)
945     {
946         /* 0 is the same as global limit */
947         *hits_limit_value = zh->approx_limit;
948     }
949     yaz_log(YLOG_DEBUG, "term_limits_APT ref_id=%s limit=" ZINT_FORMAT,
950             *term_ref_id_str ? *term_ref_id_str : "none",
951             *hits_limit_value);
952     return ZEBRA_OK;
953 }
954
955 static ZEBRA_RES term_trunc(ZebraHandle zh,
956                             Z_AttributesPlusTerm *zapt,
957                             const char **term_sub, 
958                             oid_value attributeSet, NMEM stream,
959                             struct grep_info *grep_info,
960                             int reg_type, int complete_flag,
961                             int num_bases, char **basenames,
962                             char *term_dst,
963                             const char *rank_type, 
964                             const char *xpath_use,
965                             NMEM rset_nmem,
966                             RSET *rset,
967                             struct rset_key_control *kc)
968 {
969     ZEBRA_RES res;
970     struct ord_list *ol;
971     zint hits_limit_value;
972     const char *term_ref_id_str = 0;
973     *rset = 0;
974
975     term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str, stream);
976     grep_info->isam_p_indx = 0;
977     res = string_term(zh, zapt, term_sub, attributeSet, stream, grep_info,
978                       reg_type, complete_flag, num_bases, basenames,
979                       term_dst, xpath_use, &ol);
980     if (res != ZEBRA_OK)
981         return res;
982     if (!*term_sub)  /* no more terms ? */
983         return res;
984     yaz_log(log_level_rpn, "term: %s", term_dst);
985     *rset = rset_trunc(zh, grep_info->isam_p_buf,
986                        grep_info->isam_p_indx, term_dst,
987                        strlen(term_dst), rank_type, 1 /* preserve pos */,
988                        zapt->term->which, rset_nmem,
989                        kc, kc->scope, ol, reg_type, hits_limit_value,
990                        term_ref_id_str);
991     if (!*rset)
992         return ZEBRA_FAIL;
993     return ZEBRA_OK;
994 }
995
996 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
997                              const char **term_sub, 
998                              oid_value attributeSet, NMEM stream,
999                              struct grep_info *grep_info,
1000                              int reg_type, int complete_flag,
1001                              int num_bases, char **basenames,
1002                              char *term_dst,
1003                              const char *xpath_use,
1004                              struct ord_list **ol)
1005 {
1006     char term_dict[2*IT_MAX_WORD+4000];
1007     int j, r, base_no;
1008     AttrType truncation;
1009     int truncation_value;
1010     const char *termp;
1011     struct rpn_char_map_info rcmi;
1012     int space_split = complete_flag ? 0 : 1;
1013
1014     int bases_ok = 0;     /* no of databases with OK attribute */
1015
1016     *ol = ord_list_create(stream);
1017
1018     rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
1019     attr_init_APT(&truncation, zapt, 5);
1020     truncation_value = attr_find(&truncation, NULL);
1021     yaz_log(log_level_rpn, "truncation value %d", truncation_value);
1022
1023     for (base_no = 0; base_no < num_bases; base_no++)
1024     {
1025         int ord = -1;
1026         int regex_range = 0;
1027         int max_pos, prefix_len = 0;
1028         int relation_error;
1029         char ord_buf[32];
1030         int ord_len, i;
1031
1032         termp = *term_sub; /* start of term for each database */
1033
1034         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1035         {
1036             zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
1037                            basenames[base_no]);
1038             return ZEBRA_FAIL;
1039         }
1040         
1041         if (zebra_apt_get_ord(zh, zapt, reg_type, xpath_use,
1042                               attributeSet, &ord) != ZEBRA_OK)
1043             continue;
1044
1045         bases_ok++;
1046
1047         *ol = ord_list_append(stream, *ol, ord);
1048         ord_len = key_SU_encode (ord, ord_buf);
1049         
1050         term_dict[prefix_len++] = '(';
1051         for (i = 0; i<ord_len; i++)
1052         {
1053             term_dict[prefix_len++] = 1;  /* our internal regexp escape char */
1054             term_dict[prefix_len++] = ord_buf[i];
1055         }
1056         term_dict[prefix_len++] = ')';
1057         term_dict[prefix_len] = '\0';
1058         j = prefix_len;
1059         switch (truncation_value)
1060         {
1061         case -1:         /* not specified */
1062         case 100:        /* do not truncate */
1063             if (!string_relation(zh, zapt, &termp, term_dict,
1064                                  attributeSet,
1065                                  reg_type, space_split, term_dst,
1066                                  &relation_error))
1067             {
1068                 if (relation_error)
1069                 {
1070                     zebra_setError(zh, relation_error, 0);
1071                     return ZEBRA_FAIL;
1072                 }
1073                 *term_sub = 0;
1074                 return ZEBRA_OK;
1075             }
1076             break;
1077         case 1:          /* right truncation */
1078             term_dict[j++] = '(';
1079             if (!term_100(zh->reg->zebra_maps, reg_type,
1080                           &termp, term_dict + j, space_split, term_dst))
1081             {
1082                 *term_sub = 0;
1083                 return ZEBRA_OK;
1084             }
1085             strcat(term_dict, ".*)");
1086             break;
1087         case 2:          /* keft truncation */
1088             term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*';
1089             if (!term_100(zh->reg->zebra_maps, reg_type,
1090                           &termp, term_dict + j, space_split, term_dst))
1091             {
1092                 *term_sub = 0;
1093                 return ZEBRA_OK;
1094             }
1095             strcat(term_dict, ")");
1096             break;
1097         case 3:          /* left&right truncation */
1098             term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*';
1099             if (!term_100(zh->reg->zebra_maps, reg_type,
1100                           &termp, term_dict + j, space_split, term_dst))
1101             {
1102                 *term_sub = 0;
1103                 return ZEBRA_OK;
1104             }
1105             strcat(term_dict, ".*)");
1106             break;
1107         case 101:        /* process # in term */
1108             term_dict[j++] = '(';
1109             if (!term_101(zh->reg->zebra_maps, reg_type,
1110                           &termp, term_dict + j, space_split, term_dst))
1111             {
1112                 *term_sub = 0;
1113                 return ZEBRA_OK;
1114             }
1115             strcat(term_dict, ")");
1116             break;
1117         case 102:        /* Regexp-1 */
1118             term_dict[j++] = '(';
1119             if (!term_102(zh->reg->zebra_maps, reg_type,
1120                           &termp, term_dict + j, space_split, term_dst))
1121             {
1122                 *term_sub = 0;
1123                 return ZEBRA_OK;
1124             }
1125             strcat(term_dict, ")");
1126             break;
1127         case 103:       /* Regexp-2 */
1128             regex_range = 1;
1129             term_dict[j++] = '(';
1130             if (!term_103(zh->reg->zebra_maps, reg_type,
1131                           &termp, term_dict + j, &regex_range,
1132                           space_split, term_dst))
1133             {
1134                 *term_sub = 0;
1135                 return ZEBRA_OK;
1136             }
1137             strcat(term_dict, ")");
1138             break;
1139         case 104:        /* process # and ! in term */
1140             term_dict[j++] = '(';
1141             if (!term_104(zh->reg->zebra_maps, reg_type,
1142                           &termp, term_dict + j, space_split, term_dst))
1143             {
1144                 *term_sub = 0;
1145                 return ZEBRA_OK;
1146             }
1147             strcat(term_dict, ")");
1148             break;
1149         case 105:        /* process * and ! in term */
1150             term_dict[j++] = '(';
1151             if (!term_105(zh->reg->zebra_maps, reg_type,
1152                           &termp, term_dict + j, space_split, term_dst, 1))
1153             {
1154                 *term_sub = 0;
1155                 return ZEBRA_OK;
1156             }
1157             strcat(term_dict, ")");
1158             break;
1159         case 106:        /* process * and ! in term */
1160             term_dict[j++] = '(';
1161             if (!term_105(zh->reg->zebra_maps, reg_type,
1162                           &termp, term_dict + j, space_split, term_dst, 0))
1163             {
1164                 *term_sub = 0;
1165                 return ZEBRA_OK;
1166             }
1167             strcat(term_dict, ")");
1168             break;
1169         default:
1170             zebra_setError_zint(zh,
1171                                 YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
1172                                 truncation_value);
1173             return ZEBRA_FAIL;
1174         }
1175         if (1)
1176         {
1177             char buf[80];
1178             const char *input = term_dict + prefix_len;
1179             esc_str(buf, sizeof(buf), input, strlen(input));
1180         }
1181         yaz_log(log_level_rpn, "dict_lookup_grep: %s", term_dict+prefix_len);
1182         r = dict_lookup_grep(zh->reg->dict, term_dict, regex_range,
1183                              grep_info, &max_pos, 
1184                              ord_len /* number of "exact" chars */,
1185                              grep_handle);
1186         if (r == 1)
1187             zebra_set_partial_result(zh);
1188         else if (r)
1189             yaz_log(YLOG_WARN, "dict_lookup_grep fail %d", r);
1190     }
1191     if (!bases_ok)
1192         return ZEBRA_FAIL;
1193     *term_sub = termp;
1194     yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1195     return ZEBRA_OK;
1196 }
1197
1198
1199
1200 static void grep_info_delete(struct grep_info *grep_info)
1201 {
1202 #ifdef TERM_COUNT
1203     xfree(grep_info->term_no);
1204 #endif
1205     xfree(grep_info->isam_p_buf);
1206 }
1207
1208 static ZEBRA_RES grep_info_prepare(ZebraHandle zh,
1209                                    Z_AttributesPlusTerm *zapt,
1210                                    struct grep_info *grep_info,
1211                                    int reg_type)
1212 {
1213 #ifdef TERM_COUNT
1214     grep_info->term_no = 0;
1215 #endif
1216     grep_info->trunc_max = atoi(res_get_def(zh->res, "truncmax", "10000"));
1217     grep_info->isam_p_size = 0;
1218     grep_info->isam_p_buf = NULL;
1219     grep_info->zh = zh;
1220     grep_info->reg_type = reg_type;
1221     grep_info->termset = 0;
1222     if (zapt)
1223     {
1224         AttrType truncmax;
1225         int truncmax_value;
1226
1227         attr_init_APT(&truncmax, zapt, 13);
1228         truncmax_value = attr_find(&truncmax, NULL);
1229         if (truncmax_value != -1)
1230             grep_info->trunc_max = truncmax_value;
1231     }
1232     if (zapt)
1233     {
1234         AttrType termset;
1235         int termset_value_numeric;
1236         const char *termset_value_string;
1237
1238         attr_init_APT(&termset, zapt, 8);
1239         termset_value_numeric =
1240             attr_find_ex(&termset, NULL, &termset_value_string);
1241         if (termset_value_numeric != -1)
1242         {
1243 #if TERMSET_DISABLE
1244             zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, "termset");
1245             return ZEBRA_FAIL;
1246 #else
1247             char resname[32];
1248             const char *termset_name = 0;
1249             if (termset_value_numeric != -2)
1250             {
1251                 
1252                 sprintf(resname, "%d", termset_value_numeric);
1253                 termset_name = resname;
1254             }
1255             else
1256             termset_name = termset_value_string;
1257             yaz_log(log_level_rpn, "creating termset set %s", termset_name);
1258             grep_info->termset = resultSetAdd(zh, termset_name, 1);
1259             if (!grep_info->termset)
1260             {
1261                 zebra_setError(zh, YAZ_BIB1_ILLEGAL_RESULT_SET_NAME, termset_name);
1262                 return ZEBRA_FAIL;
1263             }
1264 #endif
1265         }
1266     }
1267     return ZEBRA_OK;
1268 }
1269                                
1270 /**
1271   \brief Create result set(s) for list of terms
1272   \param zh Zebra Handle
1273   \param zapt Attributes Plust Term (RPN leaf)
1274   \param termz term as used in query but converted to UTF-8
1275   \param attributeSet default attribute set
1276   \param stream memory for result
1277   \param reg_type register type ('w', 'p',..)
1278   \param complete_flag whether it's phrases or not
1279   \param rank_type term flags for ranking
1280   \param xpath_use use attribute for X-Path (-1 for no X-path)
1281   \param num_bases number of databases
1282   \param basenames array of databases
1283   \param rset_nmem memory for result sets
1284   \param result_sets output result set for each term in list (output)
1285   \param num_result_sets number of output result sets
1286   \param kc rset key control to be used for created result sets
1287 */
1288 static ZEBRA_RES term_list_trunc(ZebraHandle zh,
1289                                  Z_AttributesPlusTerm *zapt,
1290                                  const char *termz,
1291                                  oid_value attributeSet,
1292                                  NMEM stream,
1293                                  int reg_type, int complete_flag,
1294                                  const char *rank_type,
1295                                  const char *xpath_use,
1296                                  int num_bases, char **basenames, 
1297                                  NMEM rset_nmem,
1298                                  RSET **result_sets, int *num_result_sets,
1299                                  struct rset_key_control *kc)
1300 {
1301     char term_dst[IT_MAX_WORD+1];
1302     struct grep_info grep_info;
1303     const char *termp = termz;
1304     int alloc_sets = 0;
1305
1306     *num_result_sets = 0;
1307     *term_dst = 0;
1308     if (grep_info_prepare(zh, zapt, &grep_info, reg_type) == ZEBRA_FAIL)
1309         return ZEBRA_FAIL;
1310     while(1)
1311     { 
1312         ZEBRA_RES res;
1313
1314         if (alloc_sets == *num_result_sets)
1315         {
1316             int add = 10;
1317             RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) * 
1318                                               sizeof(*rnew));
1319             if (alloc_sets)
1320                 memcpy(rnew, *result_sets, alloc_sets * sizeof(*rnew));
1321             alloc_sets = alloc_sets + add;
1322             *result_sets = rnew;
1323         }
1324         res = term_trunc(zh, zapt, &termp, attributeSet,
1325                          stream, &grep_info,
1326                          reg_type, complete_flag,
1327                          num_bases, basenames,
1328                          term_dst, rank_type,
1329                          xpath_use, rset_nmem,
1330                          &(*result_sets)[*num_result_sets],
1331                          kc);
1332         if (res != ZEBRA_OK)
1333         {
1334             int i;
1335             for (i = 0; i < *num_result_sets; i++)
1336                 rset_delete((*result_sets)[i]);
1337             grep_info_delete (&grep_info);
1338             return res;
1339         }
1340         if ((*result_sets)[*num_result_sets] == 0)
1341             break;
1342         (*num_result_sets)++;
1343
1344         if (!*termp)
1345             break;
1346     }
1347     grep_info_delete(&grep_info);
1348     return ZEBRA_OK;
1349 }
1350
1351 static ZEBRA_RES rpn_search_APT_position(ZebraHandle zh,
1352                                          Z_AttributesPlusTerm *zapt,
1353                                          oid_value attributeSet,
1354                                          int reg_type,
1355                                          int num_bases, char **basenames,
1356                                          NMEM rset_nmem,
1357                                          RSET *rset,
1358                                          struct rset_key_control *kc)
1359 {
1360     RSET *f_set;
1361     int base_no;
1362     int position_value;
1363     int num_sets = 0;
1364     AttrType position;
1365
1366     attr_init_APT(&position, zapt, 3);
1367     position_value = attr_find(&position, NULL);
1368     switch(position_value)
1369     {
1370     case 3:
1371     case -1:
1372         return ZEBRA_OK;
1373     case 1:
1374     case 2:
1375         break;
1376     default:
1377         zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1378                             position_value);
1379         return ZEBRA_FAIL;
1380     }
1381
1382     if (!zebra_maps_is_first_in_field(zh->reg->zebra_maps, reg_type))
1383     {
1384         zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1385                             position_value);
1386         return ZEBRA_FAIL;
1387     }
1388
1389     if (!zh->reg->isamb && !zh->reg->isamc)
1390     {
1391         zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1392                             position_value);
1393         return ZEBRA_FAIL;
1394     }
1395     f_set = xmalloc(sizeof(RSET) * num_bases);
1396     for (base_no = 0; base_no < num_bases; base_no++)
1397     {
1398         int ord = -1;
1399         char ord_buf[32];
1400         char term_dict[100];
1401         int ord_len;
1402         char *val;
1403         ISAM_P isam_p;
1404
1405         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1406         {
1407             zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
1408                            basenames[base_no]);
1409             return ZEBRA_FAIL;
1410         }
1411         
1412         if (zebra_apt_get_ord(zh, zapt, reg_type, 0,
1413                               attributeSet, &ord) != ZEBRA_OK)
1414             continue;
1415
1416         ord_len = key_SU_encode (ord, ord_buf);
1417         memcpy(term_dict, ord_buf, ord_len);
1418         strcpy(term_dict+ord_len, FIRST_IN_FIELD_STR);
1419         val = dict_lookup(zh->reg->dict, term_dict);
1420         if (!val)
1421             continue;
1422         assert(*val == sizeof(ISAM_P));
1423         memcpy(&isam_p, val+1, sizeof(isam_p));
1424         
1425
1426         if (zh->reg->isamb)
1427             f_set[num_sets++] = rsisamb_create(rset_nmem, kc, kc->scope,
1428                                                zh->reg->isamb, isam_p, 0);
1429         else if (zh->reg->isamc)
1430             f_set[num_sets++] = rsisamc_create(rset_nmem, kc, kc->scope,
1431                                                zh->reg->isamc, isam_p, 0);
1432     }
1433     if (num_sets)
1434     {
1435         *rset = rset_create_or(rset_nmem, kc, kc->scope,
1436                                0 /* termid */, num_sets, f_set);
1437     }
1438     xfree(f_set);
1439     return ZEBRA_OK;
1440 }
1441                                          
1442 static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh,
1443                                        Z_AttributesPlusTerm *zapt,
1444                                        const char *termz_org,
1445                                        oid_value attributeSet,
1446                                        NMEM stream,
1447                                        int reg_type, int complete_flag,
1448                                        const char *rank_type,
1449                                        const char *xpath_use,
1450                                        int num_bases, char **basenames, 
1451                                        NMEM rset_nmem,
1452                                        RSET *rset,
1453                                        struct rset_key_control *kc)
1454 {
1455     RSET *result_sets = 0;
1456     int num_result_sets = 0;
1457     ZEBRA_RES res =
1458         term_list_trunc(zh, zapt, termz_org, attributeSet,
1459                         stream, reg_type, complete_flag,
1460                         rank_type, xpath_use,
1461                         num_bases, basenames,
1462                         rset_nmem,
1463                         &result_sets, &num_result_sets, kc);
1464
1465     if (res != ZEBRA_OK)
1466         return res;
1467
1468     if (num_result_sets > 0)
1469     {
1470         RSET first_set = 0;
1471         res = rpn_search_APT_position(zh, zapt, attributeSet, 
1472                                       reg_type,
1473                                       num_bases, basenames,
1474                                       rset_nmem, &first_set,
1475                                       kc);
1476         if (res != ZEBRA_OK)
1477             return res;
1478         if (first_set)
1479         {
1480             RSET *nsets = nmem_malloc(stream,
1481                                       sizeof(RSET) * (num_result_sets+1));
1482             nsets[0] = first_set;
1483             memcpy(nsets+1, result_sets, sizeof(RSET) * num_result_sets);
1484             result_sets = nsets;
1485             num_result_sets++;
1486         }
1487     }
1488     if (num_result_sets == 0)
1489         *rset = rset_create_null(rset_nmem, kc, 0); 
1490     else if (num_result_sets == 1)
1491         *rset = result_sets[0];
1492     else
1493         *rset = rset_create_prox(rset_nmem, kc, kc->scope,
1494                                  num_result_sets, result_sets,
1495                                  1 /* ordered */, 0 /* exclusion */,
1496                                  3 /* relation */, 1 /* distance */);
1497     if (!*rset)
1498         return ZEBRA_FAIL;
1499     return ZEBRA_OK;
1500 }
1501
1502 static ZEBRA_RES rpn_search_APT_or_list(ZebraHandle zh,
1503                                         Z_AttributesPlusTerm *zapt,
1504                                         const char *termz_org,
1505                                         oid_value attributeSet,
1506                                         NMEM stream,
1507                                         int reg_type, int complete_flag,
1508                                         const char *rank_type,
1509                                         const char *xpath_use,
1510                                         int num_bases, char **basenames,
1511                                         NMEM rset_nmem,
1512                                         RSET *rset,
1513                                         struct rset_key_control *kc)
1514 {
1515     RSET *result_sets = 0;
1516     int num_result_sets = 0;
1517     int i;
1518     ZEBRA_RES res =
1519         term_list_trunc(zh, zapt, termz_org, attributeSet,
1520                         stream, reg_type, complete_flag,
1521                         rank_type, xpath_use,
1522                         num_bases, basenames,
1523                         rset_nmem,
1524                         &result_sets, &num_result_sets, kc);
1525     if (res != ZEBRA_OK)
1526         return res;
1527
1528     for (i = 0; i<num_result_sets; i++)
1529     {
1530         RSET first_set = 0;
1531         res = rpn_search_APT_position(zh, zapt, attributeSet, 
1532                                       reg_type,
1533                                       num_bases, basenames,
1534                                       rset_nmem, &first_set,
1535                                       kc);
1536         if (res != ZEBRA_OK)
1537         {
1538             for (i = 0; i<num_result_sets; i++)
1539                 rset_delete(result_sets[i]);
1540             return res;
1541         }
1542
1543         if (first_set)
1544         {
1545             RSET tmp_set[2];
1546
1547             tmp_set[0] = first_set;
1548             tmp_set[1] = result_sets[i];
1549             
1550             result_sets[i] = rset_create_prox(
1551                 rset_nmem, kc, kc->scope,
1552                 2, tmp_set,
1553                 1 /* ordered */, 0 /* exclusion */,
1554                 3 /* relation */, 1 /* distance */);
1555         }
1556     }
1557     if (num_result_sets == 0)
1558         *rset = rset_create_null(rset_nmem, kc, 0); 
1559     else if (num_result_sets == 1)
1560         *rset = result_sets[0];
1561     else
1562         *rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* termid */,
1563                                num_result_sets, result_sets);
1564     if (!*rset)
1565         return ZEBRA_FAIL;
1566     return ZEBRA_OK;
1567 }
1568
1569 static ZEBRA_RES rpn_search_APT_and_list(ZebraHandle zh,
1570                                          Z_AttributesPlusTerm *zapt,
1571                                          const char *termz_org,
1572                                          oid_value attributeSet,
1573                                          NMEM stream,
1574                                          int reg_type, int complete_flag,
1575                                          const char *rank_type, 
1576                                          const char *xpath_use,
1577                                          int num_bases, char **basenames,
1578                                          NMEM rset_nmem,
1579                                          RSET *rset,
1580                                          struct rset_key_control *kc)
1581 {
1582     RSET *result_sets = 0;
1583     int num_result_sets = 0;
1584     int i;
1585     ZEBRA_RES res =
1586         term_list_trunc(zh, zapt, termz_org, attributeSet,
1587                         stream, reg_type, complete_flag,
1588                         rank_type, xpath_use,
1589                         num_bases, basenames,
1590                         rset_nmem,
1591                         &result_sets, &num_result_sets,
1592                         kc);
1593     if (res != ZEBRA_OK)
1594         return res;
1595     for (i = 0; i<num_result_sets; i++)
1596     {
1597         RSET first_set = 0;
1598         res = rpn_search_APT_position(zh, zapt, attributeSet, 
1599                                       reg_type,
1600                                       num_bases, basenames,
1601                                       rset_nmem, &first_set,
1602                                       kc);
1603         if (res != ZEBRA_OK)
1604         {
1605             for (i = 0; i<num_result_sets; i++)
1606                 rset_delete(result_sets[i]);
1607             return res;
1608         }
1609
1610         if (first_set)
1611         {
1612             RSET tmp_set[2];
1613
1614             tmp_set[0] = first_set;
1615             tmp_set[1] = result_sets[i];
1616             
1617             result_sets[i] = rset_create_prox(
1618                 rset_nmem, kc, kc->scope,
1619                 2, tmp_set,
1620                 1 /* ordered */, 0 /* exclusion */,
1621                 3 /* relation */, 1 /* distance */);
1622         }
1623     }
1624
1625
1626     if (num_result_sets == 0)
1627         *rset = rset_create_null(rset_nmem, kc, 0); 
1628     else if (num_result_sets == 1)
1629         *rset = result_sets[0];
1630     else
1631         *rset = rset_create_and(rset_nmem, kc, kc->scope,
1632                                 num_result_sets, result_sets);
1633     if (!*rset)
1634         return ZEBRA_FAIL;
1635     return ZEBRA_OK;
1636 }
1637
1638 static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1639                             const char **term_sub,
1640                             char *term_dict,
1641                             oid_value attributeSet,
1642                             struct grep_info *grep_info,
1643                             int *max_pos,
1644                             int reg_type,
1645                             char *term_dst,
1646                             int *error_code)
1647 {
1648     AttrType relation;
1649     int relation_value;
1650     int term_value;
1651     int r;
1652     char *term_tmp = term_dict + strlen(term_dict);
1653
1654     *error_code = 0;
1655     attr_init_APT(&relation, zapt, 2);
1656     relation_value = attr_find(&relation, NULL);
1657
1658     yaz_log(log_level_rpn, "numeric relation value=%d", relation_value);
1659
1660     switch (relation_value)
1661     {
1662     case 1:
1663         yaz_log(log_level_rpn, "Relation <");
1664         if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1665                       term_dst))
1666             return 0;
1667         term_value = atoi (term_tmp);
1668         gen_regular_rel(term_tmp, term_value-1, 1);
1669         break;
1670     case 2:
1671         yaz_log(log_level_rpn, "Relation <=");
1672         if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1673                       term_dst))
1674             return 0;
1675         term_value = atoi (term_tmp);
1676         gen_regular_rel(term_tmp, term_value, 1);
1677         break;
1678     case 4:
1679         yaz_log(log_level_rpn, "Relation >=");
1680         if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1681                       term_dst))
1682             return 0;
1683         term_value = atoi (term_tmp);
1684         gen_regular_rel(term_tmp, term_value, 0);
1685         break;
1686     case 5:
1687         yaz_log(log_level_rpn, "Relation >");
1688         if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1689                       term_dst))
1690             return 0;
1691         term_value = atoi (term_tmp);
1692         gen_regular_rel(term_tmp, term_value+1, 0);
1693         break;
1694     case -1:
1695     case 3:
1696         yaz_log(log_level_rpn, "Relation =");
1697         if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1698                       term_dst))
1699             return 0;
1700         term_value = atoi (term_tmp);
1701         sprintf(term_tmp, "(0*%d)", term_value);
1702         break;
1703     case 103:
1704         /* term_tmp untouched.. */
1705         while (**term_sub != '\0')
1706             (*term_sub)++;
1707         break;
1708     default:
1709         *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
1710         return 0;
1711     }
1712     yaz_log(log_level_rpn, "dict_lookup_grep: %s", term_tmp);
1713     r = dict_lookup_grep(zh->reg->dict, term_dict, 0, grep_info, max_pos,
1714                           0, grep_handle);
1715
1716     if (r == 1)
1717         zebra_set_partial_result(zh);
1718     else if (r)
1719         yaz_log(YLOG_WARN, "dict_lookup_grep fail, rel = gt: %d", r);
1720     yaz_log(log_level_rpn, "%d positions", grep_info->isam_p_indx);
1721     return 1;
1722 }
1723
1724 static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1725                               const char **term_sub, 
1726                               oid_value attributeSet, NMEM stream,
1727                               struct grep_info *grep_info,
1728                               int reg_type, int complete_flag,
1729                               int num_bases, char **basenames,
1730                               char *term_dst, 
1731                               const char *xpath_use,
1732                               struct ord_list **ol)
1733 {
1734     char term_dict[2*IT_MAX_WORD+2];
1735     int base_no;
1736     const char *termp;
1737     struct rpn_char_map_info rcmi;
1738
1739     int bases_ok = 0;     /* no of databases with OK attribute */
1740
1741     *ol = ord_list_create(stream);
1742
1743     rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
1744
1745     for (base_no = 0; base_no < num_bases; base_no++)
1746     {
1747         int max_pos, prefix_len = 0;
1748         int relation_error = 0;
1749         int ord, ord_len, i;
1750         char ord_buf[32];
1751
1752         termp = *term_sub;
1753
1754         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1755         {
1756             zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
1757                            basenames[base_no]);
1758             return ZEBRA_FAIL;
1759         }
1760
1761         if (zebra_apt_get_ord(zh, zapt, reg_type, xpath_use,
1762                               attributeSet, &ord) != ZEBRA_OK)
1763             continue;
1764         bases_ok++;
1765
1766         *ol = ord_list_append(stream, *ol, ord);
1767
1768         ord_len = key_SU_encode (ord, ord_buf);
1769
1770         term_dict[prefix_len++] = '(';
1771         for (i = 0; i < ord_len; i++)
1772         {
1773             term_dict[prefix_len++] = 1;
1774             term_dict[prefix_len++] = ord_buf[i];
1775         }
1776         term_dict[prefix_len++] = ')';
1777         term_dict[prefix_len] = '\0';
1778
1779         if (!numeric_relation(zh, zapt, &termp, term_dict,
1780                               attributeSet, grep_info, &max_pos, reg_type,
1781                               term_dst, &relation_error))
1782         {
1783             if (relation_error)
1784             {
1785                 zebra_setError(zh, relation_error, 0);
1786                 return ZEBRA_FAIL;
1787             }
1788             *term_sub = 0;
1789             return ZEBRA_OK;
1790         }
1791     }
1792     if (!bases_ok)
1793         return ZEBRA_FAIL;
1794     *term_sub = termp;
1795     yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1796     return ZEBRA_OK;
1797 }
1798
1799                                  
1800 static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh,
1801                                         Z_AttributesPlusTerm *zapt,
1802                                         const char *termz,
1803                                         oid_value attributeSet,
1804                                         NMEM stream,
1805                                         int reg_type, int complete_flag,
1806                                         const char *rank_type, 
1807                                         const char *xpath_use,
1808                                         int num_bases, char **basenames,
1809                                         NMEM rset_nmem,
1810                                         RSET *rset,
1811                                         struct rset_key_control *kc)
1812 {
1813     char term_dst[IT_MAX_WORD+1];
1814     const char *termp = termz;
1815     RSET *result_sets = 0;
1816     int num_result_sets = 0;
1817     ZEBRA_RES res;
1818     struct grep_info grep_info;
1819     int alloc_sets = 0;
1820     zint hits_limit_value;
1821     const char *term_ref_id_str = 0;
1822
1823     term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str, stream);
1824
1825     yaz_log(log_level_rpn, "APT_numeric t='%s'", termz);
1826     if (grep_info_prepare(zh, zapt, &grep_info, reg_type) == ZEBRA_FAIL)
1827         return ZEBRA_FAIL;
1828     while (1)
1829     { 
1830         struct ord_list *ol;
1831         if (alloc_sets == num_result_sets)
1832         {
1833             int add = 10;
1834             RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) * 
1835                                               sizeof(*rnew));
1836             if (alloc_sets)
1837                 memcpy(rnew, result_sets, alloc_sets * sizeof(*rnew));
1838             alloc_sets = alloc_sets + add;
1839             result_sets = rnew;
1840         }
1841         yaz_log(YLOG_DEBUG, "APT_numeric termp=%s", termp);
1842         grep_info.isam_p_indx = 0;
1843         res = numeric_term(zh, zapt, &termp, attributeSet, stream, &grep_info,
1844                            reg_type, complete_flag, num_bases, basenames,
1845                            term_dst, xpath_use, &ol);
1846         if (res == ZEBRA_FAIL || termp == 0)
1847             break;
1848         yaz_log(YLOG_DEBUG, "term: %s", term_dst);
1849         result_sets[num_result_sets] =
1850             rset_trunc(zh, grep_info.isam_p_buf,
1851                        grep_info.isam_p_indx, term_dst,
1852                        strlen(term_dst), rank_type,
1853                        0 /* preserve position */,
1854                        zapt->term->which, rset_nmem, 
1855                        kc, kc->scope, ol, reg_type,
1856                        hits_limit_value,
1857                        term_ref_id_str);
1858         if (!result_sets[num_result_sets])
1859             break;
1860         num_result_sets++;
1861         if (!*termp)
1862             break;
1863     }
1864     grep_info_delete(&grep_info);
1865
1866     if (res != ZEBRA_OK)
1867         return res;
1868     if (num_result_sets == 0)
1869         *rset = rset_create_null(rset_nmem, kc, 0);
1870     else if (num_result_sets == 1)
1871         *rset = result_sets[0];
1872     else
1873         *rset = rset_create_and(rset_nmem, kc, kc->scope,
1874                                 num_result_sets, result_sets);
1875     if (!*rset)
1876         return ZEBRA_FAIL;
1877     return ZEBRA_OK;
1878 }
1879
1880 static ZEBRA_RES rpn_search_APT_local(ZebraHandle zh,
1881                                       Z_AttributesPlusTerm *zapt,
1882                                       const char *termz,
1883                                       oid_value attributeSet,
1884                                       NMEM stream,
1885                                       const char *rank_type, NMEM rset_nmem,
1886                                       RSET *rset,
1887                                       struct rset_key_control *kc)
1888 {
1889     RSFD rsfd;
1890     struct it_key key;
1891     int sys;
1892     *rset = rset_create_temp(rset_nmem, kc, kc->scope,
1893                              res_get (zh->res, "setTmpDir"),0 );
1894     rsfd = rset_open(*rset, RSETF_WRITE);
1895     
1896     sys = atoi(termz);
1897     if (sys <= 0)
1898         sys = 1;
1899     key.mem[0] = sys;
1900     key.mem[1] = 1;
1901     key.len = 2;
1902     rset_write (rsfd, &key);
1903     rset_close (rsfd);
1904     return ZEBRA_OK;
1905 }
1906
1907 static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1908                                oid_value attributeSet, NMEM stream,
1909                                Z_SortKeySpecList *sort_sequence,
1910                                const char *rank_type,
1911                                NMEM rset_nmem,
1912                                RSET *rset,
1913                                struct rset_key_control *kc)
1914 {
1915     int i;
1916     int sort_relation_value;
1917     AttrType sort_relation_type;
1918     Z_SortKeySpec *sks;
1919     Z_SortKey *sk;
1920     int oid[OID_SIZE];
1921     oident oe;
1922     char termz[20];
1923     
1924     attr_init_APT(&sort_relation_type, zapt, 7);
1925     sort_relation_value = attr_find(&sort_relation_type, &attributeSet);
1926
1927     if (!sort_sequence->specs)
1928     {
1929         sort_sequence->num_specs = 10;
1930         sort_sequence->specs = (Z_SortKeySpec **)
1931             nmem_malloc(stream, sort_sequence->num_specs *
1932                          sizeof(*sort_sequence->specs));
1933         for (i = 0; i<sort_sequence->num_specs; i++)
1934             sort_sequence->specs[i] = 0;
1935     }
1936     if (zapt->term->which != Z_Term_general)
1937         i = 0;
1938     else
1939         i = atoi_n ((char *) zapt->term->u.general->buf,
1940                     zapt->term->u.general->len);
1941     if (i >= sort_sequence->num_specs)
1942         i = 0;
1943     sprintf(termz, "%d", i);
1944
1945     oe.proto = PROTO_Z3950;
1946     oe.oclass = CLASS_ATTSET;
1947     oe.value = attributeSet;
1948     if (!oid_ent_to_oid (&oe, oid))
1949         return ZEBRA_FAIL;
1950
1951     sks = (Z_SortKeySpec *) nmem_malloc(stream, sizeof(*sks));
1952     sks->sortElement = (Z_SortElement *)
1953         nmem_malloc(stream, sizeof(*sks->sortElement));
1954     sks->sortElement->which = Z_SortElement_generic;
1955     sk = sks->sortElement->u.generic = (Z_SortKey *)
1956         nmem_malloc(stream, sizeof(*sk));
1957     sk->which = Z_SortKey_sortAttributes;
1958     sk->u.sortAttributes = (Z_SortAttributes *)
1959         nmem_malloc(stream, sizeof(*sk->u.sortAttributes));
1960
1961     sk->u.sortAttributes->id = oid;
1962     sk->u.sortAttributes->list = zapt->attributes;
1963
1964     sks->sortRelation = (int *)
1965         nmem_malloc(stream, sizeof(*sks->sortRelation));
1966     if (sort_relation_value == 1)
1967         *sks->sortRelation = Z_SortKeySpec_ascending;
1968     else if (sort_relation_value == 2)
1969         *sks->sortRelation = Z_SortKeySpec_descending;
1970     else 
1971         *sks->sortRelation = Z_SortKeySpec_ascending;
1972
1973     sks->caseSensitivity = (int *)
1974         nmem_malloc(stream, sizeof(*sks->caseSensitivity));
1975     *sks->caseSensitivity = 0;
1976
1977     sks->which = Z_SortKeySpec_null;
1978     sks->u.null = odr_nullval ();
1979     sort_sequence->specs[i] = sks;
1980     *rset = rset_create_null(rset_nmem, kc, 0);
1981     return ZEBRA_OK;
1982 }
1983
1984
1985 static int rpn_check_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1986                            oid_value attributeSet,
1987                            struct xpath_location_step *xpath, int max,
1988                            NMEM mem)
1989 {
1990     oid_value curAttributeSet = attributeSet;
1991     AttrType use;
1992     const char *use_string = 0;
1993     
1994     attr_init_APT(&use, zapt, 1);
1995     attr_find_ex(&use, &curAttributeSet, &use_string);
1996
1997     if (!use_string || *use_string != '/')
1998         return -1;
1999
2000     return zebra_parse_xpath_str(use_string, xpath, max, mem);
2001 }
2002  
2003                
2004
2005 static RSET xpath_trunc(ZebraHandle zh, NMEM stream,
2006                         int reg_type, const char *term, 
2007                         const char *xpath_use,
2008                         NMEM rset_nmem,
2009                         struct rset_key_control *kc)
2010 {
2011     RSET rset;
2012     struct grep_info grep_info;
2013     char term_dict[2048];
2014     char ord_buf[32];
2015     int prefix_len = 0;
2016     int ord = zebraExplain_lookup_attr_str(zh->reg->zei, 
2017                                            zinfo_index_category_index,
2018                                            reg_type,
2019                                            xpath_use);
2020     int ord_len, i, r, max_pos;
2021     int term_type = Z_Term_characterString;
2022     const char *flags = "void";
2023
2024     if (grep_info_prepare(zh, 0 /* zapt */, &grep_info, '0') == ZEBRA_FAIL)
2025         return rset_create_null(rset_nmem, kc, 0);
2026     
2027     if (ord < 0)
2028         return rset_create_null(rset_nmem, kc, 0);
2029     if (prefix_len)
2030         term_dict[prefix_len++] = '|';
2031     else
2032         term_dict[prefix_len++] = '(';
2033     
2034     ord_len = key_SU_encode (ord, ord_buf);
2035     for (i = 0; i<ord_len; i++)
2036     {
2037         term_dict[prefix_len++] = 1;
2038         term_dict[prefix_len++] = ord_buf[i];
2039     }
2040     term_dict[prefix_len++] = ')';
2041     strcpy(term_dict+prefix_len, term);
2042     
2043     grep_info.isam_p_indx = 0;
2044     r = dict_lookup_grep(zh->reg->dict, term_dict, 0,
2045                           &grep_info, &max_pos, 0, grep_handle);
2046     yaz_log(YLOG_DEBUG, "%s %d positions", term,
2047              grep_info.isam_p_indx);
2048     rset = rset_trunc(zh, grep_info.isam_p_buf,
2049                       grep_info.isam_p_indx, term, strlen(term),
2050                       flags, 1, term_type,rset_nmem,
2051                       kc, kc->scope, 0, reg_type, 0 /* hits_limit */,
2052                       0 /* term_ref_id_str */);
2053     grep_info_delete(&grep_info);
2054     return rset;
2055 }
2056
2057 static
2058 ZEBRA_RES rpn_search_xpath(ZebraHandle zh,
2059                            int num_bases, char **basenames,
2060                            NMEM stream, const char *rank_type, RSET rset,
2061                            int xpath_len, struct xpath_location_step *xpath,
2062                            NMEM rset_nmem,
2063                            RSET *rset_out,
2064                            struct rset_key_control *kc)
2065 {
2066     int base_no;
2067     int i;
2068     int always_matches = rset ? 0 : 1;
2069
2070     if (xpath_len < 0)
2071     {
2072         *rset_out = rset;
2073         return ZEBRA_OK;
2074     }
2075
2076     yaz_log(YLOG_DEBUG, "xpath len=%d", xpath_len);
2077     for (i = 0; i<xpath_len; i++)
2078     {
2079         yaz_log(log_level_rpn, "XPATH %d %s", i, xpath[i].part);
2080
2081     }
2082
2083     /*
2084       //a    ->    a/.*
2085       //a/b  ->    b/a/.*
2086       /a     ->    a/
2087       /a/b   ->    b/a/
2088
2089       /      ->    none
2090
2091    a[@attr = value]/b[@other = othervalue]
2092
2093  /e/@a val      range(e/,range(@a,freetext(w,1015,val),@a),e/)
2094  /a/b val       range(b/a/,freetext(w,1016,val),b/a/)
2095  /a/b/@c val    range(b/a/,range(@c,freetext(w,1016,val),@c),b/a/)
2096  /a/b[@c = y] val range(b/a/,freetext(w,1016,val),b/a/,@c = y)
2097  /a[@c = y]/b val range(a/,range(b/a/,freetext(w,1016,val),b/a/),a/,@c = y)
2098  /a[@c = x]/b[@c = y] range(a/,range(b/a/,freetext(w,1016,val),b/a/,@c = y),a/,@c = x)
2099       
2100     */
2101
2102     dict_grep_cmap (zh->reg->dict, 0, 0);
2103
2104     for (base_no = 0; base_no < num_bases; base_no++)
2105     {
2106         int level = xpath_len;
2107         int first_path = 1;
2108         
2109         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
2110         {
2111             zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
2112                            basenames[base_no]);
2113             *rset_out = rset;
2114             return ZEBRA_FAIL;
2115         }
2116         while (--level >= 0)
2117         {
2118             WRBUF xpath_rev = wrbuf_alloc();
2119             int i;
2120             RSET rset_start_tag = 0, rset_end_tag = 0, rset_attr = 0;
2121
2122             for (i = level; i >= 1; --i)
2123             {
2124                 const char *cp = xpath[i].part;
2125                 if (*cp)
2126                 {
2127                     for (; *cp; cp++)
2128                     {
2129                         if (*cp == '*')
2130                             wrbuf_puts(xpath_rev, "[^/]*");
2131                         else if (*cp == ' ')
2132                             wrbuf_puts(xpath_rev, "\001 ");
2133                         else
2134                             wrbuf_putc(xpath_rev, *cp);
2135
2136                         /* wrbuf_putc does not null-terminate , but
2137                            wrbuf_puts below ensures it does.. so xpath_rev
2138                            is OK iff length is > 0 */
2139                     }
2140                     wrbuf_puts(xpath_rev, "/");
2141                 }
2142                 else if (i == 1)  /* // case */
2143                     wrbuf_puts(xpath_rev, ".*");
2144             }
2145             if (xpath[level].predicate &&
2146                 xpath[level].predicate->which == XPATH_PREDICATE_RELATION &&
2147                 xpath[level].predicate->u.relation.name[0])
2148             {
2149                 WRBUF wbuf = wrbuf_alloc();
2150                 wrbuf_puts(wbuf, xpath[level].predicate->u.relation.name+1);
2151                 if (xpath[level].predicate->u.relation.value)
2152                 {
2153                     const char *cp = xpath[level].predicate->u.relation.value;
2154                     wrbuf_putc(wbuf, '=');
2155                     
2156                     while (*cp)
2157                     {
2158                         if (strchr(REGEX_CHARS, *cp))
2159                             wrbuf_putc(wbuf, '\\');
2160                         wrbuf_putc(wbuf, *cp);
2161                         cp++;
2162                     }
2163                 }
2164                 wrbuf_puts(wbuf, "");
2165                 rset_attr = xpath_trunc(
2166                     zh, stream, '0', wrbuf_buf(wbuf), ZEBRA_XPATH_ATTR_NAME, 
2167                     rset_nmem, kc);
2168                 wrbuf_free(wbuf, 1);
2169             } 
2170             else 
2171             {
2172                 if (!first_path)
2173                 {
2174                     wrbuf_free(xpath_rev, 1);
2175                     continue;
2176                 }
2177             }
2178             yaz_log(log_level_rpn, "xpath_rev (%d) = %.*s", level, 
2179                     wrbuf_len(xpath_rev), wrbuf_buf(xpath_rev));
2180             if (wrbuf_len(xpath_rev))
2181             {
2182                 rset_start_tag = xpath_trunc(zh, stream, '0', 
2183                                              wrbuf_buf(xpath_rev),
2184                                              ZEBRA_XPATH_ELM_BEGIN, 
2185                                              rset_nmem, kc);
2186                 if (always_matches)
2187                     rset = rset_start_tag;
2188                 else
2189                 {
2190                     rset_end_tag = xpath_trunc(zh, stream, '0', 
2191                                                wrbuf_buf(xpath_rev),
2192                                                ZEBRA_XPATH_ELM_END, 
2193                                                rset_nmem, kc);
2194                     
2195                     rset = rset_create_between(rset_nmem, kc, kc->scope,
2196                                                rset_start_tag, rset,
2197                                                rset_end_tag, rset_attr);
2198                 }
2199             }
2200             wrbuf_free(xpath_rev, 1);
2201             first_path = 0;
2202         }
2203     }
2204     *rset_out = rset;
2205     return ZEBRA_OK;
2206 }
2207
2208 #define MAX_XPATH_STEPS 10
2209
2210 static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2211                                 oid_value attributeSet, NMEM stream,
2212                                 Z_SortKeySpecList *sort_sequence,
2213                                 int num_bases, char **basenames, 
2214                                 NMEM rset_nmem,
2215                                 RSET *rset,
2216                                 struct rset_key_control *kc)
2217 {
2218     ZEBRA_RES res = ZEBRA_OK;
2219     unsigned reg_id;
2220     char *search_type = NULL;
2221     char rank_type[128];
2222     int complete_flag;
2223     int sort_flag;
2224     char termz[IT_MAX_WORD+1];
2225     int xpath_len;
2226     const char *xpath_use = 0;
2227     struct xpath_location_step xpath[MAX_XPATH_STEPS];
2228
2229     if (!log_level_set)
2230     {
2231         log_level_rpn = yaz_log_module_level("rpn");
2232         log_level_set = 1;
2233     }
2234     zebra_maps_attr(zh->reg->zebra_maps, zapt, &reg_id, &search_type,
2235                     rank_type, &complete_flag, &sort_flag);
2236     
2237     yaz_log(YLOG_DEBUG, "reg_id=%c", reg_id);
2238     yaz_log(YLOG_DEBUG, "complete_flag=%d", complete_flag);
2239     yaz_log(YLOG_DEBUG, "search_type=%s", search_type);
2240     yaz_log(YLOG_DEBUG, "rank_type=%s", rank_type);
2241
2242     if (zapt_term_to_utf8(zh, zapt, termz) == ZEBRA_FAIL)
2243         return ZEBRA_FAIL;
2244
2245     if (sort_flag)
2246         return rpn_sort_spec(zh, zapt, attributeSet, stream, sort_sequence,
2247                              rank_type, rset_nmem, rset, kc);
2248     /* consider if an X-Path query is used */
2249     xpath_len = rpn_check_xpath(zh, zapt, attributeSet, 
2250                                 xpath, MAX_XPATH_STEPS, stream);
2251     if (xpath_len >= 0)
2252     {
2253         if (xpath[xpath_len-1].part[0] == '@') 
2254             xpath_use = ZEBRA_XPATH_ATTR_CDATA;  /* last step is attribute  */
2255         else
2256             xpath_use = ZEBRA_XPATH_CDATA;  /* searching for cdata */        
2257
2258         if (1)
2259         {
2260             AttrType relation;
2261             int relation_value;
2262
2263             attr_init_APT(&relation, zapt, 2);
2264             relation_value = attr_find(&relation, NULL);
2265
2266             if (relation_value == 103) /* alwaysmatches */
2267             {
2268                 *rset = 0; /* signal no "term" set */
2269                 return rpn_search_xpath(zh, num_bases, basenames,
2270                                         stream, rank_type, *rset, 
2271                                         xpath_len, xpath, rset_nmem, rset, kc);
2272             }
2273         }
2274     }
2275
2276     /* search using one of the various search type strategies
2277        termz is our UTF-8 search term
2278        attributeSet is top-level default attribute set 
2279        stream is ODR for search
2280        reg_id is the register type
2281        complete_flag is 1 for complete subfield, 0 for incomplete
2282        xpath_use is use-attribute to be used for X-Path search, 0 for none
2283     */
2284     if (!strcmp(search_type, "phrase"))
2285     {
2286         res = rpn_search_APT_phrase(zh, zapt, termz, attributeSet, stream,
2287                                     reg_id, complete_flag, rank_type,
2288                                     xpath_use,
2289                                     num_bases, basenames, rset_nmem,
2290                                     rset, kc);
2291     }
2292     else if (!strcmp(search_type, "and-list"))
2293     {
2294         res = rpn_search_APT_and_list(zh, zapt, termz, attributeSet, stream,
2295                                       reg_id, complete_flag, rank_type,
2296                                       xpath_use,
2297                                       num_bases, basenames, rset_nmem,
2298                                       rset, kc);
2299     }
2300     else if (!strcmp(search_type, "or-list"))
2301     {
2302         res = rpn_search_APT_or_list(zh, zapt, termz, attributeSet, stream,
2303                                      reg_id, complete_flag, rank_type,
2304                                      xpath_use,
2305                                      num_bases, basenames, rset_nmem,
2306                                      rset, kc);
2307     }
2308     else if (!strcmp(search_type, "local"))
2309     {
2310         res = rpn_search_APT_local(zh, zapt, termz, attributeSet, stream,
2311                                    rank_type, rset_nmem, rset, kc);
2312     }
2313     else if (!strcmp(search_type, "numeric"))
2314     {
2315         res = rpn_search_APT_numeric(zh, zapt, termz, attributeSet, stream,
2316                                      reg_id, complete_flag, rank_type,
2317                                      xpath_use,
2318                                      num_bases, basenames, rset_nmem,
2319                                      rset, kc);
2320     }
2321     else
2322     {
2323         zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2324         res = ZEBRA_FAIL;
2325     }
2326     if (res != ZEBRA_OK)
2327         return res;
2328     if (!*rset)
2329         return ZEBRA_FAIL;
2330     return rpn_search_xpath(zh, num_bases, basenames,
2331                             stream, rank_type, *rset, 
2332                             xpath_len, xpath, rset_nmem, rset, kc);
2333 }
2334
2335 static ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2336                                       oid_value attributeSet, 
2337                                       NMEM stream, NMEM rset_nmem,
2338                                       Z_SortKeySpecList *sort_sequence,
2339                                       int num_bases, char **basenames,
2340                                       RSET **result_sets, int *num_result_sets,
2341                                       Z_Operator *parent_op,
2342                                       struct rset_key_control *kc);
2343
2344 ZEBRA_RES rpn_get_top_approx_limit(ZebraHandle zh, Z_RPNStructure *zs,
2345                                    zint *approx_limit)
2346 {
2347     ZEBRA_RES res = ZEBRA_OK;
2348     if (zs->which == Z_RPNStructure_complex)
2349     {
2350         if (res == ZEBRA_OK)
2351             res = rpn_get_top_approx_limit(zh, zs->u.complex->s1,
2352                                            approx_limit);
2353         if (res == ZEBRA_OK)
2354             res = rpn_get_top_approx_limit(zh, zs->u.complex->s2,
2355                                            approx_limit);
2356     }
2357     else if (zs->which == Z_RPNStructure_simple)
2358     {
2359         if (zs->u.simple->which == Z_Operand_APT)
2360         {
2361             Z_AttributesPlusTerm *zapt = zs->u.simple->u.attributesPlusTerm;
2362             AttrType global_hits_limit_attr;
2363             int l;
2364             
2365             attr_init_APT(&global_hits_limit_attr, zapt, 12);
2366             
2367             l = attr_find(&global_hits_limit_attr, NULL);
2368             if (l != -1)
2369                 *approx_limit = l;
2370         }
2371     }
2372     return res;
2373 }
2374
2375 ZEBRA_RES rpn_search_top(ZebraHandle zh, Z_RPNStructure *zs,
2376                          oid_value attributeSet, 
2377                          NMEM stream, NMEM rset_nmem,
2378                          Z_SortKeySpecList *sort_sequence,
2379                          int num_bases, char **basenames,
2380                          RSET *result_set)
2381 {
2382     RSET *result_sets = 0;
2383     int num_result_sets = 0;
2384     ZEBRA_RES res;
2385     struct rset_key_control *kc = zebra_key_control_create(zh);
2386
2387     res = rpn_search_structure(zh, zs, attributeSet,
2388                                stream, rset_nmem,
2389                                sort_sequence, 
2390                                num_bases, basenames,
2391                                &result_sets, &num_result_sets,
2392                                0 /* no parent op */,
2393                                kc);
2394     if (res != ZEBRA_OK)
2395     {
2396         int i;
2397         for (i = 0; i<num_result_sets; i++)
2398             rset_delete(result_sets[i]);
2399         *result_set = 0;
2400     }
2401     else
2402     {
2403         assert(num_result_sets == 1);
2404         assert(result_sets);
2405         assert(*result_sets);
2406         *result_set = *result_sets;
2407     }
2408     (*kc->dec)(kc);
2409     return res;
2410 }
2411
2412 ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2413                                oid_value attributeSet, 
2414                                NMEM stream, NMEM rset_nmem,
2415                                Z_SortKeySpecList *sort_sequence,
2416                                int num_bases, char **basenames,
2417                                RSET **result_sets, int *num_result_sets,
2418                                Z_Operator *parent_op,
2419                                struct rset_key_control *kc)
2420 {
2421     *num_result_sets = 0;
2422     if (zs->which == Z_RPNStructure_complex)
2423     {
2424         ZEBRA_RES res;
2425         Z_Operator *zop = zs->u.complex->roperator;
2426         RSET *result_sets_l = 0;
2427         int num_result_sets_l = 0;
2428         RSET *result_sets_r = 0;
2429         int num_result_sets_r = 0;
2430
2431         res = rpn_search_structure(zh, zs->u.complex->s1,
2432                                    attributeSet, stream, rset_nmem,
2433                                    sort_sequence,
2434                                    num_bases, basenames,
2435                                    &result_sets_l, &num_result_sets_l,
2436                                    zop, kc);
2437         if (res != ZEBRA_OK)
2438         {
2439             int i;
2440             for (i = 0; i<num_result_sets_l; i++)
2441                 rset_delete(result_sets_l[i]);
2442             return res;
2443         }
2444         res = rpn_search_structure(zh, zs->u.complex->s2,
2445                                    attributeSet, stream, rset_nmem,
2446                                    sort_sequence,
2447                                    num_bases, basenames,
2448                                    &result_sets_r, &num_result_sets_r,
2449                                    zop, kc);
2450         if (res != ZEBRA_OK)
2451         {
2452             int i;
2453             for (i = 0; i<num_result_sets_l; i++)
2454                 rset_delete(result_sets_l[i]);
2455             for (i = 0; i<num_result_sets_r; i++)
2456                 rset_delete(result_sets_r[i]);
2457             return res;
2458         }
2459
2460         /* make a new list of result for all children */
2461         *num_result_sets = num_result_sets_l + num_result_sets_r;
2462         *result_sets = nmem_malloc(stream, *num_result_sets * 
2463                                    sizeof(**result_sets));
2464         memcpy(*result_sets, result_sets_l, 
2465                num_result_sets_l * sizeof(**result_sets));
2466         memcpy(*result_sets + num_result_sets_l, result_sets_r, 
2467                num_result_sets_r * sizeof(**result_sets));
2468
2469         if (!parent_op || parent_op->which != zop->which
2470             || (zop->which != Z_Operator_and &&
2471                 zop->which != Z_Operator_or))
2472         {
2473             /* parent node different from this one (or non-present) */
2474             /* we must combine result sets now */
2475             RSET rset;
2476             switch (zop->which)
2477             {
2478             case Z_Operator_and:
2479                 rset = rset_create_and(rset_nmem, kc,
2480                                        kc->scope,
2481                                        *num_result_sets, *result_sets);
2482                 break;
2483             case Z_Operator_or:
2484                 rset = rset_create_or(rset_nmem, kc,
2485                                       kc->scope, 0, /* termid */
2486                                       *num_result_sets, *result_sets);
2487                 break;
2488             case Z_Operator_and_not:
2489                 rset = rset_create_not(rset_nmem, kc,
2490                                        kc->scope,
2491                                        (*result_sets)[0],
2492                                        (*result_sets)[1]);
2493                 break;
2494             case Z_Operator_prox:
2495                 if (zop->u.prox->which != Z_ProximityOperator_known)
2496                 {
2497                     zebra_setError(zh, 
2498                                    YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2499                                    0);
2500                     return ZEBRA_FAIL;
2501                 }
2502                 if (*zop->u.prox->u.known != Z_ProxUnit_word)
2503                 {
2504                     zebra_setError_zint(zh,
2505                                         YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2506                                         *zop->u.prox->u.known);
2507                     return ZEBRA_FAIL;
2508                 }
2509                 else
2510                 {
2511                     rset = rset_create_prox(rset_nmem, kc,
2512                                             kc->scope,
2513                                             *num_result_sets, *result_sets, 
2514                                             *zop->u.prox->ordered,
2515                                             (!zop->u.prox->exclusion ? 
2516                                              0 : *zop->u.prox->exclusion),
2517                                             *zop->u.prox->relationType,
2518                                             *zop->u.prox->distance );
2519                 }
2520                 break;
2521             default:
2522                 zebra_setError(zh, YAZ_BIB1_OPERATOR_UNSUPP, 0);
2523                 return ZEBRA_FAIL;
2524             }
2525             *num_result_sets = 1;
2526             *result_sets = nmem_malloc(stream, *num_result_sets * 
2527                                        sizeof(**result_sets));
2528             (*result_sets)[0] = rset;
2529         }
2530     }
2531     else if (zs->which == Z_RPNStructure_simple)
2532     {
2533         RSET rset;
2534         ZEBRA_RES res;
2535
2536         if (zs->u.simple->which == Z_Operand_APT)
2537         {
2538             yaz_log(YLOG_DEBUG, "rpn_search_APT");
2539             res = rpn_search_APT(zh, zs->u.simple->u.attributesPlusTerm,
2540                                  attributeSet, stream, sort_sequence,
2541                                  num_bases, basenames, rset_nmem, &rset,
2542                                  kc);
2543             if (res != ZEBRA_OK)
2544                 return res;
2545         }
2546         else if (zs->u.simple->which == Z_Operand_resultSetId)
2547         {
2548             yaz_log(YLOG_DEBUG, "rpn_search_ref");
2549             rset = resultSetRef(zh, zs->u.simple->u.resultSetId);
2550             if (!rset)
2551             {
2552                 zebra_setError(zh, 
2553                                YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
2554                                zs->u.simple->u.resultSetId);
2555                 return ZEBRA_FAIL;
2556             }
2557             rset_dup(rset);
2558         }
2559         else
2560         {
2561             zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2562             return ZEBRA_FAIL;
2563         }
2564         *num_result_sets = 1;
2565         *result_sets = nmem_malloc(stream, *num_result_sets * 
2566                                    sizeof(**result_sets));
2567         (*result_sets)[0] = rset;
2568     }
2569     else
2570     {
2571         zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2572         return ZEBRA_FAIL;
2573     }
2574     return ZEBRA_OK;
2575 }
2576
2577
2578
2579 /*
2580  * Local variables:
2581  * c-basic-offset: 4
2582  * indent-tabs-mode: nil
2583  * End:
2584  * vim: shiftwidth=4 tabstop=8 expandtab
2585  */
2586