Indexing system change. Introduced new index category type
[idzebra-moved-to-github.git] / index / zrpn.c
1 /* $Id: zrpn.c,v 1.218 2006-06-22 15:07:20 adam Exp $
2    Copyright (C) 1995-2006
3    Index Data ApS
4
5 This file is part of the Zebra server.
6
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
11
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with Zebra; see the file LICENSE.zebra.  If not, write to the
19 Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
20 02111-1307, USA.
21 */
22
23 #include <stdio.h>
24 #include <assert.h>
25 #ifdef WIN32
26 #include <io.h>
27 #endif
28 #if HAVE_UNISTD_H
29 #include <unistd.h>
30 #endif
31 #include <ctype.h>
32
33 #include <yaz/diagbib1.h>
34 #include "index.h"
35 #include <zebra_xpath.h>
36 #include <attrfind.h>
37 #include <charmap.h>
38 #include <rset.h>
39
40 struct rpn_char_map_info
41 {
42     ZebraMaps zm;
43     int reg_type;
44 };
45
46 static int log_level_set = 0;
47 static int log_level_rpn = 0;
48
49 static const char **rpn_char_map_handler(void *vp, const char **from, int len)
50 {
51     struct rpn_char_map_info *p = (struct rpn_char_map_info *) vp;
52     const char **out = zebra_maps_input(p->zm, p->reg_type, from, len, 0);
53 #if 0
54     if (out && *out)
55     {
56         const char *outp = *out;
57         yaz_log(YLOG_LOG, "---");
58         while (*outp)
59         {
60             yaz_log(YLOG_LOG, "%02X", *outp);
61             outp++;
62         }
63     }
64 #endif
65     return out;
66 }
67
68 static void rpn_char_map_prepare(struct zebra_register *reg, int reg_type,
69                                  struct rpn_char_map_info *map_info)
70 {
71     map_info->zm = reg->zebra_maps;
72     map_info->reg_type = reg_type;
73     dict_grep_cmap(reg->dict, map_info, rpn_char_map_handler);
74 }
75
76 #define TERM_COUNT        
77        
78 struct grep_info {        
79 #ifdef TERM_COUNT        
80     int *term_no;        
81 #endif        
82     ISAM_P *isam_p_buf;
83     int isam_p_size;        
84     int isam_p_indx;
85     ZebraHandle zh;
86     int reg_type;
87     ZebraSet termset;
88 };        
89
90 void zebra_term_untrans(ZebraHandle zh, int reg_type,
91                         char *dst, const char *src)
92 {
93     int len = 0;
94     while (*src)
95     {
96         const char *cp = zebra_maps_output(zh->reg->zebra_maps,
97                                            reg_type, &src);
98         if (!cp)
99         {
100             if (len < IT_MAX_WORD-1)
101                 dst[len++] = *src;
102             src++;
103         }
104         else
105             while (*cp && len < IT_MAX_WORD-1)
106                 dst[len++] = *cp++;
107     }
108     dst[len] = '\0';
109 }
110
111 static void add_isam_p(const char *name, const char *info,
112                        struct grep_info *p)
113 {
114     if (!log_level_set)
115     {
116         log_level_rpn = yaz_log_module_level("rpn");
117         log_level_set = 1;
118     }
119     if (p->isam_p_indx == p->isam_p_size)
120     {
121         ISAM_P *new_isam_p_buf;
122 #ifdef TERM_COUNT        
123         int *new_term_no;        
124 #endif
125         p->isam_p_size = 2*p->isam_p_size + 100;
126         new_isam_p_buf = (ISAM_P *) xmalloc(sizeof(*new_isam_p_buf) *
127                                             p->isam_p_size);
128         if (p->isam_p_buf)
129         {
130             memcpy(new_isam_p_buf, p->isam_p_buf,
131                     p->isam_p_indx * sizeof(*p->isam_p_buf));
132             xfree(p->isam_p_buf);
133         }
134         p->isam_p_buf = new_isam_p_buf;
135
136 #ifdef TERM_COUNT
137         new_term_no = (int *) xmalloc(sizeof(*new_term_no) * p->isam_p_size);
138         if (p->term_no)
139         {
140             memcpy(new_term_no, p->isam_p_buf,
141                     p->isam_p_indx * sizeof(*p->term_no));
142             xfree(p->term_no);
143         }
144         p->term_no = new_term_no;
145 #endif
146     }
147     assert(*info == sizeof(*p->isam_p_buf));
148     memcpy(p->isam_p_buf + p->isam_p_indx, info+1, sizeof(*p->isam_p_buf));
149
150 #if 1
151     if (p->termset)
152     {
153         const char *db;
154         char term_tmp[IT_MAX_WORD];
155         int ord = 0;
156         const char *index_name;
157         int len = key_SU_decode (&ord, (const unsigned char *) name);
158         
159         zebra_term_untrans  (p->zh, p->reg_type, term_tmp, name+len+1);
160         yaz_log(log_level_rpn, "grep: %d %c %s", ord, name[len], term_tmp);
161         zebraExplain_lookup_ord(p->zh->reg->zei,
162                                 ord, 0 /* index_type */, &db, &index_name);
163         yaz_log(log_level_rpn, "grep:  db=%s index=%s", db, index_name);
164         
165         resultSetAddTerm(p->zh, p->termset, name[len], db,
166                          index_name, term_tmp);
167     }
168 #endif
169     (p->isam_p_indx)++;
170 }
171
172 static int grep_handle(char *name, const char *info, void *p)
173 {
174     add_isam_p(name, info, (struct grep_info *) p);
175     return 0;
176 }
177
178 static int term_pre(ZebraMaps zebra_maps, int reg_type, const char **src,
179                     const char *ct1, const char *ct2, int first)
180 {
181     const char *s1, *s0 = *src;
182     const char **map;
183
184     /* skip white space */
185     while (*s0)
186     {
187         if (ct1 && strchr(ct1, *s0))
188             break;
189         if (ct2 && strchr(ct2, *s0))
190             break;
191         s1 = s0;
192         map = zebra_maps_input(zebra_maps, reg_type, &s1, strlen(s1), first);
193         if (**map != *CHR_SPACE)
194             break;
195         s0 = s1;
196     }
197     *src = s0;
198     return *s0;
199 }
200
201
202 static void esc_str(char *out_buf, size_t out_size,
203                     const char *in_buf, int in_size)
204 {
205     int k;
206
207     assert(out_buf);
208     assert(in_buf);
209     assert(out_size > 20);
210     *out_buf = '\0';
211     for (k = 0; k<in_size; k++)
212     {
213         int c = in_buf[k] & 0xff;
214         int pc;
215         if (c < 32 || c > 126)
216             pc = '?';
217         else
218             pc = c;
219         sprintf(out_buf +strlen(out_buf), "%02X:%c  ", c, pc);
220         if (strlen(out_buf) > out_size-20)
221         {
222             strcat(out_buf, "..");
223             break;
224         }
225     }
226 }
227
228 #define REGEX_CHARS " []()|.*+?!"
229
230 /* term_100: handle term, where trunc = none(no operators at all) */
231 static int term_100(ZebraMaps zebra_maps, int reg_type,
232                     const char **src, char *dst, int space_split,
233                     char *dst_term)
234 {
235     const char *s0;
236     const char **map;
237     int i = 0;
238     int j = 0;
239
240     const char *space_start = 0;
241     const char *space_end = 0;
242
243     if (!term_pre(zebra_maps, reg_type, src, NULL, NULL, !space_split))
244         return 0;
245     s0 = *src;
246     while (*s0)
247     {
248         const char *s1 = s0;
249         int q_map_match = 0;
250         map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), 
251                                 &q_map_match);
252         if (space_split)
253         {
254             if (**map == *CHR_SPACE)
255                 break;
256         }
257         else  /* complete subfield only. */
258         {
259             if (**map == *CHR_SPACE)
260             {   /* save space mapping for later  .. */
261                 space_start = s1;
262                 space_end = s0;
263                 continue;
264             }
265             else if (space_start)
266             {   /* reload last space */
267                 while (space_start < space_end)
268                 {
269                     if (strchr(REGEX_CHARS, *space_start))
270                         dst[i++] = '\\';
271                     dst_term[j++] = *space_start;
272                     dst[i++] = *space_start++;
273                 }
274                 /* and reset */
275                 space_start = space_end = 0;
276             }
277         }
278         /* add non-space char */
279         memcpy(dst_term+j, s1, s0 - s1);
280         j += (s0 - s1);
281         if (!q_map_match)
282         {
283             while (s1 < s0)
284             {
285                 if (strchr(REGEX_CHARS, *s1))
286                     dst[i++] = '\\';
287                 dst[i++] = *s1++;
288             }
289         }
290         else
291         {
292             char tmpbuf[80];
293             esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
294             
295             strcpy(dst + i, map[0]);
296             i += strlen(map[0]);
297         }
298     }
299     dst[i] = '\0';
300     dst_term[j] = '\0';
301     *src = s0;
302     return i;
303 }
304
305 /* term_101: handle term, where trunc = Process # */
306 static int term_101(ZebraMaps zebra_maps, int reg_type,
307                     const char **src, char *dst, int space_split,
308                     char *dst_term)
309 {
310     const char *s0;
311     const char **map;
312     int i = 0;
313     int j = 0;
314
315     if (!term_pre(zebra_maps, reg_type, src, "#", "#", !space_split))
316         return 0;
317     s0 = *src;
318     while (*s0)
319     {
320         if (*s0 == '#')
321         {
322             dst[i++] = '.';
323             dst[i++] = '*';
324             dst_term[j++] = *s0++;
325         }
326         else
327         {
328             const char *s1 = s0;
329             int q_map_match = 0;
330             map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), 
331                                     &q_map_match);
332             if (space_split && **map == *CHR_SPACE)
333                 break;
334
335             /* add non-space char */
336             memcpy(dst_term+j, s1, s0 - s1);
337             j += (s0 - s1);
338             if (!q_map_match)
339             {
340                 while (s1 < s0)
341                 {
342                     if (strchr(REGEX_CHARS, *s1))
343                         dst[i++] = '\\';
344                     dst[i++] = *s1++;
345                 }
346             }
347             else
348             {
349                 char tmpbuf[80];
350                 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
351                 
352                 strcpy(dst + i, map[0]);
353                 i += strlen(map[0]);
354             }
355         }
356     }
357     dst[i] = '\0';
358     dst_term[j++] = '\0';
359     *src = s0;
360     return i;
361 }
362
363 /* term_103: handle term, where trunc = re-2 (regular expressions) */
364 static int term_103(ZebraMaps zebra_maps, int reg_type, const char **src,
365                     char *dst, int *errors, int space_split,
366                     char *dst_term)
367 {
368     int i = 0;
369     int j = 0;
370     const char *s0;
371     const char **map;
372
373     if (!term_pre(zebra_maps, reg_type, src, "^\\()[].*+?|", "(", !space_split))
374         return 0;
375     s0 = *src;
376     if (errors && *s0 == '+' && s0[1] && s0[2] == '+' && s0[3] &&
377         isdigit(((const unsigned char *)s0)[1]))
378     {
379         *errors = s0[1] - '0';
380         s0 += 3;
381         if (*errors > 3)
382             *errors = 3;
383     }
384     while (*s0)
385     {
386         if (strchr("^\\()[].*+?|-", *s0))
387         {
388             dst_term[j++] = *s0;
389             dst[i++] = *s0++;
390         }
391         else
392         {
393             const char *s1 = s0;
394             int q_map_match = 0;
395             map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), 
396                                     &q_map_match);
397             if (space_split && **map == *CHR_SPACE)
398                 break;
399
400             /* add non-space char */
401             memcpy(dst_term+j, s1, s0 - s1);
402             j += (s0 - s1);
403             if (!q_map_match)
404             {
405                 while (s1 < s0)
406                 {
407                     if (strchr(REGEX_CHARS, *s1))
408                         dst[i++] = '\\';
409                     dst[i++] = *s1++;
410                 }
411             }
412             else
413             {
414                 char tmpbuf[80];
415                 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
416                 
417                 strcpy(dst + i, map[0]);
418                 i += strlen(map[0]);
419             }
420         }
421     }
422     dst[i] = '\0';
423     dst_term[j] = '\0';
424     *src = s0;
425     
426     return i;
427 }
428
429 /* term_103: handle term, where trunc = re-1 (regular expressions) */
430 static int term_102(ZebraMaps zebra_maps, int reg_type, const char **src,
431                     char *dst, int space_split, char *dst_term)
432 {
433     return term_103(zebra_maps, reg_type, src, dst, NULL, space_split,
434                     dst_term);
435 }
436
437
438 /* term_104: handle term, where trunc = Process # and ! */
439 static int term_104(ZebraMaps zebra_maps, int reg_type,
440                     const char **src, char *dst, int space_split,
441                     char *dst_term)
442 {
443     const char *s0;
444     const char **map;
445     int i = 0;
446     int j = 0;
447
448     if (!term_pre(zebra_maps, reg_type, src, "?*#", "?*#", !space_split))
449         return 0;
450     s0 = *src;
451     while (*s0)
452     {
453         if (*s0 == '?')
454         {
455             dst_term[j++] = *s0++;
456             if (*s0 >= '0' && *s0 <= '9')
457             {
458                 int limit = 0;
459                 while (*s0 >= '0' && *s0 <= '9')
460                 {
461                     limit = limit * 10 + (*s0 - '0');
462                     dst_term[j++] = *s0++;
463                 }
464                 if (limit > 20)
465                     limit = 20;
466                 while (--limit >= 0)
467                 {
468                     dst[i++] = '.';
469                     dst[i++] = '?';
470                 }
471             }
472             else
473             {
474                 dst[i++] = '.';
475                 dst[i++] = '*';
476             }
477         }
478         else if (*s0 == '*')
479         {
480             dst[i++] = '.';
481             dst[i++] = '*';
482             dst_term[j++] = *s0++;
483         }
484         else if (*s0 == '#')
485         {
486             dst[i++] = '.';
487             dst_term[j++] = *s0++;
488         }
489         else
490         {
491             const char *s1 = s0;
492             int q_map_match = 0;
493             map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), 
494                                     &q_map_match);
495             if (space_split && **map == *CHR_SPACE)
496                 break;
497
498             /* add non-space char */
499             memcpy(dst_term+j, s1, s0 - s1);
500             j += (s0 - s1);
501             if (!q_map_match)
502             {
503                 while (s1 < s0)
504                 {
505                     if (strchr(REGEX_CHARS, *s1))
506                         dst[i++] = '\\';
507                     dst[i++] = *s1++;
508                 }
509             }
510             else
511             {
512                 char tmpbuf[80];
513                 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
514                 
515                 strcpy(dst + i, map[0]);
516                 i += strlen(map[0]);
517             }
518         }
519     }
520     dst[i] = '\0';
521     dst_term[j++] = '\0';
522     *src = s0;
523     return i;
524 }
525
526 /* term_105/106: handle term, where trunc = Process * and ! and right trunc */
527 static int term_105(ZebraMaps zebra_maps, int reg_type,
528                     const char **src, char *dst, int space_split,
529                     char *dst_term, int right_truncate)
530 {
531     const char *s0;
532     const char **map;
533     int i = 0;
534     int j = 0;
535
536     if (!term_pre(zebra_maps, reg_type, src, "*!", "*!", !space_split))
537         return 0;
538     s0 = *src;
539     while (*s0)
540     {
541         if (*s0 == '*')
542         {
543             dst[i++] = '.';
544             dst[i++] = '*';
545             dst_term[j++] = *s0++;
546         }
547         else if (*s0 == '!')
548         {
549             dst[i++] = '.';
550             dst_term[j++] = *s0++;
551         }
552         else
553         {
554             const char *s1 = s0;
555             int q_map_match = 0;
556             map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), 
557                                     &q_map_match);
558             if (space_split && **map == *CHR_SPACE)
559                 break;
560
561             /* add non-space char */
562             memcpy(dst_term+j, s1, s0 - s1);
563             j += (s0 - s1);
564             if (!q_map_match)
565             {
566                 while (s1 < s0)
567                 {
568                     if (strchr(REGEX_CHARS, *s1))
569                         dst[i++] = '\\';
570                     dst[i++] = *s1++;
571                 }
572             }
573             else
574             {
575                 char tmpbuf[80];
576                 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
577                 
578                 strcpy(dst + i, map[0]);
579                 i += strlen(map[0]);
580             }
581         }
582     }
583     if (right_truncate)
584     {
585         dst[i++] = '.';
586         dst[i++] = '*';
587     }
588     dst[i] = '\0';
589     
590     dst_term[j++] = '\0';
591     *src = s0;
592     return i;
593 }
594
595
596 /* gen_regular_rel - generate regular expression from relation
597  *  val:     border value (inclusive)
598  *  islt:    1 if <=; 0 if >=.
599  */
600 static void gen_regular_rel(char *dst, int val, int islt)
601 {
602     int dst_p;
603     int w, d, i;
604     int pos = 0;
605     char numstr[20];
606
607     yaz_log(YLOG_DEBUG, "gen_regular_rel. val=%d, islt=%d", val, islt);
608     if (val >= 0)
609     {
610         if (islt)
611             strcpy(dst, "(-[0-9]+|(");
612         else
613             strcpy(dst, "((");
614     } 
615     else
616     {
617         if (!islt)
618         {
619             strcpy(dst, "([0-9]+|-(");
620             dst_p = strlen(dst);
621             islt = 1;
622         }
623         else
624         {
625             strcpy(dst, "(-(");
626             islt = 0;
627         }
628         val = -val;
629     }
630     dst_p = strlen(dst);
631     sprintf(numstr, "%d", val);
632     for (w = strlen(numstr); --w >= 0; pos++)
633     {
634         d = numstr[w];
635         if (pos > 0)
636         {
637             if (islt)
638             {
639                 if (d == '0')
640                     continue;
641                 d--;
642             } 
643             else
644             {
645                 if (d == '9')
646                     continue;
647                 d++;
648             }
649         }
650         
651         strcpy(dst + dst_p, numstr);
652         dst_p = strlen(dst) - pos - 1;
653
654         if (islt)
655         {
656             if (d != '0')
657             {
658                 dst[dst_p++] = '[';
659                 dst[dst_p++] = '0';
660                 dst[dst_p++] = '-';
661                 dst[dst_p++] = d;
662                 dst[dst_p++] = ']';
663             }
664             else
665                 dst[dst_p++] = d;
666         }
667         else
668         {
669             if (d != '9')
670             { 
671                 dst[dst_p++] = '[';
672                 dst[dst_p++] = d;
673                 dst[dst_p++] = '-';
674                 dst[dst_p++] = '9';
675                 dst[dst_p++] = ']';
676             }
677             else
678                 dst[dst_p++] = d;
679         }
680         for (i = 0; i<pos; i++)
681         {
682             dst[dst_p++] = '[';
683             dst[dst_p++] = '0';
684             dst[dst_p++] = '-';
685             dst[dst_p++] = '9';
686             dst[dst_p++] = ']';
687         }
688         dst[dst_p++] = '|';
689     }
690     dst[dst_p] = '\0';
691     if (islt)
692     {
693         /* match everything less than 10^(pos-1) */
694         strcat(dst, "0*");
695         for (i = 1; i<pos; i++)
696             strcat(dst, "[0-9]?");
697     }
698     else
699     {
700         /* match everything greater than 10^pos */
701         for (i = 0; i <= pos; i++)
702             strcat(dst, "[0-9]");
703         strcat(dst, "[0-9]*");
704     }
705     strcat(dst, "))");
706 }
707
708 void string_rel_add_char(char **term_p, const char *src, int *indx)
709 {
710     if (src[*indx] == '\\')
711         *(*term_p)++ = src[(*indx)++];
712     *(*term_p)++ = src[(*indx)++];
713 }
714
715 /*
716  *   >  abc     ([b-].*|a[c-].*|ab[d-].*|abc.+)
717  *              ([^-a].*|a[^-b].*ab[^-c].*|abc.+)
718  *   >= abc     ([b-].*|a[c-].*|ab[c-].*)
719  *              ([^-a].*|a[^-b].*|ab[c-].*)
720  *   <  abc     ([-0].*|a[-a].*|ab[-b].*)
721  *              ([^a-].*|a[^b-].*|ab[^c-].*)
722  *   <= abc     ([-0].*|a[-a].*|ab[-b].*|abc)
723  *              ([^a-].*|a[^b-].*|ab[^c-].*|abc)
724  */
725 static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
726                            const char **term_sub, char *term_dict,
727                            oid_value attributeSet,
728                            int reg_type, int space_split, char *term_dst,
729                            int *error_code)
730 {
731     AttrType relation;
732     int relation_value;
733     int i;
734     char *term_tmp = term_dict + strlen(term_dict);
735     char term_component[2*IT_MAX_WORD+20];
736
737     attr_init_APT(&relation, zapt, 2);
738     relation_value = attr_find(&relation, NULL);
739
740     *error_code = 0;
741     yaz_log(YLOG_DEBUG, "string relation value=%d", relation_value);
742     switch (relation_value)
743     {
744     case 1:
745         if (!term_100(zh->reg->zebra_maps, reg_type,
746                       term_sub, term_component,
747                       space_split, term_dst))
748             return 0;
749         yaz_log(log_level_rpn, "Relation <");
750         
751         *term_tmp++ = '(';
752         for (i = 0; term_component[i]; )
753         {
754             int j = 0;
755
756             if (i)
757                 *term_tmp++ = '|';
758             while (j < i)
759                 string_rel_add_char(&term_tmp, term_component, &j);
760
761             *term_tmp++ = '[';
762
763             *term_tmp++ = '^';
764             string_rel_add_char(&term_tmp, term_component, &i);
765             *term_tmp++ = '-';
766
767             *term_tmp++ = ']';
768             *term_tmp++ = '.';
769             *term_tmp++ = '*';
770
771             if ((term_tmp - term_dict) > IT_MAX_WORD)
772                 break;
773         }
774         *term_tmp++ = ')';
775         *term_tmp = '\0';
776         break;
777     case 2:
778         if (!term_100(zh->reg->zebra_maps, reg_type,
779                       term_sub, term_component,
780                       space_split, term_dst))
781             return 0;
782         yaz_log(log_level_rpn, "Relation <=");
783
784         *term_tmp++ = '(';
785         for (i = 0; term_component[i]; )
786         {
787             int j = 0;
788
789             while (j < i)
790                 string_rel_add_char(&term_tmp, term_component, &j);
791             *term_tmp++ = '[';
792
793             *term_tmp++ = '^';
794             string_rel_add_char(&term_tmp, term_component, &i);
795             *term_tmp++ = '-';
796
797             *term_tmp++ = ']';
798             *term_tmp++ = '.';
799             *term_tmp++ = '*';
800
801             *term_tmp++ = '|';
802
803             if ((term_tmp - term_dict) > IT_MAX_WORD)
804                 break;
805         }
806         for (i = 0; term_component[i]; )
807             string_rel_add_char(&term_tmp, term_component, &i);
808         *term_tmp++ = ')';
809         *term_tmp = '\0';
810         break;
811     case 5:
812         if (!term_100 (zh->reg->zebra_maps, reg_type,
813                        term_sub, term_component, space_split, term_dst))
814             return 0;
815         yaz_log(log_level_rpn, "Relation >");
816
817         *term_tmp++ = '(';
818         for (i = 0; term_component[i];)
819         {
820             int j = 0;
821
822             while (j < i)
823                 string_rel_add_char(&term_tmp, term_component, &j);
824             *term_tmp++ = '[';
825             
826             *term_tmp++ = '^';
827             *term_tmp++ = '-';
828             string_rel_add_char(&term_tmp, term_component, &i);
829
830             *term_tmp++ = ']';
831             *term_tmp++ = '.';
832             *term_tmp++ = '*';
833
834             *term_tmp++ = '|';
835
836             if ((term_tmp - term_dict) > IT_MAX_WORD)
837                 break;
838         }
839         for (i = 0; term_component[i];)
840             string_rel_add_char(&term_tmp, term_component, &i);
841         *term_tmp++ = '.';
842         *term_tmp++ = '+';
843         *term_tmp++ = ')';
844         *term_tmp = '\0';
845         break;
846     case 4:
847         if (!term_100(zh->reg->zebra_maps, reg_type, term_sub,
848                       term_component, space_split, term_dst))
849             return 0;
850         yaz_log(log_level_rpn, "Relation >=");
851
852         *term_tmp++ = '(';
853         for (i = 0; term_component[i];)
854         {
855             int j = 0;
856
857             if (i)
858                 *term_tmp++ = '|';
859             while (j < i)
860                 string_rel_add_char(&term_tmp, term_component, &j);
861             *term_tmp++ = '[';
862
863             if (term_component[i+1])
864             {
865                 *term_tmp++ = '^';
866                 *term_tmp++ = '-';
867                 string_rel_add_char(&term_tmp, term_component, &i);
868             }
869             else
870             {
871                 string_rel_add_char(&term_tmp, term_component, &i);
872                 *term_tmp++ = '-';
873             }
874             *term_tmp++ = ']';
875             *term_tmp++ = '.';
876             *term_tmp++ = '*';
877
878             if ((term_tmp - term_dict) > IT_MAX_WORD)
879                 break;
880         }
881         *term_tmp++ = ')';
882         *term_tmp = '\0';
883         break;
884     case 3:
885     case 102:
886     case 103:
887     case -1:
888         if (!**term_sub)
889             return 1;
890         yaz_log(log_level_rpn, "Relation =");
891         if (!term_100(zh->reg->zebra_maps, reg_type, term_sub,
892                       term_component, space_split, term_dst))
893             return 0;
894         strcat(term_tmp, "(");
895         strcat(term_tmp, term_component);
896         strcat(term_tmp, ")");
897         break;
898     default:
899         *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
900         return 0;
901     }
902     return 1;
903 }
904
905 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
906                              const char **term_sub, 
907                              oid_value attributeSet, NMEM stream,
908                              struct grep_info *grep_info,
909                              int reg_type, int complete_flag,
910                              int num_bases, char **basenames,
911                              char *term_dst,
912                              const char *xpath_use,
913                              struct ord_list **ol);
914
915 static ZEBRA_RES term_limits_APT(ZebraHandle zh,
916                                  Z_AttributesPlusTerm *zapt,
917                                  zint *hits_limit_value,
918                                  const char **term_ref_id_str,
919                                  NMEM nmem)
920 {
921     AttrType term_ref_id_attr;
922     AttrType hits_limit_attr;
923     int term_ref_id_int;
924  
925     attr_init_APT(&hits_limit_attr, zapt, 9);
926     *hits_limit_value  = attr_find(&hits_limit_attr, NULL);
927
928     attr_init_APT(&term_ref_id_attr, zapt, 10);
929     term_ref_id_int = attr_find_ex(&term_ref_id_attr, NULL, term_ref_id_str);
930     if (term_ref_id_int >= 0)
931     {
932         char *res = nmem_malloc(nmem, 20);
933         sprintf(res, "%d", term_ref_id_int);
934         *term_ref_id_str = res;
935     }
936
937     /* no limit given ? */
938     if (*hits_limit_value == -1)
939     {
940         if (*term_ref_id_str)
941         {
942             /* use global if term_ref is present */
943             *hits_limit_value = zh->approx_limit;
944         }
945         else
946         {
947             /* no counting if term_ref is not present */
948             *hits_limit_value = 0;
949         }
950     }
951     else if (*hits_limit_value == 0)
952     {
953         /* 0 is the same as global limit */
954         *hits_limit_value = zh->approx_limit;
955     }
956     yaz_log(YLOG_DEBUG, "term_limits_APT ref_id=%s limit=" ZINT_FORMAT,
957             *term_ref_id_str ? *term_ref_id_str : "none",
958             *hits_limit_value);
959     return ZEBRA_OK;
960 }
961
962 static ZEBRA_RES term_trunc(ZebraHandle zh,
963                             Z_AttributesPlusTerm *zapt,
964                             const char **term_sub, 
965                             oid_value attributeSet, NMEM stream,
966                             struct grep_info *grep_info,
967                             int reg_type, int complete_flag,
968                             int num_bases, char **basenames,
969                             char *term_dst,
970                             const char *rank_type, 
971                             const char *xpath_use,
972                             NMEM rset_nmem,
973                             RSET *rset,
974                             struct rset_key_control *kc)
975 {
976     ZEBRA_RES res;
977     struct ord_list *ol;
978     zint hits_limit_value;
979     const char *term_ref_id_str = 0;
980     *rset = 0;
981
982     term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str,
983                     stream);
984     grep_info->isam_p_indx = 0;
985     res = string_term(zh, zapt, term_sub, attributeSet, stream, grep_info,
986                       reg_type, complete_flag, num_bases, basenames,
987                       term_dst, xpath_use, &ol);
988     if (res != ZEBRA_OK)
989         return res;
990     if (!*term_sub)  /* no more terms ? */
991         return res;
992     yaz_log(log_level_rpn, "term: %s", term_dst);
993     *rset = rset_trunc(zh, grep_info->isam_p_buf,
994                        grep_info->isam_p_indx, term_dst,
995                        strlen(term_dst), rank_type, 1 /* preserve pos */,
996                        zapt->term->which, rset_nmem,
997                        kc, kc->scope, ol, reg_type, hits_limit_value,
998                        term_ref_id_str);
999     if (!*rset)
1000         return ZEBRA_FAIL;
1001     return ZEBRA_OK;
1002 }
1003
1004 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1005                              const char **term_sub, 
1006                              oid_value attributeSet, NMEM stream,
1007                              struct grep_info *grep_info,
1008                              int reg_type, int complete_flag,
1009                              int num_bases, char **basenames,
1010                              char *term_dst,
1011                              const char *xpath_use,
1012                              struct ord_list **ol)
1013 {
1014     char term_dict[2*IT_MAX_WORD+4000];
1015     int j, r, base_no;
1016     AttrType truncation;
1017     int truncation_value;
1018     oid_value curAttributeSet = attributeSet;
1019     const char *termp;
1020     struct rpn_char_map_info rcmi;
1021     int space_split = complete_flag ? 0 : 1;
1022
1023     int bases_ok = 0;     /* no of databases with OK attribute */
1024
1025     *ol = ord_list_create(stream);
1026
1027     rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
1028     attr_init_APT(&truncation, zapt, 5);
1029     truncation_value = attr_find(&truncation, NULL);
1030     yaz_log(log_level_rpn, "truncation value %d", truncation_value);
1031
1032     for (base_no = 0; base_no < num_bases; base_no++)
1033     {
1034         int ord = -1;
1035         int attr_ok = 0;
1036         int regex_range = 0;
1037         int init_pos = 0;
1038         int max_pos, prefix_len = 0;
1039         int relation_error;
1040         char ord_buf[32];
1041         int ord_len, i;
1042
1043         termp = *term_sub;
1044
1045         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1046         {
1047             zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
1048                            basenames[base_no]);
1049             return ZEBRA_FAIL;
1050         }
1051         
1052         if (zebra_apt_get_ord(zh, zapt, zinfo_index_category_index,
1053                               reg_type, xpath_use, 
1054                               curAttributeSet, &ord) 
1055             != ZEBRA_OK)
1056         {
1057             break;
1058         }
1059         *ol = ord_list_append(stream, *ol, ord);
1060         
1061         if (prefix_len)
1062             term_dict[prefix_len++] = '|';
1063         else
1064             term_dict[prefix_len++] = '(';
1065         
1066         ord_len = key_SU_encode (ord, ord_buf);
1067         for (i = 0; i<ord_len; i++)
1068         {
1069             term_dict[prefix_len++] = 1;
1070                 term_dict[prefix_len++] = ord_buf[i];
1071         }
1072         if (ord_len > init_pos)
1073             init_pos = ord_len;
1074         
1075         bases_ok++;
1076         if (prefix_len)
1077             attr_ok = 1;
1078
1079         term_dict[prefix_len++] = ')';
1080         term_dict[prefix_len] = '\0';
1081         j = prefix_len;
1082         switch (truncation_value)
1083         {
1084         case -1:         /* not specified */
1085         case 100:        /* do not truncate */
1086             if (!string_relation (zh, zapt, &termp, term_dict,
1087                                   attributeSet,
1088                                   reg_type, space_split, term_dst,
1089                                   &relation_error))
1090             {
1091                 if (relation_error)
1092                 {
1093                     zebra_setError(zh, relation_error, 0);
1094                     return ZEBRA_FAIL;
1095                 }
1096                 *term_sub = 0;
1097                 return ZEBRA_OK;
1098             }
1099             break;
1100         case 1:          /* right truncation */
1101             term_dict[j++] = '(';
1102             if (!term_100(zh->reg->zebra_maps, reg_type,
1103                           &termp, term_dict + j, space_split, term_dst))
1104             {
1105                 *term_sub = 0;
1106                 return ZEBRA_OK;
1107             }
1108             strcat(term_dict, ".*)");
1109             break;
1110         case 2:          /* keft truncation */
1111             term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*';
1112             if (!term_100(zh->reg->zebra_maps, reg_type,
1113                           &termp, term_dict + j, space_split, term_dst))
1114             {
1115                 *term_sub = 0;
1116                 return ZEBRA_OK;
1117             }
1118             strcat(term_dict, ")");
1119             break;
1120         case 3:          /* left&right truncation */
1121             term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*';
1122             if (!term_100(zh->reg->zebra_maps, reg_type,
1123                           &termp, term_dict + j, space_split, term_dst))
1124             {
1125                 *term_sub = 0;
1126                 return ZEBRA_OK;
1127             }
1128             strcat(term_dict, ".*)");
1129             break;
1130         case 101:        /* process # in term */
1131             term_dict[j++] = '(';
1132             if (!term_101(zh->reg->zebra_maps, reg_type,
1133                           &termp, term_dict + j, space_split, term_dst))
1134             {
1135                 *term_sub = 0;
1136                 return ZEBRA_OK;
1137             }
1138             strcat(term_dict, ")");
1139             break;
1140         case 102:        /* Regexp-1 */
1141             term_dict[j++] = '(';
1142             if (!term_102(zh->reg->zebra_maps, reg_type,
1143                           &termp, term_dict + j, space_split, term_dst))
1144             {
1145                 *term_sub = 0;
1146                 return ZEBRA_OK;
1147             }
1148             strcat(term_dict, ")");
1149             break;
1150         case 103:       /* Regexp-2 */
1151             regex_range = 1;
1152             term_dict[j++] = '(';
1153             if (!term_103(zh->reg->zebra_maps, reg_type,
1154                           &termp, term_dict + j, &regex_range,
1155                           space_split, term_dst))
1156             {
1157                 *term_sub = 0;
1158                 return ZEBRA_OK;
1159             }
1160             strcat(term_dict, ")");
1161             break;
1162         case 104:        /* process # and ! in term */
1163             term_dict[j++] = '(';
1164             if (!term_104(zh->reg->zebra_maps, reg_type,
1165                           &termp, term_dict + j, space_split, term_dst))
1166             {
1167                 *term_sub = 0;
1168                 return ZEBRA_OK;
1169             }
1170             strcat(term_dict, ")");
1171             break;
1172         case 105:        /* process * and ! in term */
1173             term_dict[j++] = '(';
1174             if (!term_105(zh->reg->zebra_maps, reg_type,
1175                           &termp, term_dict + j, space_split, term_dst, 1))
1176             {
1177                 *term_sub = 0;
1178                 return ZEBRA_OK;
1179             }
1180             strcat(term_dict, ")");
1181             break;
1182         case 106:        /* process * and ! in term */
1183             term_dict[j++] = '(';
1184             if (!term_105(zh->reg->zebra_maps, reg_type,
1185                           &termp, term_dict + j, space_split, term_dst, 0))
1186             {
1187                 *term_sub = 0;
1188                 return ZEBRA_OK;
1189             }
1190             strcat(term_dict, ")");
1191             break;
1192         default:
1193             zebra_setError_zint(zh,
1194                                 YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
1195                                 truncation_value);
1196             return ZEBRA_FAIL;
1197         }
1198         if (attr_ok)
1199         {
1200             char buf[80];
1201             const char *input = term_dict + prefix_len;
1202             esc_str(buf, sizeof(buf), input, strlen(input));
1203         }
1204         if (attr_ok)
1205         {
1206             yaz_log(log_level_rpn, "dict_lookup_grep: %s", term_dict+prefix_len);
1207             r = dict_lookup_grep(zh->reg->dict, term_dict, regex_range,
1208                                  grep_info, &max_pos, init_pos,
1209                                  grep_handle);
1210             if (r)
1211                 yaz_log(YLOG_WARN, "dict_lookup_grep fail %d", r);
1212         }
1213     }
1214     if (!bases_ok)
1215         return ZEBRA_FAIL;
1216     *term_sub = termp;
1217     yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1218     return ZEBRA_OK;
1219 }
1220
1221
1222 /* convert APT search term to UTF8 */
1223 static ZEBRA_RES zapt_term_to_utf8(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1224                                    char *termz)
1225 {
1226     size_t sizez;
1227     Z_Term *term = zapt->term;
1228
1229     switch (term->which)
1230     {
1231     case Z_Term_general:
1232         if (zh->iconv_to_utf8 != 0)
1233         {
1234             char *inbuf = (char *) term->u.general->buf;
1235             size_t inleft = term->u.general->len;
1236             char *outbuf = termz;
1237             size_t outleft = IT_MAX_WORD-1;
1238             size_t ret;
1239
1240             ret = yaz_iconv(zh->iconv_to_utf8, &inbuf, &inleft,
1241                         &outbuf, &outleft);
1242             if (ret == (size_t)(-1))
1243             {
1244                 ret = yaz_iconv(zh->iconv_to_utf8, 0, 0, 0, 0);
1245                 zebra_setError(
1246                     zh, 
1247                     YAZ_BIB1_QUERY_TERM_INCLUDES_CHARS_THAT_DO_NOT_TRANSLATE_INTO_,
1248                     0);
1249                 return ZEBRA_FAIL;
1250             }
1251             *outbuf = 0;
1252         }
1253         else
1254         {
1255             sizez = term->u.general->len;
1256             if (sizez > IT_MAX_WORD-1)
1257                 sizez = IT_MAX_WORD-1;
1258             memcpy (termz, term->u.general->buf, sizez);
1259             termz[sizez] = '\0';
1260         }
1261         break;
1262     case Z_Term_characterString:
1263         sizez = strlen(term->u.characterString);
1264         if (sizez > IT_MAX_WORD-1)
1265             sizez = IT_MAX_WORD-1;
1266         memcpy (termz, term->u.characterString, sizez);
1267         termz[sizez] = '\0';
1268         break;
1269     default:
1270         zebra_setError(zh, YAZ_BIB1_UNSUPP_CODED_VALUE_FOR_TERM, 0);
1271         return ZEBRA_FAIL;
1272     }
1273     return ZEBRA_OK;
1274 }
1275
1276 /* convert APT SCAN term to internal cmap */
1277 static ZEBRA_RES trans_scan_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1278                                  char *termz, int reg_type)
1279 {
1280     char termz0[IT_MAX_WORD];
1281
1282     if (zapt_term_to_utf8(zh, zapt, termz0) == ZEBRA_FAIL)
1283         return ZEBRA_FAIL;    /* error */
1284     else
1285     {
1286         const char **map;
1287         const char *cp = (const char *) termz0;
1288         const char *cp_end = cp + strlen(cp);
1289         const char *src;
1290         int i = 0;
1291         const char *space_map = NULL;
1292         int len;
1293             
1294         while ((len = (cp_end - cp)) > 0)
1295         {
1296             map = zebra_maps_input(zh->reg->zebra_maps, reg_type, &cp, len, 0);
1297             if (**map == *CHR_SPACE)
1298                 space_map = *map;
1299             else
1300             {
1301                 if (i && space_map)
1302                     for (src = space_map; *src; src++)
1303                         termz[i++] = *src;
1304                 space_map = NULL;
1305                 for (src = *map; *src; src++)
1306                     termz[i++] = *src;
1307             }
1308         }
1309         termz[i] = '\0';
1310     }
1311     return ZEBRA_OK;
1312 }
1313
1314 static void grep_info_delete(struct grep_info *grep_info)
1315 {
1316 #ifdef TERM_COUNT
1317     xfree(grep_info->term_no);
1318 #endif
1319     xfree(grep_info->isam_p_buf);
1320 }
1321
1322 static ZEBRA_RES grep_info_prepare(ZebraHandle zh,
1323                                    Z_AttributesPlusTerm *zapt,
1324                                    struct grep_info *grep_info,
1325                                    int reg_type)
1326 {
1327     AttrType termset;
1328     int termset_value_numeric;
1329     const char *termset_value_string;
1330
1331 #ifdef TERM_COUNT
1332     grep_info->term_no = 0;
1333 #endif
1334     grep_info->isam_p_size = 0;
1335     grep_info->isam_p_buf = NULL;
1336     grep_info->zh = zh;
1337     grep_info->reg_type = reg_type;
1338     grep_info->termset = 0;
1339
1340     if (!zapt)
1341         return ZEBRA_OK;
1342     attr_init_APT(&termset, zapt, 8);
1343     termset_value_numeric =
1344         attr_find_ex(&termset, NULL, &termset_value_string);
1345     if (termset_value_numeric != -1)
1346     {
1347         char resname[32];
1348         const char *termset_name = 0;
1349         if (termset_value_numeric != -2)
1350         {
1351     
1352             sprintf(resname, "%d", termset_value_numeric);
1353             termset_name = resname;
1354         }
1355         else
1356             termset_name = termset_value_string;
1357         yaz_log(log_level_rpn, "creating termset set %s", termset_name);
1358         grep_info->termset = resultSetAdd(zh, termset_name, 1);
1359         if (!grep_info->termset)
1360         {
1361             zebra_setError(zh, YAZ_BIB1_ILLEGAL_RESULT_SET_NAME, termset_name);
1362             return ZEBRA_FAIL;
1363         }
1364     }
1365     return ZEBRA_OK;
1366 }
1367                                
1368 /**
1369   \brief Create result set(s) for list of terms
1370   \param zh Zebra Handle
1371   \param termz term as used in query but converted to UTF-8
1372   \param attributeSet default attribute set
1373   \param stream memory for result
1374   \param reg_type register type ('w', 'p',..)
1375   \param complete_flag whether it's phrases or not
1376   \param rank_type term flags for ranking
1377   \param xpath_use use attribute for X-Path (-1 for no X-path)
1378   \param num_bases number of databases
1379   \param basenames array of databases
1380   \param rset_mem memory for result sets
1381   \param result_sets output result set for each term in list (output)
1382   \param number number of output result sets
1383   \param kc rset key control to be used for created result sets
1384 */
1385 static ZEBRA_RES term_list_trunc(ZebraHandle zh,
1386                                  Z_AttributesPlusTerm *zapt,
1387                                  const char *termz,
1388                                  oid_value attributeSet,
1389                                  NMEM stream,
1390                                  int reg_type, int complete_flag,
1391                                  const char *rank_type,
1392                                  const char *xpath_use,
1393                                  int num_bases, char **basenames, 
1394                                  NMEM rset_nmem,
1395                                  RSET **result_sets, int *num_result_sets,
1396                                  struct rset_key_control *kc)
1397 {
1398     char term_dst[IT_MAX_WORD+1];
1399     struct grep_info grep_info;
1400     const char *termp = termz;
1401     int alloc_sets = 0;
1402     int empty_term = *termz ? 0 : 1;
1403
1404     empty_term = 0;
1405     *num_result_sets = 0;
1406     *term_dst = 0;
1407     if (grep_info_prepare(zh, zapt, &grep_info, reg_type) == ZEBRA_FAIL)
1408         return ZEBRA_FAIL;
1409     while(1)
1410     { 
1411         ZEBRA_RES res;
1412
1413         if (alloc_sets == *num_result_sets)
1414         {
1415             int add = 10;
1416             RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) * 
1417                                               sizeof(*rnew));
1418             if (alloc_sets)
1419                 memcpy(rnew, *result_sets, alloc_sets * sizeof(*rnew));
1420             alloc_sets = alloc_sets + add;
1421             *result_sets = rnew;
1422         }
1423         res = term_trunc(zh, zapt, &termp, attributeSet,
1424                          stream, &grep_info,
1425                          reg_type, complete_flag,
1426                          num_bases, basenames,
1427                          term_dst, rank_type,
1428                          xpath_use, rset_nmem,
1429                          &(*result_sets)[*num_result_sets],
1430                          kc);
1431         if (res != ZEBRA_OK)
1432         {
1433             int i;
1434             for (i = 0; i < *num_result_sets; i++)
1435                 rset_delete((*result_sets)[i]);
1436             grep_info_delete (&grep_info);
1437             return res;
1438         }
1439         if ((*result_sets)[*num_result_sets] == 0)
1440             break;
1441         (*num_result_sets)++;
1442
1443         if (empty_term)
1444             break;
1445         if (!*termp)
1446             break;
1447     }
1448     grep_info_delete(&grep_info);
1449     return ZEBRA_OK;
1450 }
1451
1452
1453 static ZEBRA_RES always_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1454                              oid_value attributeSet, NMEM stream,
1455                              struct grep_info *grep_info,
1456                              int reg_type, int complete_flag,
1457                              int num_bases, char **basenames,
1458                              char *term_dst,
1459                              const char *xpath_use,
1460                              struct ord_list **ol)
1461 {
1462     char term_dict[2*IT_MAX_WORD+4000];
1463     int r, base_no;
1464     struct rpn_char_map_info rcmi;
1465
1466     int bases_ok = 0;     /* no of databases with OK attribute */
1467
1468     *ol = ord_list_create(stream);
1469
1470     rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
1471
1472     for (base_no = 0; base_no < num_bases; base_no++)
1473     {
1474         int ord = -1;
1475         int regex_range = 0;
1476         int init_pos = 0;
1477         int max_pos, prefix_len = 0;
1478         char ord_buf[32];
1479         int ord_len, i;
1480
1481         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1482         {
1483             zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
1484                            basenames[base_no]);
1485             return ZEBRA_FAIL;
1486         }
1487         
1488         if (zebra_apt_get_ord(zh, zapt, zinfo_index_category_alwaysmatches,
1489                               reg_type, xpath_use, 
1490                               attributeSet, &ord) != ZEBRA_OK)
1491             return ZEBRA_FAIL;
1492         yaz_log(YLOG_LOG, "Got ordinal value: %d", ord);
1493         *ol = ord_list_append(stream, *ol, ord);
1494         
1495         if (prefix_len)
1496             term_dict[prefix_len++] = '|';
1497         else
1498             term_dict[prefix_len++] = '(';
1499         
1500         ord_len = key_SU_encode (ord, ord_buf);
1501         for (i = 0; i<ord_len; i++)
1502         {
1503             term_dict[prefix_len++] = 1;
1504             term_dict[prefix_len++] = ord_buf[i];
1505         }
1506         if (ord_len > init_pos)
1507             init_pos = ord_len;
1508         
1509         bases_ok++;
1510
1511         term_dict[prefix_len++] = ')';
1512         term_dict[prefix_len] = '\0';
1513         
1514         r = dict_lookup_grep(zh->reg->dict, term_dict, regex_range,
1515                              grep_info, &max_pos, init_pos,
1516                              grep_handle);
1517     }
1518     if (!bases_ok)
1519         return ZEBRA_FAIL;
1520     yaz_log(YLOG_LOG, "always_term: %d positions", grep_info->isam_p_indx);
1521     return ZEBRA_OK;
1522 }
1523
1524 static ZEBRA_RES rpn_search_APT_alwaysmatches(ZebraHandle zh,
1525                                               Z_AttributesPlusTerm *zapt,
1526                                               const char *termz_org,
1527                                               oid_value attributeSet,
1528                                               NMEM stream,
1529                                               int reg_type, int complete_flag,
1530                                               const char *rank_type,
1531                                               const char *xpath_use,
1532                                               int num_bases, char **basenames, 
1533                                               NMEM rset_nmem,
1534                                               RSET *rset,
1535                                               struct rset_key_control *kc)
1536 {
1537     char term_dst[IT_MAX_WORD+1];
1538     struct grep_info grep_info;
1539     zint hits_limit_value;
1540     const char *term_ref_id_str = 0;
1541     ZEBRA_RES res;
1542     struct ord_list *ol;
1543
1544     term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str,
1545                     stream);
1546     if (grep_info_prepare(zh, zapt, &grep_info, reg_type) == ZEBRA_FAIL)
1547         return ZEBRA_FAIL;
1548
1549     grep_info.isam_p_indx = 0;
1550
1551     res = always_term(zh, zapt, attributeSet, stream, &grep_info,
1552                       reg_type, complete_flag, num_bases, basenames,
1553                       term_dst, xpath_use, &ol);
1554     if (res == ZEBRA_OK)
1555     {
1556         *rset = rset_trunc(zh, grep_info.isam_p_buf,
1557                            grep_info.isam_p_indx, term_dst, strlen(term_dst),
1558                            rank_type, 1 /* preserve pos */,
1559                            zapt->term->which, rset_nmem,
1560                            kc, kc->scope, ol, reg_type, hits_limit_value,
1561                            term_ref_id_str);
1562         if (!*rset)
1563             res = ZEBRA_FAIL;
1564     }
1565     grep_info_delete (&grep_info);
1566     return res;
1567 }
1568
1569 static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh,
1570                                        Z_AttributesPlusTerm *zapt,
1571                                        const char *termz_org,
1572                                        oid_value attributeSet,
1573                                        NMEM stream,
1574                                        int reg_type, int complete_flag,
1575                                        const char *rank_type,
1576                                        const char *xpath_use,
1577                                        int num_bases, char **basenames, 
1578                                        NMEM rset_nmem,
1579                                        RSET *rset,
1580                                        struct rset_key_control *kc)
1581 {
1582     RSET *result_sets = 0;
1583     int num_result_sets = 0;
1584     ZEBRA_RES res =
1585         term_list_trunc(zh, zapt, termz_org, attributeSet,
1586                         stream, reg_type, complete_flag,
1587                         rank_type, xpath_use,
1588                         num_bases, basenames,
1589                         rset_nmem,
1590                         &result_sets, &num_result_sets, kc);
1591     if (res != ZEBRA_OK)
1592         return res;
1593     if (num_result_sets == 0)
1594         *rset = rset_create_null(rset_nmem, kc, 0); 
1595     else if (num_result_sets == 1)
1596         *rset = result_sets[0];
1597     else
1598         *rset = rset_create_prox(rset_nmem, kc, kc->scope,
1599                                  num_result_sets, result_sets,
1600                                  1 /* ordered */, 0 /* exclusion */,
1601                                  3 /* relation */, 1 /* distance */);
1602     if (!*rset)
1603         return ZEBRA_FAIL;
1604     return ZEBRA_OK;
1605 }
1606
1607 static ZEBRA_RES rpn_search_APT_or_list(ZebraHandle zh,
1608                                         Z_AttributesPlusTerm *zapt,
1609                                         const char *termz_org,
1610                                         oid_value attributeSet,
1611                                         NMEM stream,
1612                                         int reg_type, int complete_flag,
1613                                         const char *rank_type,
1614                                         const char *xpath_use,
1615                                         int num_bases, char **basenames,
1616                                         NMEM rset_nmem,
1617                                         RSET *rset,
1618                                         struct rset_key_control *kc)
1619 {
1620     RSET *result_sets = 0;
1621     int num_result_sets = 0;
1622     ZEBRA_RES res =
1623         term_list_trunc(zh, zapt, termz_org, attributeSet,
1624                         stream, reg_type, complete_flag,
1625                         rank_type, xpath_use,
1626                         num_bases, basenames,
1627                         rset_nmem,
1628                         &result_sets, &num_result_sets, kc);
1629     if (res != ZEBRA_OK)
1630         return res;
1631     if (num_result_sets == 0)
1632         *rset = rset_create_null(rset_nmem, kc, 0); 
1633     else if (num_result_sets == 1)
1634         *rset = result_sets[0];
1635     else
1636         *rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* termid */,
1637                                num_result_sets, result_sets);
1638     if (!*rset)
1639         return ZEBRA_FAIL;
1640     return ZEBRA_OK;
1641 }
1642
1643 static ZEBRA_RES rpn_search_APT_and_list(ZebraHandle zh,
1644                                          Z_AttributesPlusTerm *zapt,
1645                                          const char *termz_org,
1646                                          oid_value attributeSet,
1647                                          NMEM stream,
1648                                          int reg_type, int complete_flag,
1649                                          const char *rank_type, 
1650                                          const char *xpath_use,
1651                                          int num_bases, char **basenames,
1652                                          NMEM rset_nmem,
1653                                          RSET *rset,
1654                                          struct rset_key_control *kc)
1655 {
1656     RSET *result_sets = 0;
1657     int num_result_sets = 0;
1658     ZEBRA_RES res =
1659         term_list_trunc(zh, zapt, termz_org, attributeSet,
1660                         stream, reg_type, complete_flag,
1661                         rank_type, xpath_use,
1662                         num_bases, basenames,
1663                         rset_nmem,
1664                         &result_sets, &num_result_sets,
1665                         kc);
1666     if (res != ZEBRA_OK)
1667         return res;
1668     if (num_result_sets == 0)
1669         *rset = rset_create_null(rset_nmem, kc, 0); 
1670     else if (num_result_sets == 1)
1671         *rset = result_sets[0];
1672     else
1673         *rset = rset_create_and(rset_nmem, kc, kc->scope,
1674                                 num_result_sets, result_sets);
1675     if (!*rset)
1676         return ZEBRA_FAIL;
1677     return ZEBRA_OK;
1678 }
1679
1680 static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1681                             const char **term_sub,
1682                             char *term_dict,
1683                             oid_value attributeSet,
1684                             struct grep_info *grep_info,
1685                             int *max_pos,
1686                             int reg_type,
1687                             char *term_dst,
1688                             int *error_code)
1689 {
1690     AttrType relation;
1691     int relation_value;
1692     int term_value;
1693     int r;
1694     char *term_tmp = term_dict + strlen(term_dict);
1695
1696     *error_code = 0;
1697     attr_init_APT(&relation, zapt, 2);
1698     relation_value = attr_find(&relation, NULL);
1699
1700     yaz_log(log_level_rpn, "numeric relation value=%d", relation_value);
1701
1702     if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1703                   term_dst))
1704         return 0;
1705     term_value = atoi (term_tmp);
1706     switch (relation_value)
1707     {
1708     case 1:
1709         yaz_log(log_level_rpn, "Relation <");
1710         gen_regular_rel(term_tmp, term_value-1, 1);
1711         break;
1712     case 2:
1713         yaz_log(log_level_rpn, "Relation <=");
1714         gen_regular_rel(term_tmp, term_value, 1);
1715         break;
1716     case 4:
1717         yaz_log(log_level_rpn, "Relation >=");
1718         gen_regular_rel(term_tmp, term_value, 0);
1719         break;
1720     case 5:
1721         yaz_log(log_level_rpn, "Relation >");
1722         gen_regular_rel(term_tmp, term_value+1, 0);
1723         break;
1724     case -1:
1725     case 3:
1726         yaz_log(log_level_rpn, "Relation =");
1727         sprintf(term_tmp, "(0*%d)", term_value);
1728         break;
1729     default:
1730         *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
1731         return 0;
1732     }
1733     yaz_log(log_level_rpn, "dict_lookup_grep: %s", term_tmp);
1734     r = dict_lookup_grep(zh->reg->dict, term_dict, 0, grep_info, max_pos,
1735                           0, grep_handle);
1736     if (r)
1737         yaz_log(YLOG_WARN, "dict_lookup_grep fail, rel = gt: %d", r);
1738     yaz_log(log_level_rpn, "%d positions", grep_info->isam_p_indx);
1739     return 1;
1740 }
1741
1742 static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1743                               const char **term_sub, 
1744                               oid_value attributeSet,
1745                               struct grep_info *grep_info,
1746                               int reg_type, int complete_flag,
1747                               int num_bases, char **basenames,
1748                               char *term_dst, 
1749                               const char *xpath_use,
1750                               NMEM stream)
1751 {
1752     char term_dict[2*IT_MAX_WORD+2];
1753     int base_no;
1754     oid_value curAttributeSet = attributeSet;
1755     const char *termp;
1756     struct rpn_char_map_info rcmi;
1757
1758     int bases_ok = 0;     /* no of databases with OK attribute */
1759
1760     rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
1761
1762     for (base_no = 0; base_no < num_bases; base_no++)
1763     {
1764         int max_pos, prefix_len = 0;
1765         int relation_error = 0;
1766         int ord, ord_len, i;
1767         char ord_buf[32];
1768
1769         termp = *term_sub;
1770
1771         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1772         {
1773             zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
1774                            basenames[base_no]);
1775             return ZEBRA_FAIL;
1776         }
1777
1778         if (zebra_apt_get_ord(zh, zapt, zinfo_index_category_index,
1779                               reg_type, xpath_use, curAttributeSet, &ord) 
1780             != ZEBRA_OK)
1781         {
1782             break;
1783         }
1784
1785         if (prefix_len)
1786             term_dict[prefix_len++] = '|';
1787         else
1788             term_dict[prefix_len++] = '(';
1789         
1790         ord_len = key_SU_encode (ord, ord_buf);
1791         for (i = 0; i < ord_len; i++)
1792         {
1793             term_dict[prefix_len++] = 1;
1794                 term_dict[prefix_len++] = ord_buf[i];
1795         }
1796         bases_ok++;
1797         term_dict[prefix_len++] = ')';
1798         term_dict[prefix_len] = '\0';
1799         if (!numeric_relation(zh, zapt, &termp, term_dict,
1800                               attributeSet, grep_info, &max_pos, reg_type,
1801                               term_dst, &relation_error))
1802         {
1803             if (relation_error)
1804             {
1805                 zebra_setError(zh, relation_error, 0);
1806                 return ZEBRA_FAIL;
1807             }
1808             *term_sub = 0;
1809             return ZEBRA_OK;
1810         }
1811     }
1812     if (!bases_ok)
1813         return ZEBRA_FAIL;
1814     *term_sub = termp;
1815     yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1816     return ZEBRA_OK;
1817 }
1818
1819                                  
1820 static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh,
1821                                         Z_AttributesPlusTerm *zapt,
1822                                         const char *termz,
1823                                         oid_value attributeSet,
1824                                         NMEM stream,
1825                                         int reg_type, int complete_flag,
1826                                         const char *rank_type, 
1827                                         const char *xpath_use,
1828                                         int num_bases, char **basenames,
1829                                         NMEM rset_nmem,
1830                                         RSET *rset,
1831                                         struct rset_key_control *kc)
1832 {
1833     char term_dst[IT_MAX_WORD+1];
1834     const char *termp = termz;
1835     RSET *result_sets = 0;
1836     int num_result_sets = 0;
1837     ZEBRA_RES res;
1838     struct grep_info grep_info;
1839     int alloc_sets = 0;
1840     zint hits_limit_value;
1841     const char *term_ref_id_str = 0;
1842
1843     term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str, stream);
1844
1845     yaz_log(log_level_rpn, "APT_numeric t='%s'", termz);
1846     if (grep_info_prepare(zh, zapt, &grep_info, reg_type) == ZEBRA_FAIL)
1847         return ZEBRA_FAIL;
1848     while (1)
1849     { 
1850         if (alloc_sets == num_result_sets)
1851         {
1852             int add = 10;
1853             RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) * 
1854                                               sizeof(*rnew));
1855             if (alloc_sets)
1856                 memcpy(rnew, result_sets, alloc_sets * sizeof(*rnew));
1857             alloc_sets = alloc_sets + add;
1858             result_sets = rnew;
1859         }
1860         yaz_log(YLOG_DEBUG, "APT_numeric termp=%s", termp);
1861         grep_info.isam_p_indx = 0;
1862         res = numeric_term(zh, zapt, &termp, attributeSet, &grep_info,
1863                            reg_type, complete_flag, num_bases, basenames,
1864                            term_dst, xpath_use,
1865                            stream);
1866         if (res == ZEBRA_FAIL || termp == 0)
1867             break;
1868         yaz_log(YLOG_DEBUG, "term: %s", term_dst);
1869         result_sets[num_result_sets] =
1870             rset_trunc(zh, grep_info.isam_p_buf,
1871                        grep_info.isam_p_indx, term_dst,
1872                        strlen(term_dst), rank_type,
1873                        0 /* preserve position */,
1874                        zapt->term->which, rset_nmem, 
1875                        kc, kc->scope, 0, reg_type,
1876                        hits_limit_value,
1877                        term_ref_id_str);
1878         if (!result_sets[num_result_sets])
1879             break;
1880         num_result_sets++;
1881     }
1882     grep_info_delete(&grep_info);
1883     if (termp)
1884     {
1885         int i;
1886         for (i = 0; i<num_result_sets; i++)
1887             rset_delete(result_sets[i]);
1888         return ZEBRA_FAIL;
1889     }
1890     if (num_result_sets == 0)
1891         *rset = rset_create_null(rset_nmem, kc, 0);
1892     if (num_result_sets == 1)
1893         *rset = result_sets[0];
1894     else
1895         *rset = rset_create_and(rset_nmem, kc, kc->scope,
1896                                 num_result_sets, result_sets);
1897     if (!*rset)
1898         return ZEBRA_FAIL;
1899     return ZEBRA_OK;
1900 }
1901
1902 static ZEBRA_RES rpn_search_APT_local(ZebraHandle zh,
1903                                       Z_AttributesPlusTerm *zapt,
1904                                       const char *termz,
1905                                       oid_value attributeSet,
1906                                       NMEM stream,
1907                                       const char *rank_type, NMEM rset_nmem,
1908                                       RSET *rset,
1909                                       struct rset_key_control *kc)
1910 {
1911     RSFD rsfd;
1912     struct it_key key;
1913     int sys;
1914     *rset = rset_create_temp(rset_nmem, kc, kc->scope,
1915                              res_get (zh->res, "setTmpDir"),0 );
1916     rsfd = rset_open(*rset, RSETF_WRITE);
1917     
1918     sys = atoi(termz);
1919     if (sys <= 0)
1920         sys = 1;
1921     key.mem[0] = sys;
1922     key.mem[1] = 1;
1923     key.len = 2;
1924     rset_write (rsfd, &key);
1925     rset_close (rsfd);
1926     return ZEBRA_OK;
1927 }
1928
1929 static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1930                                oid_value attributeSet, NMEM stream,
1931                                Z_SortKeySpecList *sort_sequence,
1932                                const char *rank_type,
1933                                NMEM rset_nmem,
1934                                RSET *rset,
1935                                struct rset_key_control *kc)
1936 {
1937     int i;
1938     int sort_relation_value;
1939     AttrType sort_relation_type;
1940     Z_SortKeySpec *sks;
1941     Z_SortKey *sk;
1942     int oid[OID_SIZE];
1943     oident oe;
1944     char termz[20];
1945     
1946     attr_init_APT(&sort_relation_type, zapt, 7);
1947     sort_relation_value = attr_find(&sort_relation_type, &attributeSet);
1948
1949     if (!sort_sequence->specs)
1950     {
1951         sort_sequence->num_specs = 10;
1952         sort_sequence->specs = (Z_SortKeySpec **)
1953             nmem_malloc(stream, sort_sequence->num_specs *
1954                          sizeof(*sort_sequence->specs));
1955         for (i = 0; i<sort_sequence->num_specs; i++)
1956             sort_sequence->specs[i] = 0;
1957     }
1958     if (zapt->term->which != Z_Term_general)
1959         i = 0;
1960     else
1961         i = atoi_n ((char *) zapt->term->u.general->buf,
1962                     zapt->term->u.general->len);
1963     if (i >= sort_sequence->num_specs)
1964         i = 0;
1965     sprintf(termz, "%d", i);
1966
1967     oe.proto = PROTO_Z3950;
1968     oe.oclass = CLASS_ATTSET;
1969     oe.value = attributeSet;
1970     if (!oid_ent_to_oid (&oe, oid))
1971         return ZEBRA_FAIL;
1972
1973     sks = (Z_SortKeySpec *) nmem_malloc(stream, sizeof(*sks));
1974     sks->sortElement = (Z_SortElement *)
1975         nmem_malloc(stream, sizeof(*sks->sortElement));
1976     sks->sortElement->which = Z_SortElement_generic;
1977     sk = sks->sortElement->u.generic = (Z_SortKey *)
1978         nmem_malloc(stream, sizeof(*sk));
1979     sk->which = Z_SortKey_sortAttributes;
1980     sk->u.sortAttributes = (Z_SortAttributes *)
1981         nmem_malloc(stream, sizeof(*sk->u.sortAttributes));
1982
1983     sk->u.sortAttributes->id = oid;
1984     sk->u.sortAttributes->list = zapt->attributes;
1985
1986     sks->sortRelation = (int *)
1987         nmem_malloc(stream, sizeof(*sks->sortRelation));
1988     if (sort_relation_value == 1)
1989         *sks->sortRelation = Z_SortKeySpec_ascending;
1990     else if (sort_relation_value == 2)
1991         *sks->sortRelation = Z_SortKeySpec_descending;
1992     else 
1993         *sks->sortRelation = Z_SortKeySpec_ascending;
1994
1995     sks->caseSensitivity = (int *)
1996         nmem_malloc(stream, sizeof(*sks->caseSensitivity));
1997     *sks->caseSensitivity = 0;
1998
1999     sks->which = Z_SortKeySpec_null;
2000     sks->u.null = odr_nullval ();
2001     sort_sequence->specs[i] = sks;
2002     *rset = rset_create_null(rset_nmem, kc, 0);
2003     return ZEBRA_OK;
2004 }
2005
2006
2007 static int rpn_check_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2008                            oid_value attributeSet,
2009                            struct xpath_location_step *xpath, int max,
2010                            NMEM mem)
2011 {
2012     oid_value curAttributeSet = attributeSet;
2013     AttrType use;
2014     const char *use_string = 0;
2015     
2016     attr_init_APT(&use, zapt, 1);
2017     attr_find_ex(&use, &curAttributeSet, &use_string);
2018
2019     if (!use_string || *use_string != '/')
2020         return -1;
2021
2022     return zebra_parse_xpath_str(use_string, xpath, max, mem);
2023 }
2024  
2025                
2026
2027 static RSET xpath_trunc(ZebraHandle zh, NMEM stream,
2028                         int reg_type, const char *term, 
2029                         const char *xpath_use,
2030                         NMEM rset_nmem,
2031                         struct rset_key_control *kc)
2032 {
2033     RSET rset;
2034     struct grep_info grep_info;
2035     char term_dict[2048];
2036     char ord_buf[32];
2037     int prefix_len = 0;
2038     int ord = zebraExplain_lookup_attr_str(zh->reg->zei, 
2039                                            zinfo_index_category_index,
2040                                            reg_type,
2041                                            xpath_use);
2042     int ord_len, i, r, max_pos;
2043     int term_type = Z_Term_characterString;
2044     const char *flags = "void";
2045
2046     if (grep_info_prepare(zh, 0 /* zapt */, &grep_info, '0') == ZEBRA_FAIL)
2047         return rset_create_null(rset_nmem, kc, 0);
2048     
2049     if (ord < 0)
2050         return rset_create_null(rset_nmem, kc, 0);
2051     if (prefix_len)
2052         term_dict[prefix_len++] = '|';
2053     else
2054         term_dict[prefix_len++] = '(';
2055     
2056     ord_len = key_SU_encode (ord, ord_buf);
2057     for (i = 0; i<ord_len; i++)
2058     {
2059         term_dict[prefix_len++] = 1;
2060         term_dict[prefix_len++] = ord_buf[i];
2061     }
2062     term_dict[prefix_len++] = ')';
2063     strcpy(term_dict+prefix_len, term);
2064     
2065     grep_info.isam_p_indx = 0;
2066     r = dict_lookup_grep(zh->reg->dict, term_dict, 0,
2067                           &grep_info, &max_pos, 0, grep_handle);
2068     yaz_log(YLOG_DEBUG, "%s %d positions", term,
2069              grep_info.isam_p_indx);
2070     rset = rset_trunc(zh, grep_info.isam_p_buf,
2071                       grep_info.isam_p_indx, term, strlen(term),
2072                       flags, 1, term_type,rset_nmem,
2073                       kc, kc->scope, 0, reg_type, 0 /* hits_limit */,
2074                       0 /* term_ref_id_str */);
2075     grep_info_delete(&grep_info);
2076     return rset;
2077 }
2078
2079 static
2080 ZEBRA_RES rpn_search_xpath(ZebraHandle zh,
2081                            int num_bases, char **basenames,
2082                            NMEM stream, const char *rank_type, RSET rset,
2083                            int xpath_len, struct xpath_location_step *xpath,
2084                            NMEM rset_nmem,
2085                            RSET *rset_out,
2086                            struct rset_key_control *kc)
2087 {
2088     int base_no;
2089     int i;
2090     int always_matches = rset ? 0 : 1;
2091
2092     if (xpath_len < 0)
2093     {
2094         *rset_out = rset;
2095         return ZEBRA_OK;
2096     }
2097
2098     yaz_log(YLOG_DEBUG, "xpath len=%d", xpath_len);
2099     for (i = 0; i<xpath_len; i++)
2100     {
2101         yaz_log(log_level_rpn, "XPATH %d %s", i, xpath[i].part);
2102
2103     }
2104
2105     /*
2106       //a    ->    a/.*
2107       //a/b  ->    b/a/.*
2108       /a     ->    a/
2109       /a/b   ->    b/a/
2110
2111       /      ->    none
2112
2113    a[@attr = value]/b[@other = othervalue]
2114
2115  /e/@a val      range(e/,range(@a,freetext(w,1015,val),@a),e/)
2116  /a/b val       range(b/a/,freetext(w,1016,val),b/a/)
2117  /a/b/@c val    range(b/a/,range(@c,freetext(w,1016,val),@c),b/a/)
2118  /a/b[@c = y] val range(b/a/,freetext(w,1016,val),b/a/,@c = y)
2119  /a[@c = y]/b val range(a/,range(b/a/,freetext(w,1016,val),b/a/),a/,@c = y)
2120  /a[@c = x]/b[@c = y] range(a/,range(b/a/,freetext(w,1016,val),b/a/,@c = y),a/,@c = x)
2121       
2122     */
2123
2124     dict_grep_cmap (zh->reg->dict, 0, 0);
2125
2126     for (base_no = 0; base_no < num_bases; base_no++)
2127     {
2128         int level = xpath_len;
2129         int first_path = 1;
2130         
2131         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
2132         {
2133             zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
2134                            basenames[base_no]);
2135             *rset_out = rset;
2136             return ZEBRA_FAIL;
2137         }
2138         while (--level >= 0)
2139         {
2140             char xpath_rev[128];
2141             int i, len;
2142             RSET rset_start_tag = 0, rset_end_tag = 0, rset_attr = 0;
2143
2144             *xpath_rev = 0;
2145             len = 0;
2146             for (i = level; i >= 1; --i)
2147             {
2148                 const char *cp = xpath[i].part;
2149                 if (*cp)
2150                 {
2151                     for (;*cp; cp++)
2152                         if (*cp == '*')
2153                         {
2154                             memcpy (xpath_rev + len, "[^/]*", 5);
2155                             len += 5;
2156                         }
2157                         else if (*cp == ' ')
2158                         {
2159
2160                             xpath_rev[len++] = 1;
2161                             xpath_rev[len++] = ' ';
2162                         }
2163
2164                         else
2165                             xpath_rev[len++] = *cp;
2166                     xpath_rev[len++] = '/';
2167                 }
2168                 else if (i == 1)  /* // case */
2169                 {
2170                     xpath_rev[len++] = '.';
2171                     xpath_rev[len++] = '*';
2172                 }
2173             }
2174             xpath_rev[len] = 0;
2175
2176             if (xpath[level].predicate &&
2177                 xpath[level].predicate->which == XPATH_PREDICATE_RELATION &&
2178                 xpath[level].predicate->u.relation.name[0])
2179             {
2180                 WRBUF wbuf = wrbuf_alloc();
2181                 wrbuf_puts(wbuf, xpath[level].predicate->u.relation.name+1);
2182                 if (xpath[level].predicate->u.relation.value)
2183                 {
2184                     const char *cp = xpath[level].predicate->u.relation.value;
2185                     wrbuf_putc(wbuf, '=');
2186                     
2187                     while (*cp)
2188                     {
2189                         if (strchr(REGEX_CHARS, *cp))
2190                             wrbuf_putc(wbuf, '\\');
2191                         wrbuf_putc(wbuf, *cp);
2192                         cp++;
2193                     }
2194                 }
2195                 wrbuf_puts(wbuf, "");
2196                 rset_attr = xpath_trunc(
2197                     zh, stream, '0', wrbuf_buf(wbuf), ZEBRA_XPATH_ATTR_NAME, 
2198                     rset_nmem, kc);
2199                 wrbuf_free(wbuf, 1);
2200             } 
2201             else 
2202             {
2203                 if (!first_path)
2204                     continue;
2205             }
2206             yaz_log(log_level_rpn, "xpath_rev (%d) = %s", level, xpath_rev);
2207             if (strlen(xpath_rev))
2208             {
2209                 rset_start_tag = xpath_trunc(zh, stream, '0', 
2210                                              xpath_rev, 
2211                                              ZEBRA_XPATH_ELM_BEGIN, 
2212                                              rset_nmem, kc);
2213                 if (always_matches)
2214                     rset = rset_start_tag;
2215                 else
2216                 {
2217                     rset_end_tag = xpath_trunc(zh, stream, '0', 
2218                                                xpath_rev, 
2219                                                ZEBRA_XPATH_ELM_END, 
2220                                                rset_nmem, kc);
2221                     
2222                     rset = rset_create_between(rset_nmem, kc, kc->scope,
2223                                                rset_start_tag, rset,
2224                                                rset_end_tag, rset_attr);
2225                 }
2226             }
2227             first_path = 0;
2228         }
2229     }
2230     *rset_out = rset;
2231     return ZEBRA_OK;
2232 }
2233
2234 #define MAX_XPATH_STEPS 10
2235
2236 static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2237                                 oid_value attributeSet, NMEM stream,
2238                                 Z_SortKeySpecList *sort_sequence,
2239                                 int num_bases, char **basenames, 
2240                                 NMEM rset_nmem,
2241                                 RSET *rset,
2242                                 struct rset_key_control *kc)
2243 {
2244     ZEBRA_RES res = ZEBRA_OK;
2245     unsigned reg_id;
2246     char *search_type = NULL;
2247     char rank_type[128];
2248     int complete_flag;
2249     int sort_flag;
2250     char termz[IT_MAX_WORD+1];
2251     int xpath_len;
2252     const char *xpath_use = 0;
2253     struct xpath_location_step xpath[MAX_XPATH_STEPS];
2254
2255     if (!log_level_set)
2256     {
2257         log_level_rpn = yaz_log_module_level("rpn");
2258         log_level_set = 1;
2259     }
2260     zebra_maps_attr(zh->reg->zebra_maps, zapt, &reg_id, &search_type,
2261                     rank_type, &complete_flag, &sort_flag);
2262     
2263     yaz_log(YLOG_DEBUG, "reg_id=%c", reg_id);
2264     yaz_log(YLOG_DEBUG, "complete_flag=%d", complete_flag);
2265     yaz_log(YLOG_DEBUG, "search_type=%s", search_type);
2266     yaz_log(YLOG_DEBUG, "rank_type=%s", rank_type);
2267
2268     if (zapt_term_to_utf8(zh, zapt, termz) == ZEBRA_FAIL)
2269         return ZEBRA_FAIL;
2270
2271     if (sort_flag)
2272         return rpn_sort_spec(zh, zapt, attributeSet, stream, sort_sequence,
2273                              rank_type, rset_nmem, rset, kc);
2274     /* consider if an X-Path query is used */
2275     xpath_len = rpn_check_xpath(zh, zapt, attributeSet, 
2276                                 xpath, MAX_XPATH_STEPS, stream);
2277     if (xpath_len >= 0)
2278     {
2279         if (xpath[xpath_len-1].part[0] == '@') 
2280             xpath_use = ZEBRA_XPATH_ATTR_CDATA;  /* last step is attribute  */
2281         else
2282             xpath_use = ZEBRA_XPATH_CDATA;  /* searching for cdata */        
2283     }
2284
2285     /* search using one of the various search type strategies
2286        termz is our UTF-8 search term
2287        attributeSet is top-level default attribute set 
2288        stream is ODR for search
2289        reg_id is the register type
2290        complete_flag is 1 for complete subfield, 0 for incomplete
2291        xpath_use is use-attribute to be used for X-Path search, 0 for none
2292     */
2293     if (!strcmp(search_type, "phrase"))
2294     {
2295         res = rpn_search_APT_phrase(zh, zapt, termz, attributeSet, stream,
2296                                     reg_id, complete_flag, rank_type,
2297                                     xpath_use,
2298                                     num_bases, basenames, rset_nmem,
2299                                     rset, kc);
2300     }
2301     else if (!strcmp(search_type, "and-list"))
2302     {
2303         res = rpn_search_APT_and_list(zh, zapt, termz, attributeSet, stream,
2304                                       reg_id, complete_flag, rank_type,
2305                                       xpath_use,
2306                                       num_bases, basenames, rset_nmem,
2307                                       rset, kc);
2308     }
2309     else if (!strcmp(search_type, "or-list"))
2310     {
2311         res = rpn_search_APT_or_list(zh, zapt, termz, attributeSet, stream,
2312                                      reg_id, complete_flag, rank_type,
2313                                      xpath_use,
2314                                      num_bases, basenames, rset_nmem,
2315                                      rset, kc);
2316     }
2317     else if (!strcmp(search_type, "local"))
2318     {
2319         res = rpn_search_APT_local(zh, zapt, termz, attributeSet, stream,
2320                                    rank_type, rset_nmem, rset, kc);
2321     }
2322     else if (!strcmp(search_type, "numeric"))
2323     {
2324         res = rpn_search_APT_numeric(zh, zapt, termz, attributeSet, stream,
2325                                      reg_id, complete_flag, rank_type,
2326                                      xpath_use,
2327                                      num_bases, basenames, rset_nmem,
2328                                      rset, kc);
2329     }
2330     else if (!strcmp(search_type, "always"))
2331     {
2332         if (xpath_len >= 0) /* alwaysmatches and X-Path ? */
2333         {
2334             *rset = 0; /* signal no "term" set */
2335             return rpn_search_xpath(zh, num_bases, basenames,
2336                             stream, rank_type, *rset, 
2337                             xpath_len, xpath, rset_nmem, rset, kc);
2338         }
2339         else
2340         {
2341             res = rpn_search_APT_alwaysmatches(zh, zapt, termz,
2342                                                attributeSet, stream,
2343                                                reg_id, complete_flag,
2344                                                rank_type,
2345                                                xpath_use,
2346                                                num_bases, basenames, rset_nmem,
2347                                                rset, kc);
2348         }
2349     }
2350     else
2351     {
2352         zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2353         res = ZEBRA_FAIL;
2354     }
2355     if (res != ZEBRA_OK)
2356         return res;
2357     if (!*rset)
2358         return ZEBRA_FAIL;
2359     return rpn_search_xpath(zh, num_bases, basenames,
2360                             stream, rank_type, *rset, 
2361                             xpath_len, xpath, rset_nmem, rset, kc);
2362 }
2363
2364 static ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2365                                       oid_value attributeSet, 
2366                                       NMEM stream, NMEM rset_nmem,
2367                                       Z_SortKeySpecList *sort_sequence,
2368                                       int num_bases, char **basenames,
2369                                       RSET **result_sets, int *num_result_sets,
2370                                       Z_Operator *parent_op,
2371                                       struct rset_key_control *kc);
2372
2373 ZEBRA_RES rpn_search_top(ZebraHandle zh, Z_RPNStructure *zs,
2374                          oid_value attributeSet, 
2375                          NMEM stream, NMEM rset_nmem,
2376                          Z_SortKeySpecList *sort_sequence,
2377                          int num_bases, char **basenames,
2378                          RSET *result_set)
2379 {
2380     RSET *result_sets = 0;
2381     int num_result_sets = 0;
2382     ZEBRA_RES res;
2383     struct rset_key_control *kc = zebra_key_control_create(zh);
2384
2385     res = rpn_search_structure(zh, zs, attributeSet,
2386                                stream, rset_nmem,
2387                                sort_sequence, 
2388                                num_bases, basenames,
2389                                &result_sets, &num_result_sets,
2390                                0 /* no parent op */,
2391                                kc);
2392     if (res != ZEBRA_OK)
2393     {
2394         int i;
2395         for (i = 0; i<num_result_sets; i++)
2396             rset_delete(result_sets[i]);
2397         *result_set = 0;
2398     }
2399     else
2400     {
2401         assert(num_result_sets == 1);
2402         assert(result_sets);
2403         assert(*result_sets);
2404         *result_set = *result_sets;
2405     }
2406     (*kc->dec)(kc);
2407     return res;
2408 }
2409
2410 ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2411                                oid_value attributeSet, 
2412                                NMEM stream, NMEM rset_nmem,
2413                                Z_SortKeySpecList *sort_sequence,
2414                                int num_bases, char **basenames,
2415                                RSET **result_sets, int *num_result_sets,
2416                                Z_Operator *parent_op,
2417                                struct rset_key_control *kc)
2418 {
2419     *num_result_sets = 0;
2420     if (zs->which == Z_RPNStructure_complex)
2421     {
2422         ZEBRA_RES res;
2423         Z_Operator *zop = zs->u.complex->roperator;
2424         RSET *result_sets_l = 0;
2425         int num_result_sets_l = 0;
2426         RSET *result_sets_r = 0;
2427         int num_result_sets_r = 0;
2428
2429         res = rpn_search_structure(zh, zs->u.complex->s1,
2430                                    attributeSet, stream, rset_nmem,
2431                                    sort_sequence,
2432                                    num_bases, basenames,
2433                                    &result_sets_l, &num_result_sets_l,
2434                                    zop, kc);
2435         if (res != ZEBRA_OK)
2436         {
2437             int i;
2438             for (i = 0; i<num_result_sets_l; i++)
2439                 rset_delete(result_sets_l[i]);
2440             return res;
2441         }
2442         res = rpn_search_structure(zh, zs->u.complex->s2,
2443                                    attributeSet, stream, rset_nmem,
2444                                    sort_sequence,
2445                                    num_bases, basenames,
2446                                    &result_sets_r, &num_result_sets_r,
2447                                    zop, kc);
2448         if (res != ZEBRA_OK)
2449         {
2450             int i;
2451             for (i = 0; i<num_result_sets_l; i++)
2452                 rset_delete(result_sets_l[i]);
2453             for (i = 0; i<num_result_sets_r; i++)
2454                 rset_delete(result_sets_r[i]);
2455             return res;
2456         }
2457
2458         /* make a new list of result for all children */
2459         *num_result_sets = num_result_sets_l + num_result_sets_r;
2460         *result_sets = nmem_malloc(stream, *num_result_sets * 
2461                                    sizeof(**result_sets));
2462         memcpy(*result_sets, result_sets_l, 
2463                num_result_sets_l * sizeof(**result_sets));
2464         memcpy(*result_sets + num_result_sets_l, result_sets_r, 
2465                num_result_sets_r * sizeof(**result_sets));
2466
2467         if (!parent_op || parent_op->which != zop->which
2468             || (zop->which != Z_Operator_and &&
2469                 zop->which != Z_Operator_or))
2470         {
2471             /* parent node different from this one (or non-present) */
2472             /* we must combine result sets now */
2473             RSET rset;
2474             switch (zop->which)
2475             {
2476             case Z_Operator_and:
2477                 rset = rset_create_and(rset_nmem, kc,
2478                                        kc->scope,
2479                                        *num_result_sets, *result_sets);
2480                 break;
2481             case Z_Operator_or:
2482                 rset = rset_create_or(rset_nmem, kc,
2483                                       kc->scope, 0, /* termid */
2484                                       *num_result_sets, *result_sets);
2485                 break;
2486             case Z_Operator_and_not:
2487                 rset = rset_create_not(rset_nmem, kc,
2488                                        kc->scope,
2489                                        (*result_sets)[0],
2490                                        (*result_sets)[1]);
2491                 break;
2492             case Z_Operator_prox:
2493                 if (zop->u.prox->which != Z_ProximityOperator_known)
2494                 {
2495                     zebra_setError(zh, 
2496                                    YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2497                                    0);
2498                     return ZEBRA_FAIL;
2499                 }
2500                 if (*zop->u.prox->u.known != Z_ProxUnit_word)
2501                 {
2502                     zebra_setError_zint(zh,
2503                                         YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2504                                         *zop->u.prox->u.known);
2505                     return ZEBRA_FAIL;
2506                 }
2507                 else
2508                 {
2509                     rset = rset_create_prox(rset_nmem, kc,
2510                                             kc->scope,
2511                                             *num_result_sets, *result_sets, 
2512                                             *zop->u.prox->ordered,
2513                                             (!zop->u.prox->exclusion ? 
2514                                              0 : *zop->u.prox->exclusion),
2515                                             *zop->u.prox->relationType,
2516                                             *zop->u.prox->distance );
2517                 }
2518                 break;
2519             default:
2520                 zebra_setError(zh, YAZ_BIB1_OPERATOR_UNSUPP, 0);
2521                 return ZEBRA_FAIL;
2522             }
2523             *num_result_sets = 1;
2524             *result_sets = nmem_malloc(stream, *num_result_sets * 
2525                                        sizeof(**result_sets));
2526             (*result_sets)[0] = rset;
2527         }
2528     }
2529     else if (zs->which == Z_RPNStructure_simple)
2530     {
2531         RSET rset;
2532         ZEBRA_RES res;
2533
2534         if (zs->u.simple->which == Z_Operand_APT)
2535         {
2536             yaz_log(YLOG_DEBUG, "rpn_search_APT");
2537             res = rpn_search_APT(zh, zs->u.simple->u.attributesPlusTerm,
2538                                  attributeSet, stream, sort_sequence,
2539                                  num_bases, basenames, rset_nmem, &rset,
2540                                  kc);
2541             if (res != ZEBRA_OK)
2542                 return res;
2543         }
2544         else if (zs->u.simple->which == Z_Operand_resultSetId)
2545         {
2546             yaz_log(YLOG_DEBUG, "rpn_search_ref");
2547             rset = resultSetRef(zh, zs->u.simple->u.resultSetId);
2548             if (!rset)
2549             {
2550                 zebra_setError(zh, 
2551                                YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
2552                                zs->u.simple->u.resultSetId);
2553                 return ZEBRA_FAIL;
2554             }
2555             rset_dup(rset);
2556         }
2557         else
2558         {
2559             zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2560             return ZEBRA_FAIL;
2561         }
2562         *num_result_sets = 1;
2563         *result_sets = nmem_malloc(stream, *num_result_sets * 
2564                                    sizeof(**result_sets));
2565         (*result_sets)[0] = rset;
2566     }
2567     else
2568     {
2569         zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2570         return ZEBRA_FAIL;
2571     }
2572     return ZEBRA_OK;
2573 }
2574
2575 struct scan_info_entry {
2576     char *term;
2577     ISAM_P isam_p;
2578 };
2579
2580 struct scan_info {
2581     struct scan_info_entry *list;
2582     ODR odr;
2583     int before, after;
2584     char prefix[20];
2585 };
2586
2587 static int scan_handle (char *name, const char *info, int pos, void *client)
2588 {
2589     int len_prefix, idx;
2590     struct scan_info *scan_info = (struct scan_info *) client;
2591
2592     len_prefix = strlen(scan_info->prefix);
2593     if (memcmp (name, scan_info->prefix, len_prefix))
2594         return 1;
2595     if (pos > 0)
2596         idx = scan_info->after - pos + scan_info->before;
2597     else
2598         idx = - pos - 1;
2599
2600     if (idx < 0)
2601         return 0;
2602     scan_info->list[idx].term = (char *)
2603         odr_malloc(scan_info->odr, strlen(name + len_prefix)+1);
2604     strcpy(scan_info->list[idx].term, name + len_prefix);
2605     assert (*info == sizeof(ISAM_P));
2606     memcpy (&scan_info->list[idx].isam_p, info+1, sizeof(ISAM_P));
2607     return 0;
2608 }
2609
2610 void zebra_term_untrans_iconv(ZebraHandle zh, NMEM stream, int reg_type,
2611                               char **dst, const char *src)
2612 {
2613     char term_src[IT_MAX_WORD];
2614     char term_dst[IT_MAX_WORD];
2615     
2616     zebra_term_untrans (zh, reg_type, term_src, src);
2617
2618     if (zh->iconv_from_utf8 != 0)
2619     {
2620         int len;
2621         char *inbuf = term_src;
2622         size_t inleft = strlen(term_src);
2623         char *outbuf = term_dst;
2624         size_t outleft = sizeof(term_dst)-1;
2625         size_t ret;
2626         
2627         ret = yaz_iconv (zh->iconv_from_utf8, &inbuf, &inleft,
2628                          &outbuf, &outleft);
2629         if (ret == (size_t)(-1))
2630             len = 0;
2631         else
2632             len = outbuf - term_dst;
2633         *dst = nmem_malloc(stream, len + 1);
2634         if (len > 0)
2635             memcpy (*dst, term_dst, len);
2636         (*dst)[len] = '\0';
2637     }
2638     else
2639         *dst = nmem_strdup(stream, term_src);
2640 }
2641
2642 static void count_set(ZebraHandle zh, RSET rset, zint *count)
2643 {
2644     zint psysno = 0;
2645     struct it_key key;
2646     RSFD rfd;
2647
2648     yaz_log(YLOG_DEBUG, "count_set");
2649
2650     rset->hits_limit = zh->approx_limit;
2651
2652     *count = 0;
2653     rfd = rset_open(rset, RSETF_READ);
2654     while (rset_read(rfd, &key,0 /* never mind terms */))
2655     {
2656         if (key.mem[0] != psysno)
2657         {
2658             psysno = key.mem[0];
2659             if (rfd->counted_items >= rset->hits_limit)
2660                 break;
2661         }
2662     }
2663     rset_close (rfd);
2664     *count = rset->hits_count;
2665 }
2666
2667 ZEBRA_RES rpn_scan(ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt,
2668                    oid_value attributeset,
2669                    int num_bases, char **basenames,
2670                    int *position, int *num_entries, ZebraScanEntry **list,
2671                    int *is_partial, RSET limit_set, int return_zero)
2672 {
2673     int i;
2674     int pos = *position;
2675     int num = *num_entries;
2676     int before;
2677     int after;
2678     int base_no;
2679     char termz[IT_MAX_WORD+20];
2680     struct scan_info *scan_info_array;
2681     ZebraScanEntry *glist;
2682     int ords[32], ord_no = 0;
2683     int ptr[32];
2684
2685     int bases_ok = 0;     /* no of databases with OK attribute */
2686     int errCode = 0;      /* err code (if any is not OK) */
2687     char *errString = 0;  /* addinfo */
2688
2689     unsigned index_type;
2690     char *search_type = NULL;
2691     char rank_type[128];
2692     int complete_flag;
2693     int sort_flag;
2694     NMEM rset_nmem = NULL; 
2695     struct rset_key_control *kc = 0;
2696
2697     *list = 0;
2698     *is_partial = 0;
2699
2700     if (attributeset == VAL_NONE)
2701         attributeset = VAL_BIB1;
2702
2703     if (!limit_set)
2704     {
2705         AttrType termset;
2706         int termset_value_numeric;
2707         const char *termset_value_string;
2708         attr_init_APT(&termset, zapt, 8);
2709         termset_value_numeric =
2710             attr_find_ex(&termset, NULL, &termset_value_string);
2711         if (termset_value_numeric != -1)
2712         {
2713             char resname[32];
2714             const char *termset_name = 0;
2715             
2716             if (termset_value_numeric != -2)
2717             {
2718                 
2719                 sprintf(resname, "%d", termset_value_numeric);
2720                 termset_name = resname;
2721             }
2722             else
2723                 termset_name = termset_value_string;
2724             
2725             limit_set = resultSetRef (zh, termset_name);
2726         }
2727     }
2728         
2729     yaz_log(YLOG_DEBUG, "position = %d, num = %d set=%d",
2730             pos, num, attributeset);
2731         
2732     if (zebra_maps_attr(zh->reg->zebra_maps, zapt, &index_type, &search_type,
2733                         rank_type, &complete_flag, &sort_flag))
2734     {
2735         *num_entries = 0;
2736         zebra_setError(zh, YAZ_BIB1_UNSUPP_ATTRIBUTE_TYPE, 0);
2737         return ZEBRA_FAIL;
2738     }
2739     for (base_no = 0; base_no < num_bases && ord_no < 32; base_no++)
2740     {
2741         int ord;
2742
2743         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
2744         {
2745             zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
2746                            basenames[base_no]);
2747             *num_entries = 0;
2748             return ZEBRA_FAIL;
2749         }
2750
2751         if (zebra_apt_get_ord(zh, zapt, zinfo_index_category_index,
2752                               index_type, 0, attributeset, &ord) 
2753             != ZEBRA_OK)
2754         {
2755             break;
2756         }
2757         ords[ord_no++] = ord;
2758     }
2759     if (!bases_ok && errCode)
2760     {
2761         zebra_setError(zh, errCode, errString);
2762         *num_entries = 0;
2763         return ZEBRA_FAIL;
2764     }
2765     if (ord_no == 0)
2766     {
2767         *num_entries = 0;
2768         return ZEBRA_OK;
2769     }
2770     /* prepare dictionary scanning */
2771     if (num < 1)
2772     {
2773         *num_entries = 0;
2774         return ZEBRA_OK;
2775     }
2776     before = pos-1;
2777     if (before < 0)
2778         before = 0;
2779     after = 1+num-pos;
2780     if (after < 0)
2781         after = 0;
2782     yaz_log(YLOG_DEBUG, "rpn_scan pos=%d num=%d before=%d "
2783             "after=%d before+after=%d",
2784             pos, num, before, after, before+after);
2785     scan_info_array = (struct scan_info *)
2786         odr_malloc(stream, ord_no * sizeof(*scan_info_array));
2787     for (i = 0; i < ord_no; i++)
2788     {
2789         int j, prefix_len = 0;
2790         int before_tmp = before, after_tmp = after;
2791         struct scan_info *scan_info = scan_info_array + i;
2792         struct rpn_char_map_info rcmi;
2793
2794         rpn_char_map_prepare (zh->reg, index_type, &rcmi);
2795
2796         scan_info->before = before;
2797         scan_info->after = after;
2798         scan_info->odr = stream;
2799
2800         scan_info->list = (struct scan_info_entry *)
2801             odr_malloc(stream, (before+after) * sizeof(*scan_info->list));
2802         for (j = 0; j<before+after; j++)
2803             scan_info->list[j].term = NULL;
2804
2805         prefix_len += key_SU_encode (ords[i], termz + prefix_len);
2806         termz[prefix_len] = 0;
2807         strcpy(scan_info->prefix, termz);
2808
2809         if (trans_scan_term(zh, zapt, termz+prefix_len, index_type) == 
2810             ZEBRA_FAIL)
2811             return ZEBRA_FAIL;
2812         
2813         dict_scan(zh->reg->dict, termz, &before_tmp, &after_tmp,
2814                   scan_info, scan_handle);
2815     }
2816     glist = (ZebraScanEntry *)
2817         odr_malloc(stream, (before+after)*sizeof(*glist));
2818
2819     rset_nmem = nmem_create();
2820     kc = zebra_key_control_create(zh);
2821
2822     /* consider terms after main term */
2823     for (i = 0; i < ord_no; i++)
2824         ptr[i] = before;
2825     
2826     *is_partial = 0;
2827     for (i = 0; i<after; i++)
2828     {
2829         int j, j0 = -1;
2830         const char *mterm = NULL;
2831         const char *tst;
2832         RSET rset = 0;
2833         int lo = i + pos-1; /* offset in result list */
2834
2835         /* find: j0 is the first of the minimal values */
2836         for (j = 0; j < ord_no; j++)
2837         {
2838             if (ptr[j] < before+after && ptr[j] >= 0 &&
2839                 (tst = scan_info_array[j].list[ptr[j]].term) &&
2840                 (!mterm || strcmp (tst, mterm) < 0))
2841             {
2842                 j0 = j;
2843                 mterm = tst;
2844             }
2845         }
2846         if (j0 == -1)
2847             break;  /* no value found, stop */
2848
2849         /* get result set for first one , but only if it's within bounds */
2850         if (lo >= 0)
2851         {
2852             /* get result set for first term */
2853             zebra_term_untrans_iconv(zh, stream->mem, index_type,
2854                                      &glist[lo].term, mterm);
2855             rset = rset_trunc(zh, &scan_info_array[j0].list[ptr[j0]].isam_p, 1,
2856                               glist[lo].term, strlen(glist[lo].term),
2857                               NULL, 0, zapt->term->which, rset_nmem, 
2858                               kc, kc->scope, 0, index_type, 0 /* hits_limit */,
2859                               0 /* term_ref_id_str */);
2860         }
2861         ptr[j0]++; /* move index for this set .. */
2862         /* get result set for remaining scan terms */
2863         for (j = j0+1; j<ord_no; j++)
2864         {
2865             if (ptr[j] < before+after && ptr[j] >= 0 &&
2866                 (tst = scan_info_array[j].list[ptr[j]].term) &&
2867                 !strcmp (tst, mterm))
2868             {
2869                 if (lo >= 0)
2870                 {
2871                     RSET rsets[2];
2872                     
2873                     rsets[0] = rset;
2874                     rsets[1] =
2875                         rset_trunc(
2876                             zh, &scan_info_array[j].list[ptr[j]].isam_p, 1,
2877                             glist[lo].term,
2878                             strlen(glist[lo].term), NULL, 0,
2879                             zapt->term->which,rset_nmem,
2880                             kc, kc->scope, 0, index_type, 0 /* hits_limit */,
2881                             0 /* term_ref_id_str */ );
2882                     rset = rset_create_or(rset_nmem, kc,
2883                                           kc->scope, 0 /* termid */,
2884                                           2, rsets);
2885                 }
2886                 ptr[j]++;
2887             }
2888         }
2889         if (lo >= 0)
2890         {
2891             zint count;
2892             /* merge with limit_set if given */
2893             if (limit_set)
2894             {
2895                 RSET rsets[2];
2896                 rsets[0] = rset;
2897                 rsets[1] = rset_dup(limit_set);
2898                 
2899                 rset = rset_create_and(rset_nmem, kc, kc->scope, 2, rsets);
2900             }
2901             /* count it */
2902             count_set(zh, rset, &count);
2903             glist[lo].occurrences = count;
2904             rset_delete(rset);
2905         }
2906     }
2907     if (i < after)
2908     {
2909         *num_entries -= (after-i);
2910         *is_partial = 1;
2911         if (*num_entries < 0)
2912         {
2913             (*kc->dec)(kc);
2914             nmem_destroy(rset_nmem);
2915             *num_entries = 0;
2916             return ZEBRA_OK;
2917         }
2918     }
2919     /* consider terms before main term */
2920     for (i = 0; i<ord_no; i++)
2921         ptr[i] = 0;
2922     
2923     for (i = 0; i<before; i++)
2924     {
2925         int j, j0 = -1;
2926         const char *mterm = NULL;
2927         const char *tst;
2928         RSET rset;
2929         int lo = before-1-i; /* offset in result list */
2930         zint count;
2931         
2932         for (j = 0; j <ord_no; j++)
2933         {
2934             if (ptr[j] < before && ptr[j] >= 0 &&
2935                 (tst = scan_info_array[j].list[before-1-ptr[j]].term) &&
2936                 (!mterm || strcmp (tst, mterm) > 0))
2937             {
2938                 j0 = j;
2939                     mterm = tst;
2940             }
2941         }
2942         if (j0 == -1)
2943             break;
2944         
2945         zebra_term_untrans_iconv(zh, stream->mem, index_type,
2946                                  &glist[lo].term, mterm);
2947         
2948         rset = rset_trunc
2949             (zh, &scan_info_array[j0].list[before-1-ptr[j0]].isam_p, 1,
2950              glist[lo].term, strlen(glist[lo].term),
2951              NULL, 0, zapt->term->which, rset_nmem,
2952              kc, kc->scope, 0, index_type, 0 /* hits_limit */,
2953              0 /* term_ref_id_str */);
2954         
2955         ptr[j0]++;
2956         
2957         for (j = j0+1; j<ord_no; j++)
2958         {
2959             if (ptr[j] < before && ptr[j] >= 0 &&
2960                 (tst = scan_info_array[j].list[before-1-ptr[j]].term) &&
2961                 !strcmp (tst, mterm))
2962             {
2963                 RSET rsets[2];
2964                 
2965                 rsets[0] = rset;
2966                 rsets[1] = rset_trunc(
2967                     zh,
2968                     &scan_info_array[j].list[before-1-ptr[j]].isam_p, 1,
2969                     glist[lo].term,
2970                     strlen(glist[lo].term), NULL, 0,
2971                     zapt->term->which, rset_nmem,
2972                     kc, kc->scope, 0, index_type, 0 /* hits_limit */,
2973                     0 /* term_ref_id_str */);
2974                 rset = rset_create_or(rset_nmem, kc,
2975                                       kc->scope, 0 /* termid */, 2, rsets);
2976                 
2977                 ptr[j]++;
2978             }
2979         }
2980         if (limit_set)
2981         {
2982             RSET rsets[2];
2983             rsets[0] = rset;
2984             rsets[1] = rset_dup(limit_set);
2985             
2986             rset = rset_create_and(rset_nmem, kc, kc->scope, 2, rsets);
2987         }
2988         count_set(zh, rset, &count);
2989         glist[lo].occurrences = count;
2990         rset_delete (rset);
2991     }
2992     (*kc->dec)(kc);
2993     nmem_destroy(rset_nmem);
2994     i = before-i;
2995     if (i)
2996     {
2997         *is_partial = 1;
2998         *position -= i;
2999         *num_entries -= i;
3000         if (*num_entries <= 0)
3001         {
3002             *num_entries = 0;
3003             return ZEBRA_OK;
3004         }
3005     }
3006     
3007     *list = glist + i;               /* list is set to first 'real' entry */
3008     
3009     yaz_log(YLOG_DEBUG, "position = %d, num_entries = %d",
3010             *position, *num_entries);
3011     return ZEBRA_OK;
3012 }
3013
3014 /*
3015  * Local variables:
3016  * c-basic-offset: 4
3017  * indent-tabs-mode: nil
3018  * End:
3019  * vim: shiftwidth=4 tabstop=8 expandtab
3020  */
3021