Using wrbuf for internal X-Path string. Fixed uninitialized var
[idzebra-moved-to-github.git] / index / zrpn.c
1 /* $Id: zrpn.c,v 1.219 2006-06-22 15:44:44 adam Exp $
2    Copyright (C) 1995-2006
3    Index Data ApS
4
5 This file is part of the Zebra server.
6
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
11
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with Zebra; see the file LICENSE.zebra.  If not, write to the
19 Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
20 02111-1307, USA.
21 */
22
23 #include <stdio.h>
24 #include <assert.h>
25 #ifdef WIN32
26 #include <io.h>
27 #endif
28 #if HAVE_UNISTD_H
29 #include <unistd.h>
30 #endif
31 #include <ctype.h>
32
33 #include <yaz/diagbib1.h>
34 #include "index.h"
35 #include <zebra_xpath.h>
36 #include <attrfind.h>
37 #include <charmap.h>
38 #include <rset.h>
39
40 struct rpn_char_map_info
41 {
42     ZebraMaps zm;
43     int reg_type;
44 };
45
46 static int log_level_set = 0;
47 static int log_level_rpn = 0;
48
49 static const char **rpn_char_map_handler(void *vp, const char **from, int len)
50 {
51     struct rpn_char_map_info *p = (struct rpn_char_map_info *) vp;
52     const char **out = zebra_maps_input(p->zm, p->reg_type, from, len, 0);
53 #if 0
54     if (out && *out)
55     {
56         const char *outp = *out;
57         yaz_log(YLOG_LOG, "---");
58         while (*outp)
59         {
60             yaz_log(YLOG_LOG, "%02X", *outp);
61             outp++;
62         }
63     }
64 #endif
65     return out;
66 }
67
68 static void rpn_char_map_prepare(struct zebra_register *reg, int reg_type,
69                                  struct rpn_char_map_info *map_info)
70 {
71     map_info->zm = reg->zebra_maps;
72     map_info->reg_type = reg_type;
73     dict_grep_cmap(reg->dict, map_info, rpn_char_map_handler);
74 }
75
76 #define TERM_COUNT        
77        
78 struct grep_info {        
79 #ifdef TERM_COUNT        
80     int *term_no;        
81 #endif        
82     ISAM_P *isam_p_buf;
83     int isam_p_size;        
84     int isam_p_indx;
85     ZebraHandle zh;
86     int reg_type;
87     ZebraSet termset;
88 };        
89
90 void zebra_term_untrans(ZebraHandle zh, int reg_type,
91                         char *dst, const char *src)
92 {
93     int len = 0;
94     while (*src)
95     {
96         const char *cp = zebra_maps_output(zh->reg->zebra_maps,
97                                            reg_type, &src);
98         if (!cp)
99         {
100             if (len < IT_MAX_WORD-1)
101                 dst[len++] = *src;
102             src++;
103         }
104         else
105             while (*cp && len < IT_MAX_WORD-1)
106                 dst[len++] = *cp++;
107     }
108     dst[len] = '\0';
109 }
110
111 static void add_isam_p(const char *name, const char *info,
112                        struct grep_info *p)
113 {
114     if (!log_level_set)
115     {
116         log_level_rpn = yaz_log_module_level("rpn");
117         log_level_set = 1;
118     }
119     if (p->isam_p_indx == p->isam_p_size)
120     {
121         ISAM_P *new_isam_p_buf;
122 #ifdef TERM_COUNT        
123         int *new_term_no;        
124 #endif
125         p->isam_p_size = 2*p->isam_p_size + 100;
126         new_isam_p_buf = (ISAM_P *) xmalloc(sizeof(*new_isam_p_buf) *
127                                             p->isam_p_size);
128         if (p->isam_p_buf)
129         {
130             memcpy(new_isam_p_buf, p->isam_p_buf,
131                     p->isam_p_indx * sizeof(*p->isam_p_buf));
132             xfree(p->isam_p_buf);
133         }
134         p->isam_p_buf = new_isam_p_buf;
135
136 #ifdef TERM_COUNT
137         new_term_no = (int *) xmalloc(sizeof(*new_term_no) * p->isam_p_size);
138         if (p->term_no)
139         {
140             memcpy(new_term_no, p->isam_p_buf,
141                     p->isam_p_indx * sizeof(*p->term_no));
142             xfree(p->term_no);
143         }
144         p->term_no = new_term_no;
145 #endif
146     }
147     assert(*info == sizeof(*p->isam_p_buf));
148     memcpy(p->isam_p_buf + p->isam_p_indx, info+1, sizeof(*p->isam_p_buf));
149
150 #if 1
151     if (p->termset)
152     {
153         const char *db;
154         char term_tmp[IT_MAX_WORD];
155         int ord = 0;
156         const char *index_name;
157         int len = key_SU_decode (&ord, (const unsigned char *) name);
158         
159         zebra_term_untrans  (p->zh, p->reg_type, term_tmp, name+len+1);
160         yaz_log(log_level_rpn, "grep: %d %c %s", ord, name[len], term_tmp);
161         zebraExplain_lookup_ord(p->zh->reg->zei,
162                                 ord, 0 /* index_type */, &db, &index_name);
163         yaz_log(log_level_rpn, "grep:  db=%s index=%s", db, index_name);
164         
165         resultSetAddTerm(p->zh, p->termset, name[len], db,
166                          index_name, term_tmp);
167     }
168 #endif
169     (p->isam_p_indx)++;
170 }
171
172 static int grep_handle(char *name, const char *info, void *p)
173 {
174     add_isam_p(name, info, (struct grep_info *) p);
175     return 0;
176 }
177
178 static int term_pre(ZebraMaps zebra_maps, int reg_type, const char **src,
179                     const char *ct1, const char *ct2, int first)
180 {
181     const char *s1, *s0 = *src;
182     const char **map;
183
184     /* skip white space */
185     while (*s0)
186     {
187         if (ct1 && strchr(ct1, *s0))
188             break;
189         if (ct2 && strchr(ct2, *s0))
190             break;
191         s1 = s0;
192         map = zebra_maps_input(zebra_maps, reg_type, &s1, strlen(s1), first);
193         if (**map != *CHR_SPACE)
194             break;
195         s0 = s1;
196     }
197     *src = s0;
198     return *s0;
199 }
200
201
202 static void esc_str(char *out_buf, size_t out_size,
203                     const char *in_buf, int in_size)
204 {
205     int k;
206
207     assert(out_buf);
208     assert(in_buf);
209     assert(out_size > 20);
210     *out_buf = '\0';
211     for (k = 0; k<in_size; k++)
212     {
213         int c = in_buf[k] & 0xff;
214         int pc;
215         if (c < 32 || c > 126)
216             pc = '?';
217         else
218             pc = c;
219         sprintf(out_buf +strlen(out_buf), "%02X:%c  ", c, pc);
220         if (strlen(out_buf) > out_size-20)
221         {
222             strcat(out_buf, "..");
223             break;
224         }
225     }
226 }
227
228 #define REGEX_CHARS " []()|.*+?!"
229
230 /* term_100: handle term, where trunc = none(no operators at all) */
231 static int term_100(ZebraMaps zebra_maps, int reg_type,
232                     const char **src, char *dst, int space_split,
233                     char *dst_term)
234 {
235     const char *s0;
236     const char **map;
237     int i = 0;
238     int j = 0;
239
240     const char *space_start = 0;
241     const char *space_end = 0;
242
243     if (!term_pre(zebra_maps, reg_type, src, NULL, NULL, !space_split))
244         return 0;
245     s0 = *src;
246     while (*s0)
247     {
248         const char *s1 = s0;
249         int q_map_match = 0;
250         map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), 
251                                 &q_map_match);
252         if (space_split)
253         {
254             if (**map == *CHR_SPACE)
255                 break;
256         }
257         else  /* complete subfield only. */
258         {
259             if (**map == *CHR_SPACE)
260             {   /* save space mapping for later  .. */
261                 space_start = s1;
262                 space_end = s0;
263                 continue;
264             }
265             else if (space_start)
266             {   /* reload last space */
267                 while (space_start < space_end)
268                 {
269                     if (strchr(REGEX_CHARS, *space_start))
270                         dst[i++] = '\\';
271                     dst_term[j++] = *space_start;
272                     dst[i++] = *space_start++;
273                 }
274                 /* and reset */
275                 space_start = space_end = 0;
276             }
277         }
278         /* add non-space char */
279         memcpy(dst_term+j, s1, s0 - s1);
280         j += (s0 - s1);
281         if (!q_map_match)
282         {
283             while (s1 < s0)
284             {
285                 if (strchr(REGEX_CHARS, *s1))
286                     dst[i++] = '\\';
287                 dst[i++] = *s1++;
288             }
289         }
290         else
291         {
292             char tmpbuf[80];
293             esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
294             
295             strcpy(dst + i, map[0]);
296             i += strlen(map[0]);
297         }
298     }
299     dst[i] = '\0';
300     dst_term[j] = '\0';
301     *src = s0;
302     return i;
303 }
304
305 /* term_101: handle term, where trunc = Process # */
306 static int term_101(ZebraMaps zebra_maps, int reg_type,
307                     const char **src, char *dst, int space_split,
308                     char *dst_term)
309 {
310     const char *s0;
311     const char **map;
312     int i = 0;
313     int j = 0;
314
315     if (!term_pre(zebra_maps, reg_type, src, "#", "#", !space_split))
316         return 0;
317     s0 = *src;
318     while (*s0)
319     {
320         if (*s0 == '#')
321         {
322             dst[i++] = '.';
323             dst[i++] = '*';
324             dst_term[j++] = *s0++;
325         }
326         else
327         {
328             const char *s1 = s0;
329             int q_map_match = 0;
330             map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), 
331                                     &q_map_match);
332             if (space_split && **map == *CHR_SPACE)
333                 break;
334
335             /* add non-space char */
336             memcpy(dst_term+j, s1, s0 - s1);
337             j += (s0 - s1);
338             if (!q_map_match)
339             {
340                 while (s1 < s0)
341                 {
342                     if (strchr(REGEX_CHARS, *s1))
343                         dst[i++] = '\\';
344                     dst[i++] = *s1++;
345                 }
346             }
347             else
348             {
349                 char tmpbuf[80];
350                 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
351                 
352                 strcpy(dst + i, map[0]);
353                 i += strlen(map[0]);
354             }
355         }
356     }
357     dst[i] = '\0';
358     dst_term[j++] = '\0';
359     *src = s0;
360     return i;
361 }
362
363 /* term_103: handle term, where trunc = re-2 (regular expressions) */
364 static int term_103(ZebraMaps zebra_maps, int reg_type, const char **src,
365                     char *dst, int *errors, int space_split,
366                     char *dst_term)
367 {
368     int i = 0;
369     int j = 0;
370     const char *s0;
371     const char **map;
372
373     if (!term_pre(zebra_maps, reg_type, src, "^\\()[].*+?|", "(", !space_split))
374         return 0;
375     s0 = *src;
376     if (errors && *s0 == '+' && s0[1] && s0[2] == '+' && s0[3] &&
377         isdigit(((const unsigned char *)s0)[1]))
378     {
379         *errors = s0[1] - '0';
380         s0 += 3;
381         if (*errors > 3)
382             *errors = 3;
383     }
384     while (*s0)
385     {
386         if (strchr("^\\()[].*+?|-", *s0))
387         {
388             dst_term[j++] = *s0;
389             dst[i++] = *s0++;
390         }
391         else
392         {
393             const char *s1 = s0;
394             int q_map_match = 0;
395             map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), 
396                                     &q_map_match);
397             if (space_split && **map == *CHR_SPACE)
398                 break;
399
400             /* add non-space char */
401             memcpy(dst_term+j, s1, s0 - s1);
402             j += (s0 - s1);
403             if (!q_map_match)
404             {
405                 while (s1 < s0)
406                 {
407                     if (strchr(REGEX_CHARS, *s1))
408                         dst[i++] = '\\';
409                     dst[i++] = *s1++;
410                 }
411             }
412             else
413             {
414                 char tmpbuf[80];
415                 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
416                 
417                 strcpy(dst + i, map[0]);
418                 i += strlen(map[0]);
419             }
420         }
421     }
422     dst[i] = '\0';
423     dst_term[j] = '\0';
424     *src = s0;
425     
426     return i;
427 }
428
429 /* term_103: handle term, where trunc = re-1 (regular expressions) */
430 static int term_102(ZebraMaps zebra_maps, int reg_type, const char **src,
431                     char *dst, int space_split, char *dst_term)
432 {
433     return term_103(zebra_maps, reg_type, src, dst, NULL, space_split,
434                     dst_term);
435 }
436
437
438 /* term_104: handle term, where trunc = Process # and ! */
439 static int term_104(ZebraMaps zebra_maps, int reg_type,
440                     const char **src, char *dst, int space_split,
441                     char *dst_term)
442 {
443     const char *s0;
444     const char **map;
445     int i = 0;
446     int j = 0;
447
448     if (!term_pre(zebra_maps, reg_type, src, "?*#", "?*#", !space_split))
449         return 0;
450     s0 = *src;
451     while (*s0)
452     {
453         if (*s0 == '?')
454         {
455             dst_term[j++] = *s0++;
456             if (*s0 >= '0' && *s0 <= '9')
457             {
458                 int limit = 0;
459                 while (*s0 >= '0' && *s0 <= '9')
460                 {
461                     limit = limit * 10 + (*s0 - '0');
462                     dst_term[j++] = *s0++;
463                 }
464                 if (limit > 20)
465                     limit = 20;
466                 while (--limit >= 0)
467                 {
468                     dst[i++] = '.';
469                     dst[i++] = '?';
470                 }
471             }
472             else
473             {
474                 dst[i++] = '.';
475                 dst[i++] = '*';
476             }
477         }
478         else if (*s0 == '*')
479         {
480             dst[i++] = '.';
481             dst[i++] = '*';
482             dst_term[j++] = *s0++;
483         }
484         else if (*s0 == '#')
485         {
486             dst[i++] = '.';
487             dst_term[j++] = *s0++;
488         }
489         else
490         {
491             const char *s1 = s0;
492             int q_map_match = 0;
493             map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), 
494                                     &q_map_match);
495             if (space_split && **map == *CHR_SPACE)
496                 break;
497
498             /* add non-space char */
499             memcpy(dst_term+j, s1, s0 - s1);
500             j += (s0 - s1);
501             if (!q_map_match)
502             {
503                 while (s1 < s0)
504                 {
505                     if (strchr(REGEX_CHARS, *s1))
506                         dst[i++] = '\\';
507                     dst[i++] = *s1++;
508                 }
509             }
510             else
511             {
512                 char tmpbuf[80];
513                 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
514                 
515                 strcpy(dst + i, map[0]);
516                 i += strlen(map[0]);
517             }
518         }
519     }
520     dst[i] = '\0';
521     dst_term[j++] = '\0';
522     *src = s0;
523     return i;
524 }
525
526 /* term_105/106: handle term, where trunc = Process * and ! and right trunc */
527 static int term_105(ZebraMaps zebra_maps, int reg_type,
528                     const char **src, char *dst, int space_split,
529                     char *dst_term, int right_truncate)
530 {
531     const char *s0;
532     const char **map;
533     int i = 0;
534     int j = 0;
535
536     if (!term_pre(zebra_maps, reg_type, src, "*!", "*!", !space_split))
537         return 0;
538     s0 = *src;
539     while (*s0)
540     {
541         if (*s0 == '*')
542         {
543             dst[i++] = '.';
544             dst[i++] = '*';
545             dst_term[j++] = *s0++;
546         }
547         else if (*s0 == '!')
548         {
549             dst[i++] = '.';
550             dst_term[j++] = *s0++;
551         }
552         else
553         {
554             const char *s1 = s0;
555             int q_map_match = 0;
556             map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), 
557                                     &q_map_match);
558             if (space_split && **map == *CHR_SPACE)
559                 break;
560
561             /* add non-space char */
562             memcpy(dst_term+j, s1, s0 - s1);
563             j += (s0 - s1);
564             if (!q_map_match)
565             {
566                 while (s1 < s0)
567                 {
568                     if (strchr(REGEX_CHARS, *s1))
569                         dst[i++] = '\\';
570                     dst[i++] = *s1++;
571                 }
572             }
573             else
574             {
575                 char tmpbuf[80];
576                 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
577                 
578                 strcpy(dst + i, map[0]);
579                 i += strlen(map[0]);
580             }
581         }
582     }
583     if (right_truncate)
584     {
585         dst[i++] = '.';
586         dst[i++] = '*';
587     }
588     dst[i] = '\0';
589     
590     dst_term[j++] = '\0';
591     *src = s0;
592     return i;
593 }
594
595
596 /* gen_regular_rel - generate regular expression from relation
597  *  val:     border value (inclusive)
598  *  islt:    1 if <=; 0 if >=.
599  */
600 static void gen_regular_rel(char *dst, int val, int islt)
601 {
602     int dst_p;
603     int w, d, i;
604     int pos = 0;
605     char numstr[20];
606
607     yaz_log(YLOG_DEBUG, "gen_regular_rel. val=%d, islt=%d", val, islt);
608     if (val >= 0)
609     {
610         if (islt)
611             strcpy(dst, "(-[0-9]+|(");
612         else
613             strcpy(dst, "((");
614     } 
615     else
616     {
617         if (!islt)
618         {
619             strcpy(dst, "([0-9]+|-(");
620             dst_p = strlen(dst);
621             islt = 1;
622         }
623         else
624         {
625             strcpy(dst, "(-(");
626             islt = 0;
627         }
628         val = -val;
629     }
630     dst_p = strlen(dst);
631     sprintf(numstr, "%d", val);
632     for (w = strlen(numstr); --w >= 0; pos++)
633     {
634         d = numstr[w];
635         if (pos > 0)
636         {
637             if (islt)
638             {
639                 if (d == '0')
640                     continue;
641                 d--;
642             } 
643             else
644             {
645                 if (d == '9')
646                     continue;
647                 d++;
648             }
649         }
650         
651         strcpy(dst + dst_p, numstr);
652         dst_p = strlen(dst) - pos - 1;
653
654         if (islt)
655         {
656             if (d != '0')
657             {
658                 dst[dst_p++] = '[';
659                 dst[dst_p++] = '0';
660                 dst[dst_p++] = '-';
661                 dst[dst_p++] = d;
662                 dst[dst_p++] = ']';
663             }
664             else
665                 dst[dst_p++] = d;
666         }
667         else
668         {
669             if (d != '9')
670             { 
671                 dst[dst_p++] = '[';
672                 dst[dst_p++] = d;
673                 dst[dst_p++] = '-';
674                 dst[dst_p++] = '9';
675                 dst[dst_p++] = ']';
676             }
677             else
678                 dst[dst_p++] = d;
679         }
680         for (i = 0; i<pos; i++)
681         {
682             dst[dst_p++] = '[';
683             dst[dst_p++] = '0';
684             dst[dst_p++] = '-';
685             dst[dst_p++] = '9';
686             dst[dst_p++] = ']';
687         }
688         dst[dst_p++] = '|';
689     }
690     dst[dst_p] = '\0';
691     if (islt)
692     {
693         /* match everything less than 10^(pos-1) */
694         strcat(dst, "0*");
695         for (i = 1; i<pos; i++)
696             strcat(dst, "[0-9]?");
697     }
698     else
699     {
700         /* match everything greater than 10^pos */
701         for (i = 0; i <= pos; i++)
702             strcat(dst, "[0-9]");
703         strcat(dst, "[0-9]*");
704     }
705     strcat(dst, "))");
706 }
707
708 void string_rel_add_char(char **term_p, const char *src, int *indx)
709 {
710     if (src[*indx] == '\\')
711         *(*term_p)++ = src[(*indx)++];
712     *(*term_p)++ = src[(*indx)++];
713 }
714
715 /*
716  *   >  abc     ([b-].*|a[c-].*|ab[d-].*|abc.+)
717  *              ([^-a].*|a[^-b].*ab[^-c].*|abc.+)
718  *   >= abc     ([b-].*|a[c-].*|ab[c-].*)
719  *              ([^-a].*|a[^-b].*|ab[c-].*)
720  *   <  abc     ([-0].*|a[-a].*|ab[-b].*)
721  *              ([^a-].*|a[^b-].*|ab[^c-].*)
722  *   <= abc     ([-0].*|a[-a].*|ab[-b].*|abc)
723  *              ([^a-].*|a[^b-].*|ab[^c-].*|abc)
724  */
725 static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
726                            const char **term_sub, char *term_dict,
727                            oid_value attributeSet,
728                            int reg_type, int space_split, char *term_dst,
729                            int *error_code)
730 {
731     AttrType relation;
732     int relation_value;
733     int i;
734     char *term_tmp = term_dict + strlen(term_dict);
735     char term_component[2*IT_MAX_WORD+20];
736
737     attr_init_APT(&relation, zapt, 2);
738     relation_value = attr_find(&relation, NULL);
739
740     *error_code = 0;
741     yaz_log(YLOG_DEBUG, "string relation value=%d", relation_value);
742     switch (relation_value)
743     {
744     case 1:
745         if (!term_100(zh->reg->zebra_maps, reg_type,
746                       term_sub, term_component,
747                       space_split, term_dst))
748             return 0;
749         yaz_log(log_level_rpn, "Relation <");
750         
751         *term_tmp++ = '(';
752         for (i = 0; term_component[i]; )
753         {
754             int j = 0;
755
756             if (i)
757                 *term_tmp++ = '|';
758             while (j < i)
759                 string_rel_add_char(&term_tmp, term_component, &j);
760
761             *term_tmp++ = '[';
762
763             *term_tmp++ = '^';
764             string_rel_add_char(&term_tmp, term_component, &i);
765             *term_tmp++ = '-';
766
767             *term_tmp++ = ']';
768             *term_tmp++ = '.';
769             *term_tmp++ = '*';
770
771             if ((term_tmp - term_dict) > IT_MAX_WORD)
772                 break;
773         }
774         *term_tmp++ = ')';
775         *term_tmp = '\0';
776         break;
777     case 2:
778         if (!term_100(zh->reg->zebra_maps, reg_type,
779                       term_sub, term_component,
780                       space_split, term_dst))
781             return 0;
782         yaz_log(log_level_rpn, "Relation <=");
783
784         *term_tmp++ = '(';
785         for (i = 0; term_component[i]; )
786         {
787             int j = 0;
788
789             while (j < i)
790                 string_rel_add_char(&term_tmp, term_component, &j);
791             *term_tmp++ = '[';
792
793             *term_tmp++ = '^';
794             string_rel_add_char(&term_tmp, term_component, &i);
795             *term_tmp++ = '-';
796
797             *term_tmp++ = ']';
798             *term_tmp++ = '.';
799             *term_tmp++ = '*';
800
801             *term_tmp++ = '|';
802
803             if ((term_tmp - term_dict) > IT_MAX_WORD)
804                 break;
805         }
806         for (i = 0; term_component[i]; )
807             string_rel_add_char(&term_tmp, term_component, &i);
808         *term_tmp++ = ')';
809         *term_tmp = '\0';
810         break;
811     case 5:
812         if (!term_100 (zh->reg->zebra_maps, reg_type,
813                        term_sub, term_component, space_split, term_dst))
814             return 0;
815         yaz_log(log_level_rpn, "Relation >");
816
817         *term_tmp++ = '(';
818         for (i = 0; term_component[i];)
819         {
820             int j = 0;
821
822             while (j < i)
823                 string_rel_add_char(&term_tmp, term_component, &j);
824             *term_tmp++ = '[';
825             
826             *term_tmp++ = '^';
827             *term_tmp++ = '-';
828             string_rel_add_char(&term_tmp, term_component, &i);
829
830             *term_tmp++ = ']';
831             *term_tmp++ = '.';
832             *term_tmp++ = '*';
833
834             *term_tmp++ = '|';
835
836             if ((term_tmp - term_dict) > IT_MAX_WORD)
837                 break;
838         }
839         for (i = 0; term_component[i];)
840             string_rel_add_char(&term_tmp, term_component, &i);
841         *term_tmp++ = '.';
842         *term_tmp++ = '+';
843         *term_tmp++ = ')';
844         *term_tmp = '\0';
845         break;
846     case 4:
847         if (!term_100(zh->reg->zebra_maps, reg_type, term_sub,
848                       term_component, space_split, term_dst))
849             return 0;
850         yaz_log(log_level_rpn, "Relation >=");
851
852         *term_tmp++ = '(';
853         for (i = 0; term_component[i];)
854         {
855             int j = 0;
856
857             if (i)
858                 *term_tmp++ = '|';
859             while (j < i)
860                 string_rel_add_char(&term_tmp, term_component, &j);
861             *term_tmp++ = '[';
862
863             if (term_component[i+1])
864             {
865                 *term_tmp++ = '^';
866                 *term_tmp++ = '-';
867                 string_rel_add_char(&term_tmp, term_component, &i);
868             }
869             else
870             {
871                 string_rel_add_char(&term_tmp, term_component, &i);
872                 *term_tmp++ = '-';
873             }
874             *term_tmp++ = ']';
875             *term_tmp++ = '.';
876             *term_tmp++ = '*';
877
878             if ((term_tmp - term_dict) > IT_MAX_WORD)
879                 break;
880         }
881         *term_tmp++ = ')';
882         *term_tmp = '\0';
883         break;
884     case 3:
885     case 102:
886     case 103:
887     case -1:
888         if (!**term_sub)
889             return 1;
890         yaz_log(log_level_rpn, "Relation =");
891         if (!term_100(zh->reg->zebra_maps, reg_type, term_sub,
892                       term_component, space_split, term_dst))
893             return 0;
894         strcat(term_tmp, "(");
895         strcat(term_tmp, term_component);
896         strcat(term_tmp, ")");
897         break;
898     default:
899         *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
900         return 0;
901     }
902     return 1;
903 }
904
905 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
906                              const char **term_sub, 
907                              oid_value attributeSet, NMEM stream,
908                              struct grep_info *grep_info,
909                              int reg_type, int complete_flag,
910                              int num_bases, char **basenames,
911                              char *term_dst,
912                              const char *xpath_use,
913                              struct ord_list **ol);
914
915 static ZEBRA_RES term_limits_APT(ZebraHandle zh,
916                                  Z_AttributesPlusTerm *zapt,
917                                  zint *hits_limit_value,
918                                  const char **term_ref_id_str,
919                                  NMEM nmem)
920 {
921     AttrType term_ref_id_attr;
922     AttrType hits_limit_attr;
923     int term_ref_id_int;
924  
925     attr_init_APT(&hits_limit_attr, zapt, 9);
926     *hits_limit_value  = attr_find(&hits_limit_attr, NULL);
927
928     attr_init_APT(&term_ref_id_attr, zapt, 10);
929     term_ref_id_int = attr_find_ex(&term_ref_id_attr, NULL, term_ref_id_str);
930     if (term_ref_id_int >= 0)
931     {
932         char *res = nmem_malloc(nmem, 20);
933         sprintf(res, "%d", term_ref_id_int);
934         *term_ref_id_str = res;
935     }
936
937     /* no limit given ? */
938     if (*hits_limit_value == -1)
939     {
940         if (*term_ref_id_str)
941         {
942             /* use global if term_ref is present */
943             *hits_limit_value = zh->approx_limit;
944         }
945         else
946         {
947             /* no counting if term_ref is not present */
948             *hits_limit_value = 0;
949         }
950     }
951     else if (*hits_limit_value == 0)
952     {
953         /* 0 is the same as global limit */
954         *hits_limit_value = zh->approx_limit;
955     }
956     yaz_log(YLOG_DEBUG, "term_limits_APT ref_id=%s limit=" ZINT_FORMAT,
957             *term_ref_id_str ? *term_ref_id_str : "none",
958             *hits_limit_value);
959     return ZEBRA_OK;
960 }
961
962 static ZEBRA_RES term_trunc(ZebraHandle zh,
963                             Z_AttributesPlusTerm *zapt,
964                             const char **term_sub, 
965                             oid_value attributeSet, NMEM stream,
966                             struct grep_info *grep_info,
967                             int reg_type, int complete_flag,
968                             int num_bases, char **basenames,
969                             char *term_dst,
970                             const char *rank_type, 
971                             const char *xpath_use,
972                             NMEM rset_nmem,
973                             RSET *rset,
974                             struct rset_key_control *kc)
975 {
976     ZEBRA_RES res;
977     struct ord_list *ol;
978     zint hits_limit_value;
979     const char *term_ref_id_str = 0;
980     *rset = 0;
981
982     term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str,
983                     stream);
984     grep_info->isam_p_indx = 0;
985     res = string_term(zh, zapt, term_sub, attributeSet, stream, grep_info,
986                       reg_type, complete_flag, num_bases, basenames,
987                       term_dst, xpath_use, &ol);
988     if (res != ZEBRA_OK)
989         return res;
990     if (!*term_sub)  /* no more terms ? */
991         return res;
992     yaz_log(log_level_rpn, "term: %s", term_dst);
993     *rset = rset_trunc(zh, grep_info->isam_p_buf,
994                        grep_info->isam_p_indx, term_dst,
995                        strlen(term_dst), rank_type, 1 /* preserve pos */,
996                        zapt->term->which, rset_nmem,
997                        kc, kc->scope, ol, reg_type, hits_limit_value,
998                        term_ref_id_str);
999     if (!*rset)
1000         return ZEBRA_FAIL;
1001     return ZEBRA_OK;
1002 }
1003
1004 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1005                              const char **term_sub, 
1006                              oid_value attributeSet, NMEM stream,
1007                              struct grep_info *grep_info,
1008                              int reg_type, int complete_flag,
1009                              int num_bases, char **basenames,
1010                              char *term_dst,
1011                              const char *xpath_use,
1012                              struct ord_list **ol)
1013 {
1014     char term_dict[2*IT_MAX_WORD+4000];
1015     int j, r, base_no;
1016     AttrType truncation;
1017     int truncation_value;
1018     oid_value curAttributeSet = attributeSet;
1019     const char *termp;
1020     struct rpn_char_map_info rcmi;
1021     int space_split = complete_flag ? 0 : 1;
1022
1023     int bases_ok = 0;     /* no of databases with OK attribute */
1024
1025     *ol = ord_list_create(stream);
1026
1027     rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
1028     attr_init_APT(&truncation, zapt, 5);
1029     truncation_value = attr_find(&truncation, NULL);
1030     yaz_log(log_level_rpn, "truncation value %d", truncation_value);
1031
1032     for (base_no = 0; base_no < num_bases; base_no++)
1033     {
1034         int ord = -1;
1035         int attr_ok = 0;
1036         int regex_range = 0;
1037         int init_pos = 0;
1038         int max_pos, prefix_len = 0;
1039         int relation_error;
1040         char ord_buf[32];
1041         int ord_len, i;
1042
1043         termp = *term_sub;
1044
1045         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1046         {
1047             zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
1048                            basenames[base_no]);
1049             return ZEBRA_FAIL;
1050         }
1051         
1052         if (zebra_apt_get_ord(zh, zapt, zinfo_index_category_index,
1053                               reg_type, xpath_use, 
1054                               curAttributeSet, &ord) 
1055             != ZEBRA_OK)
1056         {
1057             break;
1058         }
1059         *ol = ord_list_append(stream, *ol, ord);
1060         
1061         if (prefix_len)
1062             term_dict[prefix_len++] = '|';
1063         else
1064             term_dict[prefix_len++] = '(';
1065         
1066         ord_len = key_SU_encode (ord, ord_buf);
1067         for (i = 0; i<ord_len; i++)
1068         {
1069             term_dict[prefix_len++] = 1;
1070                 term_dict[prefix_len++] = ord_buf[i];
1071         }
1072         if (ord_len > init_pos)
1073             init_pos = ord_len;
1074         
1075         bases_ok++;
1076         if (prefix_len)
1077             attr_ok = 1;
1078
1079         term_dict[prefix_len++] = ')';
1080         term_dict[prefix_len] = '\0';
1081         j = prefix_len;
1082         switch (truncation_value)
1083         {
1084         case -1:         /* not specified */
1085         case 100:        /* do not truncate */
1086             if (!string_relation (zh, zapt, &termp, term_dict,
1087                                   attributeSet,
1088                                   reg_type, space_split, term_dst,
1089                                   &relation_error))
1090             {
1091                 if (relation_error)
1092                 {
1093                     zebra_setError(zh, relation_error, 0);
1094                     return ZEBRA_FAIL;
1095                 }
1096                 *term_sub = 0;
1097                 return ZEBRA_OK;
1098             }
1099             break;
1100         case 1:          /* right truncation */
1101             term_dict[j++] = '(';
1102             if (!term_100(zh->reg->zebra_maps, reg_type,
1103                           &termp, term_dict + j, space_split, term_dst))
1104             {
1105                 *term_sub = 0;
1106                 return ZEBRA_OK;
1107             }
1108             strcat(term_dict, ".*)");
1109             break;
1110         case 2:          /* keft truncation */
1111             term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*';
1112             if (!term_100(zh->reg->zebra_maps, reg_type,
1113                           &termp, term_dict + j, space_split, term_dst))
1114             {
1115                 *term_sub = 0;
1116                 return ZEBRA_OK;
1117             }
1118             strcat(term_dict, ")");
1119             break;
1120         case 3:          /* left&right truncation */
1121             term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*';
1122             if (!term_100(zh->reg->zebra_maps, reg_type,
1123                           &termp, term_dict + j, space_split, term_dst))
1124             {
1125                 *term_sub = 0;
1126                 return ZEBRA_OK;
1127             }
1128             strcat(term_dict, ".*)");
1129             break;
1130         case 101:        /* process # in term */
1131             term_dict[j++] = '(';
1132             if (!term_101(zh->reg->zebra_maps, reg_type,
1133                           &termp, term_dict + j, space_split, term_dst))
1134             {
1135                 *term_sub = 0;
1136                 return ZEBRA_OK;
1137             }
1138             strcat(term_dict, ")");
1139             break;
1140         case 102:        /* Regexp-1 */
1141             term_dict[j++] = '(';
1142             if (!term_102(zh->reg->zebra_maps, reg_type,
1143                           &termp, term_dict + j, space_split, term_dst))
1144             {
1145                 *term_sub = 0;
1146                 return ZEBRA_OK;
1147             }
1148             strcat(term_dict, ")");
1149             break;
1150         case 103:       /* Regexp-2 */
1151             regex_range = 1;
1152             term_dict[j++] = '(';
1153             if (!term_103(zh->reg->zebra_maps, reg_type,
1154                           &termp, term_dict + j, &regex_range,
1155                           space_split, term_dst))
1156             {
1157                 *term_sub = 0;
1158                 return ZEBRA_OK;
1159             }
1160             strcat(term_dict, ")");
1161             break;
1162         case 104:        /* process # and ! in term */
1163             term_dict[j++] = '(';
1164             if (!term_104(zh->reg->zebra_maps, reg_type,
1165                           &termp, term_dict + j, space_split, term_dst))
1166             {
1167                 *term_sub = 0;
1168                 return ZEBRA_OK;
1169             }
1170             strcat(term_dict, ")");
1171             break;
1172         case 105:        /* process * and ! in term */
1173             term_dict[j++] = '(';
1174             if (!term_105(zh->reg->zebra_maps, reg_type,
1175                           &termp, term_dict + j, space_split, term_dst, 1))
1176             {
1177                 *term_sub = 0;
1178                 return ZEBRA_OK;
1179             }
1180             strcat(term_dict, ")");
1181             break;
1182         case 106:        /* process * and ! in term */
1183             term_dict[j++] = '(';
1184             if (!term_105(zh->reg->zebra_maps, reg_type,
1185                           &termp, term_dict + j, space_split, term_dst, 0))
1186             {
1187                 *term_sub = 0;
1188                 return ZEBRA_OK;
1189             }
1190             strcat(term_dict, ")");
1191             break;
1192         default:
1193             zebra_setError_zint(zh,
1194                                 YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
1195                                 truncation_value);
1196             return ZEBRA_FAIL;
1197         }
1198         if (attr_ok)
1199         {
1200             char buf[80];
1201             const char *input = term_dict + prefix_len;
1202             esc_str(buf, sizeof(buf), input, strlen(input));
1203         }
1204         if (attr_ok)
1205         {
1206             yaz_log(log_level_rpn, "dict_lookup_grep: %s", term_dict+prefix_len);
1207             r = dict_lookup_grep(zh->reg->dict, term_dict, regex_range,
1208                                  grep_info, &max_pos, init_pos,
1209                                  grep_handle);
1210             if (r)
1211                 yaz_log(YLOG_WARN, "dict_lookup_grep fail %d", r);
1212         }
1213     }
1214     if (!bases_ok)
1215         return ZEBRA_FAIL;
1216     *term_sub = termp;
1217     yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1218     return ZEBRA_OK;
1219 }
1220
1221
1222 /* convert APT search term to UTF8 */
1223 static ZEBRA_RES zapt_term_to_utf8(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1224                                    char *termz)
1225 {
1226     size_t sizez;
1227     Z_Term *term = zapt->term;
1228
1229     switch (term->which)
1230     {
1231     case Z_Term_general:
1232         if (zh->iconv_to_utf8 != 0)
1233         {
1234             char *inbuf = (char *) term->u.general->buf;
1235             size_t inleft = term->u.general->len;
1236             char *outbuf = termz;
1237             size_t outleft = IT_MAX_WORD-1;
1238             size_t ret;
1239
1240             ret = yaz_iconv(zh->iconv_to_utf8, &inbuf, &inleft,
1241                         &outbuf, &outleft);
1242             if (ret == (size_t)(-1))
1243             {
1244                 ret = yaz_iconv(zh->iconv_to_utf8, 0, 0, 0, 0);
1245                 zebra_setError(
1246                     zh, 
1247                     YAZ_BIB1_QUERY_TERM_INCLUDES_CHARS_THAT_DO_NOT_TRANSLATE_INTO_,
1248                     0);
1249                 return ZEBRA_FAIL;
1250             }
1251             *outbuf = 0;
1252         }
1253         else
1254         {
1255             sizez = term->u.general->len;
1256             if (sizez > IT_MAX_WORD-1)
1257                 sizez = IT_MAX_WORD-1;
1258             memcpy (termz, term->u.general->buf, sizez);
1259             termz[sizez] = '\0';
1260         }
1261         break;
1262     case Z_Term_characterString:
1263         sizez = strlen(term->u.characterString);
1264         if (sizez > IT_MAX_WORD-1)
1265             sizez = IT_MAX_WORD-1;
1266         memcpy (termz, term->u.characterString, sizez);
1267         termz[sizez] = '\0';
1268         break;
1269     default:
1270         zebra_setError(zh, YAZ_BIB1_UNSUPP_CODED_VALUE_FOR_TERM, 0);
1271         return ZEBRA_FAIL;
1272     }
1273     return ZEBRA_OK;
1274 }
1275
1276 /* convert APT SCAN term to internal cmap */
1277 static ZEBRA_RES trans_scan_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1278                                  char *termz, int reg_type)
1279 {
1280     char termz0[IT_MAX_WORD];
1281
1282     if (zapt_term_to_utf8(zh, zapt, termz0) == ZEBRA_FAIL)
1283         return ZEBRA_FAIL;    /* error */
1284     else
1285     {
1286         const char **map;
1287         const char *cp = (const char *) termz0;
1288         const char *cp_end = cp + strlen(cp);
1289         const char *src;
1290         int i = 0;
1291         const char *space_map = NULL;
1292         int len;
1293             
1294         while ((len = (cp_end - cp)) > 0)
1295         {
1296             map = zebra_maps_input(zh->reg->zebra_maps, reg_type, &cp, len, 0);
1297             if (**map == *CHR_SPACE)
1298                 space_map = *map;
1299             else
1300             {
1301                 if (i && space_map)
1302                     for (src = space_map; *src; src++)
1303                         termz[i++] = *src;
1304                 space_map = NULL;
1305                 for (src = *map; *src; src++)
1306                     termz[i++] = *src;
1307             }
1308         }
1309         termz[i] = '\0';
1310     }
1311     return ZEBRA_OK;
1312 }
1313
1314 static void grep_info_delete(struct grep_info *grep_info)
1315 {
1316 #ifdef TERM_COUNT
1317     xfree(grep_info->term_no);
1318 #endif
1319     xfree(grep_info->isam_p_buf);
1320 }
1321
1322 static ZEBRA_RES grep_info_prepare(ZebraHandle zh,
1323                                    Z_AttributesPlusTerm *zapt,
1324                                    struct grep_info *grep_info,
1325                                    int reg_type)
1326 {
1327     AttrType termset;
1328     int termset_value_numeric;
1329     const char *termset_value_string;
1330
1331 #ifdef TERM_COUNT
1332     grep_info->term_no = 0;
1333 #endif
1334     grep_info->isam_p_size = 0;
1335     grep_info->isam_p_buf = NULL;
1336     grep_info->zh = zh;
1337     grep_info->reg_type = reg_type;
1338     grep_info->termset = 0;
1339
1340     if (!zapt)
1341         return ZEBRA_OK;
1342     attr_init_APT(&termset, zapt, 8);
1343     termset_value_numeric =
1344         attr_find_ex(&termset, NULL, &termset_value_string);
1345     if (termset_value_numeric != -1)
1346     {
1347         char resname[32];
1348         const char *termset_name = 0;
1349         if (termset_value_numeric != -2)
1350         {
1351     
1352             sprintf(resname, "%d", termset_value_numeric);
1353             termset_name = resname;
1354         }
1355         else
1356             termset_name = termset_value_string;
1357         yaz_log(log_level_rpn, "creating termset set %s", termset_name);
1358         grep_info->termset = resultSetAdd(zh, termset_name, 1);
1359         if (!grep_info->termset)
1360         {
1361             zebra_setError(zh, YAZ_BIB1_ILLEGAL_RESULT_SET_NAME, termset_name);
1362             return ZEBRA_FAIL;
1363         }
1364     }
1365     return ZEBRA_OK;
1366 }
1367                                
1368 /**
1369   \brief Create result set(s) for list of terms
1370   \param zh Zebra Handle
1371   \param termz term as used in query but converted to UTF-8
1372   \param attributeSet default attribute set
1373   \param stream memory for result
1374   \param reg_type register type ('w', 'p',..)
1375   \param complete_flag whether it's phrases or not
1376   \param rank_type term flags for ranking
1377   \param xpath_use use attribute for X-Path (-1 for no X-path)
1378   \param num_bases number of databases
1379   \param basenames array of databases
1380   \param rset_mem memory for result sets
1381   \param result_sets output result set for each term in list (output)
1382   \param number number of output result sets
1383   \param kc rset key control to be used for created result sets
1384 */
1385 static ZEBRA_RES term_list_trunc(ZebraHandle zh,
1386                                  Z_AttributesPlusTerm *zapt,
1387                                  const char *termz,
1388                                  oid_value attributeSet,
1389                                  NMEM stream,
1390                                  int reg_type, int complete_flag,
1391                                  const char *rank_type,
1392                                  const char *xpath_use,
1393                                  int num_bases, char **basenames, 
1394                                  NMEM rset_nmem,
1395                                  RSET **result_sets, int *num_result_sets,
1396                                  struct rset_key_control *kc)
1397 {
1398     char term_dst[IT_MAX_WORD+1];
1399     struct grep_info grep_info;
1400     const char *termp = termz;
1401     int alloc_sets = 0;
1402     int empty_term = *termz ? 0 : 1;
1403
1404     empty_term = 0;
1405     *num_result_sets = 0;
1406     *term_dst = 0;
1407     if (grep_info_prepare(zh, zapt, &grep_info, reg_type) == ZEBRA_FAIL)
1408         return ZEBRA_FAIL;
1409     while(1)
1410     { 
1411         ZEBRA_RES res;
1412
1413         if (alloc_sets == *num_result_sets)
1414         {
1415             int add = 10;
1416             RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) * 
1417                                               sizeof(*rnew));
1418             if (alloc_sets)
1419                 memcpy(rnew, *result_sets, alloc_sets * sizeof(*rnew));
1420             alloc_sets = alloc_sets + add;
1421             *result_sets = rnew;
1422         }
1423         res = term_trunc(zh, zapt, &termp, attributeSet,
1424                          stream, &grep_info,
1425                          reg_type, complete_flag,
1426                          num_bases, basenames,
1427                          term_dst, rank_type,
1428                          xpath_use, rset_nmem,
1429                          &(*result_sets)[*num_result_sets],
1430                          kc);
1431         if (res != ZEBRA_OK)
1432         {
1433             int i;
1434             for (i = 0; i < *num_result_sets; i++)
1435                 rset_delete((*result_sets)[i]);
1436             grep_info_delete (&grep_info);
1437             return res;
1438         }
1439         if ((*result_sets)[*num_result_sets] == 0)
1440             break;
1441         (*num_result_sets)++;
1442
1443         if (empty_term)
1444             break;
1445         if (!*termp)
1446             break;
1447     }
1448     grep_info_delete(&grep_info);
1449     return ZEBRA_OK;
1450 }
1451
1452
1453 static ZEBRA_RES always_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1454                              oid_value attributeSet, NMEM stream,
1455                              struct grep_info *grep_info,
1456                              int reg_type, int complete_flag,
1457                              int num_bases, char **basenames,
1458                              const char *xpath_use,
1459                              struct ord_list **ol)
1460 {
1461     char term_dict[2*IT_MAX_WORD+4000];
1462     int r, base_no;
1463     struct rpn_char_map_info rcmi;
1464
1465     int bases_ok = 0;     /* no of databases with OK attribute */
1466
1467     *ol = ord_list_create(stream);
1468
1469     rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
1470
1471     for (base_no = 0; base_no < num_bases; base_no++)
1472     {
1473         int ord = -1;
1474         int regex_range = 0;
1475         int init_pos = 0;
1476         int max_pos, prefix_len = 0;
1477         char ord_buf[32];
1478         int ord_len, i;
1479
1480         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1481         {
1482             zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
1483                            basenames[base_no]);
1484             return ZEBRA_FAIL;
1485         }
1486         
1487         if (zebra_apt_get_ord(zh, zapt, zinfo_index_category_alwaysmatches,
1488                               reg_type, xpath_use, 
1489                               attributeSet, &ord) != ZEBRA_OK)
1490             return ZEBRA_FAIL;
1491         yaz_log(YLOG_LOG, "Got ordinal value: %d", ord);
1492         *ol = ord_list_append(stream, *ol, ord);
1493         
1494         if (prefix_len)
1495             term_dict[prefix_len++] = '|';
1496         else
1497             term_dict[prefix_len++] = '(';
1498         
1499         ord_len = key_SU_encode (ord, ord_buf);
1500         for (i = 0; i<ord_len; i++)
1501         {
1502             term_dict[prefix_len++] = 1;
1503             term_dict[prefix_len++] = ord_buf[i];
1504         }
1505         if (ord_len > init_pos)
1506             init_pos = ord_len;
1507         
1508         bases_ok++;
1509
1510         term_dict[prefix_len++] = ')';
1511         term_dict[prefix_len] = '\0';
1512         
1513         r = dict_lookup_grep(zh->reg->dict, term_dict, regex_range,
1514                              grep_info, &max_pos, init_pos,
1515                              grep_handle);
1516     }
1517     if (!bases_ok)
1518         return ZEBRA_FAIL;
1519     yaz_log(YLOG_LOG, "always_term: %d positions", grep_info->isam_p_indx);
1520     return ZEBRA_OK;
1521 }
1522
1523 static ZEBRA_RES rpn_search_APT_alwaysmatches(ZebraHandle zh,
1524                                               Z_AttributesPlusTerm *zapt,
1525                                               const char *termz_org,
1526                                               oid_value attributeSet,
1527                                               NMEM stream,
1528                                               int reg_type, int complete_flag,
1529                                               const char *rank_type,
1530                                               const char *xpath_use,
1531                                               int num_bases, char **basenames, 
1532                                               NMEM rset_nmem,
1533                                               RSET *rset,
1534                                               struct rset_key_control *kc)
1535 {
1536     const char *term_dst = "always";
1537     struct grep_info grep_info;
1538     zint hits_limit_value;
1539     const char *term_ref_id_str = 0;
1540     ZEBRA_RES res;
1541     struct ord_list *ol;
1542
1543     term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str,
1544                     stream);
1545     if (grep_info_prepare(zh, zapt, &grep_info, reg_type) == ZEBRA_FAIL)
1546         return ZEBRA_FAIL;
1547
1548     grep_info.isam_p_indx = 0;
1549
1550     res = always_term(zh, zapt, attributeSet, stream, &grep_info,
1551                       reg_type, complete_flag, num_bases, basenames,
1552                       xpath_use, &ol);
1553     if (res == ZEBRA_OK)
1554     {
1555         *rset = rset_trunc(zh, grep_info.isam_p_buf,
1556                            grep_info.isam_p_indx, term_dst, strlen(term_dst),
1557                            rank_type, 1 /* preserve pos */,
1558                            zapt->term->which, rset_nmem,
1559                            kc, kc->scope, ol, reg_type, hits_limit_value,
1560                            term_ref_id_str);
1561         if (!*rset)
1562             res = ZEBRA_FAIL;
1563     }
1564     grep_info_delete (&grep_info);
1565     return res;
1566 }
1567
1568 static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh,
1569                                        Z_AttributesPlusTerm *zapt,
1570                                        const char *termz_org,
1571                                        oid_value attributeSet,
1572                                        NMEM stream,
1573                                        int reg_type, int complete_flag,
1574                                        const char *rank_type,
1575                                        const char *xpath_use,
1576                                        int num_bases, char **basenames, 
1577                                        NMEM rset_nmem,
1578                                        RSET *rset,
1579                                        struct rset_key_control *kc)
1580 {
1581     RSET *result_sets = 0;
1582     int num_result_sets = 0;
1583     ZEBRA_RES res =
1584         term_list_trunc(zh, zapt, termz_org, attributeSet,
1585                         stream, reg_type, complete_flag,
1586                         rank_type, xpath_use,
1587                         num_bases, basenames,
1588                         rset_nmem,
1589                         &result_sets, &num_result_sets, kc);
1590     if (res != ZEBRA_OK)
1591         return res;
1592     if (num_result_sets == 0)
1593         *rset = rset_create_null(rset_nmem, kc, 0); 
1594     else if (num_result_sets == 1)
1595         *rset = result_sets[0];
1596     else
1597         *rset = rset_create_prox(rset_nmem, kc, kc->scope,
1598                                  num_result_sets, result_sets,
1599                                  1 /* ordered */, 0 /* exclusion */,
1600                                  3 /* relation */, 1 /* distance */);
1601     if (!*rset)
1602         return ZEBRA_FAIL;
1603     return ZEBRA_OK;
1604 }
1605
1606 static ZEBRA_RES rpn_search_APT_or_list(ZebraHandle zh,
1607                                         Z_AttributesPlusTerm *zapt,
1608                                         const char *termz_org,
1609                                         oid_value attributeSet,
1610                                         NMEM stream,
1611                                         int reg_type, int complete_flag,
1612                                         const char *rank_type,
1613                                         const char *xpath_use,
1614                                         int num_bases, char **basenames,
1615                                         NMEM rset_nmem,
1616                                         RSET *rset,
1617                                         struct rset_key_control *kc)
1618 {
1619     RSET *result_sets = 0;
1620     int num_result_sets = 0;
1621     ZEBRA_RES res =
1622         term_list_trunc(zh, zapt, termz_org, attributeSet,
1623                         stream, reg_type, complete_flag,
1624                         rank_type, xpath_use,
1625                         num_bases, basenames,
1626                         rset_nmem,
1627                         &result_sets, &num_result_sets, kc);
1628     if (res != ZEBRA_OK)
1629         return res;
1630     if (num_result_sets == 0)
1631         *rset = rset_create_null(rset_nmem, kc, 0); 
1632     else if (num_result_sets == 1)
1633         *rset = result_sets[0];
1634     else
1635         *rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* termid */,
1636                                num_result_sets, result_sets);
1637     if (!*rset)
1638         return ZEBRA_FAIL;
1639     return ZEBRA_OK;
1640 }
1641
1642 static ZEBRA_RES rpn_search_APT_and_list(ZebraHandle zh,
1643                                          Z_AttributesPlusTerm *zapt,
1644                                          const char *termz_org,
1645                                          oid_value attributeSet,
1646                                          NMEM stream,
1647                                          int reg_type, int complete_flag,
1648                                          const char *rank_type, 
1649                                          const char *xpath_use,
1650                                          int num_bases, char **basenames,
1651                                          NMEM rset_nmem,
1652                                          RSET *rset,
1653                                          struct rset_key_control *kc)
1654 {
1655     RSET *result_sets = 0;
1656     int num_result_sets = 0;
1657     ZEBRA_RES res =
1658         term_list_trunc(zh, zapt, termz_org, attributeSet,
1659                         stream, reg_type, complete_flag,
1660                         rank_type, xpath_use,
1661                         num_bases, basenames,
1662                         rset_nmem,
1663                         &result_sets, &num_result_sets,
1664                         kc);
1665     if (res != ZEBRA_OK)
1666         return res;
1667     if (num_result_sets == 0)
1668         *rset = rset_create_null(rset_nmem, kc, 0); 
1669     else if (num_result_sets == 1)
1670         *rset = result_sets[0];
1671     else
1672         *rset = rset_create_and(rset_nmem, kc, kc->scope,
1673                                 num_result_sets, result_sets);
1674     if (!*rset)
1675         return ZEBRA_FAIL;
1676     return ZEBRA_OK;
1677 }
1678
1679 static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1680                             const char **term_sub,
1681                             char *term_dict,
1682                             oid_value attributeSet,
1683                             struct grep_info *grep_info,
1684                             int *max_pos,
1685                             int reg_type,
1686                             char *term_dst,
1687                             int *error_code)
1688 {
1689     AttrType relation;
1690     int relation_value;
1691     int term_value;
1692     int r;
1693     char *term_tmp = term_dict + strlen(term_dict);
1694
1695     *error_code = 0;
1696     attr_init_APT(&relation, zapt, 2);
1697     relation_value = attr_find(&relation, NULL);
1698
1699     yaz_log(log_level_rpn, "numeric relation value=%d", relation_value);
1700
1701     if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1702                   term_dst))
1703         return 0;
1704     term_value = atoi (term_tmp);
1705     switch (relation_value)
1706     {
1707     case 1:
1708         yaz_log(log_level_rpn, "Relation <");
1709         gen_regular_rel(term_tmp, term_value-1, 1);
1710         break;
1711     case 2:
1712         yaz_log(log_level_rpn, "Relation <=");
1713         gen_regular_rel(term_tmp, term_value, 1);
1714         break;
1715     case 4:
1716         yaz_log(log_level_rpn, "Relation >=");
1717         gen_regular_rel(term_tmp, term_value, 0);
1718         break;
1719     case 5:
1720         yaz_log(log_level_rpn, "Relation >");
1721         gen_regular_rel(term_tmp, term_value+1, 0);
1722         break;
1723     case -1:
1724     case 3:
1725         yaz_log(log_level_rpn, "Relation =");
1726         sprintf(term_tmp, "(0*%d)", term_value);
1727         break;
1728     default:
1729         *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
1730         return 0;
1731     }
1732     yaz_log(log_level_rpn, "dict_lookup_grep: %s", term_tmp);
1733     r = dict_lookup_grep(zh->reg->dict, term_dict, 0, grep_info, max_pos,
1734                           0, grep_handle);
1735     if (r)
1736         yaz_log(YLOG_WARN, "dict_lookup_grep fail, rel = gt: %d", r);
1737     yaz_log(log_level_rpn, "%d positions", grep_info->isam_p_indx);
1738     return 1;
1739 }
1740
1741 static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1742                               const char **term_sub, 
1743                               oid_value attributeSet,
1744                               struct grep_info *grep_info,
1745                               int reg_type, int complete_flag,
1746                               int num_bases, char **basenames,
1747                               char *term_dst, 
1748                               const char *xpath_use,
1749                               NMEM stream)
1750 {
1751     char term_dict[2*IT_MAX_WORD+2];
1752     int base_no;
1753     oid_value curAttributeSet = attributeSet;
1754     const char *termp;
1755     struct rpn_char_map_info rcmi;
1756
1757     int bases_ok = 0;     /* no of databases with OK attribute */
1758
1759     rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
1760
1761     for (base_no = 0; base_no < num_bases; base_no++)
1762     {
1763         int max_pos, prefix_len = 0;
1764         int relation_error = 0;
1765         int ord, ord_len, i;
1766         char ord_buf[32];
1767
1768         termp = *term_sub;
1769
1770         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1771         {
1772             zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
1773                            basenames[base_no]);
1774             return ZEBRA_FAIL;
1775         }
1776
1777         if (zebra_apt_get_ord(zh, zapt, zinfo_index_category_index,
1778                               reg_type, xpath_use, curAttributeSet, &ord) 
1779             != ZEBRA_OK)
1780         {
1781             break;
1782         }
1783
1784         if (prefix_len)
1785             term_dict[prefix_len++] = '|';
1786         else
1787             term_dict[prefix_len++] = '(';
1788         
1789         ord_len = key_SU_encode (ord, ord_buf);
1790         for (i = 0; i < ord_len; i++)
1791         {
1792             term_dict[prefix_len++] = 1;
1793                 term_dict[prefix_len++] = ord_buf[i];
1794         }
1795         bases_ok++;
1796         term_dict[prefix_len++] = ')';
1797         term_dict[prefix_len] = '\0';
1798         if (!numeric_relation(zh, zapt, &termp, term_dict,
1799                               attributeSet, grep_info, &max_pos, reg_type,
1800                               term_dst, &relation_error))
1801         {
1802             if (relation_error)
1803             {
1804                 zebra_setError(zh, relation_error, 0);
1805                 return ZEBRA_FAIL;
1806             }
1807             *term_sub = 0;
1808             return ZEBRA_OK;
1809         }
1810     }
1811     if (!bases_ok)
1812         return ZEBRA_FAIL;
1813     *term_sub = termp;
1814     yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1815     return ZEBRA_OK;
1816 }
1817
1818                                  
1819 static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh,
1820                                         Z_AttributesPlusTerm *zapt,
1821                                         const char *termz,
1822                                         oid_value attributeSet,
1823                                         NMEM stream,
1824                                         int reg_type, int complete_flag,
1825                                         const char *rank_type, 
1826                                         const char *xpath_use,
1827                                         int num_bases, char **basenames,
1828                                         NMEM rset_nmem,
1829                                         RSET *rset,
1830                                         struct rset_key_control *kc)
1831 {
1832     char term_dst[IT_MAX_WORD+1];
1833     const char *termp = termz;
1834     RSET *result_sets = 0;
1835     int num_result_sets = 0;
1836     ZEBRA_RES res;
1837     struct grep_info grep_info;
1838     int alloc_sets = 0;
1839     zint hits_limit_value;
1840     const char *term_ref_id_str = 0;
1841
1842     term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str, stream);
1843
1844     yaz_log(log_level_rpn, "APT_numeric t='%s'", termz);
1845     if (grep_info_prepare(zh, zapt, &grep_info, reg_type) == ZEBRA_FAIL)
1846         return ZEBRA_FAIL;
1847     while (1)
1848     { 
1849         if (alloc_sets == num_result_sets)
1850         {
1851             int add = 10;
1852             RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) * 
1853                                               sizeof(*rnew));
1854             if (alloc_sets)
1855                 memcpy(rnew, result_sets, alloc_sets * sizeof(*rnew));
1856             alloc_sets = alloc_sets + add;
1857             result_sets = rnew;
1858         }
1859         yaz_log(YLOG_DEBUG, "APT_numeric termp=%s", termp);
1860         grep_info.isam_p_indx = 0;
1861         res = numeric_term(zh, zapt, &termp, attributeSet, &grep_info,
1862                            reg_type, complete_flag, num_bases, basenames,
1863                            term_dst, xpath_use,
1864                            stream);
1865         if (res == ZEBRA_FAIL || termp == 0)
1866             break;
1867         yaz_log(YLOG_DEBUG, "term: %s", term_dst);
1868         result_sets[num_result_sets] =
1869             rset_trunc(zh, grep_info.isam_p_buf,
1870                        grep_info.isam_p_indx, term_dst,
1871                        strlen(term_dst), rank_type,
1872                        0 /* preserve position */,
1873                        zapt->term->which, rset_nmem, 
1874                        kc, kc->scope, 0, reg_type,
1875                        hits_limit_value,
1876                        term_ref_id_str);
1877         if (!result_sets[num_result_sets])
1878             break;
1879         num_result_sets++;
1880     }
1881     grep_info_delete(&grep_info);
1882     if (termp)
1883     {
1884         int i;
1885         for (i = 0; i<num_result_sets; i++)
1886             rset_delete(result_sets[i]);
1887         return ZEBRA_FAIL;
1888     }
1889     if (num_result_sets == 0)
1890         *rset = rset_create_null(rset_nmem, kc, 0);
1891     if (num_result_sets == 1)
1892         *rset = result_sets[0];
1893     else
1894         *rset = rset_create_and(rset_nmem, kc, kc->scope,
1895                                 num_result_sets, result_sets);
1896     if (!*rset)
1897         return ZEBRA_FAIL;
1898     return ZEBRA_OK;
1899 }
1900
1901 static ZEBRA_RES rpn_search_APT_local(ZebraHandle zh,
1902                                       Z_AttributesPlusTerm *zapt,
1903                                       const char *termz,
1904                                       oid_value attributeSet,
1905                                       NMEM stream,
1906                                       const char *rank_type, NMEM rset_nmem,
1907                                       RSET *rset,
1908                                       struct rset_key_control *kc)
1909 {
1910     RSFD rsfd;
1911     struct it_key key;
1912     int sys;
1913     *rset = rset_create_temp(rset_nmem, kc, kc->scope,
1914                              res_get (zh->res, "setTmpDir"),0 );
1915     rsfd = rset_open(*rset, RSETF_WRITE);
1916     
1917     sys = atoi(termz);
1918     if (sys <= 0)
1919         sys = 1;
1920     key.mem[0] = sys;
1921     key.mem[1] = 1;
1922     key.len = 2;
1923     rset_write (rsfd, &key);
1924     rset_close (rsfd);
1925     return ZEBRA_OK;
1926 }
1927
1928 static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1929                                oid_value attributeSet, NMEM stream,
1930                                Z_SortKeySpecList *sort_sequence,
1931                                const char *rank_type,
1932                                NMEM rset_nmem,
1933                                RSET *rset,
1934                                struct rset_key_control *kc)
1935 {
1936     int i;
1937     int sort_relation_value;
1938     AttrType sort_relation_type;
1939     Z_SortKeySpec *sks;
1940     Z_SortKey *sk;
1941     int oid[OID_SIZE];
1942     oident oe;
1943     char termz[20];
1944     
1945     attr_init_APT(&sort_relation_type, zapt, 7);
1946     sort_relation_value = attr_find(&sort_relation_type, &attributeSet);
1947
1948     if (!sort_sequence->specs)
1949     {
1950         sort_sequence->num_specs = 10;
1951         sort_sequence->specs = (Z_SortKeySpec **)
1952             nmem_malloc(stream, sort_sequence->num_specs *
1953                          sizeof(*sort_sequence->specs));
1954         for (i = 0; i<sort_sequence->num_specs; i++)
1955             sort_sequence->specs[i] = 0;
1956     }
1957     if (zapt->term->which != Z_Term_general)
1958         i = 0;
1959     else
1960         i = atoi_n ((char *) zapt->term->u.general->buf,
1961                     zapt->term->u.general->len);
1962     if (i >= sort_sequence->num_specs)
1963         i = 0;
1964     sprintf(termz, "%d", i);
1965
1966     oe.proto = PROTO_Z3950;
1967     oe.oclass = CLASS_ATTSET;
1968     oe.value = attributeSet;
1969     if (!oid_ent_to_oid (&oe, oid))
1970         return ZEBRA_FAIL;
1971
1972     sks = (Z_SortKeySpec *) nmem_malloc(stream, sizeof(*sks));
1973     sks->sortElement = (Z_SortElement *)
1974         nmem_malloc(stream, sizeof(*sks->sortElement));
1975     sks->sortElement->which = Z_SortElement_generic;
1976     sk = sks->sortElement->u.generic = (Z_SortKey *)
1977         nmem_malloc(stream, sizeof(*sk));
1978     sk->which = Z_SortKey_sortAttributes;
1979     sk->u.sortAttributes = (Z_SortAttributes *)
1980         nmem_malloc(stream, sizeof(*sk->u.sortAttributes));
1981
1982     sk->u.sortAttributes->id = oid;
1983     sk->u.sortAttributes->list = zapt->attributes;
1984
1985     sks->sortRelation = (int *)
1986         nmem_malloc(stream, sizeof(*sks->sortRelation));
1987     if (sort_relation_value == 1)
1988         *sks->sortRelation = Z_SortKeySpec_ascending;
1989     else if (sort_relation_value == 2)
1990         *sks->sortRelation = Z_SortKeySpec_descending;
1991     else 
1992         *sks->sortRelation = Z_SortKeySpec_ascending;
1993
1994     sks->caseSensitivity = (int *)
1995         nmem_malloc(stream, sizeof(*sks->caseSensitivity));
1996     *sks->caseSensitivity = 0;
1997
1998     sks->which = Z_SortKeySpec_null;
1999     sks->u.null = odr_nullval ();
2000     sort_sequence->specs[i] = sks;
2001     *rset = rset_create_null(rset_nmem, kc, 0);
2002     return ZEBRA_OK;
2003 }
2004
2005
2006 static int rpn_check_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2007                            oid_value attributeSet,
2008                            struct xpath_location_step *xpath, int max,
2009                            NMEM mem)
2010 {
2011     oid_value curAttributeSet = attributeSet;
2012     AttrType use;
2013     const char *use_string = 0;
2014     
2015     attr_init_APT(&use, zapt, 1);
2016     attr_find_ex(&use, &curAttributeSet, &use_string);
2017
2018     if (!use_string || *use_string != '/')
2019         return -1;
2020
2021     return zebra_parse_xpath_str(use_string, xpath, max, mem);
2022 }
2023  
2024                
2025
2026 static RSET xpath_trunc(ZebraHandle zh, NMEM stream,
2027                         int reg_type, const char *term, 
2028                         const char *xpath_use,
2029                         NMEM rset_nmem,
2030                         struct rset_key_control *kc)
2031 {
2032     RSET rset;
2033     struct grep_info grep_info;
2034     char term_dict[2048];
2035     char ord_buf[32];
2036     int prefix_len = 0;
2037     int ord = zebraExplain_lookup_attr_str(zh->reg->zei, 
2038                                            zinfo_index_category_index,
2039                                            reg_type,
2040                                            xpath_use);
2041     int ord_len, i, r, max_pos;
2042     int term_type = Z_Term_characterString;
2043     const char *flags = "void";
2044
2045     if (grep_info_prepare(zh, 0 /* zapt */, &grep_info, '0') == ZEBRA_FAIL)
2046         return rset_create_null(rset_nmem, kc, 0);
2047     
2048     if (ord < 0)
2049         return rset_create_null(rset_nmem, kc, 0);
2050     if (prefix_len)
2051         term_dict[prefix_len++] = '|';
2052     else
2053         term_dict[prefix_len++] = '(';
2054     
2055     ord_len = key_SU_encode (ord, ord_buf);
2056     for (i = 0; i<ord_len; i++)
2057     {
2058         term_dict[prefix_len++] = 1;
2059         term_dict[prefix_len++] = ord_buf[i];
2060     }
2061     term_dict[prefix_len++] = ')';
2062     strcpy(term_dict+prefix_len, term);
2063     
2064     grep_info.isam_p_indx = 0;
2065     r = dict_lookup_grep(zh->reg->dict, term_dict, 0,
2066                           &grep_info, &max_pos, 0, grep_handle);
2067     yaz_log(YLOG_DEBUG, "%s %d positions", term,
2068              grep_info.isam_p_indx);
2069     rset = rset_trunc(zh, grep_info.isam_p_buf,
2070                       grep_info.isam_p_indx, term, strlen(term),
2071                       flags, 1, term_type,rset_nmem,
2072                       kc, kc->scope, 0, reg_type, 0 /* hits_limit */,
2073                       0 /* term_ref_id_str */);
2074     grep_info_delete(&grep_info);
2075     return rset;
2076 }
2077
2078 static
2079 ZEBRA_RES rpn_search_xpath(ZebraHandle zh,
2080                            int num_bases, char **basenames,
2081                            NMEM stream, const char *rank_type, RSET rset,
2082                            int xpath_len, struct xpath_location_step *xpath,
2083                            NMEM rset_nmem,
2084                            RSET *rset_out,
2085                            struct rset_key_control *kc)
2086 {
2087     int base_no;
2088     int i;
2089     int always_matches = rset ? 0 : 1;
2090
2091     if (xpath_len < 0)
2092     {
2093         *rset_out = rset;
2094         return ZEBRA_OK;
2095     }
2096
2097     yaz_log(YLOG_DEBUG, "xpath len=%d", xpath_len);
2098     for (i = 0; i<xpath_len; i++)
2099     {
2100         yaz_log(log_level_rpn, "XPATH %d %s", i, xpath[i].part);
2101
2102     }
2103
2104     /*
2105       //a    ->    a/.*
2106       //a/b  ->    b/a/.*
2107       /a     ->    a/
2108       /a/b   ->    b/a/
2109
2110       /      ->    none
2111
2112    a[@attr = value]/b[@other = othervalue]
2113
2114  /e/@a val      range(e/,range(@a,freetext(w,1015,val),@a),e/)
2115  /a/b val       range(b/a/,freetext(w,1016,val),b/a/)
2116  /a/b/@c val    range(b/a/,range(@c,freetext(w,1016,val),@c),b/a/)
2117  /a/b[@c = y] val range(b/a/,freetext(w,1016,val),b/a/,@c = y)
2118  /a[@c = y]/b val range(a/,range(b/a/,freetext(w,1016,val),b/a/),a/,@c = y)
2119  /a[@c = x]/b[@c = y] range(a/,range(b/a/,freetext(w,1016,val),b/a/,@c = y),a/,@c = x)
2120       
2121     */
2122
2123     dict_grep_cmap (zh->reg->dict, 0, 0);
2124
2125     for (base_no = 0; base_no < num_bases; base_no++)
2126     {
2127         int level = xpath_len;
2128         int first_path = 1;
2129         
2130         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
2131         {
2132             zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
2133                            basenames[base_no]);
2134             *rset_out = rset;
2135             return ZEBRA_FAIL;
2136         }
2137         while (--level >= 0)
2138         {
2139             WRBUF xpath_rev = wrbuf_alloc();
2140             int i;
2141             RSET rset_start_tag = 0, rset_end_tag = 0, rset_attr = 0;
2142
2143             for (i = level; i >= 1; --i)
2144             {
2145                 const char *cp = xpath[i].part;
2146                 if (*cp)
2147                 {
2148                     for (; *cp; cp++)
2149                     {
2150                         if (*cp == '*')
2151                             wrbuf_puts(xpath_rev, "[^/]*");
2152                         else if (*cp == ' ')
2153                             wrbuf_puts(xpath_rev, "\001 ");
2154                         else
2155                             wrbuf_putc(xpath_rev, *cp);
2156
2157                         /* wrbuf_putc does not null-terminate , but
2158                            wrbuf_puts below ensures it does.. so xpath_rev
2159                            is OK iff length is > 0 */
2160                     }
2161                     wrbuf_puts(xpath_rev, "/");
2162                 }
2163                 else if (i == 1)  /* // case */
2164                     wrbuf_puts(xpath_rev, ".*");
2165             }
2166             if (xpath[level].predicate &&
2167                 xpath[level].predicate->which == XPATH_PREDICATE_RELATION &&
2168                 xpath[level].predicate->u.relation.name[0])
2169             {
2170                 WRBUF wbuf = wrbuf_alloc();
2171                 wrbuf_puts(wbuf, xpath[level].predicate->u.relation.name+1);
2172                 if (xpath[level].predicate->u.relation.value)
2173                 {
2174                     const char *cp = xpath[level].predicate->u.relation.value;
2175                     wrbuf_putc(wbuf, '=');
2176                     
2177                     while (*cp)
2178                     {
2179                         if (strchr(REGEX_CHARS, *cp))
2180                             wrbuf_putc(wbuf, '\\');
2181                         wrbuf_putc(wbuf, *cp);
2182                         cp++;
2183                     }
2184                 }
2185                 wrbuf_puts(wbuf, "");
2186                 rset_attr = xpath_trunc(
2187                     zh, stream, '0', wrbuf_buf(wbuf), ZEBRA_XPATH_ATTR_NAME, 
2188                     rset_nmem, kc);
2189                 wrbuf_free(wbuf, 1);
2190             } 
2191             else 
2192             {
2193                 if (!first_path)
2194                 {
2195                     wrbuf_free(xpath_rev, 1);
2196                     continue;
2197                 }
2198             }
2199             yaz_log(log_level_rpn, "xpath_rev (%d) = %.*s", level, 
2200                     wrbuf_len(xpath_rev), wrbuf_buf(xpath_rev));
2201             if (wrbuf_len(xpath_rev))
2202             {
2203                 rset_start_tag = xpath_trunc(zh, stream, '0', 
2204                                              wrbuf_buf(xpath_rev),
2205                                              ZEBRA_XPATH_ELM_BEGIN, 
2206                                              rset_nmem, kc);
2207                 if (always_matches)
2208                     rset = rset_start_tag;
2209                 else
2210                 {
2211                     rset_end_tag = xpath_trunc(zh, stream, '0', 
2212                                                wrbuf_buf(xpath_rev),
2213                                                ZEBRA_XPATH_ELM_END, 
2214                                                rset_nmem, kc);
2215                     
2216                     rset = rset_create_between(rset_nmem, kc, kc->scope,
2217                                                rset_start_tag, rset,
2218                                                rset_end_tag, rset_attr);
2219                 }
2220             }
2221             wrbuf_free(xpath_rev, 1);
2222             first_path = 0;
2223         }
2224     }
2225     *rset_out = rset;
2226     return ZEBRA_OK;
2227 }
2228
2229 #define MAX_XPATH_STEPS 10
2230
2231 static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2232                                 oid_value attributeSet, NMEM stream,
2233                                 Z_SortKeySpecList *sort_sequence,
2234                                 int num_bases, char **basenames, 
2235                                 NMEM rset_nmem,
2236                                 RSET *rset,
2237                                 struct rset_key_control *kc)
2238 {
2239     ZEBRA_RES res = ZEBRA_OK;
2240     unsigned reg_id;
2241     char *search_type = NULL;
2242     char rank_type[128];
2243     int complete_flag;
2244     int sort_flag;
2245     char termz[IT_MAX_WORD+1];
2246     int xpath_len;
2247     const char *xpath_use = 0;
2248     struct xpath_location_step xpath[MAX_XPATH_STEPS];
2249
2250     if (!log_level_set)
2251     {
2252         log_level_rpn = yaz_log_module_level("rpn");
2253         log_level_set = 1;
2254     }
2255     zebra_maps_attr(zh->reg->zebra_maps, zapt, &reg_id, &search_type,
2256                     rank_type, &complete_flag, &sort_flag);
2257     
2258     yaz_log(YLOG_DEBUG, "reg_id=%c", reg_id);
2259     yaz_log(YLOG_DEBUG, "complete_flag=%d", complete_flag);
2260     yaz_log(YLOG_DEBUG, "search_type=%s", search_type);
2261     yaz_log(YLOG_DEBUG, "rank_type=%s", rank_type);
2262
2263     if (zapt_term_to_utf8(zh, zapt, termz) == ZEBRA_FAIL)
2264         return ZEBRA_FAIL;
2265
2266     if (sort_flag)
2267         return rpn_sort_spec(zh, zapt, attributeSet, stream, sort_sequence,
2268                              rank_type, rset_nmem, rset, kc);
2269     /* consider if an X-Path query is used */
2270     xpath_len = rpn_check_xpath(zh, zapt, attributeSet, 
2271                                 xpath, MAX_XPATH_STEPS, stream);
2272     if (xpath_len >= 0)
2273     {
2274         if (xpath[xpath_len-1].part[0] == '@') 
2275             xpath_use = ZEBRA_XPATH_ATTR_CDATA;  /* last step is attribute  */
2276         else
2277             xpath_use = ZEBRA_XPATH_CDATA;  /* searching for cdata */        
2278     }
2279
2280     /* search using one of the various search type strategies
2281        termz is our UTF-8 search term
2282        attributeSet is top-level default attribute set 
2283        stream is ODR for search
2284        reg_id is the register type
2285        complete_flag is 1 for complete subfield, 0 for incomplete
2286        xpath_use is use-attribute to be used for X-Path search, 0 for none
2287     */
2288     if (!strcmp(search_type, "phrase"))
2289     {
2290         res = rpn_search_APT_phrase(zh, zapt, termz, attributeSet, stream,
2291                                     reg_id, complete_flag, rank_type,
2292                                     xpath_use,
2293                                     num_bases, basenames, rset_nmem,
2294                                     rset, kc);
2295     }
2296     else if (!strcmp(search_type, "and-list"))
2297     {
2298         res = rpn_search_APT_and_list(zh, zapt, termz, attributeSet, stream,
2299                                       reg_id, complete_flag, rank_type,
2300                                       xpath_use,
2301                                       num_bases, basenames, rset_nmem,
2302                                       rset, kc);
2303     }
2304     else if (!strcmp(search_type, "or-list"))
2305     {
2306         res = rpn_search_APT_or_list(zh, zapt, termz, attributeSet, stream,
2307                                      reg_id, complete_flag, rank_type,
2308                                      xpath_use,
2309                                      num_bases, basenames, rset_nmem,
2310                                      rset, kc);
2311     }
2312     else if (!strcmp(search_type, "local"))
2313     {
2314         res = rpn_search_APT_local(zh, zapt, termz, attributeSet, stream,
2315                                    rank_type, rset_nmem, rset, kc);
2316     }
2317     else if (!strcmp(search_type, "numeric"))
2318     {
2319         res = rpn_search_APT_numeric(zh, zapt, termz, attributeSet, stream,
2320                                      reg_id, complete_flag, rank_type,
2321                                      xpath_use,
2322                                      num_bases, basenames, rset_nmem,
2323                                      rset, kc);
2324     }
2325     else if (!strcmp(search_type, "always"))
2326     {
2327         if (xpath_len >= 0) /* alwaysmatches and X-Path ? */
2328         {
2329             *rset = 0; /* signal no "term" set */
2330             return rpn_search_xpath(zh, num_bases, basenames,
2331                             stream, rank_type, *rset, 
2332                             xpath_len, xpath, rset_nmem, rset, kc);
2333         }
2334         else
2335         {
2336             res = rpn_search_APT_alwaysmatches(zh, zapt, termz,
2337                                                attributeSet, stream,
2338                                                reg_id, complete_flag,
2339                                                rank_type,
2340                                                xpath_use,
2341                                                num_bases, basenames, rset_nmem,
2342                                                rset, kc);
2343         }
2344     }
2345     else
2346     {
2347         zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2348         res = ZEBRA_FAIL;
2349     }
2350     if (res != ZEBRA_OK)
2351         return res;
2352     if (!*rset)
2353         return ZEBRA_FAIL;
2354     return rpn_search_xpath(zh, num_bases, basenames,
2355                             stream, rank_type, *rset, 
2356                             xpath_len, xpath, rset_nmem, rset, kc);
2357 }
2358
2359 static ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2360                                       oid_value attributeSet, 
2361                                       NMEM stream, NMEM rset_nmem,
2362                                       Z_SortKeySpecList *sort_sequence,
2363                                       int num_bases, char **basenames,
2364                                       RSET **result_sets, int *num_result_sets,
2365                                       Z_Operator *parent_op,
2366                                       struct rset_key_control *kc);
2367
2368 ZEBRA_RES rpn_search_top(ZebraHandle zh, Z_RPNStructure *zs,
2369                          oid_value attributeSet, 
2370                          NMEM stream, NMEM rset_nmem,
2371                          Z_SortKeySpecList *sort_sequence,
2372                          int num_bases, char **basenames,
2373                          RSET *result_set)
2374 {
2375     RSET *result_sets = 0;
2376     int num_result_sets = 0;
2377     ZEBRA_RES res;
2378     struct rset_key_control *kc = zebra_key_control_create(zh);
2379
2380     res = rpn_search_structure(zh, zs, attributeSet,
2381                                stream, rset_nmem,
2382                                sort_sequence, 
2383                                num_bases, basenames,
2384                                &result_sets, &num_result_sets,
2385                                0 /* no parent op */,
2386                                kc);
2387     if (res != ZEBRA_OK)
2388     {
2389         int i;
2390         for (i = 0; i<num_result_sets; i++)
2391             rset_delete(result_sets[i]);
2392         *result_set = 0;
2393     }
2394     else
2395     {
2396         assert(num_result_sets == 1);
2397         assert(result_sets);
2398         assert(*result_sets);
2399         *result_set = *result_sets;
2400     }
2401     (*kc->dec)(kc);
2402     return res;
2403 }
2404
2405 ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2406                                oid_value attributeSet, 
2407                                NMEM stream, NMEM rset_nmem,
2408                                Z_SortKeySpecList *sort_sequence,
2409                                int num_bases, char **basenames,
2410                                RSET **result_sets, int *num_result_sets,
2411                                Z_Operator *parent_op,
2412                                struct rset_key_control *kc)
2413 {
2414     *num_result_sets = 0;
2415     if (zs->which == Z_RPNStructure_complex)
2416     {
2417         ZEBRA_RES res;
2418         Z_Operator *zop = zs->u.complex->roperator;
2419         RSET *result_sets_l = 0;
2420         int num_result_sets_l = 0;
2421         RSET *result_sets_r = 0;
2422         int num_result_sets_r = 0;
2423
2424         res = rpn_search_structure(zh, zs->u.complex->s1,
2425                                    attributeSet, stream, rset_nmem,
2426                                    sort_sequence,
2427                                    num_bases, basenames,
2428                                    &result_sets_l, &num_result_sets_l,
2429                                    zop, kc);
2430         if (res != ZEBRA_OK)
2431         {
2432             int i;
2433             for (i = 0; i<num_result_sets_l; i++)
2434                 rset_delete(result_sets_l[i]);
2435             return res;
2436         }
2437         res = rpn_search_structure(zh, zs->u.complex->s2,
2438                                    attributeSet, stream, rset_nmem,
2439                                    sort_sequence,
2440                                    num_bases, basenames,
2441                                    &result_sets_r, &num_result_sets_r,
2442                                    zop, kc);
2443         if (res != ZEBRA_OK)
2444         {
2445             int i;
2446             for (i = 0; i<num_result_sets_l; i++)
2447                 rset_delete(result_sets_l[i]);
2448             for (i = 0; i<num_result_sets_r; i++)
2449                 rset_delete(result_sets_r[i]);
2450             return res;
2451         }
2452
2453         /* make a new list of result for all children */
2454         *num_result_sets = num_result_sets_l + num_result_sets_r;
2455         *result_sets = nmem_malloc(stream, *num_result_sets * 
2456                                    sizeof(**result_sets));
2457         memcpy(*result_sets, result_sets_l, 
2458                num_result_sets_l * sizeof(**result_sets));
2459         memcpy(*result_sets + num_result_sets_l, result_sets_r, 
2460                num_result_sets_r * sizeof(**result_sets));
2461
2462         if (!parent_op || parent_op->which != zop->which
2463             || (zop->which != Z_Operator_and &&
2464                 zop->which != Z_Operator_or))
2465         {
2466             /* parent node different from this one (or non-present) */
2467             /* we must combine result sets now */
2468             RSET rset;
2469             switch (zop->which)
2470             {
2471             case Z_Operator_and:
2472                 rset = rset_create_and(rset_nmem, kc,
2473                                        kc->scope,
2474                                        *num_result_sets, *result_sets);
2475                 break;
2476             case Z_Operator_or:
2477                 rset = rset_create_or(rset_nmem, kc,
2478                                       kc->scope, 0, /* termid */
2479                                       *num_result_sets, *result_sets);
2480                 break;
2481             case Z_Operator_and_not:
2482                 rset = rset_create_not(rset_nmem, kc,
2483                                        kc->scope,
2484                                        (*result_sets)[0],
2485                                        (*result_sets)[1]);
2486                 break;
2487             case Z_Operator_prox:
2488                 if (zop->u.prox->which != Z_ProximityOperator_known)
2489                 {
2490                     zebra_setError(zh, 
2491                                    YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2492                                    0);
2493                     return ZEBRA_FAIL;
2494                 }
2495                 if (*zop->u.prox->u.known != Z_ProxUnit_word)
2496                 {
2497                     zebra_setError_zint(zh,
2498                                         YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2499                                         *zop->u.prox->u.known);
2500                     return ZEBRA_FAIL;
2501                 }
2502                 else
2503                 {
2504                     rset = rset_create_prox(rset_nmem, kc,
2505                                             kc->scope,
2506                                             *num_result_sets, *result_sets, 
2507                                             *zop->u.prox->ordered,
2508                                             (!zop->u.prox->exclusion ? 
2509                                              0 : *zop->u.prox->exclusion),
2510                                             *zop->u.prox->relationType,
2511                                             *zop->u.prox->distance );
2512                 }
2513                 break;
2514             default:
2515                 zebra_setError(zh, YAZ_BIB1_OPERATOR_UNSUPP, 0);
2516                 return ZEBRA_FAIL;
2517             }
2518             *num_result_sets = 1;
2519             *result_sets = nmem_malloc(stream, *num_result_sets * 
2520                                        sizeof(**result_sets));
2521             (*result_sets)[0] = rset;
2522         }
2523     }
2524     else if (zs->which == Z_RPNStructure_simple)
2525     {
2526         RSET rset;
2527         ZEBRA_RES res;
2528
2529         if (zs->u.simple->which == Z_Operand_APT)
2530         {
2531             yaz_log(YLOG_DEBUG, "rpn_search_APT");
2532             res = rpn_search_APT(zh, zs->u.simple->u.attributesPlusTerm,
2533                                  attributeSet, stream, sort_sequence,
2534                                  num_bases, basenames, rset_nmem, &rset,
2535                                  kc);
2536             if (res != ZEBRA_OK)
2537                 return res;
2538         }
2539         else if (zs->u.simple->which == Z_Operand_resultSetId)
2540         {
2541             yaz_log(YLOG_DEBUG, "rpn_search_ref");
2542             rset = resultSetRef(zh, zs->u.simple->u.resultSetId);
2543             if (!rset)
2544             {
2545                 zebra_setError(zh, 
2546                                YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
2547                                zs->u.simple->u.resultSetId);
2548                 return ZEBRA_FAIL;
2549             }
2550             rset_dup(rset);
2551         }
2552         else
2553         {
2554             zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2555             return ZEBRA_FAIL;
2556         }
2557         *num_result_sets = 1;
2558         *result_sets = nmem_malloc(stream, *num_result_sets * 
2559                                    sizeof(**result_sets));
2560         (*result_sets)[0] = rset;
2561     }
2562     else
2563     {
2564         zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2565         return ZEBRA_FAIL;
2566     }
2567     return ZEBRA_OK;
2568 }
2569
2570 struct scan_info_entry {
2571     char *term;
2572     ISAM_P isam_p;
2573 };
2574
2575 struct scan_info {
2576     struct scan_info_entry *list;
2577     ODR odr;
2578     int before, after;
2579     char prefix[20];
2580 };
2581
2582 static int scan_handle (char *name, const char *info, int pos, void *client)
2583 {
2584     int len_prefix, idx;
2585     struct scan_info *scan_info = (struct scan_info *) client;
2586
2587     len_prefix = strlen(scan_info->prefix);
2588     if (memcmp (name, scan_info->prefix, len_prefix))
2589         return 1;
2590     if (pos > 0)
2591         idx = scan_info->after - pos + scan_info->before;
2592     else
2593         idx = - pos - 1;
2594
2595     if (idx < 0)
2596         return 0;
2597     scan_info->list[idx].term = (char *)
2598         odr_malloc(scan_info->odr, strlen(name + len_prefix)+1);
2599     strcpy(scan_info->list[idx].term, name + len_prefix);
2600     assert (*info == sizeof(ISAM_P));
2601     memcpy (&scan_info->list[idx].isam_p, info+1, sizeof(ISAM_P));
2602     return 0;
2603 }
2604
2605 void zebra_term_untrans_iconv(ZebraHandle zh, NMEM stream, int reg_type,
2606                               char **dst, const char *src)
2607 {
2608     char term_src[IT_MAX_WORD];
2609     char term_dst[IT_MAX_WORD];
2610     
2611     zebra_term_untrans (zh, reg_type, term_src, src);
2612
2613     if (zh->iconv_from_utf8 != 0)
2614     {
2615         int len;
2616         char *inbuf = term_src;
2617         size_t inleft = strlen(term_src);
2618         char *outbuf = term_dst;
2619         size_t outleft = sizeof(term_dst)-1;
2620         size_t ret;
2621         
2622         ret = yaz_iconv (zh->iconv_from_utf8, &inbuf, &inleft,
2623                          &outbuf, &outleft);
2624         if (ret == (size_t)(-1))
2625             len = 0;
2626         else
2627             len = outbuf - term_dst;
2628         *dst = nmem_malloc(stream, len + 1);
2629         if (len > 0)
2630             memcpy (*dst, term_dst, len);
2631         (*dst)[len] = '\0';
2632     }
2633     else
2634         *dst = nmem_strdup(stream, term_src);
2635 }
2636
2637 static void count_set(ZebraHandle zh, RSET rset, zint *count)
2638 {
2639     zint psysno = 0;
2640     struct it_key key;
2641     RSFD rfd;
2642
2643     yaz_log(YLOG_DEBUG, "count_set");
2644
2645     rset->hits_limit = zh->approx_limit;
2646
2647     *count = 0;
2648     rfd = rset_open(rset, RSETF_READ);
2649     while (rset_read(rfd, &key,0 /* never mind terms */))
2650     {
2651         if (key.mem[0] != psysno)
2652         {
2653             psysno = key.mem[0];
2654             if (rfd->counted_items >= rset->hits_limit)
2655                 break;
2656         }
2657     }
2658     rset_close (rfd);
2659     *count = rset->hits_count;
2660 }
2661
2662 ZEBRA_RES rpn_scan(ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt,
2663                    oid_value attributeset,
2664                    int num_bases, char **basenames,
2665                    int *position, int *num_entries, ZebraScanEntry **list,
2666                    int *is_partial, RSET limit_set, int return_zero)
2667 {
2668     int i;
2669     int pos = *position;
2670     int num = *num_entries;
2671     int before;
2672     int after;
2673     int base_no;
2674     char termz[IT_MAX_WORD+20];
2675     struct scan_info *scan_info_array;
2676     ZebraScanEntry *glist;
2677     int ords[32], ord_no = 0;
2678     int ptr[32];
2679
2680     int bases_ok = 0;     /* no of databases with OK attribute */
2681     int errCode = 0;      /* err code (if any is not OK) */
2682     char *errString = 0;  /* addinfo */
2683
2684     unsigned index_type;
2685     char *search_type = NULL;
2686     char rank_type[128];
2687     int complete_flag;
2688     int sort_flag;
2689     NMEM rset_nmem = NULL; 
2690     struct rset_key_control *kc = 0;
2691
2692     *list = 0;
2693     *is_partial = 0;
2694
2695     if (attributeset == VAL_NONE)
2696         attributeset = VAL_BIB1;
2697
2698     if (!limit_set)
2699     {
2700         AttrType termset;
2701         int termset_value_numeric;
2702         const char *termset_value_string;
2703         attr_init_APT(&termset, zapt, 8);
2704         termset_value_numeric =
2705             attr_find_ex(&termset, NULL, &termset_value_string);
2706         if (termset_value_numeric != -1)
2707         {
2708             char resname[32];
2709             const char *termset_name = 0;
2710             
2711             if (termset_value_numeric != -2)
2712             {
2713                 
2714                 sprintf(resname, "%d", termset_value_numeric);
2715                 termset_name = resname;
2716             }
2717             else
2718                 termset_name = termset_value_string;
2719             
2720             limit_set = resultSetRef (zh, termset_name);
2721         }
2722     }
2723         
2724     yaz_log(YLOG_DEBUG, "position = %d, num = %d set=%d",
2725             pos, num, attributeset);
2726         
2727     if (zebra_maps_attr(zh->reg->zebra_maps, zapt, &index_type, &search_type,
2728                         rank_type, &complete_flag, &sort_flag))
2729     {
2730         *num_entries = 0;
2731         zebra_setError(zh, YAZ_BIB1_UNSUPP_ATTRIBUTE_TYPE, 0);
2732         return ZEBRA_FAIL;
2733     }
2734     for (base_no = 0; base_no < num_bases && ord_no < 32; base_no++)
2735     {
2736         int ord;
2737
2738         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
2739         {
2740             zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
2741                            basenames[base_no]);
2742             *num_entries = 0;
2743             return ZEBRA_FAIL;
2744         }
2745
2746         if (zebra_apt_get_ord(zh, zapt, zinfo_index_category_index,
2747                               index_type, 0, attributeset, &ord) 
2748             != ZEBRA_OK)
2749         {
2750             break;
2751         }
2752         ords[ord_no++] = ord;
2753     }
2754     if (!bases_ok && errCode)
2755     {
2756         zebra_setError(zh, errCode, errString);
2757         *num_entries = 0;
2758         return ZEBRA_FAIL;
2759     }
2760     if (ord_no == 0)
2761     {
2762         *num_entries = 0;
2763         return ZEBRA_OK;
2764     }
2765     /* prepare dictionary scanning */
2766     if (num < 1)
2767     {
2768         *num_entries = 0;
2769         return ZEBRA_OK;
2770     }
2771     before = pos-1;
2772     if (before < 0)
2773         before = 0;
2774     after = 1+num-pos;
2775     if (after < 0)
2776         after = 0;
2777     yaz_log(YLOG_DEBUG, "rpn_scan pos=%d num=%d before=%d "
2778             "after=%d before+after=%d",
2779             pos, num, before, after, before+after);
2780     scan_info_array = (struct scan_info *)
2781         odr_malloc(stream, ord_no * sizeof(*scan_info_array));
2782     for (i = 0; i < ord_no; i++)
2783     {
2784         int j, prefix_len = 0;
2785         int before_tmp = before, after_tmp = after;
2786         struct scan_info *scan_info = scan_info_array + i;
2787         struct rpn_char_map_info rcmi;
2788
2789         rpn_char_map_prepare (zh->reg, index_type, &rcmi);
2790
2791         scan_info->before = before;
2792         scan_info->after = after;
2793         scan_info->odr = stream;
2794
2795         scan_info->list = (struct scan_info_entry *)
2796             odr_malloc(stream, (before+after) * sizeof(*scan_info->list));
2797         for (j = 0; j<before+after; j++)
2798             scan_info->list[j].term = NULL;
2799
2800         prefix_len += key_SU_encode (ords[i], termz + prefix_len);
2801         termz[prefix_len] = 0;
2802         strcpy(scan_info->prefix, termz);
2803
2804         if (trans_scan_term(zh, zapt, termz+prefix_len, index_type) == 
2805             ZEBRA_FAIL)
2806             return ZEBRA_FAIL;
2807         
2808         dict_scan(zh->reg->dict, termz, &before_tmp, &after_tmp,
2809                   scan_info, scan_handle);
2810     }
2811     glist = (ZebraScanEntry *)
2812         odr_malloc(stream, (before+after)*sizeof(*glist));
2813
2814     rset_nmem = nmem_create();
2815     kc = zebra_key_control_create(zh);
2816
2817     /* consider terms after main term */
2818     for (i = 0; i < ord_no; i++)
2819         ptr[i] = before;
2820     
2821     *is_partial = 0;
2822     for (i = 0; i<after; i++)
2823     {
2824         int j, j0 = -1;
2825         const char *mterm = NULL;
2826         const char *tst;
2827         RSET rset = 0;
2828         int lo = i + pos-1; /* offset in result list */
2829
2830         /* find: j0 is the first of the minimal values */
2831         for (j = 0; j < ord_no; j++)
2832         {
2833             if (ptr[j] < before+after && ptr[j] >= 0 &&
2834                 (tst = scan_info_array[j].list[ptr[j]].term) &&
2835                 (!mterm || strcmp (tst, mterm) < 0))
2836             {
2837                 j0 = j;
2838                 mterm = tst;
2839             }
2840         }
2841         if (j0 == -1)
2842             break;  /* no value found, stop */
2843
2844         /* get result set for first one , but only if it's within bounds */
2845         if (lo >= 0)
2846         {
2847             /* get result set for first term */
2848             zebra_term_untrans_iconv(zh, stream->mem, index_type,
2849                                      &glist[lo].term, mterm);
2850             rset = rset_trunc(zh, &scan_info_array[j0].list[ptr[j0]].isam_p, 1,
2851                               glist[lo].term, strlen(glist[lo].term),
2852                               NULL, 0, zapt->term->which, rset_nmem, 
2853                               kc, kc->scope, 0, index_type, 0 /* hits_limit */,
2854                               0 /* term_ref_id_str */);
2855         }
2856         ptr[j0]++; /* move index for this set .. */
2857         /* get result set for remaining scan terms */
2858         for (j = j0+1; j<ord_no; j++)
2859         {
2860             if (ptr[j] < before+after && ptr[j] >= 0 &&
2861                 (tst = scan_info_array[j].list[ptr[j]].term) &&
2862                 !strcmp (tst, mterm))
2863             {
2864                 if (lo >= 0)
2865                 {
2866                     RSET rsets[2];
2867                     
2868                     rsets[0] = rset;
2869                     rsets[1] =
2870                         rset_trunc(
2871                             zh, &scan_info_array[j].list[ptr[j]].isam_p, 1,
2872                             glist[lo].term,
2873                             strlen(glist[lo].term), NULL, 0,
2874                             zapt->term->which,rset_nmem,
2875                             kc, kc->scope, 0, index_type, 0 /* hits_limit */,
2876                             0 /* term_ref_id_str */ );
2877                     rset = rset_create_or(rset_nmem, kc,
2878                                           kc->scope, 0 /* termid */,
2879                                           2, rsets);
2880                 }
2881                 ptr[j]++;
2882             }
2883         }
2884         if (lo >= 0)
2885         {
2886             zint count;
2887             /* merge with limit_set if given */
2888             if (limit_set)
2889             {
2890                 RSET rsets[2];
2891                 rsets[0] = rset;
2892                 rsets[1] = rset_dup(limit_set);
2893                 
2894                 rset = rset_create_and(rset_nmem, kc, kc->scope, 2, rsets);
2895             }
2896             /* count it */
2897             count_set(zh, rset, &count);
2898             glist[lo].occurrences = count;
2899             rset_delete(rset);
2900         }
2901     }
2902     if (i < after)
2903     {
2904         *num_entries -= (after-i);
2905         *is_partial = 1;
2906         if (*num_entries < 0)
2907         {
2908             (*kc->dec)(kc);
2909             nmem_destroy(rset_nmem);
2910             *num_entries = 0;
2911             return ZEBRA_OK;
2912         }
2913     }
2914     /* consider terms before main term */
2915     for (i = 0; i<ord_no; i++)
2916         ptr[i] = 0;
2917     
2918     for (i = 0; i<before; i++)
2919     {
2920         int j, j0 = -1;
2921         const char *mterm = NULL;
2922         const char *tst;
2923         RSET rset;
2924         int lo = before-1-i; /* offset in result list */
2925         zint count;
2926         
2927         for (j = 0; j <ord_no; j++)
2928         {
2929             if (ptr[j] < before && ptr[j] >= 0 &&
2930                 (tst = scan_info_array[j].list[before-1-ptr[j]].term) &&
2931                 (!mterm || strcmp (tst, mterm) > 0))
2932             {
2933                 j0 = j;
2934                     mterm = tst;
2935             }
2936         }
2937         if (j0 == -1)
2938             break;
2939         
2940         zebra_term_untrans_iconv(zh, stream->mem, index_type,
2941                                  &glist[lo].term, mterm);
2942         
2943         rset = rset_trunc
2944             (zh, &scan_info_array[j0].list[before-1-ptr[j0]].isam_p, 1,
2945              glist[lo].term, strlen(glist[lo].term),
2946              NULL, 0, zapt->term->which, rset_nmem,
2947              kc, kc->scope, 0, index_type, 0 /* hits_limit */,
2948              0 /* term_ref_id_str */);
2949         
2950         ptr[j0]++;
2951         
2952         for (j = j0+1; j<ord_no; j++)
2953         {
2954             if (ptr[j] < before && ptr[j] >= 0 &&
2955                 (tst = scan_info_array[j].list[before-1-ptr[j]].term) &&
2956                 !strcmp (tst, mterm))
2957             {
2958                 RSET rsets[2];
2959                 
2960                 rsets[0] = rset;
2961                 rsets[1] = rset_trunc(
2962                     zh,
2963                     &scan_info_array[j].list[before-1-ptr[j]].isam_p, 1,
2964                     glist[lo].term,
2965                     strlen(glist[lo].term), NULL, 0,
2966                     zapt->term->which, rset_nmem,
2967                     kc, kc->scope, 0, index_type, 0 /* hits_limit */,
2968                     0 /* term_ref_id_str */);
2969                 rset = rset_create_or(rset_nmem, kc,
2970                                       kc->scope, 0 /* termid */, 2, rsets);
2971                 
2972                 ptr[j]++;
2973             }
2974         }
2975         if (limit_set)
2976         {
2977             RSET rsets[2];
2978             rsets[0] = rset;
2979             rsets[1] = rset_dup(limit_set);
2980             
2981             rset = rset_create_and(rset_nmem, kc, kc->scope, 2, rsets);
2982         }
2983         count_set(zh, rset, &count);
2984         glist[lo].occurrences = count;
2985         rset_delete (rset);
2986     }
2987     (*kc->dec)(kc);
2988     nmem_destroy(rset_nmem);
2989     i = before-i;
2990     if (i)
2991     {
2992         *is_partial = 1;
2993         *position -= i;
2994         *num_entries -= i;
2995         if (*num_entries <= 0)
2996         {
2997             *num_entries = 0;
2998             return ZEBRA_OK;
2999         }
3000     }
3001     
3002     *list = glist + i;               /* list is set to first 'real' entry */
3003     
3004     yaz_log(YLOG_DEBUG, "position = %d, num_entries = %d",
3005             *position, *num_entries);
3006     return ZEBRA_OK;
3007 }
3008
3009 /*
3010  * Local variables:
3011  * c-basic-offset: 4
3012  * indent-tabs-mode: nil
3013  * End:
3014  * vim: shiftwidth=4 tabstop=8 expandtab
3015  */
3016