Align code for regular word search and numeric search a bit.
[idzebra-moved-to-github.git] / index / zrpn.c
1 /* $Id: zrpn.c,v 1.220 2006-06-22 23:06:06 adam Exp $
2    Copyright (C) 1995-2006
3    Index Data ApS
4
5 This file is part of the Zebra server.
6
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
11
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with Zebra; see the file LICENSE.zebra.  If not, write to the
19 Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
20 02111-1307, USA.
21 */
22
23 #include <stdio.h>
24 #include <assert.h>
25 #ifdef WIN32
26 #include <io.h>
27 #endif
28 #if HAVE_UNISTD_H
29 #include <unistd.h>
30 #endif
31 #include <ctype.h>
32
33 #include <yaz/diagbib1.h>
34 #include "index.h"
35 #include <zebra_xpath.h>
36 #include <attrfind.h>
37 #include <charmap.h>
38 #include <rset.h>
39
40 struct rpn_char_map_info
41 {
42     ZebraMaps zm;
43     int reg_type;
44 };
45
46 static int log_level_set = 0;
47 static int log_level_rpn = 0;
48
49 static const char **rpn_char_map_handler(void *vp, const char **from, int len)
50 {
51     struct rpn_char_map_info *p = (struct rpn_char_map_info *) vp;
52     const char **out = zebra_maps_input(p->zm, p->reg_type, from, len, 0);
53 #if 0
54     if (out && *out)
55     {
56         const char *outp = *out;
57         yaz_log(YLOG_LOG, "---");
58         while (*outp)
59         {
60             yaz_log(YLOG_LOG, "%02X", *outp);
61             outp++;
62         }
63     }
64 #endif
65     return out;
66 }
67
68 static void rpn_char_map_prepare(struct zebra_register *reg, int reg_type,
69                                  struct rpn_char_map_info *map_info)
70 {
71     map_info->zm = reg->zebra_maps;
72     map_info->reg_type = reg_type;
73     dict_grep_cmap(reg->dict, map_info, rpn_char_map_handler);
74 }
75
76 #define TERM_COUNT        
77        
78 struct grep_info {        
79 #ifdef TERM_COUNT        
80     int *term_no;        
81 #endif        
82     ISAM_P *isam_p_buf;
83     int isam_p_size;        
84     int isam_p_indx;
85     ZebraHandle zh;
86     int reg_type;
87     ZebraSet termset;
88 };        
89
90 void zebra_term_untrans(ZebraHandle zh, int reg_type,
91                         char *dst, const char *src)
92 {
93     int len = 0;
94     while (*src)
95     {
96         const char *cp = zebra_maps_output(zh->reg->zebra_maps,
97                                            reg_type, &src);
98         if (!cp)
99         {
100             if (len < IT_MAX_WORD-1)
101                 dst[len++] = *src;
102             src++;
103         }
104         else
105             while (*cp && len < IT_MAX_WORD-1)
106                 dst[len++] = *cp++;
107     }
108     dst[len] = '\0';
109 }
110
111 static void add_isam_p(const char *name, const char *info,
112                        struct grep_info *p)
113 {
114     if (!log_level_set)
115     {
116         log_level_rpn = yaz_log_module_level("rpn");
117         log_level_set = 1;
118     }
119     if (p->isam_p_indx == p->isam_p_size)
120     {
121         ISAM_P *new_isam_p_buf;
122 #ifdef TERM_COUNT        
123         int *new_term_no;        
124 #endif
125         p->isam_p_size = 2*p->isam_p_size + 100;
126         new_isam_p_buf = (ISAM_P *) xmalloc(sizeof(*new_isam_p_buf) *
127                                             p->isam_p_size);
128         if (p->isam_p_buf)
129         {
130             memcpy(new_isam_p_buf, p->isam_p_buf,
131                     p->isam_p_indx * sizeof(*p->isam_p_buf));
132             xfree(p->isam_p_buf);
133         }
134         p->isam_p_buf = new_isam_p_buf;
135
136 #ifdef TERM_COUNT
137         new_term_no = (int *) xmalloc(sizeof(*new_term_no) * p->isam_p_size);
138         if (p->term_no)
139         {
140             memcpy(new_term_no, p->isam_p_buf,
141                     p->isam_p_indx * sizeof(*p->term_no));
142             xfree(p->term_no);
143         }
144         p->term_no = new_term_no;
145 #endif
146     }
147     assert(*info == sizeof(*p->isam_p_buf));
148     memcpy(p->isam_p_buf + p->isam_p_indx, info+1, sizeof(*p->isam_p_buf));
149
150 #if 1
151     if (p->termset)
152     {
153         const char *db;
154         char term_tmp[IT_MAX_WORD];
155         int ord = 0;
156         const char *index_name;
157         int len = key_SU_decode (&ord, (const unsigned char *) name);
158         
159         zebra_term_untrans  (p->zh, p->reg_type, term_tmp, name+len+1);
160         yaz_log(log_level_rpn, "grep: %d %c %s", ord, name[len], term_tmp);
161         zebraExplain_lookup_ord(p->zh->reg->zei,
162                                 ord, 0 /* index_type */, &db, &index_name);
163         yaz_log(log_level_rpn, "grep:  db=%s index=%s", db, index_name);
164         
165         resultSetAddTerm(p->zh, p->termset, name[len], db,
166                          index_name, term_tmp);
167     }
168 #endif
169     (p->isam_p_indx)++;
170 }
171
172 static int grep_handle(char *name, const char *info, void *p)
173 {
174     add_isam_p(name, info, (struct grep_info *) p);
175     return 0;
176 }
177
178 static int term_pre(ZebraMaps zebra_maps, int reg_type, const char **src,
179                     const char *ct1, const char *ct2, int first)
180 {
181     const char *s1, *s0 = *src;
182     const char **map;
183
184     /* skip white space */
185     while (*s0)
186     {
187         if (ct1 && strchr(ct1, *s0))
188             break;
189         if (ct2 && strchr(ct2, *s0))
190             break;
191         s1 = s0;
192         map = zebra_maps_input(zebra_maps, reg_type, &s1, strlen(s1), first);
193         if (**map != *CHR_SPACE)
194             break;
195         s0 = s1;
196     }
197     *src = s0;
198     return *s0;
199 }
200
201
202 static void esc_str(char *out_buf, size_t out_size,
203                     const char *in_buf, int in_size)
204 {
205     int k;
206
207     assert(out_buf);
208     assert(in_buf);
209     assert(out_size > 20);
210     *out_buf = '\0';
211     for (k = 0; k<in_size; k++)
212     {
213         int c = in_buf[k] & 0xff;
214         int pc;
215         if (c < 32 || c > 126)
216             pc = '?';
217         else
218             pc = c;
219         sprintf(out_buf +strlen(out_buf), "%02X:%c  ", c, pc);
220         if (strlen(out_buf) > out_size-20)
221         {
222             strcat(out_buf, "..");
223             break;
224         }
225     }
226 }
227
228 #define REGEX_CHARS " []()|.*+?!"
229
230 /* term_100: handle term, where trunc = none(no operators at all) */
231 static int term_100(ZebraMaps zebra_maps, int reg_type,
232                     const char **src, char *dst, int space_split,
233                     char *dst_term)
234 {
235     const char *s0;
236     const char **map;
237     int i = 0;
238     int j = 0;
239
240     const char *space_start = 0;
241     const char *space_end = 0;
242
243     if (!term_pre(zebra_maps, reg_type, src, NULL, NULL, !space_split))
244         return 0;
245     s0 = *src;
246     while (*s0)
247     {
248         const char *s1 = s0;
249         int q_map_match = 0;
250         map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), 
251                                 &q_map_match);
252         if (space_split)
253         {
254             if (**map == *CHR_SPACE)
255                 break;
256         }
257         else  /* complete subfield only. */
258         {
259             if (**map == *CHR_SPACE)
260             {   /* save space mapping for later  .. */
261                 space_start = s1;
262                 space_end = s0;
263                 continue;
264             }
265             else if (space_start)
266             {   /* reload last space */
267                 while (space_start < space_end)
268                 {
269                     if (strchr(REGEX_CHARS, *space_start))
270                         dst[i++] = '\\';
271                     dst_term[j++] = *space_start;
272                     dst[i++] = *space_start++;
273                 }
274                 /* and reset */
275                 space_start = space_end = 0;
276             }
277         }
278         /* add non-space char */
279         memcpy(dst_term+j, s1, s0 - s1);
280         j += (s0 - s1);
281         if (!q_map_match)
282         {
283             while (s1 < s0)
284             {
285                 if (strchr(REGEX_CHARS, *s1))
286                     dst[i++] = '\\';
287                 dst[i++] = *s1++;
288             }
289         }
290         else
291         {
292             char tmpbuf[80];
293             esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
294             
295             strcpy(dst + i, map[0]);
296             i += strlen(map[0]);
297         }
298     }
299     dst[i] = '\0';
300     dst_term[j] = '\0';
301     *src = s0;
302     return i;
303 }
304
305 /* term_101: handle term, where trunc = Process # */
306 static int term_101(ZebraMaps zebra_maps, int reg_type,
307                     const char **src, char *dst, int space_split,
308                     char *dst_term)
309 {
310     const char *s0;
311     const char **map;
312     int i = 0;
313     int j = 0;
314
315     if (!term_pre(zebra_maps, reg_type, src, "#", "#", !space_split))
316         return 0;
317     s0 = *src;
318     while (*s0)
319     {
320         if (*s0 == '#')
321         {
322             dst[i++] = '.';
323             dst[i++] = '*';
324             dst_term[j++] = *s0++;
325         }
326         else
327         {
328             const char *s1 = s0;
329             int q_map_match = 0;
330             map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), 
331                                     &q_map_match);
332             if (space_split && **map == *CHR_SPACE)
333                 break;
334
335             /* add non-space char */
336             memcpy(dst_term+j, s1, s0 - s1);
337             j += (s0 - s1);
338             if (!q_map_match)
339             {
340                 while (s1 < s0)
341                 {
342                     if (strchr(REGEX_CHARS, *s1))
343                         dst[i++] = '\\';
344                     dst[i++] = *s1++;
345                 }
346             }
347             else
348             {
349                 char tmpbuf[80];
350                 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
351                 
352                 strcpy(dst + i, map[0]);
353                 i += strlen(map[0]);
354             }
355         }
356     }
357     dst[i] = '\0';
358     dst_term[j++] = '\0';
359     *src = s0;
360     return i;
361 }
362
363 /* term_103: handle term, where trunc = re-2 (regular expressions) */
364 static int term_103(ZebraMaps zebra_maps, int reg_type, const char **src,
365                     char *dst, int *errors, int space_split,
366                     char *dst_term)
367 {
368     int i = 0;
369     int j = 0;
370     const char *s0;
371     const char **map;
372
373     if (!term_pre(zebra_maps, reg_type, src, "^\\()[].*+?|", "(", !space_split))
374         return 0;
375     s0 = *src;
376     if (errors && *s0 == '+' && s0[1] && s0[2] == '+' && s0[3] &&
377         isdigit(((const unsigned char *)s0)[1]))
378     {
379         *errors = s0[1] - '0';
380         s0 += 3;
381         if (*errors > 3)
382             *errors = 3;
383     }
384     while (*s0)
385     {
386         if (strchr("^\\()[].*+?|-", *s0))
387         {
388             dst_term[j++] = *s0;
389             dst[i++] = *s0++;
390         }
391         else
392         {
393             const char *s1 = s0;
394             int q_map_match = 0;
395             map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), 
396                                     &q_map_match);
397             if (space_split && **map == *CHR_SPACE)
398                 break;
399
400             /* add non-space char */
401             memcpy(dst_term+j, s1, s0 - s1);
402             j += (s0 - s1);
403             if (!q_map_match)
404             {
405                 while (s1 < s0)
406                 {
407                     if (strchr(REGEX_CHARS, *s1))
408                         dst[i++] = '\\';
409                     dst[i++] = *s1++;
410                 }
411             }
412             else
413             {
414                 char tmpbuf[80];
415                 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
416                 
417                 strcpy(dst + i, map[0]);
418                 i += strlen(map[0]);
419             }
420         }
421     }
422     dst[i] = '\0';
423     dst_term[j] = '\0';
424     *src = s0;
425     
426     return i;
427 }
428
429 /* term_103: handle term, where trunc = re-1 (regular expressions) */
430 static int term_102(ZebraMaps zebra_maps, int reg_type, const char **src,
431                     char *dst, int space_split, char *dst_term)
432 {
433     return term_103(zebra_maps, reg_type, src, dst, NULL, space_split,
434                     dst_term);
435 }
436
437
438 /* term_104: handle term, where trunc = Process # and ! */
439 static int term_104(ZebraMaps zebra_maps, int reg_type,
440                     const char **src, char *dst, int space_split,
441                     char *dst_term)
442 {
443     const char *s0;
444     const char **map;
445     int i = 0;
446     int j = 0;
447
448     if (!term_pre(zebra_maps, reg_type, src, "?*#", "?*#", !space_split))
449         return 0;
450     s0 = *src;
451     while (*s0)
452     {
453         if (*s0 == '?')
454         {
455             dst_term[j++] = *s0++;
456             if (*s0 >= '0' && *s0 <= '9')
457             {
458                 int limit = 0;
459                 while (*s0 >= '0' && *s0 <= '9')
460                 {
461                     limit = limit * 10 + (*s0 - '0');
462                     dst_term[j++] = *s0++;
463                 }
464                 if (limit > 20)
465                     limit = 20;
466                 while (--limit >= 0)
467                 {
468                     dst[i++] = '.';
469                     dst[i++] = '?';
470                 }
471             }
472             else
473             {
474                 dst[i++] = '.';
475                 dst[i++] = '*';
476             }
477         }
478         else if (*s0 == '*')
479         {
480             dst[i++] = '.';
481             dst[i++] = '*';
482             dst_term[j++] = *s0++;
483         }
484         else if (*s0 == '#')
485         {
486             dst[i++] = '.';
487             dst_term[j++] = *s0++;
488         }
489         else
490         {
491             const char *s1 = s0;
492             int q_map_match = 0;
493             map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), 
494                                     &q_map_match);
495             if (space_split && **map == *CHR_SPACE)
496                 break;
497
498             /* add non-space char */
499             memcpy(dst_term+j, s1, s0 - s1);
500             j += (s0 - s1);
501             if (!q_map_match)
502             {
503                 while (s1 < s0)
504                 {
505                     if (strchr(REGEX_CHARS, *s1))
506                         dst[i++] = '\\';
507                     dst[i++] = *s1++;
508                 }
509             }
510             else
511             {
512                 char tmpbuf[80];
513                 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
514                 
515                 strcpy(dst + i, map[0]);
516                 i += strlen(map[0]);
517             }
518         }
519     }
520     dst[i] = '\0';
521     dst_term[j++] = '\0';
522     *src = s0;
523     return i;
524 }
525
526 /* term_105/106: handle term, where trunc = Process * and ! and right trunc */
527 static int term_105(ZebraMaps zebra_maps, int reg_type,
528                     const char **src, char *dst, int space_split,
529                     char *dst_term, int right_truncate)
530 {
531     const char *s0;
532     const char **map;
533     int i = 0;
534     int j = 0;
535
536     if (!term_pre(zebra_maps, reg_type, src, "*!", "*!", !space_split))
537         return 0;
538     s0 = *src;
539     while (*s0)
540     {
541         if (*s0 == '*')
542         {
543             dst[i++] = '.';
544             dst[i++] = '*';
545             dst_term[j++] = *s0++;
546         }
547         else if (*s0 == '!')
548         {
549             dst[i++] = '.';
550             dst_term[j++] = *s0++;
551         }
552         else
553         {
554             const char *s1 = s0;
555             int q_map_match = 0;
556             map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), 
557                                     &q_map_match);
558             if (space_split && **map == *CHR_SPACE)
559                 break;
560
561             /* add non-space char */
562             memcpy(dst_term+j, s1, s0 - s1);
563             j += (s0 - s1);
564             if (!q_map_match)
565             {
566                 while (s1 < s0)
567                 {
568                     if (strchr(REGEX_CHARS, *s1))
569                         dst[i++] = '\\';
570                     dst[i++] = *s1++;
571                 }
572             }
573             else
574             {
575                 char tmpbuf[80];
576                 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
577                 
578                 strcpy(dst + i, map[0]);
579                 i += strlen(map[0]);
580             }
581         }
582     }
583     if (right_truncate)
584     {
585         dst[i++] = '.';
586         dst[i++] = '*';
587     }
588     dst[i] = '\0';
589     
590     dst_term[j++] = '\0';
591     *src = s0;
592     return i;
593 }
594
595
596 /* gen_regular_rel - generate regular expression from relation
597  *  val:     border value (inclusive)
598  *  islt:    1 if <=; 0 if >=.
599  */
600 static void gen_regular_rel(char *dst, int val, int islt)
601 {
602     int dst_p;
603     int w, d, i;
604     int pos = 0;
605     char numstr[20];
606
607     yaz_log(YLOG_DEBUG, "gen_regular_rel. val=%d, islt=%d", val, islt);
608     if (val >= 0)
609     {
610         if (islt)
611             strcpy(dst, "(-[0-9]+|(");
612         else
613             strcpy(dst, "((");
614     } 
615     else
616     {
617         if (!islt)
618         {
619             strcpy(dst, "([0-9]+|-(");
620             dst_p = strlen(dst);
621             islt = 1;
622         }
623         else
624         {
625             strcpy(dst, "(-(");
626             islt = 0;
627         }
628         val = -val;
629     }
630     dst_p = strlen(dst);
631     sprintf(numstr, "%d", val);
632     for (w = strlen(numstr); --w >= 0; pos++)
633     {
634         d = numstr[w];
635         if (pos > 0)
636         {
637             if (islt)
638             {
639                 if (d == '0')
640                     continue;
641                 d--;
642             } 
643             else
644             {
645                 if (d == '9')
646                     continue;
647                 d++;
648             }
649         }
650         
651         strcpy(dst + dst_p, numstr);
652         dst_p = strlen(dst) - pos - 1;
653
654         if (islt)
655         {
656             if (d != '0')
657             {
658                 dst[dst_p++] = '[';
659                 dst[dst_p++] = '0';
660                 dst[dst_p++] = '-';
661                 dst[dst_p++] = d;
662                 dst[dst_p++] = ']';
663             }
664             else
665                 dst[dst_p++] = d;
666         }
667         else
668         {
669             if (d != '9')
670             { 
671                 dst[dst_p++] = '[';
672                 dst[dst_p++] = d;
673                 dst[dst_p++] = '-';
674                 dst[dst_p++] = '9';
675                 dst[dst_p++] = ']';
676             }
677             else
678                 dst[dst_p++] = d;
679         }
680         for (i = 0; i<pos; i++)
681         {
682             dst[dst_p++] = '[';
683             dst[dst_p++] = '0';
684             dst[dst_p++] = '-';
685             dst[dst_p++] = '9';
686             dst[dst_p++] = ']';
687         }
688         dst[dst_p++] = '|';
689     }
690     dst[dst_p] = '\0';
691     if (islt)
692     {
693         /* match everything less than 10^(pos-1) */
694         strcat(dst, "0*");
695         for (i = 1; i<pos; i++)
696             strcat(dst, "[0-9]?");
697     }
698     else
699     {
700         /* match everything greater than 10^pos */
701         for (i = 0; i <= pos; i++)
702             strcat(dst, "[0-9]");
703         strcat(dst, "[0-9]*");
704     }
705     strcat(dst, "))");
706 }
707
708 void string_rel_add_char(char **term_p, const char *src, int *indx)
709 {
710     if (src[*indx] == '\\')
711         *(*term_p)++ = src[(*indx)++];
712     *(*term_p)++ = src[(*indx)++];
713 }
714
715 /*
716  *   >  abc     ([b-].*|a[c-].*|ab[d-].*|abc.+)
717  *              ([^-a].*|a[^-b].*ab[^-c].*|abc.+)
718  *   >= abc     ([b-].*|a[c-].*|ab[c-].*)
719  *              ([^-a].*|a[^-b].*|ab[c-].*)
720  *   <  abc     ([-0].*|a[-a].*|ab[-b].*)
721  *              ([^a-].*|a[^b-].*|ab[^c-].*)
722  *   <= abc     ([-0].*|a[-a].*|ab[-b].*|abc)
723  *              ([^a-].*|a[^b-].*|ab[^c-].*|abc)
724  */
725 static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
726                            const char **term_sub, char *term_dict,
727                            oid_value attributeSet,
728                            int reg_type, int space_split, char *term_dst,
729                            int *error_code)
730 {
731     AttrType relation;
732     int relation_value;
733     int i;
734     char *term_tmp = term_dict + strlen(term_dict);
735     char term_component[2*IT_MAX_WORD+20];
736
737     attr_init_APT(&relation, zapt, 2);
738     relation_value = attr_find(&relation, NULL);
739
740     *error_code = 0;
741     yaz_log(YLOG_DEBUG, "string relation value=%d", relation_value);
742     switch (relation_value)
743     {
744     case 1:
745         if (!term_100(zh->reg->zebra_maps, reg_type,
746                       term_sub, term_component,
747                       space_split, term_dst))
748             return 0;
749         yaz_log(log_level_rpn, "Relation <");
750         
751         *term_tmp++ = '(';
752         for (i = 0; term_component[i]; )
753         {
754             int j = 0;
755
756             if (i)
757                 *term_tmp++ = '|';
758             while (j < i)
759                 string_rel_add_char(&term_tmp, term_component, &j);
760
761             *term_tmp++ = '[';
762
763             *term_tmp++ = '^';
764             string_rel_add_char(&term_tmp, term_component, &i);
765             *term_tmp++ = '-';
766
767             *term_tmp++ = ']';
768             *term_tmp++ = '.';
769             *term_tmp++ = '*';
770
771             if ((term_tmp - term_dict) > IT_MAX_WORD)
772                 break;
773         }
774         *term_tmp++ = ')';
775         *term_tmp = '\0';
776         break;
777     case 2:
778         if (!term_100(zh->reg->zebra_maps, reg_type,
779                       term_sub, term_component,
780                       space_split, term_dst))
781             return 0;
782         yaz_log(log_level_rpn, "Relation <=");
783
784         *term_tmp++ = '(';
785         for (i = 0; term_component[i]; )
786         {
787             int j = 0;
788
789             while (j < i)
790                 string_rel_add_char(&term_tmp, term_component, &j);
791             *term_tmp++ = '[';
792
793             *term_tmp++ = '^';
794             string_rel_add_char(&term_tmp, term_component, &i);
795             *term_tmp++ = '-';
796
797             *term_tmp++ = ']';
798             *term_tmp++ = '.';
799             *term_tmp++ = '*';
800
801             *term_tmp++ = '|';
802
803             if ((term_tmp - term_dict) > IT_MAX_WORD)
804                 break;
805         }
806         for (i = 0; term_component[i]; )
807             string_rel_add_char(&term_tmp, term_component, &i);
808         *term_tmp++ = ')';
809         *term_tmp = '\0';
810         break;
811     case 5:
812         if (!term_100 (zh->reg->zebra_maps, reg_type,
813                        term_sub, term_component, space_split, term_dst))
814             return 0;
815         yaz_log(log_level_rpn, "Relation >");
816
817         *term_tmp++ = '(';
818         for (i = 0; term_component[i];)
819         {
820             int j = 0;
821
822             while (j < i)
823                 string_rel_add_char(&term_tmp, term_component, &j);
824             *term_tmp++ = '[';
825             
826             *term_tmp++ = '^';
827             *term_tmp++ = '-';
828             string_rel_add_char(&term_tmp, term_component, &i);
829
830             *term_tmp++ = ']';
831             *term_tmp++ = '.';
832             *term_tmp++ = '*';
833
834             *term_tmp++ = '|';
835
836             if ((term_tmp - term_dict) > IT_MAX_WORD)
837                 break;
838         }
839         for (i = 0; term_component[i];)
840             string_rel_add_char(&term_tmp, term_component, &i);
841         *term_tmp++ = '.';
842         *term_tmp++ = '+';
843         *term_tmp++ = ')';
844         *term_tmp = '\0';
845         break;
846     case 4:
847         if (!term_100(zh->reg->zebra_maps, reg_type, term_sub,
848                       term_component, space_split, term_dst))
849             return 0;
850         yaz_log(log_level_rpn, "Relation >=");
851
852         *term_tmp++ = '(';
853         for (i = 0; term_component[i];)
854         {
855             int j = 0;
856
857             if (i)
858                 *term_tmp++ = '|';
859             while (j < i)
860                 string_rel_add_char(&term_tmp, term_component, &j);
861             *term_tmp++ = '[';
862
863             if (term_component[i+1])
864             {
865                 *term_tmp++ = '^';
866                 *term_tmp++ = '-';
867                 string_rel_add_char(&term_tmp, term_component, &i);
868             }
869             else
870             {
871                 string_rel_add_char(&term_tmp, term_component, &i);
872                 *term_tmp++ = '-';
873             }
874             *term_tmp++ = ']';
875             *term_tmp++ = '.';
876             *term_tmp++ = '*';
877
878             if ((term_tmp - term_dict) > IT_MAX_WORD)
879                 break;
880         }
881         *term_tmp++ = ')';
882         *term_tmp = '\0';
883         break;
884     case 3:
885     case 102:
886     case 103:
887     case -1:
888         if (!**term_sub)
889             return 1;
890         yaz_log(log_level_rpn, "Relation =");
891         if (!term_100(zh->reg->zebra_maps, reg_type, term_sub,
892                       term_component, space_split, term_dst))
893             return 0;
894         strcat(term_tmp, "(");
895         strcat(term_tmp, term_component);
896         strcat(term_tmp, ")");
897         break;
898     default:
899         *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
900         return 0;
901     }
902     return 1;
903 }
904
905 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
906                              const char **term_sub, 
907                              oid_value attributeSet, NMEM stream,
908                              struct grep_info *grep_info,
909                              int reg_type, int complete_flag,
910                              int num_bases, char **basenames,
911                              char *term_dst,
912                              const char *xpath_use,
913                              struct ord_list **ol);
914
915 static ZEBRA_RES term_limits_APT(ZebraHandle zh,
916                                  Z_AttributesPlusTerm *zapt,
917                                  zint *hits_limit_value,
918                                  const char **term_ref_id_str,
919                                  NMEM nmem)
920 {
921     AttrType term_ref_id_attr;
922     AttrType hits_limit_attr;
923     int term_ref_id_int;
924  
925     attr_init_APT(&hits_limit_attr, zapt, 9);
926     *hits_limit_value  = attr_find(&hits_limit_attr, NULL);
927
928     attr_init_APT(&term_ref_id_attr, zapt, 10);
929     term_ref_id_int = attr_find_ex(&term_ref_id_attr, NULL, term_ref_id_str);
930     if (term_ref_id_int >= 0)
931     {
932         char *res = nmem_malloc(nmem, 20);
933         sprintf(res, "%d", term_ref_id_int);
934         *term_ref_id_str = res;
935     }
936
937     /* no limit given ? */
938     if (*hits_limit_value == -1)
939     {
940         if (*term_ref_id_str)
941         {
942             /* use global if term_ref is present */
943             *hits_limit_value = zh->approx_limit;
944         }
945         else
946         {
947             /* no counting if term_ref is not present */
948             *hits_limit_value = 0;
949         }
950     }
951     else if (*hits_limit_value == 0)
952     {
953         /* 0 is the same as global limit */
954         *hits_limit_value = zh->approx_limit;
955     }
956     yaz_log(YLOG_DEBUG, "term_limits_APT ref_id=%s limit=" ZINT_FORMAT,
957             *term_ref_id_str ? *term_ref_id_str : "none",
958             *hits_limit_value);
959     return ZEBRA_OK;
960 }
961
962 static ZEBRA_RES term_trunc(ZebraHandle zh,
963                             Z_AttributesPlusTerm *zapt,
964                             const char **term_sub, 
965                             oid_value attributeSet, NMEM stream,
966                             struct grep_info *grep_info,
967                             int reg_type, int complete_flag,
968                             int num_bases, char **basenames,
969                             char *term_dst,
970                             const char *rank_type, 
971                             const char *xpath_use,
972                             NMEM rset_nmem,
973                             RSET *rset,
974                             struct rset_key_control *kc)
975 {
976     ZEBRA_RES res;
977     struct ord_list *ol;
978     zint hits_limit_value;
979     const char *term_ref_id_str = 0;
980     *rset = 0;
981
982     term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str,
983                     stream);
984     grep_info->isam_p_indx = 0;
985     res = string_term(zh, zapt, term_sub, attributeSet, stream, grep_info,
986                       reg_type, complete_flag, num_bases, basenames,
987                       term_dst, xpath_use, &ol);
988     if (res != ZEBRA_OK)
989         return res;
990     if (!*term_sub)  /* no more terms ? */
991         return res;
992     yaz_log(log_level_rpn, "term: %s", term_dst);
993     *rset = rset_trunc(zh, grep_info->isam_p_buf,
994                        grep_info->isam_p_indx, term_dst,
995                        strlen(term_dst), rank_type, 1 /* preserve pos */,
996                        zapt->term->which, rset_nmem,
997                        kc, kc->scope, ol, reg_type, hits_limit_value,
998                        term_ref_id_str);
999     if (!*rset)
1000         return ZEBRA_FAIL;
1001     return ZEBRA_OK;
1002 }
1003
1004 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1005                              const char **term_sub, 
1006                              oid_value attributeSet, NMEM stream,
1007                              struct grep_info *grep_info,
1008                              int reg_type, int complete_flag,
1009                              int num_bases, char **basenames,
1010                              char *term_dst,
1011                              const char *xpath_use,
1012                              struct ord_list **ol)
1013 {
1014     char term_dict[2*IT_MAX_WORD+4000];
1015     int j, r, base_no;
1016     AttrType truncation;
1017     int truncation_value;
1018     const char *termp;
1019     struct rpn_char_map_info rcmi;
1020     int space_split = complete_flag ? 0 : 1;
1021
1022     int bases_ok = 0;     /* no of databases with OK attribute */
1023
1024     *ol = ord_list_create(stream);
1025
1026     rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
1027     attr_init_APT(&truncation, zapt, 5);
1028     truncation_value = attr_find(&truncation, NULL);
1029     yaz_log(log_level_rpn, "truncation value %d", truncation_value);
1030
1031     for (base_no = 0; base_no < num_bases; base_no++)
1032     {
1033         int ord = -1;
1034         int regex_range = 0;
1035         int max_pos, prefix_len = 0;
1036         int relation_error;
1037         char ord_buf[32];
1038         int ord_len, i;
1039
1040         termp = *term_sub;
1041
1042         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1043         {
1044             zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
1045                            basenames[base_no]);
1046             return ZEBRA_FAIL;
1047         }
1048         
1049         if (zebra_apt_get_ord(zh, zapt, zinfo_index_category_index,
1050                               reg_type, xpath_use,
1051                               attributeSet, &ord) != ZEBRA_OK)
1052             continue;
1053
1054         *ol = ord_list_append(stream, *ol, ord);
1055         
1056         if (prefix_len)
1057             term_dict[prefix_len++] = '|';
1058         else
1059             term_dict[prefix_len++] = '(';
1060         
1061         ord_len = key_SU_encode (ord, ord_buf);
1062         for (i = 0; i<ord_len; i++)
1063         {
1064             term_dict[prefix_len++] = 1;
1065             term_dict[prefix_len++] = ord_buf[i];
1066         }
1067         
1068         bases_ok++;
1069
1070         term_dict[prefix_len++] = ')';
1071         term_dict[prefix_len] = '\0';
1072         j = prefix_len;
1073         switch (truncation_value)
1074         {
1075         case -1:         /* not specified */
1076         case 100:        /* do not truncate */
1077             if (!string_relation (zh, zapt, &termp, term_dict,
1078                                   attributeSet,
1079                                   reg_type, space_split, term_dst,
1080                                   &relation_error))
1081             {
1082                 if (relation_error)
1083                 {
1084                     zebra_setError(zh, relation_error, 0);
1085                     return ZEBRA_FAIL;
1086                 }
1087                 *term_sub = 0;
1088                 return ZEBRA_OK;
1089             }
1090             break;
1091         case 1:          /* right truncation */
1092             term_dict[j++] = '(';
1093             if (!term_100(zh->reg->zebra_maps, reg_type,
1094                           &termp, term_dict + j, space_split, term_dst))
1095             {
1096                 *term_sub = 0;
1097                 return ZEBRA_OK;
1098             }
1099             strcat(term_dict, ".*)");
1100             break;
1101         case 2:          /* keft truncation */
1102             term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*';
1103             if (!term_100(zh->reg->zebra_maps, reg_type,
1104                           &termp, term_dict + j, space_split, term_dst))
1105             {
1106                 *term_sub = 0;
1107                 return ZEBRA_OK;
1108             }
1109             strcat(term_dict, ")");
1110             break;
1111         case 3:          /* left&right truncation */
1112             term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*';
1113             if (!term_100(zh->reg->zebra_maps, reg_type,
1114                           &termp, term_dict + j, space_split, term_dst))
1115             {
1116                 *term_sub = 0;
1117                 return ZEBRA_OK;
1118             }
1119             strcat(term_dict, ".*)");
1120             break;
1121         case 101:        /* process # in term */
1122             term_dict[j++] = '(';
1123             if (!term_101(zh->reg->zebra_maps, reg_type,
1124                           &termp, term_dict + j, space_split, term_dst))
1125             {
1126                 *term_sub = 0;
1127                 return ZEBRA_OK;
1128             }
1129             strcat(term_dict, ")");
1130             break;
1131         case 102:        /* Regexp-1 */
1132             term_dict[j++] = '(';
1133             if (!term_102(zh->reg->zebra_maps, reg_type,
1134                           &termp, term_dict + j, space_split, term_dst))
1135             {
1136                 *term_sub = 0;
1137                 return ZEBRA_OK;
1138             }
1139             strcat(term_dict, ")");
1140             break;
1141         case 103:       /* Regexp-2 */
1142             regex_range = 1;
1143             term_dict[j++] = '(';
1144             if (!term_103(zh->reg->zebra_maps, reg_type,
1145                           &termp, term_dict + j, &regex_range,
1146                           space_split, term_dst))
1147             {
1148                 *term_sub = 0;
1149                 return ZEBRA_OK;
1150             }
1151             strcat(term_dict, ")");
1152             break;
1153         case 104:        /* process # and ! in term */
1154             term_dict[j++] = '(';
1155             if (!term_104(zh->reg->zebra_maps, reg_type,
1156                           &termp, term_dict + j, space_split, term_dst))
1157             {
1158                 *term_sub = 0;
1159                 return ZEBRA_OK;
1160             }
1161             strcat(term_dict, ")");
1162             break;
1163         case 105:        /* process * and ! in term */
1164             term_dict[j++] = '(';
1165             if (!term_105(zh->reg->zebra_maps, reg_type,
1166                           &termp, term_dict + j, space_split, term_dst, 1))
1167             {
1168                 *term_sub = 0;
1169                 return ZEBRA_OK;
1170             }
1171             strcat(term_dict, ")");
1172             break;
1173         case 106:        /* process * and ! in term */
1174             term_dict[j++] = '(';
1175             if (!term_105(zh->reg->zebra_maps, reg_type,
1176                           &termp, term_dict + j, space_split, term_dst, 0))
1177             {
1178                 *term_sub = 0;
1179                 return ZEBRA_OK;
1180             }
1181             strcat(term_dict, ")");
1182             break;
1183         default:
1184             zebra_setError_zint(zh,
1185                                 YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
1186                                 truncation_value);
1187             return ZEBRA_FAIL;
1188         }
1189         if (1)
1190         {
1191             char buf[80];
1192             const char *input = term_dict + prefix_len;
1193             esc_str(buf, sizeof(buf), input, strlen(input));
1194         }
1195         yaz_log(log_level_rpn, "dict_lookup_grep: %s", term_dict+prefix_len);
1196         r = dict_lookup_grep(zh->reg->dict, term_dict, regex_range,
1197                              grep_info, &max_pos, 
1198                              ord_len /* number of "exact" chars */,
1199                              grep_handle);
1200         if (r)
1201             yaz_log(YLOG_WARN, "dict_lookup_grep fail %d", r);
1202     }
1203     if (!bases_ok)
1204         return ZEBRA_FAIL;
1205     *term_sub = termp;
1206     yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1207     return ZEBRA_OK;
1208 }
1209
1210
1211 /* convert APT search term to UTF8 */
1212 static ZEBRA_RES zapt_term_to_utf8(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1213                                    char *termz)
1214 {
1215     size_t sizez;
1216     Z_Term *term = zapt->term;
1217
1218     switch (term->which)
1219     {
1220     case Z_Term_general:
1221         if (zh->iconv_to_utf8 != 0)
1222         {
1223             char *inbuf = (char *) term->u.general->buf;
1224             size_t inleft = term->u.general->len;
1225             char *outbuf = termz;
1226             size_t outleft = IT_MAX_WORD-1;
1227             size_t ret;
1228
1229             ret = yaz_iconv(zh->iconv_to_utf8, &inbuf, &inleft,
1230                         &outbuf, &outleft);
1231             if (ret == (size_t)(-1))
1232             {
1233                 ret = yaz_iconv(zh->iconv_to_utf8, 0, 0, 0, 0);
1234                 zebra_setError(
1235                     zh, 
1236                     YAZ_BIB1_QUERY_TERM_INCLUDES_CHARS_THAT_DO_NOT_TRANSLATE_INTO_,
1237                     0);
1238                 return ZEBRA_FAIL;
1239             }
1240             *outbuf = 0;
1241         }
1242         else
1243         {
1244             sizez = term->u.general->len;
1245             if (sizez > IT_MAX_WORD-1)
1246                 sizez = IT_MAX_WORD-1;
1247             memcpy (termz, term->u.general->buf, sizez);
1248             termz[sizez] = '\0';
1249         }
1250         break;
1251     case Z_Term_characterString:
1252         sizez = strlen(term->u.characterString);
1253         if (sizez > IT_MAX_WORD-1)
1254             sizez = IT_MAX_WORD-1;
1255         memcpy (termz, term->u.characterString, sizez);
1256         termz[sizez] = '\0';
1257         break;
1258     default:
1259         zebra_setError(zh, YAZ_BIB1_UNSUPP_CODED_VALUE_FOR_TERM, 0);
1260         return ZEBRA_FAIL;
1261     }
1262     return ZEBRA_OK;
1263 }
1264
1265 /* convert APT SCAN term to internal cmap */
1266 static ZEBRA_RES trans_scan_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1267                                  char *termz, int reg_type)
1268 {
1269     char termz0[IT_MAX_WORD];
1270
1271     if (zapt_term_to_utf8(zh, zapt, termz0) == ZEBRA_FAIL)
1272         return ZEBRA_FAIL;    /* error */
1273     else
1274     {
1275         const char **map;
1276         const char *cp = (const char *) termz0;
1277         const char *cp_end = cp + strlen(cp);
1278         const char *src;
1279         int i = 0;
1280         const char *space_map = NULL;
1281         int len;
1282             
1283         while ((len = (cp_end - cp)) > 0)
1284         {
1285             map = zebra_maps_input(zh->reg->zebra_maps, reg_type, &cp, len, 0);
1286             if (**map == *CHR_SPACE)
1287                 space_map = *map;
1288             else
1289             {
1290                 if (i && space_map)
1291                     for (src = space_map; *src; src++)
1292                         termz[i++] = *src;
1293                 space_map = NULL;
1294                 for (src = *map; *src; src++)
1295                     termz[i++] = *src;
1296             }
1297         }
1298         termz[i] = '\0';
1299     }
1300     return ZEBRA_OK;
1301 }
1302
1303 static void grep_info_delete(struct grep_info *grep_info)
1304 {
1305 #ifdef TERM_COUNT
1306     xfree(grep_info->term_no);
1307 #endif
1308     xfree(grep_info->isam_p_buf);
1309 }
1310
1311 static ZEBRA_RES grep_info_prepare(ZebraHandle zh,
1312                                    Z_AttributesPlusTerm *zapt,
1313                                    struct grep_info *grep_info,
1314                                    int reg_type)
1315 {
1316     AttrType termset;
1317     int termset_value_numeric;
1318     const char *termset_value_string;
1319
1320 #ifdef TERM_COUNT
1321     grep_info->term_no = 0;
1322 #endif
1323     grep_info->isam_p_size = 0;
1324     grep_info->isam_p_buf = NULL;
1325     grep_info->zh = zh;
1326     grep_info->reg_type = reg_type;
1327     grep_info->termset = 0;
1328
1329     if (!zapt)
1330         return ZEBRA_OK;
1331     attr_init_APT(&termset, zapt, 8);
1332     termset_value_numeric =
1333         attr_find_ex(&termset, NULL, &termset_value_string);
1334     if (termset_value_numeric != -1)
1335     {
1336         char resname[32];
1337         const char *termset_name = 0;
1338         if (termset_value_numeric != -2)
1339         {
1340     
1341             sprintf(resname, "%d", termset_value_numeric);
1342             termset_name = resname;
1343         }
1344         else
1345             termset_name = termset_value_string;
1346         yaz_log(log_level_rpn, "creating termset set %s", termset_name);
1347         grep_info->termset = resultSetAdd(zh, termset_name, 1);
1348         if (!grep_info->termset)
1349         {
1350             zebra_setError(zh, YAZ_BIB1_ILLEGAL_RESULT_SET_NAME, termset_name);
1351             return ZEBRA_FAIL;
1352         }
1353     }
1354     return ZEBRA_OK;
1355 }
1356                                
1357 /**
1358   \brief Create result set(s) for list of terms
1359   \param zh Zebra Handle
1360   \param termz term as used in query but converted to UTF-8
1361   \param attributeSet default attribute set
1362   \param stream memory for result
1363   \param reg_type register type ('w', 'p',..)
1364   \param complete_flag whether it's phrases or not
1365   \param rank_type term flags for ranking
1366   \param xpath_use use attribute for X-Path (-1 for no X-path)
1367   \param num_bases number of databases
1368   \param basenames array of databases
1369   \param rset_mem memory for result sets
1370   \param result_sets output result set for each term in list (output)
1371   \param number number of output result sets
1372   \param kc rset key control to be used for created result sets
1373 */
1374 static ZEBRA_RES term_list_trunc(ZebraHandle zh,
1375                                  Z_AttributesPlusTerm *zapt,
1376                                  const char *termz,
1377                                  oid_value attributeSet,
1378                                  NMEM stream,
1379                                  int reg_type, int complete_flag,
1380                                  const char *rank_type,
1381                                  const char *xpath_use,
1382                                  int num_bases, char **basenames, 
1383                                  NMEM rset_nmem,
1384                                  RSET **result_sets, int *num_result_sets,
1385                                  struct rset_key_control *kc)
1386 {
1387     char term_dst[IT_MAX_WORD+1];
1388     struct grep_info grep_info;
1389     const char *termp = termz;
1390     int alloc_sets = 0;
1391     int empty_term = *termz ? 0 : 1;
1392
1393     empty_term = 0;
1394     *num_result_sets = 0;
1395     *term_dst = 0;
1396     if (grep_info_prepare(zh, zapt, &grep_info, reg_type) == ZEBRA_FAIL)
1397         return ZEBRA_FAIL;
1398     while(1)
1399     { 
1400         ZEBRA_RES res;
1401
1402         if (alloc_sets == *num_result_sets)
1403         {
1404             int add = 10;
1405             RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) * 
1406                                               sizeof(*rnew));
1407             if (alloc_sets)
1408                 memcpy(rnew, *result_sets, alloc_sets * sizeof(*rnew));
1409             alloc_sets = alloc_sets + add;
1410             *result_sets = rnew;
1411         }
1412         res = term_trunc(zh, zapt, &termp, attributeSet,
1413                          stream, &grep_info,
1414                          reg_type, complete_flag,
1415                          num_bases, basenames,
1416                          term_dst, rank_type,
1417                          xpath_use, rset_nmem,
1418                          &(*result_sets)[*num_result_sets],
1419                          kc);
1420         if (res != ZEBRA_OK)
1421         {
1422             int i;
1423             for (i = 0; i < *num_result_sets; i++)
1424                 rset_delete((*result_sets)[i]);
1425             grep_info_delete (&grep_info);
1426             return res;
1427         }
1428         if ((*result_sets)[*num_result_sets] == 0)
1429             break;
1430         (*num_result_sets)++;
1431
1432         if (empty_term)
1433             break;
1434         if (!*termp)
1435             break;
1436     }
1437     grep_info_delete(&grep_info);
1438     return ZEBRA_OK;
1439 }
1440
1441
1442 static ZEBRA_RES always_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1443                              oid_value attributeSet, NMEM stream,
1444                              struct grep_info *grep_info,
1445                              int reg_type, int complete_flag,
1446                              int num_bases, char **basenames,
1447                              const char *xpath_use,
1448                              struct ord_list **ol)
1449 {
1450     char term_dict[2*IT_MAX_WORD+4000];
1451     int r, base_no;
1452     struct rpn_char_map_info rcmi;
1453
1454     int bases_ok = 0;     /* no of databases with OK attribute */
1455
1456     *ol = ord_list_create(stream);
1457
1458     rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
1459
1460     for (base_no = 0; base_no < num_bases; base_no++)
1461     {
1462         int ord = -1;
1463         int regex_range = 0;
1464         int max_pos, prefix_len = 0;
1465         char ord_buf[32];
1466         int ord_len, i;
1467
1468         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1469         {
1470             zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
1471                            basenames[base_no]);
1472             return ZEBRA_FAIL;
1473         }
1474         
1475         if (zebra_apt_get_ord(zh, zapt, zinfo_index_category_alwaysmatches,
1476                               reg_type, xpath_use, 
1477                               attributeSet, &ord) != ZEBRA_OK)
1478             continue;
1479
1480         *ol = ord_list_append(stream, *ol, ord);
1481         
1482         if (prefix_len)
1483             term_dict[prefix_len++] = '|';
1484         else
1485             term_dict[prefix_len++] = '(';
1486         
1487         ord_len = key_SU_encode (ord, ord_buf);
1488         for (i = 0; i<ord_len; i++)
1489         {
1490             term_dict[prefix_len++] = 1;
1491             term_dict[prefix_len++] = ord_buf[i];
1492         }
1493         
1494         term_dict[prefix_len++] = ')';
1495         term_dict[prefix_len] = '\0';
1496
1497         bases_ok++;
1498         
1499         r = dict_lookup_grep(zh->reg->dict, term_dict, regex_range,
1500                              grep_info, &max_pos, 
1501                              ord_len /* number of "exact" chars */,
1502                              grep_handle);
1503     }
1504     if (!bases_ok)
1505         return ZEBRA_FAIL;
1506     return ZEBRA_OK;
1507 }
1508
1509 static ZEBRA_RES rpn_search_APT_alwaysmatches(ZebraHandle zh,
1510                                               Z_AttributesPlusTerm *zapt,
1511                                               const char *termz_org,
1512                                               oid_value attributeSet,
1513                                               NMEM stream,
1514                                               int reg_type, int complete_flag,
1515                                               const char *rank_type,
1516                                               const char *xpath_use,
1517                                               int num_bases, char **basenames, 
1518                                               NMEM rset_nmem,
1519                                               RSET *rset,
1520                                               struct rset_key_control *kc)
1521 {
1522     const char *term_dst = "always";
1523     struct grep_info grep_info;
1524     zint hits_limit_value;
1525     const char *term_ref_id_str = 0;
1526     ZEBRA_RES res;
1527     struct ord_list *ol;
1528
1529     term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str,
1530                     stream);
1531     if (grep_info_prepare(zh, zapt, &grep_info, reg_type) == ZEBRA_FAIL)
1532         return ZEBRA_FAIL;
1533
1534     grep_info.isam_p_indx = 0;
1535
1536     res = always_term(zh, zapt, attributeSet, stream, &grep_info,
1537                       reg_type, complete_flag, num_bases, basenames,
1538                       xpath_use, &ol);
1539     if (res == ZEBRA_OK)
1540     {
1541         *rset = rset_trunc(zh, grep_info.isam_p_buf,
1542                            grep_info.isam_p_indx, term_dst, strlen(term_dst),
1543                            rank_type, 1 /* preserve pos */,
1544                            zapt->term->which, rset_nmem,
1545                            kc, kc->scope, ol, reg_type, hits_limit_value,
1546                            term_ref_id_str);
1547         if (!*rset)
1548             res = ZEBRA_FAIL;
1549     }
1550     grep_info_delete (&grep_info);
1551     return res;
1552 }
1553
1554 static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh,
1555                                        Z_AttributesPlusTerm *zapt,
1556                                        const char *termz_org,
1557                                        oid_value attributeSet,
1558                                        NMEM stream,
1559                                        int reg_type, int complete_flag,
1560                                        const char *rank_type,
1561                                        const char *xpath_use,
1562                                        int num_bases, char **basenames, 
1563                                        NMEM rset_nmem,
1564                                        RSET *rset,
1565                                        struct rset_key_control *kc)
1566 {
1567     RSET *result_sets = 0;
1568     int num_result_sets = 0;
1569     ZEBRA_RES res =
1570         term_list_trunc(zh, zapt, termz_org, attributeSet,
1571                         stream, reg_type, complete_flag,
1572                         rank_type, xpath_use,
1573                         num_bases, basenames,
1574                         rset_nmem,
1575                         &result_sets, &num_result_sets, kc);
1576     if (res != ZEBRA_OK)
1577         return res;
1578     if (num_result_sets == 0)
1579         *rset = rset_create_null(rset_nmem, kc, 0); 
1580     else if (num_result_sets == 1)
1581         *rset = result_sets[0];
1582     else
1583         *rset = rset_create_prox(rset_nmem, kc, kc->scope,
1584                                  num_result_sets, result_sets,
1585                                  1 /* ordered */, 0 /* exclusion */,
1586                                  3 /* relation */, 1 /* distance */);
1587     if (!*rset)
1588         return ZEBRA_FAIL;
1589     return ZEBRA_OK;
1590 }
1591
1592 static ZEBRA_RES rpn_search_APT_or_list(ZebraHandle zh,
1593                                         Z_AttributesPlusTerm *zapt,
1594                                         const char *termz_org,
1595                                         oid_value attributeSet,
1596                                         NMEM stream,
1597                                         int reg_type, int complete_flag,
1598                                         const char *rank_type,
1599                                         const char *xpath_use,
1600                                         int num_bases, char **basenames,
1601                                         NMEM rset_nmem,
1602                                         RSET *rset,
1603                                         struct rset_key_control *kc)
1604 {
1605     RSET *result_sets = 0;
1606     int num_result_sets = 0;
1607     ZEBRA_RES res =
1608         term_list_trunc(zh, zapt, termz_org, attributeSet,
1609                         stream, reg_type, complete_flag,
1610                         rank_type, xpath_use,
1611                         num_bases, basenames,
1612                         rset_nmem,
1613                         &result_sets, &num_result_sets, kc);
1614     if (res != ZEBRA_OK)
1615         return res;
1616     if (num_result_sets == 0)
1617         *rset = rset_create_null(rset_nmem, kc, 0); 
1618     else if (num_result_sets == 1)
1619         *rset = result_sets[0];
1620     else
1621         *rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* termid */,
1622                                num_result_sets, result_sets);
1623     if (!*rset)
1624         return ZEBRA_FAIL;
1625     return ZEBRA_OK;
1626 }
1627
1628 static ZEBRA_RES rpn_search_APT_and_list(ZebraHandle zh,
1629                                          Z_AttributesPlusTerm *zapt,
1630                                          const char *termz_org,
1631                                          oid_value attributeSet,
1632                                          NMEM stream,
1633                                          int reg_type, int complete_flag,
1634                                          const char *rank_type, 
1635                                          const char *xpath_use,
1636                                          int num_bases, char **basenames,
1637                                          NMEM rset_nmem,
1638                                          RSET *rset,
1639                                          struct rset_key_control *kc)
1640 {
1641     RSET *result_sets = 0;
1642     int num_result_sets = 0;
1643     ZEBRA_RES res =
1644         term_list_trunc(zh, zapt, termz_org, attributeSet,
1645                         stream, reg_type, complete_flag,
1646                         rank_type, xpath_use,
1647                         num_bases, basenames,
1648                         rset_nmem,
1649                         &result_sets, &num_result_sets,
1650                         kc);
1651     if (res != ZEBRA_OK)
1652         return res;
1653     if (num_result_sets == 0)
1654         *rset = rset_create_null(rset_nmem, kc, 0); 
1655     else if (num_result_sets == 1)
1656         *rset = result_sets[0];
1657     else
1658         *rset = rset_create_and(rset_nmem, kc, kc->scope,
1659                                 num_result_sets, result_sets);
1660     if (!*rset)
1661         return ZEBRA_FAIL;
1662     return ZEBRA_OK;
1663 }
1664
1665 static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1666                             const char **term_sub,
1667                             char *term_dict,
1668                             oid_value attributeSet,
1669                             struct grep_info *grep_info,
1670                             int *max_pos,
1671                             int reg_type,
1672                             char *term_dst,
1673                             int *error_code)
1674 {
1675     AttrType relation;
1676     int relation_value;
1677     int term_value;
1678     int r;
1679     char *term_tmp = term_dict + strlen(term_dict);
1680
1681     *error_code = 0;
1682     attr_init_APT(&relation, zapt, 2);
1683     relation_value = attr_find(&relation, NULL);
1684
1685     yaz_log(log_level_rpn, "numeric relation value=%d", relation_value);
1686
1687     if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1688                   term_dst))
1689         return 0;
1690     term_value = atoi (term_tmp);
1691     switch (relation_value)
1692     {
1693     case 1:
1694         yaz_log(log_level_rpn, "Relation <");
1695         gen_regular_rel(term_tmp, term_value-1, 1);
1696         break;
1697     case 2:
1698         yaz_log(log_level_rpn, "Relation <=");
1699         gen_regular_rel(term_tmp, term_value, 1);
1700         break;
1701     case 4:
1702         yaz_log(log_level_rpn, "Relation >=");
1703         gen_regular_rel(term_tmp, term_value, 0);
1704         break;
1705     case 5:
1706         yaz_log(log_level_rpn, "Relation >");
1707         gen_regular_rel(term_tmp, term_value+1, 0);
1708         break;
1709     case -1:
1710     case 3:
1711         yaz_log(log_level_rpn, "Relation =");
1712         sprintf(term_tmp, "(0*%d)", term_value);
1713         break;
1714     default:
1715         *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
1716         return 0;
1717     }
1718     yaz_log(log_level_rpn, "dict_lookup_grep: %s", term_tmp);
1719     r = dict_lookup_grep(zh->reg->dict, term_dict, 0, grep_info, max_pos,
1720                           0, grep_handle);
1721     if (r)
1722         yaz_log(YLOG_WARN, "dict_lookup_grep fail, rel = gt: %d", r);
1723     yaz_log(log_level_rpn, "%d positions", grep_info->isam_p_indx);
1724     return 1;
1725 }
1726
1727 static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1728                               const char **term_sub, 
1729                               oid_value attributeSet, NMEM stream,
1730                               struct grep_info *grep_info,
1731                               int reg_type, int complete_flag,
1732                               int num_bases, char **basenames,
1733                               char *term_dst, 
1734                               const char *xpath_use,
1735                               struct ord_list **ol)
1736 {
1737     char term_dict[2*IT_MAX_WORD+2];
1738     int base_no;
1739     const char *termp;
1740     struct rpn_char_map_info rcmi;
1741
1742     int bases_ok = 0;     /* no of databases with OK attribute */
1743
1744     *ol = ord_list_create(stream);
1745
1746     rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
1747
1748     for (base_no = 0; base_no < num_bases; base_no++)
1749     {
1750         int max_pos, prefix_len = 0;
1751         int relation_error = 0;
1752         int ord, ord_len, i;
1753         char ord_buf[32];
1754
1755         termp = *term_sub;
1756
1757         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1758         {
1759             zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
1760                            basenames[base_no]);
1761             return ZEBRA_FAIL;
1762         }
1763
1764         if (zebra_apt_get_ord(zh, zapt, zinfo_index_category_index,
1765                               reg_type, xpath_use, attributeSet, &ord) 
1766             != ZEBRA_OK)
1767             continue;
1768
1769         *ol = ord_list_append(stream, *ol, ord);
1770
1771         if (prefix_len)
1772             term_dict[prefix_len++] = '|';
1773         else
1774             term_dict[prefix_len++] = '(';
1775         
1776         ord_len = key_SU_encode (ord, ord_buf);
1777         for (i = 0; i < ord_len; i++)
1778         {
1779             term_dict[prefix_len++] = 1;
1780             term_dict[prefix_len++] = ord_buf[i];
1781         }
1782         term_dict[prefix_len++] = ')';
1783         term_dict[prefix_len] = '\0';
1784         bases_ok++;
1785         if (!numeric_relation(zh, zapt, &termp, term_dict,
1786                               attributeSet, grep_info, &max_pos, reg_type,
1787                               term_dst, &relation_error))
1788         {
1789             if (relation_error)
1790             {
1791                 zebra_setError(zh, relation_error, 0);
1792                 return ZEBRA_FAIL;
1793             }
1794             *term_sub = 0;
1795             return ZEBRA_OK;
1796         }
1797     }
1798     if (!bases_ok)
1799         return ZEBRA_FAIL;
1800     *term_sub = termp;
1801     yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1802     return ZEBRA_OK;
1803 }
1804
1805                                  
1806 static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh,
1807                                         Z_AttributesPlusTerm *zapt,
1808                                         const char *termz,
1809                                         oid_value attributeSet,
1810                                         NMEM stream,
1811                                         int reg_type, int complete_flag,
1812                                         const char *rank_type, 
1813                                         const char *xpath_use,
1814                                         int num_bases, char **basenames,
1815                                         NMEM rset_nmem,
1816                                         RSET *rset,
1817                                         struct rset_key_control *kc)
1818 {
1819     char term_dst[IT_MAX_WORD+1];
1820     const char *termp = termz;
1821     RSET *result_sets = 0;
1822     int num_result_sets = 0;
1823     ZEBRA_RES res;
1824     struct grep_info grep_info;
1825     int alloc_sets = 0;
1826     zint hits_limit_value;
1827     const char *term_ref_id_str = 0;
1828
1829     term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str, stream);
1830
1831     yaz_log(log_level_rpn, "APT_numeric t='%s'", termz);
1832     if (grep_info_prepare(zh, zapt, &grep_info, reg_type) == ZEBRA_FAIL)
1833         return ZEBRA_FAIL;
1834     while (1)
1835     { 
1836         struct ord_list *ol;
1837         if (alloc_sets == num_result_sets)
1838         {
1839             int add = 10;
1840             RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) * 
1841                                               sizeof(*rnew));
1842             if (alloc_sets)
1843                 memcpy(rnew, result_sets, alloc_sets * sizeof(*rnew));
1844             alloc_sets = alloc_sets + add;
1845             result_sets = rnew;
1846         }
1847         yaz_log(YLOG_DEBUG, "APT_numeric termp=%s", termp);
1848         grep_info.isam_p_indx = 0;
1849         res = numeric_term(zh, zapt, &termp, attributeSet, stream, &grep_info,
1850                            reg_type, complete_flag, num_bases, basenames,
1851                            term_dst, xpath_use, &ol);
1852         if (res == ZEBRA_FAIL || termp == 0)
1853             break;
1854         yaz_log(YLOG_DEBUG, "term: %s", term_dst);
1855         result_sets[num_result_sets] =
1856             rset_trunc(zh, grep_info.isam_p_buf,
1857                        grep_info.isam_p_indx, term_dst,
1858                        strlen(term_dst), rank_type,
1859                        0 /* preserve position */,
1860                        zapt->term->which, rset_nmem, 
1861                        kc, kc->scope, ol, reg_type,
1862                        hits_limit_value,
1863                        term_ref_id_str);
1864         if (!result_sets[num_result_sets])
1865             break;
1866         num_result_sets++;
1867     }
1868     grep_info_delete(&grep_info);
1869     if (termp)
1870     {
1871         int i;
1872         for (i = 0; i<num_result_sets; i++)
1873             rset_delete(result_sets[i]);
1874         return ZEBRA_FAIL;
1875     }
1876     if (num_result_sets == 0)
1877         *rset = rset_create_null(rset_nmem, kc, 0);
1878     if (num_result_sets == 1)
1879         *rset = result_sets[0];
1880     else
1881         *rset = rset_create_and(rset_nmem, kc, kc->scope,
1882                                 num_result_sets, result_sets);
1883     if (!*rset)
1884         return ZEBRA_FAIL;
1885     return ZEBRA_OK;
1886 }
1887
1888 static ZEBRA_RES rpn_search_APT_local(ZebraHandle zh,
1889                                       Z_AttributesPlusTerm *zapt,
1890                                       const char *termz,
1891                                       oid_value attributeSet,
1892                                       NMEM stream,
1893                                       const char *rank_type, NMEM rset_nmem,
1894                                       RSET *rset,
1895                                       struct rset_key_control *kc)
1896 {
1897     RSFD rsfd;
1898     struct it_key key;
1899     int sys;
1900     *rset = rset_create_temp(rset_nmem, kc, kc->scope,
1901                              res_get (zh->res, "setTmpDir"),0 );
1902     rsfd = rset_open(*rset, RSETF_WRITE);
1903     
1904     sys = atoi(termz);
1905     if (sys <= 0)
1906         sys = 1;
1907     key.mem[0] = sys;
1908     key.mem[1] = 1;
1909     key.len = 2;
1910     rset_write (rsfd, &key);
1911     rset_close (rsfd);
1912     return ZEBRA_OK;
1913 }
1914
1915 static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1916                                oid_value attributeSet, NMEM stream,
1917                                Z_SortKeySpecList *sort_sequence,
1918                                const char *rank_type,
1919                                NMEM rset_nmem,
1920                                RSET *rset,
1921                                struct rset_key_control *kc)
1922 {
1923     int i;
1924     int sort_relation_value;
1925     AttrType sort_relation_type;
1926     Z_SortKeySpec *sks;
1927     Z_SortKey *sk;
1928     int oid[OID_SIZE];
1929     oident oe;
1930     char termz[20];
1931     
1932     attr_init_APT(&sort_relation_type, zapt, 7);
1933     sort_relation_value = attr_find(&sort_relation_type, &attributeSet);
1934
1935     if (!sort_sequence->specs)
1936     {
1937         sort_sequence->num_specs = 10;
1938         sort_sequence->specs = (Z_SortKeySpec **)
1939             nmem_malloc(stream, sort_sequence->num_specs *
1940                          sizeof(*sort_sequence->specs));
1941         for (i = 0; i<sort_sequence->num_specs; i++)
1942             sort_sequence->specs[i] = 0;
1943     }
1944     if (zapt->term->which != Z_Term_general)
1945         i = 0;
1946     else
1947         i = atoi_n ((char *) zapt->term->u.general->buf,
1948                     zapt->term->u.general->len);
1949     if (i >= sort_sequence->num_specs)
1950         i = 0;
1951     sprintf(termz, "%d", i);
1952
1953     oe.proto = PROTO_Z3950;
1954     oe.oclass = CLASS_ATTSET;
1955     oe.value = attributeSet;
1956     if (!oid_ent_to_oid (&oe, oid))
1957         return ZEBRA_FAIL;
1958
1959     sks = (Z_SortKeySpec *) nmem_malloc(stream, sizeof(*sks));
1960     sks->sortElement = (Z_SortElement *)
1961         nmem_malloc(stream, sizeof(*sks->sortElement));
1962     sks->sortElement->which = Z_SortElement_generic;
1963     sk = sks->sortElement->u.generic = (Z_SortKey *)
1964         nmem_malloc(stream, sizeof(*sk));
1965     sk->which = Z_SortKey_sortAttributes;
1966     sk->u.sortAttributes = (Z_SortAttributes *)
1967         nmem_malloc(stream, sizeof(*sk->u.sortAttributes));
1968
1969     sk->u.sortAttributes->id = oid;
1970     sk->u.sortAttributes->list = zapt->attributes;
1971
1972     sks->sortRelation = (int *)
1973         nmem_malloc(stream, sizeof(*sks->sortRelation));
1974     if (sort_relation_value == 1)
1975         *sks->sortRelation = Z_SortKeySpec_ascending;
1976     else if (sort_relation_value == 2)
1977         *sks->sortRelation = Z_SortKeySpec_descending;
1978     else 
1979         *sks->sortRelation = Z_SortKeySpec_ascending;
1980
1981     sks->caseSensitivity = (int *)
1982         nmem_malloc(stream, sizeof(*sks->caseSensitivity));
1983     *sks->caseSensitivity = 0;
1984
1985     sks->which = Z_SortKeySpec_null;
1986     sks->u.null = odr_nullval ();
1987     sort_sequence->specs[i] = sks;
1988     *rset = rset_create_null(rset_nmem, kc, 0);
1989     return ZEBRA_OK;
1990 }
1991
1992
1993 static int rpn_check_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1994                            oid_value attributeSet,
1995                            struct xpath_location_step *xpath, int max,
1996                            NMEM mem)
1997 {
1998     oid_value curAttributeSet = attributeSet;
1999     AttrType use;
2000     const char *use_string = 0;
2001     
2002     attr_init_APT(&use, zapt, 1);
2003     attr_find_ex(&use, &curAttributeSet, &use_string);
2004
2005     if (!use_string || *use_string != '/')
2006         return -1;
2007
2008     return zebra_parse_xpath_str(use_string, xpath, max, mem);
2009 }
2010  
2011                
2012
2013 static RSET xpath_trunc(ZebraHandle zh, NMEM stream,
2014                         int reg_type, const char *term, 
2015                         const char *xpath_use,
2016                         NMEM rset_nmem,
2017                         struct rset_key_control *kc)
2018 {
2019     RSET rset;
2020     struct grep_info grep_info;
2021     char term_dict[2048];
2022     char ord_buf[32];
2023     int prefix_len = 0;
2024     int ord = zebraExplain_lookup_attr_str(zh->reg->zei, 
2025                                            zinfo_index_category_index,
2026                                            reg_type,
2027                                            xpath_use);
2028     int ord_len, i, r, max_pos;
2029     int term_type = Z_Term_characterString;
2030     const char *flags = "void";
2031
2032     if (grep_info_prepare(zh, 0 /* zapt */, &grep_info, '0') == ZEBRA_FAIL)
2033         return rset_create_null(rset_nmem, kc, 0);
2034     
2035     if (ord < 0)
2036         return rset_create_null(rset_nmem, kc, 0);
2037     if (prefix_len)
2038         term_dict[prefix_len++] = '|';
2039     else
2040         term_dict[prefix_len++] = '(';
2041     
2042     ord_len = key_SU_encode (ord, ord_buf);
2043     for (i = 0; i<ord_len; i++)
2044     {
2045         term_dict[prefix_len++] = 1;
2046         term_dict[prefix_len++] = ord_buf[i];
2047     }
2048     term_dict[prefix_len++] = ')';
2049     strcpy(term_dict+prefix_len, term);
2050     
2051     grep_info.isam_p_indx = 0;
2052     r = dict_lookup_grep(zh->reg->dict, term_dict, 0,
2053                           &grep_info, &max_pos, 0, grep_handle);
2054     yaz_log(YLOG_DEBUG, "%s %d positions", term,
2055              grep_info.isam_p_indx);
2056     rset = rset_trunc(zh, grep_info.isam_p_buf,
2057                       grep_info.isam_p_indx, term, strlen(term),
2058                       flags, 1, term_type,rset_nmem,
2059                       kc, kc->scope, 0, reg_type, 0 /* hits_limit */,
2060                       0 /* term_ref_id_str */);
2061     grep_info_delete(&grep_info);
2062     return rset;
2063 }
2064
2065 static
2066 ZEBRA_RES rpn_search_xpath(ZebraHandle zh,
2067                            int num_bases, char **basenames,
2068                            NMEM stream, const char *rank_type, RSET rset,
2069                            int xpath_len, struct xpath_location_step *xpath,
2070                            NMEM rset_nmem,
2071                            RSET *rset_out,
2072                            struct rset_key_control *kc)
2073 {
2074     int base_no;
2075     int i;
2076     int always_matches = rset ? 0 : 1;
2077
2078     if (xpath_len < 0)
2079     {
2080         *rset_out = rset;
2081         return ZEBRA_OK;
2082     }
2083
2084     yaz_log(YLOG_DEBUG, "xpath len=%d", xpath_len);
2085     for (i = 0; i<xpath_len; i++)
2086     {
2087         yaz_log(log_level_rpn, "XPATH %d %s", i, xpath[i].part);
2088
2089     }
2090
2091     /*
2092       //a    ->    a/.*
2093       //a/b  ->    b/a/.*
2094       /a     ->    a/
2095       /a/b   ->    b/a/
2096
2097       /      ->    none
2098
2099    a[@attr = value]/b[@other = othervalue]
2100
2101  /e/@a val      range(e/,range(@a,freetext(w,1015,val),@a),e/)
2102  /a/b val       range(b/a/,freetext(w,1016,val),b/a/)
2103  /a/b/@c val    range(b/a/,range(@c,freetext(w,1016,val),@c),b/a/)
2104  /a/b[@c = y] val range(b/a/,freetext(w,1016,val),b/a/,@c = y)
2105  /a[@c = y]/b val range(a/,range(b/a/,freetext(w,1016,val),b/a/),a/,@c = y)
2106  /a[@c = x]/b[@c = y] range(a/,range(b/a/,freetext(w,1016,val),b/a/,@c = y),a/,@c = x)
2107       
2108     */
2109
2110     dict_grep_cmap (zh->reg->dict, 0, 0);
2111
2112     for (base_no = 0; base_no < num_bases; base_no++)
2113     {
2114         int level = xpath_len;
2115         int first_path = 1;
2116         
2117         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
2118         {
2119             zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
2120                            basenames[base_no]);
2121             *rset_out = rset;
2122             return ZEBRA_FAIL;
2123         }
2124         while (--level >= 0)
2125         {
2126             WRBUF xpath_rev = wrbuf_alloc();
2127             int i;
2128             RSET rset_start_tag = 0, rset_end_tag = 0, rset_attr = 0;
2129
2130             for (i = level; i >= 1; --i)
2131             {
2132                 const char *cp = xpath[i].part;
2133                 if (*cp)
2134                 {
2135                     for (; *cp; cp++)
2136                     {
2137                         if (*cp == '*')
2138                             wrbuf_puts(xpath_rev, "[^/]*");
2139                         else if (*cp == ' ')
2140                             wrbuf_puts(xpath_rev, "\001 ");
2141                         else
2142                             wrbuf_putc(xpath_rev, *cp);
2143
2144                         /* wrbuf_putc does not null-terminate , but
2145                            wrbuf_puts below ensures it does.. so xpath_rev
2146                            is OK iff length is > 0 */
2147                     }
2148                     wrbuf_puts(xpath_rev, "/");
2149                 }
2150                 else if (i == 1)  /* // case */
2151                     wrbuf_puts(xpath_rev, ".*");
2152             }
2153             if (xpath[level].predicate &&
2154                 xpath[level].predicate->which == XPATH_PREDICATE_RELATION &&
2155                 xpath[level].predicate->u.relation.name[0])
2156             {
2157                 WRBUF wbuf = wrbuf_alloc();
2158                 wrbuf_puts(wbuf, xpath[level].predicate->u.relation.name+1);
2159                 if (xpath[level].predicate->u.relation.value)
2160                 {
2161                     const char *cp = xpath[level].predicate->u.relation.value;
2162                     wrbuf_putc(wbuf, '=');
2163                     
2164                     while (*cp)
2165                     {
2166                         if (strchr(REGEX_CHARS, *cp))
2167                             wrbuf_putc(wbuf, '\\');
2168                         wrbuf_putc(wbuf, *cp);
2169                         cp++;
2170                     }
2171                 }
2172                 wrbuf_puts(wbuf, "");
2173                 rset_attr = xpath_trunc(
2174                     zh, stream, '0', wrbuf_buf(wbuf), ZEBRA_XPATH_ATTR_NAME, 
2175                     rset_nmem, kc);
2176                 wrbuf_free(wbuf, 1);
2177             } 
2178             else 
2179             {
2180                 if (!first_path)
2181                 {
2182                     wrbuf_free(xpath_rev, 1);
2183                     continue;
2184                 }
2185             }
2186             yaz_log(log_level_rpn, "xpath_rev (%d) = %.*s", level, 
2187                     wrbuf_len(xpath_rev), wrbuf_buf(xpath_rev));
2188             if (wrbuf_len(xpath_rev))
2189             {
2190                 rset_start_tag = xpath_trunc(zh, stream, '0', 
2191                                              wrbuf_buf(xpath_rev),
2192                                              ZEBRA_XPATH_ELM_BEGIN, 
2193                                              rset_nmem, kc);
2194                 if (always_matches)
2195                     rset = rset_start_tag;
2196                 else
2197                 {
2198                     rset_end_tag = xpath_trunc(zh, stream, '0', 
2199                                                wrbuf_buf(xpath_rev),
2200                                                ZEBRA_XPATH_ELM_END, 
2201                                                rset_nmem, kc);
2202                     
2203                     rset = rset_create_between(rset_nmem, kc, kc->scope,
2204                                                rset_start_tag, rset,
2205                                                rset_end_tag, rset_attr);
2206                 }
2207             }
2208             wrbuf_free(xpath_rev, 1);
2209             first_path = 0;
2210         }
2211     }
2212     *rset_out = rset;
2213     return ZEBRA_OK;
2214 }
2215
2216 #define MAX_XPATH_STEPS 10
2217
2218 static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2219                                 oid_value attributeSet, NMEM stream,
2220                                 Z_SortKeySpecList *sort_sequence,
2221                                 int num_bases, char **basenames, 
2222                                 NMEM rset_nmem,
2223                                 RSET *rset,
2224                                 struct rset_key_control *kc)
2225 {
2226     ZEBRA_RES res = ZEBRA_OK;
2227     unsigned reg_id;
2228     char *search_type = NULL;
2229     char rank_type[128];
2230     int complete_flag;
2231     int sort_flag;
2232     char termz[IT_MAX_WORD+1];
2233     int xpath_len;
2234     const char *xpath_use = 0;
2235     struct xpath_location_step xpath[MAX_XPATH_STEPS];
2236
2237     if (!log_level_set)
2238     {
2239         log_level_rpn = yaz_log_module_level("rpn");
2240         log_level_set = 1;
2241     }
2242     zebra_maps_attr(zh->reg->zebra_maps, zapt, &reg_id, &search_type,
2243                     rank_type, &complete_flag, &sort_flag);
2244     
2245     yaz_log(YLOG_DEBUG, "reg_id=%c", reg_id);
2246     yaz_log(YLOG_DEBUG, "complete_flag=%d", complete_flag);
2247     yaz_log(YLOG_DEBUG, "search_type=%s", search_type);
2248     yaz_log(YLOG_DEBUG, "rank_type=%s", rank_type);
2249
2250     if (zapt_term_to_utf8(zh, zapt, termz) == ZEBRA_FAIL)
2251         return ZEBRA_FAIL;
2252
2253     if (sort_flag)
2254         return rpn_sort_spec(zh, zapt, attributeSet, stream, sort_sequence,
2255                              rank_type, rset_nmem, rset, kc);
2256     /* consider if an X-Path query is used */
2257     xpath_len = rpn_check_xpath(zh, zapt, attributeSet, 
2258                                 xpath, MAX_XPATH_STEPS, stream);
2259     if (xpath_len >= 0)
2260     {
2261         if (xpath[xpath_len-1].part[0] == '@') 
2262             xpath_use = ZEBRA_XPATH_ATTR_CDATA;  /* last step is attribute  */
2263         else
2264             xpath_use = ZEBRA_XPATH_CDATA;  /* searching for cdata */        
2265     }
2266
2267     /* search using one of the various search type strategies
2268        termz is our UTF-8 search term
2269        attributeSet is top-level default attribute set 
2270        stream is ODR for search
2271        reg_id is the register type
2272        complete_flag is 1 for complete subfield, 0 for incomplete
2273        xpath_use is use-attribute to be used for X-Path search, 0 for none
2274     */
2275     if (!strcmp(search_type, "phrase"))
2276     {
2277         res = rpn_search_APT_phrase(zh, zapt, termz, attributeSet, stream,
2278                                     reg_id, complete_flag, rank_type,
2279                                     xpath_use,
2280                                     num_bases, basenames, rset_nmem,
2281                                     rset, kc);
2282     }
2283     else if (!strcmp(search_type, "and-list"))
2284     {
2285         res = rpn_search_APT_and_list(zh, zapt, termz, attributeSet, stream,
2286                                       reg_id, complete_flag, rank_type,
2287                                       xpath_use,
2288                                       num_bases, basenames, rset_nmem,
2289                                       rset, kc);
2290     }
2291     else if (!strcmp(search_type, "or-list"))
2292     {
2293         res = rpn_search_APT_or_list(zh, zapt, termz, attributeSet, stream,
2294                                      reg_id, complete_flag, rank_type,
2295                                      xpath_use,
2296                                      num_bases, basenames, rset_nmem,
2297                                      rset, kc);
2298     }
2299     else if (!strcmp(search_type, "local"))
2300     {
2301         res = rpn_search_APT_local(zh, zapt, termz, attributeSet, stream,
2302                                    rank_type, rset_nmem, rset, kc);
2303     }
2304     else if (!strcmp(search_type, "numeric"))
2305     {
2306         res = rpn_search_APT_numeric(zh, zapt, termz, attributeSet, stream,
2307                                      reg_id, complete_flag, rank_type,
2308                                      xpath_use,
2309                                      num_bases, basenames, rset_nmem,
2310                                      rset, kc);
2311     }
2312     else if (!strcmp(search_type, "always"))
2313     {
2314         if (xpath_len >= 0) /* alwaysmatches and X-Path ? */
2315         {
2316             *rset = 0; /* signal no "term" set */
2317             return rpn_search_xpath(zh, num_bases, basenames,
2318                             stream, rank_type, *rset, 
2319                             xpath_len, xpath, rset_nmem, rset, kc);
2320         }
2321         else
2322         {
2323             res = rpn_search_APT_alwaysmatches(zh, zapt, termz,
2324                                                attributeSet, stream,
2325                                                reg_id, complete_flag,
2326                                                rank_type,
2327                                                xpath_use,
2328                                                num_bases, basenames, rset_nmem,
2329                                                rset, kc);
2330         }
2331     }
2332     else
2333     {
2334         zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2335         res = ZEBRA_FAIL;
2336     }
2337     if (res != ZEBRA_OK)
2338         return res;
2339     if (!*rset)
2340         return ZEBRA_FAIL;
2341     return rpn_search_xpath(zh, num_bases, basenames,
2342                             stream, rank_type, *rset, 
2343                             xpath_len, xpath, rset_nmem, rset, kc);
2344 }
2345
2346 static ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2347                                       oid_value attributeSet, 
2348                                       NMEM stream, NMEM rset_nmem,
2349                                       Z_SortKeySpecList *sort_sequence,
2350                                       int num_bases, char **basenames,
2351                                       RSET **result_sets, int *num_result_sets,
2352                                       Z_Operator *parent_op,
2353                                       struct rset_key_control *kc);
2354
2355 ZEBRA_RES rpn_search_top(ZebraHandle zh, Z_RPNStructure *zs,
2356                          oid_value attributeSet, 
2357                          NMEM stream, NMEM rset_nmem,
2358                          Z_SortKeySpecList *sort_sequence,
2359                          int num_bases, char **basenames,
2360                          RSET *result_set)
2361 {
2362     RSET *result_sets = 0;
2363     int num_result_sets = 0;
2364     ZEBRA_RES res;
2365     struct rset_key_control *kc = zebra_key_control_create(zh);
2366
2367     res = rpn_search_structure(zh, zs, attributeSet,
2368                                stream, rset_nmem,
2369                                sort_sequence, 
2370                                num_bases, basenames,
2371                                &result_sets, &num_result_sets,
2372                                0 /* no parent op */,
2373                                kc);
2374     if (res != ZEBRA_OK)
2375     {
2376         int i;
2377         for (i = 0; i<num_result_sets; i++)
2378             rset_delete(result_sets[i]);
2379         *result_set = 0;
2380     }
2381     else
2382     {
2383         assert(num_result_sets == 1);
2384         assert(result_sets);
2385         assert(*result_sets);
2386         *result_set = *result_sets;
2387     }
2388     (*kc->dec)(kc);
2389     return res;
2390 }
2391
2392 ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2393                                oid_value attributeSet, 
2394                                NMEM stream, NMEM rset_nmem,
2395                                Z_SortKeySpecList *sort_sequence,
2396                                int num_bases, char **basenames,
2397                                RSET **result_sets, int *num_result_sets,
2398                                Z_Operator *parent_op,
2399                                struct rset_key_control *kc)
2400 {
2401     *num_result_sets = 0;
2402     if (zs->which == Z_RPNStructure_complex)
2403     {
2404         ZEBRA_RES res;
2405         Z_Operator *zop = zs->u.complex->roperator;
2406         RSET *result_sets_l = 0;
2407         int num_result_sets_l = 0;
2408         RSET *result_sets_r = 0;
2409         int num_result_sets_r = 0;
2410
2411         res = rpn_search_structure(zh, zs->u.complex->s1,
2412                                    attributeSet, stream, rset_nmem,
2413                                    sort_sequence,
2414                                    num_bases, basenames,
2415                                    &result_sets_l, &num_result_sets_l,
2416                                    zop, kc);
2417         if (res != ZEBRA_OK)
2418         {
2419             int i;
2420             for (i = 0; i<num_result_sets_l; i++)
2421                 rset_delete(result_sets_l[i]);
2422             return res;
2423         }
2424         res = rpn_search_structure(zh, zs->u.complex->s2,
2425                                    attributeSet, stream, rset_nmem,
2426                                    sort_sequence,
2427                                    num_bases, basenames,
2428                                    &result_sets_r, &num_result_sets_r,
2429                                    zop, kc);
2430         if (res != ZEBRA_OK)
2431         {
2432             int i;
2433             for (i = 0; i<num_result_sets_l; i++)
2434                 rset_delete(result_sets_l[i]);
2435             for (i = 0; i<num_result_sets_r; i++)
2436                 rset_delete(result_sets_r[i]);
2437             return res;
2438         }
2439
2440         /* make a new list of result for all children */
2441         *num_result_sets = num_result_sets_l + num_result_sets_r;
2442         *result_sets = nmem_malloc(stream, *num_result_sets * 
2443                                    sizeof(**result_sets));
2444         memcpy(*result_sets, result_sets_l, 
2445                num_result_sets_l * sizeof(**result_sets));
2446         memcpy(*result_sets + num_result_sets_l, result_sets_r, 
2447                num_result_sets_r * sizeof(**result_sets));
2448
2449         if (!parent_op || parent_op->which != zop->which
2450             || (zop->which != Z_Operator_and &&
2451                 zop->which != Z_Operator_or))
2452         {
2453             /* parent node different from this one (or non-present) */
2454             /* we must combine result sets now */
2455             RSET rset;
2456             switch (zop->which)
2457             {
2458             case Z_Operator_and:
2459                 rset = rset_create_and(rset_nmem, kc,
2460                                        kc->scope,
2461                                        *num_result_sets, *result_sets);
2462                 break;
2463             case Z_Operator_or:
2464                 rset = rset_create_or(rset_nmem, kc,
2465                                       kc->scope, 0, /* termid */
2466                                       *num_result_sets, *result_sets);
2467                 break;
2468             case Z_Operator_and_not:
2469                 rset = rset_create_not(rset_nmem, kc,
2470                                        kc->scope,
2471                                        (*result_sets)[0],
2472                                        (*result_sets)[1]);
2473                 break;
2474             case Z_Operator_prox:
2475                 if (zop->u.prox->which != Z_ProximityOperator_known)
2476                 {
2477                     zebra_setError(zh, 
2478                                    YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2479                                    0);
2480                     return ZEBRA_FAIL;
2481                 }
2482                 if (*zop->u.prox->u.known != Z_ProxUnit_word)
2483                 {
2484                     zebra_setError_zint(zh,
2485                                         YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2486                                         *zop->u.prox->u.known);
2487                     return ZEBRA_FAIL;
2488                 }
2489                 else
2490                 {
2491                     rset = rset_create_prox(rset_nmem, kc,
2492                                             kc->scope,
2493                                             *num_result_sets, *result_sets, 
2494                                             *zop->u.prox->ordered,
2495                                             (!zop->u.prox->exclusion ? 
2496                                              0 : *zop->u.prox->exclusion),
2497                                             *zop->u.prox->relationType,
2498                                             *zop->u.prox->distance );
2499                 }
2500                 break;
2501             default:
2502                 zebra_setError(zh, YAZ_BIB1_OPERATOR_UNSUPP, 0);
2503                 return ZEBRA_FAIL;
2504             }
2505             *num_result_sets = 1;
2506             *result_sets = nmem_malloc(stream, *num_result_sets * 
2507                                        sizeof(**result_sets));
2508             (*result_sets)[0] = rset;
2509         }
2510     }
2511     else if (zs->which == Z_RPNStructure_simple)
2512     {
2513         RSET rset;
2514         ZEBRA_RES res;
2515
2516         if (zs->u.simple->which == Z_Operand_APT)
2517         {
2518             yaz_log(YLOG_DEBUG, "rpn_search_APT");
2519             res = rpn_search_APT(zh, zs->u.simple->u.attributesPlusTerm,
2520                                  attributeSet, stream, sort_sequence,
2521                                  num_bases, basenames, rset_nmem, &rset,
2522                                  kc);
2523             if (res != ZEBRA_OK)
2524                 return res;
2525         }
2526         else if (zs->u.simple->which == Z_Operand_resultSetId)
2527         {
2528             yaz_log(YLOG_DEBUG, "rpn_search_ref");
2529             rset = resultSetRef(zh, zs->u.simple->u.resultSetId);
2530             if (!rset)
2531             {
2532                 zebra_setError(zh, 
2533                                YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
2534                                zs->u.simple->u.resultSetId);
2535                 return ZEBRA_FAIL;
2536             }
2537             rset_dup(rset);
2538         }
2539         else
2540         {
2541             zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2542             return ZEBRA_FAIL;
2543         }
2544         *num_result_sets = 1;
2545         *result_sets = nmem_malloc(stream, *num_result_sets * 
2546                                    sizeof(**result_sets));
2547         (*result_sets)[0] = rset;
2548     }
2549     else
2550     {
2551         zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2552         return ZEBRA_FAIL;
2553     }
2554     return ZEBRA_OK;
2555 }
2556
2557 struct scan_info_entry {
2558     char *term;
2559     ISAM_P isam_p;
2560 };
2561
2562 struct scan_info {
2563     struct scan_info_entry *list;
2564     ODR odr;
2565     int before, after;
2566     char prefix[20];
2567 };
2568
2569 static int scan_handle (char *name, const char *info, int pos, void *client)
2570 {
2571     int len_prefix, idx;
2572     struct scan_info *scan_info = (struct scan_info *) client;
2573
2574     len_prefix = strlen(scan_info->prefix);
2575     if (memcmp (name, scan_info->prefix, len_prefix))
2576         return 1;
2577     if (pos > 0)
2578         idx = scan_info->after - pos + scan_info->before;
2579     else
2580         idx = - pos - 1;
2581
2582     if (idx < 0)
2583         return 0;
2584     scan_info->list[idx].term = (char *)
2585         odr_malloc(scan_info->odr, strlen(name + len_prefix)+1);
2586     strcpy(scan_info->list[idx].term, name + len_prefix);
2587     assert (*info == sizeof(ISAM_P));
2588     memcpy (&scan_info->list[idx].isam_p, info+1, sizeof(ISAM_P));
2589     return 0;
2590 }
2591
2592 void zebra_term_untrans_iconv(ZebraHandle zh, NMEM stream, int reg_type,
2593                               char **dst, const char *src)
2594 {
2595     char term_src[IT_MAX_WORD];
2596     char term_dst[IT_MAX_WORD];
2597     
2598     zebra_term_untrans (zh, reg_type, term_src, src);
2599
2600     if (zh->iconv_from_utf8 != 0)
2601     {
2602         int len;
2603         char *inbuf = term_src;
2604         size_t inleft = strlen(term_src);
2605         char *outbuf = term_dst;
2606         size_t outleft = sizeof(term_dst)-1;
2607         size_t ret;
2608         
2609         ret = yaz_iconv (zh->iconv_from_utf8, &inbuf, &inleft,
2610                          &outbuf, &outleft);
2611         if (ret == (size_t)(-1))
2612             len = 0;
2613         else
2614             len = outbuf - term_dst;
2615         *dst = nmem_malloc(stream, len + 1);
2616         if (len > 0)
2617             memcpy (*dst, term_dst, len);
2618         (*dst)[len] = '\0';
2619     }
2620     else
2621         *dst = nmem_strdup(stream, term_src);
2622 }
2623
2624 static void count_set(ZebraHandle zh, RSET rset, zint *count)
2625 {
2626     zint psysno = 0;
2627     struct it_key key;
2628     RSFD rfd;
2629
2630     yaz_log(YLOG_DEBUG, "count_set");
2631
2632     rset->hits_limit = zh->approx_limit;
2633
2634     *count = 0;
2635     rfd = rset_open(rset, RSETF_READ);
2636     while (rset_read(rfd, &key,0 /* never mind terms */))
2637     {
2638         if (key.mem[0] != psysno)
2639         {
2640             psysno = key.mem[0];
2641             if (rfd->counted_items >= rset->hits_limit)
2642                 break;
2643         }
2644     }
2645     rset_close (rfd);
2646     *count = rset->hits_count;
2647 }
2648
2649 #define RPN_MAX_ORDS 32
2650
2651 ZEBRA_RES rpn_scan(ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt,
2652                    oid_value attributeset,
2653                    int num_bases, char **basenames,
2654                    int *position, int *num_entries, ZebraScanEntry **list,
2655                    int *is_partial, RSET limit_set, int return_zero)
2656 {
2657     int i;
2658     int pos = *position;
2659     int num = *num_entries;
2660     int before;
2661     int after;
2662     int base_no;
2663     char termz[IT_MAX_WORD+20];
2664     struct scan_info *scan_info_array;
2665     ZebraScanEntry *glist;
2666     int ords[RPN_MAX_ORDS], ord_no = 0;
2667     int ptr[RPN_MAX_ORDS];
2668
2669     unsigned index_type;
2670     char *search_type = NULL;
2671     char rank_type[128];
2672     int complete_flag;
2673     int sort_flag;
2674     NMEM rset_nmem = NULL; 
2675     struct rset_key_control *kc = 0;
2676
2677     *list = 0;
2678     *is_partial = 0;
2679
2680     if (attributeset == VAL_NONE)
2681         attributeset = VAL_BIB1;
2682
2683     if (!limit_set)
2684     {
2685         AttrType termset;
2686         int termset_value_numeric;
2687         const char *termset_value_string;
2688         attr_init_APT(&termset, zapt, 8);
2689         termset_value_numeric =
2690             attr_find_ex(&termset, NULL, &termset_value_string);
2691         if (termset_value_numeric != -1)
2692         {
2693             char resname[32];
2694             const char *termset_name = 0;
2695             
2696             if (termset_value_numeric != -2)
2697             {
2698                 
2699                 sprintf(resname, "%d", termset_value_numeric);
2700                 termset_name = resname;
2701             }
2702             else
2703                 termset_name = termset_value_string;
2704             
2705             limit_set = resultSetRef (zh, termset_name);
2706         }
2707     }
2708         
2709     yaz_log(YLOG_DEBUG, "position = %d, num = %d set=%d",
2710             pos, num, attributeset);
2711         
2712     if (zebra_maps_attr(zh->reg->zebra_maps, zapt, &index_type, &search_type,
2713                         rank_type, &complete_flag, &sort_flag))
2714     {
2715         *num_entries = 0;
2716         zebra_setError(zh, YAZ_BIB1_UNSUPP_ATTRIBUTE_TYPE, 0);
2717         return ZEBRA_FAIL;
2718     }
2719     for (base_no = 0; base_no < num_bases && ord_no < RPN_MAX_ORDS; base_no++)
2720     {
2721         int ord;
2722
2723         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
2724         {
2725             zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
2726                            basenames[base_no]);
2727             *num_entries = 0;
2728             return ZEBRA_FAIL;
2729         }
2730         if (zebra_apt_get_ord(zh, zapt, zinfo_index_category_index,
2731                               index_type, 0, attributeset, &ord) 
2732             != ZEBRA_OK)
2733             continue;
2734         ords[ord_no++] = ord;
2735     }
2736     if (ord_no == 0)
2737     {
2738         *num_entries = 0;
2739         return ZEBRA_OK;
2740     }
2741     /* prepare dictionary scanning */
2742     if (num < 1)
2743     {
2744         *num_entries = 0;
2745         return ZEBRA_OK;
2746     }
2747     before = pos-1;
2748     if (before < 0)
2749         before = 0;
2750     after = 1+num-pos;
2751     if (after < 0)
2752         after = 0;
2753     yaz_log(YLOG_DEBUG, "rpn_scan pos=%d num=%d before=%d "
2754             "after=%d before+after=%d",
2755             pos, num, before, after, before+after);
2756     scan_info_array = (struct scan_info *)
2757         odr_malloc(stream, ord_no * sizeof(*scan_info_array));
2758     for (i = 0; i < ord_no; i++)
2759     {
2760         int j, prefix_len = 0;
2761         int before_tmp = before, after_tmp = after;
2762         struct scan_info *scan_info = scan_info_array + i;
2763         struct rpn_char_map_info rcmi;
2764
2765         rpn_char_map_prepare (zh->reg, index_type, &rcmi);
2766
2767         scan_info->before = before;
2768         scan_info->after = after;
2769         scan_info->odr = stream;
2770
2771         scan_info->list = (struct scan_info_entry *)
2772             odr_malloc(stream, (before+after) * sizeof(*scan_info->list));
2773         for (j = 0; j<before+after; j++)
2774             scan_info->list[j].term = NULL;
2775
2776         prefix_len += key_SU_encode (ords[i], termz + prefix_len);
2777         termz[prefix_len] = 0;
2778         strcpy(scan_info->prefix, termz);
2779
2780         if (trans_scan_term(zh, zapt, termz+prefix_len, index_type) == 
2781             ZEBRA_FAIL)
2782             return ZEBRA_FAIL;
2783         
2784         dict_scan(zh->reg->dict, termz, &before_tmp, &after_tmp,
2785                   scan_info, scan_handle);
2786     }
2787     glist = (ZebraScanEntry *)
2788         odr_malloc(stream, (before+after)*sizeof(*glist));
2789
2790     rset_nmem = nmem_create();
2791     kc = zebra_key_control_create(zh);
2792
2793     /* consider terms after main term */
2794     for (i = 0; i < ord_no; i++)
2795         ptr[i] = before;
2796     
2797     *is_partial = 0;
2798     for (i = 0; i<after; i++)
2799     {
2800         int j, j0 = -1;
2801         const char *mterm = NULL;
2802         const char *tst;
2803         RSET rset = 0;
2804         int lo = i + pos-1; /* offset in result list */
2805
2806         /* find: j0 is the first of the minimal values */
2807         for (j = 0; j < ord_no; j++)
2808         {
2809             if (ptr[j] < before+after && ptr[j] >= 0 &&
2810                 (tst = scan_info_array[j].list[ptr[j]].term) &&
2811                 (!mterm || strcmp (tst, mterm) < 0))
2812             {
2813                 j0 = j;
2814                 mterm = tst;
2815             }
2816         }
2817         if (j0 == -1)
2818             break;  /* no value found, stop */
2819
2820         /* get result set for first one , but only if it's within bounds */
2821         if (lo >= 0)
2822         {
2823             /* get result set for first term */
2824             zebra_term_untrans_iconv(zh, stream->mem, index_type,
2825                                      &glist[lo].term, mterm);
2826             rset = rset_trunc(zh, &scan_info_array[j0].list[ptr[j0]].isam_p, 1,
2827                               glist[lo].term, strlen(glist[lo].term),
2828                               NULL, 0, zapt->term->which, rset_nmem, 
2829                               kc, kc->scope, 0, index_type, 0 /* hits_limit */,
2830                               0 /* term_ref_id_str */);
2831         }
2832         ptr[j0]++; /* move index for this set .. */
2833         /* get result set for remaining scan terms */
2834         for (j = j0+1; j<ord_no; j++)
2835         {
2836             if (ptr[j] < before+after && ptr[j] >= 0 &&
2837                 (tst = scan_info_array[j].list[ptr[j]].term) &&
2838                 !strcmp (tst, mterm))
2839             {
2840                 if (lo >= 0)
2841                 {
2842                     RSET rsets[2];
2843                     
2844                     rsets[0] = rset;
2845                     rsets[1] =
2846                         rset_trunc(
2847                             zh, &scan_info_array[j].list[ptr[j]].isam_p, 1,
2848                             glist[lo].term,
2849                             strlen(glist[lo].term), NULL, 0,
2850                             zapt->term->which,rset_nmem,
2851                             kc, kc->scope, 0, index_type, 0 /* hits_limit */,
2852                             0 /* term_ref_id_str */ );
2853                     rset = rset_create_or(rset_nmem, kc,
2854                                           kc->scope, 0 /* termid */,
2855                                           2, rsets);
2856                 }
2857                 ptr[j]++;
2858             }
2859         }
2860         if (lo >= 0)
2861         {
2862             zint count;
2863             /* merge with limit_set if given */
2864             if (limit_set)
2865             {
2866                 RSET rsets[2];
2867                 rsets[0] = rset;
2868                 rsets[1] = rset_dup(limit_set);
2869                 
2870                 rset = rset_create_and(rset_nmem, kc, kc->scope, 2, rsets);
2871             }
2872             /* count it */
2873             count_set(zh, rset, &count);
2874             glist[lo].occurrences = count;
2875             rset_delete(rset);
2876         }
2877     }
2878     if (i < after)
2879     {
2880         *num_entries -= (after-i);
2881         *is_partial = 1;
2882         if (*num_entries < 0)
2883         {
2884             (*kc->dec)(kc);
2885             nmem_destroy(rset_nmem);
2886             *num_entries = 0;
2887             return ZEBRA_OK;
2888         }
2889     }
2890     /* consider terms before main term */
2891     for (i = 0; i<ord_no; i++)
2892         ptr[i] = 0;
2893     
2894     for (i = 0; i<before; i++)
2895     {
2896         int j, j0 = -1;
2897         const char *mterm = NULL;
2898         const char *tst;
2899         RSET rset;
2900         int lo = before-1-i; /* offset in result list */
2901         zint count;
2902         
2903         for (j = 0; j <ord_no; j++)
2904         {
2905             if (ptr[j] < before && ptr[j] >= 0 &&
2906                 (tst = scan_info_array[j].list[before-1-ptr[j]].term) &&
2907                 (!mterm || strcmp (tst, mterm) > 0))
2908             {
2909                 j0 = j;
2910                     mterm = tst;
2911             }
2912         }
2913         if (j0 == -1)
2914             break;
2915         
2916         zebra_term_untrans_iconv(zh, stream->mem, index_type,
2917                                  &glist[lo].term, mterm);
2918         
2919         rset = rset_trunc
2920             (zh, &scan_info_array[j0].list[before-1-ptr[j0]].isam_p, 1,
2921              glist[lo].term, strlen(glist[lo].term),
2922              NULL, 0, zapt->term->which, rset_nmem,
2923              kc, kc->scope, 0, index_type, 0 /* hits_limit */,
2924              0 /* term_ref_id_str */);
2925         
2926         ptr[j0]++;
2927         
2928         for (j = j0+1; j<ord_no; j++)
2929         {
2930             if (ptr[j] < before && ptr[j] >= 0 &&
2931                 (tst = scan_info_array[j].list[before-1-ptr[j]].term) &&
2932                 !strcmp (tst, mterm))
2933             {
2934                 RSET rsets[2];
2935                 
2936                 rsets[0] = rset;
2937                 rsets[1] = rset_trunc(
2938                     zh,
2939                     &scan_info_array[j].list[before-1-ptr[j]].isam_p, 1,
2940                     glist[lo].term,
2941                     strlen(glist[lo].term), NULL, 0,
2942                     zapt->term->which, rset_nmem,
2943                     kc, kc->scope, 0, index_type, 0 /* hits_limit */,
2944                     0 /* term_ref_id_str */);
2945                 rset = rset_create_or(rset_nmem, kc,
2946                                       kc->scope, 0 /* termid */, 2, rsets);
2947                 
2948                 ptr[j]++;
2949             }
2950         }
2951         if (limit_set)
2952         {
2953             RSET rsets[2];
2954             rsets[0] = rset;
2955             rsets[1] = rset_dup(limit_set);
2956             
2957             rset = rset_create_and(rset_nmem, kc, kc->scope, 2, rsets);
2958         }
2959         count_set(zh, rset, &count);
2960         glist[lo].occurrences = count;
2961         rset_delete (rset);
2962     }
2963     (*kc->dec)(kc);
2964     nmem_destroy(rset_nmem);
2965     i = before-i;
2966     if (i)
2967     {
2968         *is_partial = 1;
2969         *position -= i;
2970         *num_entries -= i;
2971         if (*num_entries <= 0)
2972         {
2973             *num_entries = 0;
2974             return ZEBRA_OK;
2975         }
2976     }
2977     
2978     *list = glist + i;               /* list is set to first 'real' entry */
2979     
2980     yaz_log(YLOG_DEBUG, "position = %d, num_entries = %d",
2981             *position, *num_entries);
2982     return ZEBRA_OK;
2983 }
2984
2985 /*
2986  * Local variables:
2987  * c-basic-offset: 4
2988  * indent-tabs-mode: nil
2989  * End:
2990  * vim: shiftwidth=4 tabstop=8 expandtab
2991  */
2992