Update copyright year + FSF address
[idzebra-moved-to-github.git] / index / zrpn.c
1 /* $Id: zrpn.c,v 1.226 2006-08-14 10:40:15 adam Exp $
2    Copyright (C) 1995-2006
3    Index Data ApS
4
5 This file is part of the Zebra server.
6
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
11
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
20
21 */
22
23 #include <stdio.h>
24 #include <assert.h>
25 #ifdef WIN32
26 #include <io.h>
27 #endif
28 #if HAVE_UNISTD_H
29 #include <unistd.h>
30 #endif
31 #include <ctype.h>
32
33 #include <yaz/diagbib1.h>
34 #include "index.h"
35 #include <zebra_xpath.h>
36 #include <attrfind.h>
37 #include <charmap.h>
38 #include <rset.h>
39
40 struct rpn_char_map_info
41 {
42     ZebraMaps zm;
43     int reg_type;
44 };
45
46 static int log_level_set = 0;
47 static int log_level_rpn = 0;
48
49 #define TERMSET_DISABLE 1
50
51 static const char **rpn_char_map_handler(void *vp, const char **from, int len)
52 {
53     struct rpn_char_map_info *p = (struct rpn_char_map_info *) vp;
54     const char **out = zebra_maps_input(p->zm, p->reg_type, from, len, 0);
55 #if 0
56     if (out && *out)
57     {
58         const char *outp = *out;
59         yaz_log(YLOG_LOG, "---");
60         while (*outp)
61         {
62             yaz_log(YLOG_LOG, "%02X", *outp);
63             outp++;
64         }
65     }
66 #endif
67     return out;
68 }
69
70 static void rpn_char_map_prepare(struct zebra_register *reg, int reg_type,
71                                  struct rpn_char_map_info *map_info)
72 {
73     map_info->zm = reg->zebra_maps;
74     map_info->reg_type = reg_type;
75     dict_grep_cmap(reg->dict, map_info, rpn_char_map_handler);
76 }
77
78 #define TERM_COUNT        
79        
80 struct grep_info {        
81 #ifdef TERM_COUNT        
82     int *term_no;        
83 #endif        
84     ISAM_P *isam_p_buf;
85     int isam_p_size;        
86     int isam_p_indx;
87     ZebraHandle zh;
88     int reg_type;
89     ZebraSet termset;
90 };        
91
92 void zebra_term_untrans(ZebraHandle zh, int reg_type,
93                         char *dst, const char *src)
94 {
95     int len = 0;
96     while (*src)
97     {
98         const char *cp = zebra_maps_output(zh->reg->zebra_maps,
99                                            reg_type, &src);
100         if (!cp)
101         {
102             if (len < IT_MAX_WORD-1)
103                 dst[len++] = *src;
104             src++;
105         }
106         else
107             while (*cp && len < IT_MAX_WORD-1)
108                 dst[len++] = *cp++;
109     }
110     dst[len] = '\0';
111 }
112
113 static void add_isam_p(const char *name, const char *info,
114                        struct grep_info *p)
115 {
116     if (!log_level_set)
117     {
118         log_level_rpn = yaz_log_module_level("rpn");
119         log_level_set = 1;
120     }
121     if (p->isam_p_indx == p->isam_p_size)
122     {
123         ISAM_P *new_isam_p_buf;
124 #ifdef TERM_COUNT        
125         int *new_term_no;        
126 #endif
127         p->isam_p_size = 2*p->isam_p_size + 100;
128         new_isam_p_buf = (ISAM_P *) xmalloc(sizeof(*new_isam_p_buf) *
129                                             p->isam_p_size);
130         if (p->isam_p_buf)
131         {
132             memcpy(new_isam_p_buf, p->isam_p_buf,
133                     p->isam_p_indx * sizeof(*p->isam_p_buf));
134             xfree(p->isam_p_buf);
135         }
136         p->isam_p_buf = new_isam_p_buf;
137
138 #ifdef TERM_COUNT
139         new_term_no = (int *) xmalloc(sizeof(*new_term_no) * p->isam_p_size);
140         if (p->term_no)
141         {
142             memcpy(new_term_no, p->isam_p_buf,
143                     p->isam_p_indx * sizeof(*p->term_no));
144             xfree(p->term_no);
145         }
146         p->term_no = new_term_no;
147 #endif
148     }
149     assert(*info == sizeof(*p->isam_p_buf));
150     memcpy(p->isam_p_buf + p->isam_p_indx, info+1, sizeof(*p->isam_p_buf));
151
152     if (p->termset)
153     {
154         const char *db;
155         char term_tmp[IT_MAX_WORD];
156         int ord = 0;
157         const char *index_name;
158         int len = key_SU_decode (&ord, (const unsigned char *) name);
159         
160         zebra_term_untrans  (p->zh, p->reg_type, term_tmp, name+len);
161         yaz_log(log_level_rpn, "grep: %d %c %s", ord, name[len], term_tmp);
162         zebraExplain_lookup_ord(p->zh->reg->zei,
163                                 ord, 0 /* index_type */, &db, &index_name);
164         yaz_log(log_level_rpn, "grep:  db=%s index=%s", db, index_name);
165         
166         resultSetAddTerm(p->zh, p->termset, name[len], db,
167                          index_name, term_tmp);
168     }
169     (p->isam_p_indx)++;
170 }
171
172 static int grep_handle(char *name, const char *info, void *p)
173 {
174     add_isam_p(name, info, (struct grep_info *) p);
175     return 0;
176 }
177
178 static int term_pre(ZebraMaps zebra_maps, int reg_type, const char **src,
179                     const char *ct1, const char *ct2, int first)
180 {
181     const char *s1, *s0 = *src;
182     const char **map;
183
184     /* skip white space */
185     while (*s0)
186     {
187         if (ct1 && strchr(ct1, *s0))
188             break;
189         if (ct2 && strchr(ct2, *s0))
190             break;
191         s1 = s0;
192         map = zebra_maps_input(zebra_maps, reg_type, &s1, strlen(s1), first);
193         if (**map != *CHR_SPACE)
194             break;
195         s0 = s1;
196     }
197     *src = s0;
198     return *s0;
199 }
200
201
202 static void esc_str(char *out_buf, size_t out_size,
203                     const char *in_buf, int in_size)
204 {
205     int k;
206
207     assert(out_buf);
208     assert(in_buf);
209     assert(out_size > 20);
210     *out_buf = '\0';
211     for (k = 0; k<in_size; k++)
212     {
213         int c = in_buf[k] & 0xff;
214         int pc;
215         if (c < 32 || c > 126)
216             pc = '?';
217         else
218             pc = c;
219         sprintf(out_buf +strlen(out_buf), "%02X:%c  ", c, pc);
220         if (strlen(out_buf) > out_size-20)
221         {
222             strcat(out_buf, "..");
223             break;
224         }
225     }
226 }
227
228 #define REGEX_CHARS " []()|.*+?!"
229
230 /* term_100: handle term, where trunc = none(no operators at all) */
231 static int term_100(ZebraMaps zebra_maps, int reg_type,
232                     const char **src, char *dst, int space_split,
233                     char *dst_term)
234 {
235     const char *s0;
236     const char **map;
237     int i = 0;
238     int j = 0;
239
240     const char *space_start = 0;
241     const char *space_end = 0;
242
243     if (!term_pre(zebra_maps, reg_type, src, NULL, NULL, !space_split))
244         return 0;
245     s0 = *src;
246     while (*s0)
247     {
248         const char *s1 = s0;
249         int q_map_match = 0;
250         map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), 
251                                 &q_map_match);
252         if (space_split)
253         {
254             if (**map == *CHR_SPACE)
255                 break;
256         }
257         else  /* complete subfield only. */
258         {
259             if (**map == *CHR_SPACE)
260             {   /* save space mapping for later  .. */
261                 space_start = s1;
262                 space_end = s0;
263                 continue;
264             }
265             else if (space_start)
266             {   /* reload last space */
267                 while (space_start < space_end)
268                 {
269                     if (strchr(REGEX_CHARS, *space_start))
270                         dst[i++] = '\\';
271                     dst_term[j++] = *space_start;
272                     dst[i++] = *space_start++;
273                 }
274                 /* and reset */
275                 space_start = space_end = 0;
276             }
277         }
278         /* add non-space char */
279         memcpy(dst_term+j, s1, s0 - s1);
280         j += (s0 - s1);
281         if (!q_map_match)
282         {
283             while (s1 < s0)
284             {
285                 if (strchr(REGEX_CHARS, *s1))
286                     dst[i++] = '\\';
287                 dst[i++] = *s1++;
288             }
289         }
290         else
291         {
292             char tmpbuf[80];
293             esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
294             
295             strcpy(dst + i, map[0]);
296             i += strlen(map[0]);
297         }
298     }
299     dst[i] = '\0';
300     dst_term[j] = '\0';
301     *src = s0;
302     return i;
303 }
304
305 /* term_101: handle term, where trunc = Process # */
306 static int term_101(ZebraMaps zebra_maps, int reg_type,
307                     const char **src, char *dst, int space_split,
308                     char *dst_term)
309 {
310     const char *s0;
311     const char **map;
312     int i = 0;
313     int j = 0;
314
315     if (!term_pre(zebra_maps, reg_type, src, "#", "#", !space_split))
316         return 0;
317     s0 = *src;
318     while (*s0)
319     {
320         if (*s0 == '#')
321         {
322             dst[i++] = '.';
323             dst[i++] = '*';
324             dst_term[j++] = *s0++;
325         }
326         else
327         {
328             const char *s1 = s0;
329             int q_map_match = 0;
330             map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), 
331                                     &q_map_match);
332             if (space_split && **map == *CHR_SPACE)
333                 break;
334
335             /* add non-space char */
336             memcpy(dst_term+j, s1, s0 - s1);
337             j += (s0 - s1);
338             if (!q_map_match)
339             {
340                 while (s1 < s0)
341                 {
342                     if (strchr(REGEX_CHARS, *s1))
343                         dst[i++] = '\\';
344                     dst[i++] = *s1++;
345                 }
346             }
347             else
348             {
349                 char tmpbuf[80];
350                 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
351                 
352                 strcpy(dst + i, map[0]);
353                 i += strlen(map[0]);
354             }
355         }
356     }
357     dst[i] = '\0';
358     dst_term[j++] = '\0';
359     *src = s0;
360     return i;
361 }
362
363 /* term_103: handle term, where trunc = re-2 (regular expressions) */
364 static int term_103(ZebraMaps zebra_maps, int reg_type, const char **src,
365                     char *dst, int *errors, int space_split,
366                     char *dst_term)
367 {
368     int i = 0;
369     int j = 0;
370     const char *s0;
371     const char **map;
372
373     if (!term_pre(zebra_maps, reg_type, src, "^\\()[].*+?|", "(", !space_split))
374         return 0;
375     s0 = *src;
376     if (errors && *s0 == '+' && s0[1] && s0[2] == '+' && s0[3] &&
377         isdigit(((const unsigned char *)s0)[1]))
378     {
379         *errors = s0[1] - '0';
380         s0 += 3;
381         if (*errors > 3)
382             *errors = 3;
383     }
384     while (*s0)
385     {
386         if (strchr("^\\()[].*+?|-", *s0))
387         {
388             dst_term[j++] = *s0;
389             dst[i++] = *s0++;
390         }
391         else
392         {
393             const char *s1 = s0;
394             int q_map_match = 0;
395             map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), 
396                                     &q_map_match);
397             if (space_split && **map == *CHR_SPACE)
398                 break;
399
400             /* add non-space char */
401             memcpy(dst_term+j, s1, s0 - s1);
402             j += (s0 - s1);
403             if (!q_map_match)
404             {
405                 while (s1 < s0)
406                 {
407                     if (strchr(REGEX_CHARS, *s1))
408                         dst[i++] = '\\';
409                     dst[i++] = *s1++;
410                 }
411             }
412             else
413             {
414                 char tmpbuf[80];
415                 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
416                 
417                 strcpy(dst + i, map[0]);
418                 i += strlen(map[0]);
419             }
420         }
421     }
422     dst[i] = '\0';
423     dst_term[j] = '\0';
424     *src = s0;
425     
426     return i;
427 }
428
429 /* term_103: handle term, where trunc = re-1 (regular expressions) */
430 static int term_102(ZebraMaps zebra_maps, int reg_type, const char **src,
431                     char *dst, int space_split, char *dst_term)
432 {
433     return term_103(zebra_maps, reg_type, src, dst, NULL, space_split,
434                     dst_term);
435 }
436
437
438 /* term_104: handle term, where trunc = Process # and ! */
439 static int term_104(ZebraMaps zebra_maps, int reg_type,
440                     const char **src, char *dst, int space_split,
441                     char *dst_term)
442 {
443     const char *s0;
444     const char **map;
445     int i = 0;
446     int j = 0;
447
448     if (!term_pre(zebra_maps, reg_type, src, "?*#", "?*#", !space_split))
449         return 0;
450     s0 = *src;
451     while (*s0)
452     {
453         if (*s0 == '?')
454         {
455             dst_term[j++] = *s0++;
456             if (*s0 >= '0' && *s0 <= '9')
457             {
458                 int limit = 0;
459                 while (*s0 >= '0' && *s0 <= '9')
460                 {
461                     limit = limit * 10 + (*s0 - '0');
462                     dst_term[j++] = *s0++;
463                 }
464                 if (limit > 20)
465                     limit = 20;
466                 while (--limit >= 0)
467                 {
468                     dst[i++] = '.';
469                     dst[i++] = '?';
470                 }
471             }
472             else
473             {
474                 dst[i++] = '.';
475                 dst[i++] = '*';
476             }
477         }
478         else if (*s0 == '*')
479         {
480             dst[i++] = '.';
481             dst[i++] = '*';
482             dst_term[j++] = *s0++;
483         }
484         else if (*s0 == '#')
485         {
486             dst[i++] = '.';
487             dst_term[j++] = *s0++;
488         }
489         else
490         {
491             const char *s1 = s0;
492             int q_map_match = 0;
493             map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), 
494                                     &q_map_match);
495             if (space_split && **map == *CHR_SPACE)
496                 break;
497
498             /* add non-space char */
499             memcpy(dst_term+j, s1, s0 - s1);
500             j += (s0 - s1);
501             if (!q_map_match)
502             {
503                 while (s1 < s0)
504                 {
505                     if (strchr(REGEX_CHARS, *s1))
506                         dst[i++] = '\\';
507                     dst[i++] = *s1++;
508                 }
509             }
510             else
511             {
512                 char tmpbuf[80];
513                 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
514                 
515                 strcpy(dst + i, map[0]);
516                 i += strlen(map[0]);
517             }
518         }
519     }
520     dst[i] = '\0';
521     dst_term[j++] = '\0';
522     *src = s0;
523     return i;
524 }
525
526 /* term_105/106: handle term, where trunc = Process * and ! and right trunc */
527 static int term_105(ZebraMaps zebra_maps, int reg_type,
528                     const char **src, char *dst, int space_split,
529                     char *dst_term, int right_truncate)
530 {
531     const char *s0;
532     const char **map;
533     int i = 0;
534     int j = 0;
535
536     if (!term_pre(zebra_maps, reg_type, src, "*!", "*!", !space_split))
537         return 0;
538     s0 = *src;
539     while (*s0)
540     {
541         if (*s0 == '*')
542         {
543             dst[i++] = '.';
544             dst[i++] = '*';
545             dst_term[j++] = *s0++;
546         }
547         else if (*s0 == '!')
548         {
549             dst[i++] = '.';
550             dst_term[j++] = *s0++;
551         }
552         else
553         {
554             const char *s1 = s0;
555             int q_map_match = 0;
556             map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), 
557                                     &q_map_match);
558             if (space_split && **map == *CHR_SPACE)
559                 break;
560
561             /* add non-space char */
562             memcpy(dst_term+j, s1, s0 - s1);
563             j += (s0 - s1);
564             if (!q_map_match)
565             {
566                 while (s1 < s0)
567                 {
568                     if (strchr(REGEX_CHARS, *s1))
569                         dst[i++] = '\\';
570                     dst[i++] = *s1++;
571                 }
572             }
573             else
574             {
575                 char tmpbuf[80];
576                 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
577                 
578                 strcpy(dst + i, map[0]);
579                 i += strlen(map[0]);
580             }
581         }
582     }
583     if (right_truncate)
584     {
585         dst[i++] = '.';
586         dst[i++] = '*';
587     }
588     dst[i] = '\0';
589     
590     dst_term[j++] = '\0';
591     *src = s0;
592     return i;
593 }
594
595
596 /* gen_regular_rel - generate regular expression from relation
597  *  val:     border value (inclusive)
598  *  islt:    1 if <=; 0 if >=.
599  */
600 static void gen_regular_rel(char *dst, int val, int islt)
601 {
602     int dst_p;
603     int w, d, i;
604     int pos = 0;
605     char numstr[20];
606
607     yaz_log(YLOG_DEBUG, "gen_regular_rel. val=%d, islt=%d", val, islt);
608     if (val >= 0)
609     {
610         if (islt)
611             strcpy(dst, "(-[0-9]+|(");
612         else
613             strcpy(dst, "((");
614     } 
615     else
616     {
617         if (!islt)
618         {
619             strcpy(dst, "([0-9]+|-(");
620             dst_p = strlen(dst);
621             islt = 1;
622         }
623         else
624         {
625             strcpy(dst, "(-(");
626             islt = 0;
627         }
628         val = -val;
629     }
630     dst_p = strlen(dst);
631     sprintf(numstr, "%d", val);
632     for (w = strlen(numstr); --w >= 0; pos++)
633     {
634         d = numstr[w];
635         if (pos > 0)
636         {
637             if (islt)
638             {
639                 if (d == '0')
640                     continue;
641                 d--;
642             } 
643             else
644             {
645                 if (d == '9')
646                     continue;
647                 d++;
648             }
649         }
650         
651         strcpy(dst + dst_p, numstr);
652         dst_p = strlen(dst) - pos - 1;
653
654         if (islt)
655         {
656             if (d != '0')
657             {
658                 dst[dst_p++] = '[';
659                 dst[dst_p++] = '0';
660                 dst[dst_p++] = '-';
661                 dst[dst_p++] = d;
662                 dst[dst_p++] = ']';
663             }
664             else
665                 dst[dst_p++] = d;
666         }
667         else
668         {
669             if (d != '9')
670             { 
671                 dst[dst_p++] = '[';
672                 dst[dst_p++] = d;
673                 dst[dst_p++] = '-';
674                 dst[dst_p++] = '9';
675                 dst[dst_p++] = ']';
676             }
677             else
678                 dst[dst_p++] = d;
679         }
680         for (i = 0; i<pos; i++)
681         {
682             dst[dst_p++] = '[';
683             dst[dst_p++] = '0';
684             dst[dst_p++] = '-';
685             dst[dst_p++] = '9';
686             dst[dst_p++] = ']';
687         }
688         dst[dst_p++] = '|';
689     }
690     dst[dst_p] = '\0';
691     if (islt)
692     {
693         /* match everything less than 10^(pos-1) */
694         strcat(dst, "0*");
695         for (i = 1; i<pos; i++)
696             strcat(dst, "[0-9]?");
697     }
698     else
699     {
700         /* match everything greater than 10^pos */
701         for (i = 0; i <= pos; i++)
702             strcat(dst, "[0-9]");
703         strcat(dst, "[0-9]*");
704     }
705     strcat(dst, "))");
706 }
707
708 void string_rel_add_char(char **term_p, const char *src, int *indx)
709 {
710     if (src[*indx] == '\\')
711         *(*term_p)++ = src[(*indx)++];
712     *(*term_p)++ = src[(*indx)++];
713 }
714
715 /*
716  *   >  abc     ([b-].*|a[c-].*|ab[d-].*|abc.+)
717  *              ([^-a].*|a[^-b].*ab[^-c].*|abc.+)
718  *   >= abc     ([b-].*|a[c-].*|ab[c-].*)
719  *              ([^-a].*|a[^-b].*|ab[c-].*)
720  *   <  abc     ([-0].*|a[-a].*|ab[-b].*)
721  *              ([^a-].*|a[^b-].*|ab[^c-].*)
722  *   <= abc     ([-0].*|a[-a].*|ab[-b].*|abc)
723  *              ([^a-].*|a[^b-].*|ab[^c-].*|abc)
724  */
725 static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
726                            const char **term_sub, char *term_dict,
727                            oid_value attributeSet,
728                            int reg_type, int space_split, char *term_dst,
729                            int *error_code)
730 {
731     AttrType relation;
732     int relation_value;
733     int i;
734     char *term_tmp = term_dict + strlen(term_dict);
735     char term_component[2*IT_MAX_WORD+20];
736
737     attr_init_APT(&relation, zapt, 2);
738     relation_value = attr_find(&relation, NULL);
739
740     *error_code = 0;
741     yaz_log(YLOG_DEBUG, "string relation value=%d", relation_value);
742     switch (relation_value)
743     {
744     case 1:
745         if (!term_100(zh->reg->zebra_maps, reg_type,
746                       term_sub, term_component,
747                       space_split, term_dst))
748             return 0;
749         yaz_log(log_level_rpn, "Relation <");
750         
751         *term_tmp++ = '(';
752         for (i = 0; term_component[i]; )
753         {
754             int j = 0;
755
756             if (i)
757                 *term_tmp++ = '|';
758             while (j < i)
759                 string_rel_add_char(&term_tmp, term_component, &j);
760
761             *term_tmp++ = '[';
762
763             *term_tmp++ = '^';
764             string_rel_add_char(&term_tmp, term_component, &i);
765             *term_tmp++ = '-';
766
767             *term_tmp++ = ']';
768             *term_tmp++ = '.';
769             *term_tmp++ = '*';
770
771             if ((term_tmp - term_dict) > IT_MAX_WORD)
772                 break;
773         }
774         *term_tmp++ = ')';
775         *term_tmp = '\0';
776         break;
777     case 2:
778         if (!term_100(zh->reg->zebra_maps, reg_type,
779                       term_sub, term_component,
780                       space_split, term_dst))
781             return 0;
782         yaz_log(log_level_rpn, "Relation <=");
783
784         *term_tmp++ = '(';
785         for (i = 0; term_component[i]; )
786         {
787             int j = 0;
788
789             while (j < i)
790                 string_rel_add_char(&term_tmp, term_component, &j);
791             *term_tmp++ = '[';
792
793             *term_tmp++ = '^';
794             string_rel_add_char(&term_tmp, term_component, &i);
795             *term_tmp++ = '-';
796
797             *term_tmp++ = ']';
798             *term_tmp++ = '.';
799             *term_tmp++ = '*';
800
801             *term_tmp++ = '|';
802
803             if ((term_tmp - term_dict) > IT_MAX_WORD)
804                 break;
805         }
806         for (i = 0; term_component[i]; )
807             string_rel_add_char(&term_tmp, term_component, &i);
808         *term_tmp++ = ')';
809         *term_tmp = '\0';
810         break;
811     case 5:
812         if (!term_100 (zh->reg->zebra_maps, reg_type,
813                        term_sub, term_component, space_split, term_dst))
814             return 0;
815         yaz_log(log_level_rpn, "Relation >");
816
817         *term_tmp++ = '(';
818         for (i = 0; term_component[i];)
819         {
820             int j = 0;
821
822             while (j < i)
823                 string_rel_add_char(&term_tmp, term_component, &j);
824             *term_tmp++ = '[';
825             
826             *term_tmp++ = '^';
827             *term_tmp++ = '-';
828             string_rel_add_char(&term_tmp, term_component, &i);
829
830             *term_tmp++ = ']';
831             *term_tmp++ = '.';
832             *term_tmp++ = '*';
833
834             *term_tmp++ = '|';
835
836             if ((term_tmp - term_dict) > IT_MAX_WORD)
837                 break;
838         }
839         for (i = 0; term_component[i];)
840             string_rel_add_char(&term_tmp, term_component, &i);
841         *term_tmp++ = '.';
842         *term_tmp++ = '+';
843         *term_tmp++ = ')';
844         *term_tmp = '\0';
845         break;
846     case 4:
847         if (!term_100(zh->reg->zebra_maps, reg_type, term_sub,
848                       term_component, space_split, term_dst))
849             return 0;
850         yaz_log(log_level_rpn, "Relation >=");
851
852         *term_tmp++ = '(';
853         for (i = 0; term_component[i];)
854         {
855             int j = 0;
856
857             if (i)
858                 *term_tmp++ = '|';
859             while (j < i)
860                 string_rel_add_char(&term_tmp, term_component, &j);
861             *term_tmp++ = '[';
862
863             if (term_component[i+1])
864             {
865                 *term_tmp++ = '^';
866                 *term_tmp++ = '-';
867                 string_rel_add_char(&term_tmp, term_component, &i);
868             }
869             else
870             {
871                 string_rel_add_char(&term_tmp, term_component, &i);
872                 *term_tmp++ = '-';
873             }
874             *term_tmp++ = ']';
875             *term_tmp++ = '.';
876             *term_tmp++ = '*';
877
878             if ((term_tmp - term_dict) > IT_MAX_WORD)
879                 break;
880         }
881         *term_tmp++ = ')';
882         *term_tmp = '\0';
883         break;
884     case 3:
885     case 102:
886     case -1:
887         if (!**term_sub)
888             return 1;
889         yaz_log(log_level_rpn, "Relation =");
890         if (!term_100(zh->reg->zebra_maps, reg_type, term_sub,
891                       term_component, space_split, term_dst))
892             return 0;
893         strcat(term_tmp, "(");
894         strcat(term_tmp, term_component);
895         strcat(term_tmp, ")");
896         break;
897     case 103:
898         yaz_log(log_level_rpn, "Relation always matches");
899         /* skip to end of term (we don't care what it is) */
900         while (**term_sub != '\0')
901             (*term_sub)++;
902         break;
903     default:
904         *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
905         return 0;
906     }
907     return 1;
908 }
909
910 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
911                              const char **term_sub, 
912                              oid_value attributeSet, NMEM stream,
913                              struct grep_info *grep_info,
914                              int reg_type, int complete_flag,
915                              int num_bases, char **basenames,
916                              char *term_dst,
917                              const char *xpath_use,
918                              struct ord_list **ol);
919
920 static ZEBRA_RES term_limits_APT(ZebraHandle zh,
921                                  Z_AttributesPlusTerm *zapt,
922                                  zint *hits_limit_value,
923                                  const char **term_ref_id_str,
924                                  NMEM nmem)
925 {
926     AttrType term_ref_id_attr;
927     AttrType hits_limit_attr;
928     int term_ref_id_int;
929  
930     attr_init_APT(&hits_limit_attr, zapt, 11);
931     *hits_limit_value  = attr_find(&hits_limit_attr, NULL);
932
933     attr_init_APT(&term_ref_id_attr, zapt, 10);
934     term_ref_id_int = attr_find_ex(&term_ref_id_attr, NULL, term_ref_id_str);
935     if (term_ref_id_int >= 0)
936     {
937         char *res = nmem_malloc(nmem, 20);
938         sprintf(res, "%d", term_ref_id_int);
939         *term_ref_id_str = res;
940     }
941
942     /* no limit given ? */
943     if (*hits_limit_value == -1)
944     {
945         if (*term_ref_id_str)
946         {
947             /* use global if term_ref is present */
948             *hits_limit_value = zh->approx_limit;
949         }
950         else
951         {
952             /* no counting if term_ref is not present */
953             *hits_limit_value = 0;
954         }
955     }
956     else if (*hits_limit_value == 0)
957     {
958         /* 0 is the same as global limit */
959         *hits_limit_value = zh->approx_limit;
960     }
961     yaz_log(YLOG_DEBUG, "term_limits_APT ref_id=%s limit=" ZINT_FORMAT,
962             *term_ref_id_str ? *term_ref_id_str : "none",
963             *hits_limit_value);
964     return ZEBRA_OK;
965 }
966
967 static ZEBRA_RES term_trunc(ZebraHandle zh,
968                             Z_AttributesPlusTerm *zapt,
969                             const char **term_sub, 
970                             oid_value attributeSet, NMEM stream,
971                             struct grep_info *grep_info,
972                             int reg_type, int complete_flag,
973                             int num_bases, char **basenames,
974                             char *term_dst,
975                             const char *rank_type, 
976                             const char *xpath_use,
977                             NMEM rset_nmem,
978                             RSET *rset,
979                             struct rset_key_control *kc)
980 {
981     ZEBRA_RES res;
982     struct ord_list *ol;
983     zint hits_limit_value;
984     const char *term_ref_id_str = 0;
985     *rset = 0;
986
987     term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str, stream);
988     grep_info->isam_p_indx = 0;
989     res = string_term(zh, zapt, term_sub, attributeSet, stream, grep_info,
990                       reg_type, complete_flag, num_bases, basenames,
991                       term_dst, xpath_use, &ol);
992     if (res != ZEBRA_OK)
993         return res;
994     if (!*term_sub)  /* no more terms ? */
995         return res;
996     yaz_log(log_level_rpn, "term: %s", term_dst);
997     *rset = rset_trunc(zh, grep_info->isam_p_buf,
998                        grep_info->isam_p_indx, term_dst,
999                        strlen(term_dst), rank_type, 1 /* preserve pos */,
1000                        zapt->term->which, rset_nmem,
1001                        kc, kc->scope, ol, reg_type, hits_limit_value,
1002                        term_ref_id_str);
1003     if (!*rset)
1004         return ZEBRA_FAIL;
1005     return ZEBRA_OK;
1006 }
1007
1008 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1009                              const char **term_sub, 
1010                              oid_value attributeSet, NMEM stream,
1011                              struct grep_info *grep_info,
1012                              int reg_type, int complete_flag,
1013                              int num_bases, char **basenames,
1014                              char *term_dst,
1015                              const char *xpath_use,
1016                              struct ord_list **ol)
1017 {
1018     char term_dict[2*IT_MAX_WORD+4000];
1019     int j, r, base_no;
1020     AttrType truncation;
1021     int truncation_value;
1022     const char *termp;
1023     struct rpn_char_map_info rcmi;
1024     int space_split = complete_flag ? 0 : 1;
1025
1026     int bases_ok = 0;     /* no of databases with OK attribute */
1027
1028     *ol = ord_list_create(stream);
1029
1030     rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
1031     attr_init_APT(&truncation, zapt, 5);
1032     truncation_value = attr_find(&truncation, NULL);
1033     yaz_log(log_level_rpn, "truncation value %d", truncation_value);
1034
1035     for (base_no = 0; base_no < num_bases; base_no++)
1036     {
1037         int ord = -1;
1038         int regex_range = 0;
1039         int max_pos, prefix_len = 0;
1040         int relation_error;
1041         char ord_buf[32];
1042         int ord_len, i;
1043
1044         termp = *term_sub; /* start of term for each database */
1045
1046         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1047         {
1048             zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
1049                            basenames[base_no]);
1050             return ZEBRA_FAIL;
1051         }
1052         
1053         if (zebra_apt_get_ord(zh, zapt, reg_type, xpath_use,
1054                               attributeSet, &ord) != ZEBRA_OK)
1055             continue;
1056
1057         bases_ok++;
1058
1059         *ol = ord_list_append(stream, *ol, ord);
1060         ord_len = key_SU_encode (ord, ord_buf);
1061         
1062         term_dict[prefix_len++] = '(';
1063         for (i = 0; i<ord_len; i++)
1064         {
1065             term_dict[prefix_len++] = 1;  /* our internal regexp escape char */
1066             term_dict[prefix_len++] = ord_buf[i];
1067         }
1068         term_dict[prefix_len++] = ')';
1069         term_dict[prefix_len] = '\0';
1070         j = prefix_len;
1071         switch (truncation_value)
1072         {
1073         case -1:         /* not specified */
1074         case 100:        /* do not truncate */
1075             if (!string_relation(zh, zapt, &termp, term_dict,
1076                                  attributeSet,
1077                                  reg_type, space_split, term_dst,
1078                                  &relation_error))
1079             {
1080                 if (relation_error)
1081                 {
1082                     zebra_setError(zh, relation_error, 0);
1083                     return ZEBRA_FAIL;
1084                 }
1085                 *term_sub = 0;
1086                 return ZEBRA_OK;
1087             }
1088             break;
1089         case 1:          /* right truncation */
1090             term_dict[j++] = '(';
1091             if (!term_100(zh->reg->zebra_maps, reg_type,
1092                           &termp, term_dict + j, space_split, term_dst))
1093             {
1094                 *term_sub = 0;
1095                 return ZEBRA_OK;
1096             }
1097             strcat(term_dict, ".*)");
1098             break;
1099         case 2:          /* keft truncation */
1100             term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*';
1101             if (!term_100(zh->reg->zebra_maps, reg_type,
1102                           &termp, term_dict + j, space_split, term_dst))
1103             {
1104                 *term_sub = 0;
1105                 return ZEBRA_OK;
1106             }
1107             strcat(term_dict, ")");
1108             break;
1109         case 3:          /* left&right truncation */
1110             term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*';
1111             if (!term_100(zh->reg->zebra_maps, reg_type,
1112                           &termp, term_dict + j, space_split, term_dst))
1113             {
1114                 *term_sub = 0;
1115                 return ZEBRA_OK;
1116             }
1117             strcat(term_dict, ".*)");
1118             break;
1119         case 101:        /* process # in term */
1120             term_dict[j++] = '(';
1121             if (!term_101(zh->reg->zebra_maps, reg_type,
1122                           &termp, term_dict + j, space_split, term_dst))
1123             {
1124                 *term_sub = 0;
1125                 return ZEBRA_OK;
1126             }
1127             strcat(term_dict, ")");
1128             break;
1129         case 102:        /* Regexp-1 */
1130             term_dict[j++] = '(';
1131             if (!term_102(zh->reg->zebra_maps, reg_type,
1132                           &termp, term_dict + j, space_split, term_dst))
1133             {
1134                 *term_sub = 0;
1135                 return ZEBRA_OK;
1136             }
1137             strcat(term_dict, ")");
1138             break;
1139         case 103:       /* Regexp-2 */
1140             regex_range = 1;
1141             term_dict[j++] = '(';
1142             if (!term_103(zh->reg->zebra_maps, reg_type,
1143                           &termp, term_dict + j, &regex_range,
1144                           space_split, term_dst))
1145             {
1146                 *term_sub = 0;
1147                 return ZEBRA_OK;
1148             }
1149             strcat(term_dict, ")");
1150             break;
1151         case 104:        /* process # and ! in term */
1152             term_dict[j++] = '(';
1153             if (!term_104(zh->reg->zebra_maps, reg_type,
1154                           &termp, term_dict + j, space_split, term_dst))
1155             {
1156                 *term_sub = 0;
1157                 return ZEBRA_OK;
1158             }
1159             strcat(term_dict, ")");
1160             break;
1161         case 105:        /* process * and ! in term */
1162             term_dict[j++] = '(';
1163             if (!term_105(zh->reg->zebra_maps, reg_type,
1164                           &termp, term_dict + j, space_split, term_dst, 1))
1165             {
1166                 *term_sub = 0;
1167                 return ZEBRA_OK;
1168             }
1169             strcat(term_dict, ")");
1170             break;
1171         case 106:        /* process * and ! in term */
1172             term_dict[j++] = '(';
1173             if (!term_105(zh->reg->zebra_maps, reg_type,
1174                           &termp, term_dict + j, space_split, term_dst, 0))
1175             {
1176                 *term_sub = 0;
1177                 return ZEBRA_OK;
1178             }
1179             strcat(term_dict, ")");
1180             break;
1181         default:
1182             zebra_setError_zint(zh,
1183                                 YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
1184                                 truncation_value);
1185             return ZEBRA_FAIL;
1186         }
1187         if (1)
1188         {
1189             char buf[80];
1190             const char *input = term_dict + prefix_len;
1191             esc_str(buf, sizeof(buf), input, strlen(input));
1192         }
1193         yaz_log(log_level_rpn, "dict_lookup_grep: %s", term_dict+prefix_len);
1194         r = dict_lookup_grep(zh->reg->dict, term_dict, regex_range,
1195                              grep_info, &max_pos, 
1196                              ord_len /* number of "exact" chars */,
1197                              grep_handle);
1198         if (r)
1199             yaz_log(YLOG_WARN, "dict_lookup_grep fail %d", r);
1200     }
1201     if (!bases_ok)
1202         return ZEBRA_FAIL;
1203     *term_sub = termp;
1204     yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1205     return ZEBRA_OK;
1206 }
1207
1208
1209 /* convert APT search term to UTF8 */
1210 static ZEBRA_RES zapt_term_to_utf8(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1211                                    char *termz)
1212 {
1213     size_t sizez;
1214     Z_Term *term = zapt->term;
1215
1216     switch (term->which)
1217     {
1218     case Z_Term_general:
1219         if (zh->iconv_to_utf8 != 0)
1220         {
1221             char *inbuf = (char *) term->u.general->buf;
1222             size_t inleft = term->u.general->len;
1223             char *outbuf = termz;
1224             size_t outleft = IT_MAX_WORD-1;
1225             size_t ret;
1226
1227             ret = yaz_iconv(zh->iconv_to_utf8, &inbuf, &inleft,
1228                         &outbuf, &outleft);
1229             if (ret == (size_t)(-1))
1230             {
1231                 ret = yaz_iconv(zh->iconv_to_utf8, 0, 0, 0, 0);
1232                 zebra_setError(
1233                     zh, 
1234                     YAZ_BIB1_QUERY_TERM_INCLUDES_CHARS_THAT_DO_NOT_TRANSLATE_INTO_,
1235                     0);
1236                 return ZEBRA_FAIL;
1237             }
1238             *outbuf = 0;
1239         }
1240         else
1241         {
1242             sizez = term->u.general->len;
1243             if (sizez > IT_MAX_WORD-1)
1244                 sizez = IT_MAX_WORD-1;
1245             memcpy (termz, term->u.general->buf, sizez);
1246             termz[sizez] = '\0';
1247         }
1248         break;
1249     case Z_Term_characterString:
1250         sizez = strlen(term->u.characterString);
1251         if (sizez > IT_MAX_WORD-1)
1252             sizez = IT_MAX_WORD-1;
1253         memcpy (termz, term->u.characterString, sizez);
1254         termz[sizez] = '\0';
1255         break;
1256     default:
1257         zebra_setError(zh, YAZ_BIB1_UNSUPP_CODED_VALUE_FOR_TERM, 0);
1258         return ZEBRA_FAIL;
1259     }
1260     return ZEBRA_OK;
1261 }
1262
1263 /* convert APT SCAN term to internal cmap */
1264 static ZEBRA_RES trans_scan_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1265                                  char *termz, int reg_type)
1266 {
1267     char termz0[IT_MAX_WORD];
1268
1269     if (zapt_term_to_utf8(zh, zapt, termz0) == ZEBRA_FAIL)
1270         return ZEBRA_FAIL;    /* error */
1271     else
1272     {
1273         const char **map;
1274         const char *cp = (const char *) termz0;
1275         const char *cp_end = cp + strlen(cp);
1276         const char *src;
1277         int i = 0;
1278         const char *space_map = NULL;
1279         int len;
1280             
1281         while ((len = (cp_end - cp)) > 0)
1282         {
1283             map = zebra_maps_input(zh->reg->zebra_maps, reg_type, &cp, len, 0);
1284             if (**map == *CHR_SPACE)
1285                 space_map = *map;
1286             else
1287             {
1288                 if (i && space_map)
1289                     for (src = space_map; *src; src++)
1290                         termz[i++] = *src;
1291                 space_map = NULL;
1292                 for (src = *map; *src; src++)
1293                     termz[i++] = *src;
1294             }
1295         }
1296         termz[i] = '\0';
1297     }
1298     return ZEBRA_OK;
1299 }
1300
1301 static void grep_info_delete(struct grep_info *grep_info)
1302 {
1303 #ifdef TERM_COUNT
1304     xfree(grep_info->term_no);
1305 #endif
1306     xfree(grep_info->isam_p_buf);
1307 }
1308
1309 static ZEBRA_RES grep_info_prepare(ZebraHandle zh,
1310                                    Z_AttributesPlusTerm *zapt,
1311                                    struct grep_info *grep_info,
1312                                    int reg_type)
1313 {
1314     AttrType termset;
1315     int termset_value_numeric;
1316     const char *termset_value_string;
1317
1318 #ifdef TERM_COUNT
1319     grep_info->term_no = 0;
1320 #endif
1321     grep_info->isam_p_size = 0;
1322     grep_info->isam_p_buf = NULL;
1323     grep_info->zh = zh;
1324     grep_info->reg_type = reg_type;
1325     grep_info->termset = 0;
1326     if (!zapt)
1327         return ZEBRA_OK;
1328     attr_init_APT(&termset, zapt, 8);
1329     termset_value_numeric =
1330         attr_find_ex(&termset, NULL, &termset_value_string);
1331     if (termset_value_numeric != -1)
1332     {
1333 #if TERMSET_DISABLE
1334         zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, "termset");
1335         return ZEBRA_FAIL;
1336 #else
1337         char resname[32];
1338         const char *termset_name = 0;
1339         if (termset_value_numeric != -2)
1340         {
1341     
1342             sprintf(resname, "%d", termset_value_numeric);
1343             termset_name = resname;
1344         }
1345         else
1346             termset_name = termset_value_string;
1347         yaz_log(log_level_rpn, "creating termset set %s", termset_name);
1348         grep_info->termset = resultSetAdd(zh, termset_name, 1);
1349         if (!grep_info->termset)
1350         {
1351             zebra_setError(zh, YAZ_BIB1_ILLEGAL_RESULT_SET_NAME, termset_name);
1352             return ZEBRA_FAIL;
1353         }
1354 #endif
1355     }
1356     return ZEBRA_OK;
1357 }
1358                                
1359 /**
1360   \brief Create result set(s) for list of terms
1361   \param zh Zebra Handle
1362   \param zapt Attributes Plust Term (RPN leaf)
1363   \param termz term as used in query but converted to UTF-8
1364   \param attributeSet default attribute set
1365   \param stream memory for result
1366   \param reg_type register type ('w', 'p',..)
1367   \param complete_flag whether it's phrases or not
1368   \param rank_type term flags for ranking
1369   \param xpath_use use attribute for X-Path (-1 for no X-path)
1370   \param num_bases number of databases
1371   \param basenames array of databases
1372   \param rset_nmem memory for result sets
1373   \param result_sets output result set for each term in list (output)
1374   \param num_result_sets number of output result sets
1375   \param kc rset key control to be used for created result sets
1376 */
1377 static ZEBRA_RES term_list_trunc(ZebraHandle zh,
1378                                  Z_AttributesPlusTerm *zapt,
1379                                  const char *termz,
1380                                  oid_value attributeSet,
1381                                  NMEM stream,
1382                                  int reg_type, int complete_flag,
1383                                  const char *rank_type,
1384                                  const char *xpath_use,
1385                                  int num_bases, char **basenames, 
1386                                  NMEM rset_nmem,
1387                                  RSET **result_sets, int *num_result_sets,
1388                                  struct rset_key_control *kc)
1389 {
1390     char term_dst[IT_MAX_WORD+1];
1391     struct grep_info grep_info;
1392     const char *termp = termz;
1393     int alloc_sets = 0;
1394
1395     *num_result_sets = 0;
1396     *term_dst = 0;
1397     if (grep_info_prepare(zh, zapt, &grep_info, reg_type) == ZEBRA_FAIL)
1398         return ZEBRA_FAIL;
1399     while(1)
1400     { 
1401         ZEBRA_RES res;
1402
1403         if (alloc_sets == *num_result_sets)
1404         {
1405             int add = 10;
1406             RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) * 
1407                                               sizeof(*rnew));
1408             if (alloc_sets)
1409                 memcpy(rnew, *result_sets, alloc_sets * sizeof(*rnew));
1410             alloc_sets = alloc_sets + add;
1411             *result_sets = rnew;
1412         }
1413         res = term_trunc(zh, zapt, &termp, attributeSet,
1414                          stream, &grep_info,
1415                          reg_type, complete_flag,
1416                          num_bases, basenames,
1417                          term_dst, rank_type,
1418                          xpath_use, rset_nmem,
1419                          &(*result_sets)[*num_result_sets],
1420                          kc);
1421         if (res != ZEBRA_OK)
1422         {
1423             int i;
1424             for (i = 0; i < *num_result_sets; i++)
1425                 rset_delete((*result_sets)[i]);
1426             grep_info_delete (&grep_info);
1427             return res;
1428         }
1429         if ((*result_sets)[*num_result_sets] == 0)
1430             break;
1431         (*num_result_sets)++;
1432
1433         if (!*termp)
1434             break;
1435     }
1436     grep_info_delete(&grep_info);
1437     return ZEBRA_OK;
1438 }
1439
1440 static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh,
1441                                        Z_AttributesPlusTerm *zapt,
1442                                        const char *termz_org,
1443                                        oid_value attributeSet,
1444                                        NMEM stream,
1445                                        int reg_type, int complete_flag,
1446                                        const char *rank_type,
1447                                        const char *xpath_use,
1448                                        int num_bases, char **basenames, 
1449                                        NMEM rset_nmem,
1450                                        RSET *rset,
1451                                        struct rset_key_control *kc)
1452 {
1453     RSET *result_sets = 0;
1454     int num_result_sets = 0;
1455     ZEBRA_RES res =
1456         term_list_trunc(zh, zapt, termz_org, attributeSet,
1457                         stream, reg_type, complete_flag,
1458                         rank_type, xpath_use,
1459                         num_bases, basenames,
1460                         rset_nmem,
1461                         &result_sets, &num_result_sets, kc);
1462     if (res != ZEBRA_OK)
1463         return res;
1464     if (num_result_sets == 0)
1465         *rset = rset_create_null(rset_nmem, kc, 0); 
1466     else if (num_result_sets == 1)
1467         *rset = result_sets[0];
1468     else
1469         *rset = rset_create_prox(rset_nmem, kc, kc->scope,
1470                                  num_result_sets, result_sets,
1471                                  1 /* ordered */, 0 /* exclusion */,
1472                                  3 /* relation */, 1 /* distance */);
1473     if (!*rset)
1474         return ZEBRA_FAIL;
1475     return ZEBRA_OK;
1476 }
1477
1478 static ZEBRA_RES rpn_search_APT_or_list(ZebraHandle zh,
1479                                         Z_AttributesPlusTerm *zapt,
1480                                         const char *termz_org,
1481                                         oid_value attributeSet,
1482                                         NMEM stream,
1483                                         int reg_type, int complete_flag,
1484                                         const char *rank_type,
1485                                         const char *xpath_use,
1486                                         int num_bases, char **basenames,
1487                                         NMEM rset_nmem,
1488                                         RSET *rset,
1489                                         struct rset_key_control *kc)
1490 {
1491     RSET *result_sets = 0;
1492     int num_result_sets = 0;
1493     ZEBRA_RES res =
1494         term_list_trunc(zh, zapt, termz_org, attributeSet,
1495                         stream, reg_type, complete_flag,
1496                         rank_type, xpath_use,
1497                         num_bases, basenames,
1498                         rset_nmem,
1499                         &result_sets, &num_result_sets, kc);
1500     if (res != ZEBRA_OK)
1501         return res;
1502     if (num_result_sets == 0)
1503         *rset = rset_create_null(rset_nmem, kc, 0); 
1504     else if (num_result_sets == 1)
1505         *rset = result_sets[0];
1506     else
1507         *rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* termid */,
1508                                num_result_sets, result_sets);
1509     if (!*rset)
1510         return ZEBRA_FAIL;
1511     return ZEBRA_OK;
1512 }
1513
1514 static ZEBRA_RES rpn_search_APT_and_list(ZebraHandle zh,
1515                                          Z_AttributesPlusTerm *zapt,
1516                                          const char *termz_org,
1517                                          oid_value attributeSet,
1518                                          NMEM stream,
1519                                          int reg_type, int complete_flag,
1520                                          const char *rank_type, 
1521                                          const char *xpath_use,
1522                                          int num_bases, char **basenames,
1523                                          NMEM rset_nmem,
1524                                          RSET *rset,
1525                                          struct rset_key_control *kc)
1526 {
1527     RSET *result_sets = 0;
1528     int num_result_sets = 0;
1529     ZEBRA_RES res =
1530         term_list_trunc(zh, zapt, termz_org, attributeSet,
1531                         stream, reg_type, complete_flag,
1532                         rank_type, xpath_use,
1533                         num_bases, basenames,
1534                         rset_nmem,
1535                         &result_sets, &num_result_sets,
1536                         kc);
1537     if (res != ZEBRA_OK)
1538         return res;
1539     if (num_result_sets == 0)
1540         *rset = rset_create_null(rset_nmem, kc, 0); 
1541     else if (num_result_sets == 1)
1542         *rset = result_sets[0];
1543     else
1544         *rset = rset_create_and(rset_nmem, kc, kc->scope,
1545                                 num_result_sets, result_sets);
1546     if (!*rset)
1547         return ZEBRA_FAIL;
1548     return ZEBRA_OK;
1549 }
1550
1551 static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1552                             const char **term_sub,
1553                             char *term_dict,
1554                             oid_value attributeSet,
1555                             struct grep_info *grep_info,
1556                             int *max_pos,
1557                             int reg_type,
1558                             char *term_dst,
1559                             int *error_code)
1560 {
1561     AttrType relation;
1562     int relation_value;
1563     int term_value;
1564     int r;
1565     char *term_tmp = term_dict + strlen(term_dict);
1566
1567     *error_code = 0;
1568     attr_init_APT(&relation, zapt, 2);
1569     relation_value = attr_find(&relation, NULL);
1570
1571     yaz_log(log_level_rpn, "numeric relation value=%d", relation_value);
1572
1573     switch (relation_value)
1574     {
1575     case 1:
1576         yaz_log(log_level_rpn, "Relation <");
1577         if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1578                       term_dst))
1579             return 0;
1580         term_value = atoi (term_tmp);
1581         gen_regular_rel(term_tmp, term_value-1, 1);
1582         break;
1583     case 2:
1584         yaz_log(log_level_rpn, "Relation <=");
1585         if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1586                       term_dst))
1587             return 0;
1588         term_value = atoi (term_tmp);
1589         gen_regular_rel(term_tmp, term_value, 1);
1590         break;
1591     case 4:
1592         yaz_log(log_level_rpn, "Relation >=");
1593         if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1594                       term_dst))
1595             return 0;
1596         term_value = atoi (term_tmp);
1597         gen_regular_rel(term_tmp, term_value, 0);
1598         break;
1599     case 5:
1600         yaz_log(log_level_rpn, "Relation >");
1601         if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1602                       term_dst))
1603             return 0;
1604         term_value = atoi (term_tmp);
1605         gen_regular_rel(term_tmp, term_value+1, 0);
1606         break;
1607     case -1:
1608     case 3:
1609         yaz_log(log_level_rpn, "Relation =");
1610         if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1611                       term_dst))
1612             return 0;
1613         term_value = atoi (term_tmp);
1614         sprintf(term_tmp, "(0*%d)", term_value);
1615         break;
1616     case 103:
1617         /* term_tmp untouched.. */
1618         while (**term_sub != '\0')
1619             (*term_sub)++;
1620         break;
1621     default:
1622         *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
1623         return 0;
1624     }
1625     yaz_log(log_level_rpn, "dict_lookup_grep: %s", term_tmp);
1626     r = dict_lookup_grep(zh->reg->dict, term_dict, 0, grep_info, max_pos,
1627                           0, grep_handle);
1628     if (r)
1629         yaz_log(YLOG_WARN, "dict_lookup_grep fail, rel = gt: %d", r);
1630     yaz_log(log_level_rpn, "%d positions", grep_info->isam_p_indx);
1631     return 1;
1632 }
1633
1634 static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1635                               const char **term_sub, 
1636                               oid_value attributeSet, NMEM stream,
1637                               struct grep_info *grep_info,
1638                               int reg_type, int complete_flag,
1639                               int num_bases, char **basenames,
1640                               char *term_dst, 
1641                               const char *xpath_use,
1642                               struct ord_list **ol)
1643 {
1644     char term_dict[2*IT_MAX_WORD+2];
1645     int base_no;
1646     const char *termp;
1647     struct rpn_char_map_info rcmi;
1648
1649     int bases_ok = 0;     /* no of databases with OK attribute */
1650
1651     *ol = ord_list_create(stream);
1652
1653     rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
1654
1655     for (base_no = 0; base_no < num_bases; base_no++)
1656     {
1657         int max_pos, prefix_len = 0;
1658         int relation_error = 0;
1659         int ord, ord_len, i;
1660         char ord_buf[32];
1661
1662         termp = *term_sub;
1663
1664         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1665         {
1666             zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
1667                            basenames[base_no]);
1668             return ZEBRA_FAIL;
1669         }
1670
1671         if (zebra_apt_get_ord(zh, zapt, reg_type, xpath_use,
1672                               attributeSet, &ord) != ZEBRA_OK)
1673             continue;
1674         bases_ok++;
1675
1676         *ol = ord_list_append(stream, *ol, ord);
1677
1678         ord_len = key_SU_encode (ord, ord_buf);
1679
1680         term_dict[prefix_len++] = '(';
1681         for (i = 0; i < ord_len; i++)
1682         {
1683             term_dict[prefix_len++] = 1;
1684             term_dict[prefix_len++] = ord_buf[i];
1685         }
1686         term_dict[prefix_len++] = ')';
1687         term_dict[prefix_len] = '\0';
1688
1689         if (!numeric_relation(zh, zapt, &termp, term_dict,
1690                               attributeSet, grep_info, &max_pos, reg_type,
1691                               term_dst, &relation_error))
1692         {
1693             if (relation_error)
1694             {
1695                 zebra_setError(zh, relation_error, 0);
1696                 return ZEBRA_FAIL;
1697             }
1698             *term_sub = 0;
1699             return ZEBRA_OK;
1700         }
1701     }
1702     if (!bases_ok)
1703         return ZEBRA_FAIL;
1704     *term_sub = termp;
1705     yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1706     return ZEBRA_OK;
1707 }
1708
1709                                  
1710 static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh,
1711                                         Z_AttributesPlusTerm *zapt,
1712                                         const char *termz,
1713                                         oid_value attributeSet,
1714                                         NMEM stream,
1715                                         int reg_type, int complete_flag,
1716                                         const char *rank_type, 
1717                                         const char *xpath_use,
1718                                         int num_bases, char **basenames,
1719                                         NMEM rset_nmem,
1720                                         RSET *rset,
1721                                         struct rset_key_control *kc)
1722 {
1723     char term_dst[IT_MAX_WORD+1];
1724     const char *termp = termz;
1725     RSET *result_sets = 0;
1726     int num_result_sets = 0;
1727     ZEBRA_RES res;
1728     struct grep_info grep_info;
1729     int alloc_sets = 0;
1730     zint hits_limit_value;
1731     const char *term_ref_id_str = 0;
1732
1733     term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str, stream);
1734
1735     yaz_log(log_level_rpn, "APT_numeric t='%s'", termz);
1736     if (grep_info_prepare(zh, zapt, &grep_info, reg_type) == ZEBRA_FAIL)
1737         return ZEBRA_FAIL;
1738     while (1)
1739     { 
1740         struct ord_list *ol;
1741         if (alloc_sets == num_result_sets)
1742         {
1743             int add = 10;
1744             RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) * 
1745                                               sizeof(*rnew));
1746             if (alloc_sets)
1747                 memcpy(rnew, result_sets, alloc_sets * sizeof(*rnew));
1748             alloc_sets = alloc_sets + add;
1749             result_sets = rnew;
1750         }
1751         yaz_log(YLOG_DEBUG, "APT_numeric termp=%s", termp);
1752         grep_info.isam_p_indx = 0;
1753         res = numeric_term(zh, zapt, &termp, attributeSet, stream, &grep_info,
1754                            reg_type, complete_flag, num_bases, basenames,
1755                            term_dst, xpath_use, &ol);
1756         if (res == ZEBRA_FAIL || termp == 0)
1757             break;
1758         yaz_log(YLOG_DEBUG, "term: %s", term_dst);
1759         result_sets[num_result_sets] =
1760             rset_trunc(zh, grep_info.isam_p_buf,
1761                        grep_info.isam_p_indx, term_dst,
1762                        strlen(term_dst), rank_type,
1763                        0 /* preserve position */,
1764                        zapt->term->which, rset_nmem, 
1765                        kc, kc->scope, ol, reg_type,
1766                        hits_limit_value,
1767                        term_ref_id_str);
1768         if (!result_sets[num_result_sets])
1769             break;
1770         num_result_sets++;
1771         if (!*termp)
1772             break;
1773     }
1774     grep_info_delete(&grep_info);
1775
1776     if (res != ZEBRA_OK)
1777         return res;
1778     if (num_result_sets == 0)
1779         *rset = rset_create_null(rset_nmem, kc, 0);
1780     else if (num_result_sets == 1)
1781         *rset = result_sets[0];
1782     else
1783         *rset = rset_create_and(rset_nmem, kc, kc->scope,
1784                                 num_result_sets, result_sets);
1785     if (!*rset)
1786         return ZEBRA_FAIL;
1787     return ZEBRA_OK;
1788 }
1789
1790 static ZEBRA_RES rpn_search_APT_local(ZebraHandle zh,
1791                                       Z_AttributesPlusTerm *zapt,
1792                                       const char *termz,
1793                                       oid_value attributeSet,
1794                                       NMEM stream,
1795                                       const char *rank_type, NMEM rset_nmem,
1796                                       RSET *rset,
1797                                       struct rset_key_control *kc)
1798 {
1799     RSFD rsfd;
1800     struct it_key key;
1801     int sys;
1802     *rset = rset_create_temp(rset_nmem, kc, kc->scope,
1803                              res_get (zh->res, "setTmpDir"),0 );
1804     rsfd = rset_open(*rset, RSETF_WRITE);
1805     
1806     sys = atoi(termz);
1807     if (sys <= 0)
1808         sys = 1;
1809     key.mem[0] = sys;
1810     key.mem[1] = 1;
1811     key.len = 2;
1812     rset_write (rsfd, &key);
1813     rset_close (rsfd);
1814     return ZEBRA_OK;
1815 }
1816
1817 static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1818                                oid_value attributeSet, NMEM stream,
1819                                Z_SortKeySpecList *sort_sequence,
1820                                const char *rank_type,
1821                                NMEM rset_nmem,
1822                                RSET *rset,
1823                                struct rset_key_control *kc)
1824 {
1825     int i;
1826     int sort_relation_value;
1827     AttrType sort_relation_type;
1828     Z_SortKeySpec *sks;
1829     Z_SortKey *sk;
1830     int oid[OID_SIZE];
1831     oident oe;
1832     char termz[20];
1833     
1834     attr_init_APT(&sort_relation_type, zapt, 7);
1835     sort_relation_value = attr_find(&sort_relation_type, &attributeSet);
1836
1837     if (!sort_sequence->specs)
1838     {
1839         sort_sequence->num_specs = 10;
1840         sort_sequence->specs = (Z_SortKeySpec **)
1841             nmem_malloc(stream, sort_sequence->num_specs *
1842                          sizeof(*sort_sequence->specs));
1843         for (i = 0; i<sort_sequence->num_specs; i++)
1844             sort_sequence->specs[i] = 0;
1845     }
1846     if (zapt->term->which != Z_Term_general)
1847         i = 0;
1848     else
1849         i = atoi_n ((char *) zapt->term->u.general->buf,
1850                     zapt->term->u.general->len);
1851     if (i >= sort_sequence->num_specs)
1852         i = 0;
1853     sprintf(termz, "%d", i);
1854
1855     oe.proto = PROTO_Z3950;
1856     oe.oclass = CLASS_ATTSET;
1857     oe.value = attributeSet;
1858     if (!oid_ent_to_oid (&oe, oid))
1859         return ZEBRA_FAIL;
1860
1861     sks = (Z_SortKeySpec *) nmem_malloc(stream, sizeof(*sks));
1862     sks->sortElement = (Z_SortElement *)
1863         nmem_malloc(stream, sizeof(*sks->sortElement));
1864     sks->sortElement->which = Z_SortElement_generic;
1865     sk = sks->sortElement->u.generic = (Z_SortKey *)
1866         nmem_malloc(stream, sizeof(*sk));
1867     sk->which = Z_SortKey_sortAttributes;
1868     sk->u.sortAttributes = (Z_SortAttributes *)
1869         nmem_malloc(stream, sizeof(*sk->u.sortAttributes));
1870
1871     sk->u.sortAttributes->id = oid;
1872     sk->u.sortAttributes->list = zapt->attributes;
1873
1874     sks->sortRelation = (int *)
1875         nmem_malloc(stream, sizeof(*sks->sortRelation));
1876     if (sort_relation_value == 1)
1877         *sks->sortRelation = Z_SortKeySpec_ascending;
1878     else if (sort_relation_value == 2)
1879         *sks->sortRelation = Z_SortKeySpec_descending;
1880     else 
1881         *sks->sortRelation = Z_SortKeySpec_ascending;
1882
1883     sks->caseSensitivity = (int *)
1884         nmem_malloc(stream, sizeof(*sks->caseSensitivity));
1885     *sks->caseSensitivity = 0;
1886
1887     sks->which = Z_SortKeySpec_null;
1888     sks->u.null = odr_nullval ();
1889     sort_sequence->specs[i] = sks;
1890     *rset = rset_create_null(rset_nmem, kc, 0);
1891     return ZEBRA_OK;
1892 }
1893
1894
1895 static int rpn_check_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1896                            oid_value attributeSet,
1897                            struct xpath_location_step *xpath, int max,
1898                            NMEM mem)
1899 {
1900     oid_value curAttributeSet = attributeSet;
1901     AttrType use;
1902     const char *use_string = 0;
1903     
1904     attr_init_APT(&use, zapt, 1);
1905     attr_find_ex(&use, &curAttributeSet, &use_string);
1906
1907     if (!use_string || *use_string != '/')
1908         return -1;
1909
1910     return zebra_parse_xpath_str(use_string, xpath, max, mem);
1911 }
1912  
1913                
1914
1915 static RSET xpath_trunc(ZebraHandle zh, NMEM stream,
1916                         int reg_type, const char *term, 
1917                         const char *xpath_use,
1918                         NMEM rset_nmem,
1919                         struct rset_key_control *kc)
1920 {
1921     RSET rset;
1922     struct grep_info grep_info;
1923     char term_dict[2048];
1924     char ord_buf[32];
1925     int prefix_len = 0;
1926     int ord = zebraExplain_lookup_attr_str(zh->reg->zei, 
1927                                            zinfo_index_category_index,
1928                                            reg_type,
1929                                            xpath_use);
1930     int ord_len, i, r, max_pos;
1931     int term_type = Z_Term_characterString;
1932     const char *flags = "void";
1933
1934     if (grep_info_prepare(zh, 0 /* zapt */, &grep_info, '0') == ZEBRA_FAIL)
1935         return rset_create_null(rset_nmem, kc, 0);
1936     
1937     if (ord < 0)
1938         return rset_create_null(rset_nmem, kc, 0);
1939     if (prefix_len)
1940         term_dict[prefix_len++] = '|';
1941     else
1942         term_dict[prefix_len++] = '(';
1943     
1944     ord_len = key_SU_encode (ord, ord_buf);
1945     for (i = 0; i<ord_len; i++)
1946     {
1947         term_dict[prefix_len++] = 1;
1948         term_dict[prefix_len++] = ord_buf[i];
1949     }
1950     term_dict[prefix_len++] = ')';
1951     strcpy(term_dict+prefix_len, term);
1952     
1953     grep_info.isam_p_indx = 0;
1954     r = dict_lookup_grep(zh->reg->dict, term_dict, 0,
1955                           &grep_info, &max_pos, 0, grep_handle);
1956     yaz_log(YLOG_DEBUG, "%s %d positions", term,
1957              grep_info.isam_p_indx);
1958     rset = rset_trunc(zh, grep_info.isam_p_buf,
1959                       grep_info.isam_p_indx, term, strlen(term),
1960                       flags, 1, term_type,rset_nmem,
1961                       kc, kc->scope, 0, reg_type, 0 /* hits_limit */,
1962                       0 /* term_ref_id_str */);
1963     grep_info_delete(&grep_info);
1964     return rset;
1965 }
1966
1967 static
1968 ZEBRA_RES rpn_search_xpath(ZebraHandle zh,
1969                            int num_bases, char **basenames,
1970                            NMEM stream, const char *rank_type, RSET rset,
1971                            int xpath_len, struct xpath_location_step *xpath,
1972                            NMEM rset_nmem,
1973                            RSET *rset_out,
1974                            struct rset_key_control *kc)
1975 {
1976     int base_no;
1977     int i;
1978     int always_matches = rset ? 0 : 1;
1979
1980     if (xpath_len < 0)
1981     {
1982         *rset_out = rset;
1983         return ZEBRA_OK;
1984     }
1985
1986     yaz_log(YLOG_DEBUG, "xpath len=%d", xpath_len);
1987     for (i = 0; i<xpath_len; i++)
1988     {
1989         yaz_log(log_level_rpn, "XPATH %d %s", i, xpath[i].part);
1990
1991     }
1992
1993     /*
1994       //a    ->    a/.*
1995       //a/b  ->    b/a/.*
1996       /a     ->    a/
1997       /a/b   ->    b/a/
1998
1999       /      ->    none
2000
2001    a[@attr = value]/b[@other = othervalue]
2002
2003  /e/@a val      range(e/,range(@a,freetext(w,1015,val),@a),e/)
2004  /a/b val       range(b/a/,freetext(w,1016,val),b/a/)
2005  /a/b/@c val    range(b/a/,range(@c,freetext(w,1016,val),@c),b/a/)
2006  /a/b[@c = y] val range(b/a/,freetext(w,1016,val),b/a/,@c = y)
2007  /a[@c = y]/b val range(a/,range(b/a/,freetext(w,1016,val),b/a/),a/,@c = y)
2008  /a[@c = x]/b[@c = y] range(a/,range(b/a/,freetext(w,1016,val),b/a/,@c = y),a/,@c = x)
2009       
2010     */
2011
2012     dict_grep_cmap (zh->reg->dict, 0, 0);
2013
2014     for (base_no = 0; base_no < num_bases; base_no++)
2015     {
2016         int level = xpath_len;
2017         int first_path = 1;
2018         
2019         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
2020         {
2021             zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
2022                            basenames[base_no]);
2023             *rset_out = rset;
2024             return ZEBRA_FAIL;
2025         }
2026         while (--level >= 0)
2027         {
2028             WRBUF xpath_rev = wrbuf_alloc();
2029             int i;
2030             RSET rset_start_tag = 0, rset_end_tag = 0, rset_attr = 0;
2031
2032             for (i = level; i >= 1; --i)
2033             {
2034                 const char *cp = xpath[i].part;
2035                 if (*cp)
2036                 {
2037                     for (; *cp; cp++)
2038                     {
2039                         if (*cp == '*')
2040                             wrbuf_puts(xpath_rev, "[^/]*");
2041                         else if (*cp == ' ')
2042                             wrbuf_puts(xpath_rev, "\001 ");
2043                         else
2044                             wrbuf_putc(xpath_rev, *cp);
2045
2046                         /* wrbuf_putc does not null-terminate , but
2047                            wrbuf_puts below ensures it does.. so xpath_rev
2048                            is OK iff length is > 0 */
2049                     }
2050                     wrbuf_puts(xpath_rev, "/");
2051                 }
2052                 else if (i == 1)  /* // case */
2053                     wrbuf_puts(xpath_rev, ".*");
2054             }
2055             if (xpath[level].predicate &&
2056                 xpath[level].predicate->which == XPATH_PREDICATE_RELATION &&
2057                 xpath[level].predicate->u.relation.name[0])
2058             {
2059                 WRBUF wbuf = wrbuf_alloc();
2060                 wrbuf_puts(wbuf, xpath[level].predicate->u.relation.name+1);
2061                 if (xpath[level].predicate->u.relation.value)
2062                 {
2063                     const char *cp = xpath[level].predicate->u.relation.value;
2064                     wrbuf_putc(wbuf, '=');
2065                     
2066                     while (*cp)
2067                     {
2068                         if (strchr(REGEX_CHARS, *cp))
2069                             wrbuf_putc(wbuf, '\\');
2070                         wrbuf_putc(wbuf, *cp);
2071                         cp++;
2072                     }
2073                 }
2074                 wrbuf_puts(wbuf, "");
2075                 rset_attr = xpath_trunc(
2076                     zh, stream, '0', wrbuf_buf(wbuf), ZEBRA_XPATH_ATTR_NAME, 
2077                     rset_nmem, kc);
2078                 wrbuf_free(wbuf, 1);
2079             } 
2080             else 
2081             {
2082                 if (!first_path)
2083                 {
2084                     wrbuf_free(xpath_rev, 1);
2085                     continue;
2086                 }
2087             }
2088             yaz_log(log_level_rpn, "xpath_rev (%d) = %.*s", level, 
2089                     wrbuf_len(xpath_rev), wrbuf_buf(xpath_rev));
2090             if (wrbuf_len(xpath_rev))
2091             {
2092                 rset_start_tag = xpath_trunc(zh, stream, '0', 
2093                                              wrbuf_buf(xpath_rev),
2094                                              ZEBRA_XPATH_ELM_BEGIN, 
2095                                              rset_nmem, kc);
2096                 if (always_matches)
2097                     rset = rset_start_tag;
2098                 else
2099                 {
2100                     rset_end_tag = xpath_trunc(zh, stream, '0', 
2101                                                wrbuf_buf(xpath_rev),
2102                                                ZEBRA_XPATH_ELM_END, 
2103                                                rset_nmem, kc);
2104                     
2105                     rset = rset_create_between(rset_nmem, kc, kc->scope,
2106                                                rset_start_tag, rset,
2107                                                rset_end_tag, rset_attr);
2108                 }
2109             }
2110             wrbuf_free(xpath_rev, 1);
2111             first_path = 0;
2112         }
2113     }
2114     *rset_out = rset;
2115     return ZEBRA_OK;
2116 }
2117
2118 #define MAX_XPATH_STEPS 10
2119
2120 static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2121                                 oid_value attributeSet, NMEM stream,
2122                                 Z_SortKeySpecList *sort_sequence,
2123                                 int num_bases, char **basenames, 
2124                                 NMEM rset_nmem,
2125                                 RSET *rset,
2126                                 struct rset_key_control *kc)
2127 {
2128     ZEBRA_RES res = ZEBRA_OK;
2129     unsigned reg_id;
2130     char *search_type = NULL;
2131     char rank_type[128];
2132     int complete_flag;
2133     int sort_flag;
2134     char termz[IT_MAX_WORD+1];
2135     int xpath_len;
2136     const char *xpath_use = 0;
2137     struct xpath_location_step xpath[MAX_XPATH_STEPS];
2138
2139     if (!log_level_set)
2140     {
2141         log_level_rpn = yaz_log_module_level("rpn");
2142         log_level_set = 1;
2143     }
2144     zebra_maps_attr(zh->reg->zebra_maps, zapt, &reg_id, &search_type,
2145                     rank_type, &complete_flag, &sort_flag);
2146     
2147     yaz_log(YLOG_DEBUG, "reg_id=%c", reg_id);
2148     yaz_log(YLOG_DEBUG, "complete_flag=%d", complete_flag);
2149     yaz_log(YLOG_DEBUG, "search_type=%s", search_type);
2150     yaz_log(YLOG_DEBUG, "rank_type=%s", rank_type);
2151
2152     if (zapt_term_to_utf8(zh, zapt, termz) == ZEBRA_FAIL)
2153         return ZEBRA_FAIL;
2154
2155     if (sort_flag)
2156         return rpn_sort_spec(zh, zapt, attributeSet, stream, sort_sequence,
2157                              rank_type, rset_nmem, rset, kc);
2158     /* consider if an X-Path query is used */
2159     xpath_len = rpn_check_xpath(zh, zapt, attributeSet, 
2160                                 xpath, MAX_XPATH_STEPS, stream);
2161     if (xpath_len >= 0)
2162     {
2163         if (xpath[xpath_len-1].part[0] == '@') 
2164             xpath_use = ZEBRA_XPATH_ATTR_CDATA;  /* last step is attribute  */
2165         else
2166             xpath_use = ZEBRA_XPATH_CDATA;  /* searching for cdata */        
2167
2168         if (1)
2169         {
2170             AttrType relation;
2171             int relation_value;
2172
2173             attr_init_APT(&relation, zapt, 2);
2174             relation_value = attr_find(&relation, NULL);
2175
2176             if (relation_value == 103) /* alwaysmatches */
2177             {
2178                 *rset = 0; /* signal no "term" set */
2179                 return rpn_search_xpath(zh, num_bases, basenames,
2180                                         stream, rank_type, *rset, 
2181                                         xpath_len, xpath, rset_nmem, rset, kc);
2182             }
2183         }
2184     }
2185
2186     /* search using one of the various search type strategies
2187        termz is our UTF-8 search term
2188        attributeSet is top-level default attribute set 
2189        stream is ODR for search
2190        reg_id is the register type
2191        complete_flag is 1 for complete subfield, 0 for incomplete
2192        xpath_use is use-attribute to be used for X-Path search, 0 for none
2193     */
2194     if (!strcmp(search_type, "phrase"))
2195     {
2196         res = rpn_search_APT_phrase(zh, zapt, termz, attributeSet, stream,
2197                                     reg_id, complete_flag, rank_type,
2198                                     xpath_use,
2199                                     num_bases, basenames, rset_nmem,
2200                                     rset, kc);
2201     }
2202     else if (!strcmp(search_type, "and-list"))
2203     {
2204         res = rpn_search_APT_and_list(zh, zapt, termz, attributeSet, stream,
2205                                       reg_id, complete_flag, rank_type,
2206                                       xpath_use,
2207                                       num_bases, basenames, rset_nmem,
2208                                       rset, kc);
2209     }
2210     else if (!strcmp(search_type, "or-list"))
2211     {
2212         res = rpn_search_APT_or_list(zh, zapt, termz, attributeSet, stream,
2213                                      reg_id, complete_flag, rank_type,
2214                                      xpath_use,
2215                                      num_bases, basenames, rset_nmem,
2216                                      rset, kc);
2217     }
2218     else if (!strcmp(search_type, "local"))
2219     {
2220         res = rpn_search_APT_local(zh, zapt, termz, attributeSet, stream,
2221                                    rank_type, rset_nmem, rset, kc);
2222     }
2223     else if (!strcmp(search_type, "numeric"))
2224     {
2225         res = rpn_search_APT_numeric(zh, zapt, termz, attributeSet, stream,
2226                                      reg_id, complete_flag, rank_type,
2227                                      xpath_use,
2228                                      num_bases, basenames, rset_nmem,
2229                                      rset, kc);
2230     }
2231     else
2232     {
2233         zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2234         res = ZEBRA_FAIL;
2235     }
2236     if (res != ZEBRA_OK)
2237         return res;
2238     if (!*rset)
2239         return ZEBRA_FAIL;
2240     return rpn_search_xpath(zh, num_bases, basenames,
2241                             stream, rank_type, *rset, 
2242                             xpath_len, xpath, rset_nmem, rset, kc);
2243 }
2244
2245 static ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2246                                       oid_value attributeSet, 
2247                                       NMEM stream, NMEM rset_nmem,
2248                                       Z_SortKeySpecList *sort_sequence,
2249                                       int num_bases, char **basenames,
2250                                       RSET **result_sets, int *num_result_sets,
2251                                       Z_Operator *parent_op,
2252                                       struct rset_key_control *kc);
2253
2254 ZEBRA_RES rpn_search_top(ZebraHandle zh, Z_RPNStructure *zs,
2255                          oid_value attributeSet, 
2256                          NMEM stream, NMEM rset_nmem,
2257                          Z_SortKeySpecList *sort_sequence,
2258                          int num_bases, char **basenames,
2259                          RSET *result_set)
2260 {
2261     RSET *result_sets = 0;
2262     int num_result_sets = 0;
2263     ZEBRA_RES res;
2264     struct rset_key_control *kc = zebra_key_control_create(zh);
2265
2266     res = rpn_search_structure(zh, zs, attributeSet,
2267                                stream, rset_nmem,
2268                                sort_sequence, 
2269                                num_bases, basenames,
2270                                &result_sets, &num_result_sets,
2271                                0 /* no parent op */,
2272                                kc);
2273     if (res != ZEBRA_OK)
2274     {
2275         int i;
2276         for (i = 0; i<num_result_sets; i++)
2277             rset_delete(result_sets[i]);
2278         *result_set = 0;
2279     }
2280     else
2281     {
2282         assert(num_result_sets == 1);
2283         assert(result_sets);
2284         assert(*result_sets);
2285         *result_set = *result_sets;
2286     }
2287     (*kc->dec)(kc);
2288     return res;
2289 }
2290
2291 ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2292                                oid_value attributeSet, 
2293                                NMEM stream, NMEM rset_nmem,
2294                                Z_SortKeySpecList *sort_sequence,
2295                                int num_bases, char **basenames,
2296                                RSET **result_sets, int *num_result_sets,
2297                                Z_Operator *parent_op,
2298                                struct rset_key_control *kc)
2299 {
2300     *num_result_sets = 0;
2301     if (zs->which == Z_RPNStructure_complex)
2302     {
2303         ZEBRA_RES res;
2304         Z_Operator *zop = zs->u.complex->roperator;
2305         RSET *result_sets_l = 0;
2306         int num_result_sets_l = 0;
2307         RSET *result_sets_r = 0;
2308         int num_result_sets_r = 0;
2309
2310         res = rpn_search_structure(zh, zs->u.complex->s1,
2311                                    attributeSet, stream, rset_nmem,
2312                                    sort_sequence,
2313                                    num_bases, basenames,
2314                                    &result_sets_l, &num_result_sets_l,
2315                                    zop, kc);
2316         if (res != ZEBRA_OK)
2317         {
2318             int i;
2319             for (i = 0; i<num_result_sets_l; i++)
2320                 rset_delete(result_sets_l[i]);
2321             return res;
2322         }
2323         res = rpn_search_structure(zh, zs->u.complex->s2,
2324                                    attributeSet, stream, rset_nmem,
2325                                    sort_sequence,
2326                                    num_bases, basenames,
2327                                    &result_sets_r, &num_result_sets_r,
2328                                    zop, kc);
2329         if (res != ZEBRA_OK)
2330         {
2331             int i;
2332             for (i = 0; i<num_result_sets_l; i++)
2333                 rset_delete(result_sets_l[i]);
2334             for (i = 0; i<num_result_sets_r; i++)
2335                 rset_delete(result_sets_r[i]);
2336             return res;
2337         }
2338
2339         /* make a new list of result for all children */
2340         *num_result_sets = num_result_sets_l + num_result_sets_r;
2341         *result_sets = nmem_malloc(stream, *num_result_sets * 
2342                                    sizeof(**result_sets));
2343         memcpy(*result_sets, result_sets_l, 
2344                num_result_sets_l * sizeof(**result_sets));
2345         memcpy(*result_sets + num_result_sets_l, result_sets_r, 
2346                num_result_sets_r * sizeof(**result_sets));
2347
2348         if (!parent_op || parent_op->which != zop->which
2349             || (zop->which != Z_Operator_and &&
2350                 zop->which != Z_Operator_or))
2351         {
2352             /* parent node different from this one (or non-present) */
2353             /* we must combine result sets now */
2354             RSET rset;
2355             switch (zop->which)
2356             {
2357             case Z_Operator_and:
2358                 rset = rset_create_and(rset_nmem, kc,
2359                                        kc->scope,
2360                                        *num_result_sets, *result_sets);
2361                 break;
2362             case Z_Operator_or:
2363                 rset = rset_create_or(rset_nmem, kc,
2364                                       kc->scope, 0, /* termid */
2365                                       *num_result_sets, *result_sets);
2366                 break;
2367             case Z_Operator_and_not:
2368                 rset = rset_create_not(rset_nmem, kc,
2369                                        kc->scope,
2370                                        (*result_sets)[0],
2371                                        (*result_sets)[1]);
2372                 break;
2373             case Z_Operator_prox:
2374                 if (zop->u.prox->which != Z_ProximityOperator_known)
2375                 {
2376                     zebra_setError(zh, 
2377                                    YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2378                                    0);
2379                     return ZEBRA_FAIL;
2380                 }
2381                 if (*zop->u.prox->u.known != Z_ProxUnit_word)
2382                 {
2383                     zebra_setError_zint(zh,
2384                                         YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2385                                         *zop->u.prox->u.known);
2386                     return ZEBRA_FAIL;
2387                 }
2388                 else
2389                 {
2390                     rset = rset_create_prox(rset_nmem, kc,
2391                                             kc->scope,
2392                                             *num_result_sets, *result_sets, 
2393                                             *zop->u.prox->ordered,
2394                                             (!zop->u.prox->exclusion ? 
2395                                              0 : *zop->u.prox->exclusion),
2396                                             *zop->u.prox->relationType,
2397                                             *zop->u.prox->distance );
2398                 }
2399                 break;
2400             default:
2401                 zebra_setError(zh, YAZ_BIB1_OPERATOR_UNSUPP, 0);
2402                 return ZEBRA_FAIL;
2403             }
2404             *num_result_sets = 1;
2405             *result_sets = nmem_malloc(stream, *num_result_sets * 
2406                                        sizeof(**result_sets));
2407             (*result_sets)[0] = rset;
2408         }
2409     }
2410     else if (zs->which == Z_RPNStructure_simple)
2411     {
2412         RSET rset;
2413         ZEBRA_RES res;
2414
2415         if (zs->u.simple->which == Z_Operand_APT)
2416         {
2417             yaz_log(YLOG_DEBUG, "rpn_search_APT");
2418             res = rpn_search_APT(zh, zs->u.simple->u.attributesPlusTerm,
2419                                  attributeSet, stream, sort_sequence,
2420                                  num_bases, basenames, rset_nmem, &rset,
2421                                  kc);
2422             if (res != ZEBRA_OK)
2423                 return res;
2424         }
2425         else if (zs->u.simple->which == Z_Operand_resultSetId)
2426         {
2427             yaz_log(YLOG_DEBUG, "rpn_search_ref");
2428             rset = resultSetRef(zh, zs->u.simple->u.resultSetId);
2429             if (!rset)
2430             {
2431                 zebra_setError(zh, 
2432                                YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
2433                                zs->u.simple->u.resultSetId);
2434                 return ZEBRA_FAIL;
2435             }
2436             rset_dup(rset);
2437         }
2438         else
2439         {
2440             zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2441             return ZEBRA_FAIL;
2442         }
2443         *num_result_sets = 1;
2444         *result_sets = nmem_malloc(stream, *num_result_sets * 
2445                                    sizeof(**result_sets));
2446         (*result_sets)[0] = rset;
2447     }
2448     else
2449     {
2450         zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2451         return ZEBRA_FAIL;
2452     }
2453     return ZEBRA_OK;
2454 }
2455
2456 struct scan_info_entry {
2457     char *term;
2458     ISAM_P isam_p;
2459 };
2460
2461 struct scan_info {
2462     struct scan_info_entry *list;
2463     ODR odr;
2464     int before, after;
2465     char prefix[20];
2466 };
2467
2468 static int scan_handle (char *name, const char *info, int pos, void *client)
2469 {
2470     int len_prefix, idx;
2471     struct scan_info *scan_info = (struct scan_info *) client;
2472
2473     len_prefix = strlen(scan_info->prefix);
2474     if (memcmp (name, scan_info->prefix, len_prefix))
2475         return 1;
2476     if (pos > 0)
2477         idx = scan_info->after - pos + scan_info->before;
2478     else
2479         idx = - pos - 1;
2480
2481     if (idx < 0)
2482         return 0;
2483     scan_info->list[idx].term = (char *)
2484         odr_malloc(scan_info->odr, strlen(name + len_prefix)+1);
2485     strcpy(scan_info->list[idx].term, name + len_prefix);
2486     assert (*info == sizeof(ISAM_P));
2487     memcpy (&scan_info->list[idx].isam_p, info+1, sizeof(ISAM_P));
2488     return 0;
2489 }
2490
2491 void zebra_term_untrans_iconv(ZebraHandle zh, NMEM stream, int reg_type,
2492                               char **dst, const char *src)
2493 {
2494     char term_src[IT_MAX_WORD];
2495     char term_dst[IT_MAX_WORD];
2496     
2497     zebra_term_untrans (zh, reg_type, term_src, src);
2498
2499     if (zh->iconv_from_utf8 != 0)
2500     {
2501         int len;
2502         char *inbuf = term_src;
2503         size_t inleft = strlen(term_src);
2504         char *outbuf = term_dst;
2505         size_t outleft = sizeof(term_dst)-1;
2506         size_t ret;
2507         
2508         ret = yaz_iconv (zh->iconv_from_utf8, &inbuf, &inleft,
2509                          &outbuf, &outleft);
2510         if (ret == (size_t)(-1))
2511             len = 0;
2512         else
2513             len = outbuf - term_dst;
2514         *dst = nmem_malloc(stream, len + 1);
2515         if (len > 0)
2516             memcpy (*dst, term_dst, len);
2517         (*dst)[len] = '\0';
2518     }
2519     else
2520         *dst = nmem_strdup(stream, term_src);
2521 }
2522
2523 static void count_set(ZebraHandle zh, RSET rset, zint *count)
2524 {
2525     zint psysno = 0;
2526     struct it_key key;
2527     RSFD rfd;
2528
2529     yaz_log(YLOG_DEBUG, "count_set");
2530
2531     rset->hits_limit = zh->approx_limit;
2532
2533     *count = 0;
2534     rfd = rset_open(rset, RSETF_READ);
2535     while (rset_read(rfd, &key,0 /* never mind terms */))
2536     {
2537         if (key.mem[0] != psysno)
2538         {
2539             psysno = key.mem[0];
2540             if (rfd->counted_items >= rset->hits_limit)
2541                 break;
2542         }
2543     }
2544     rset_close (rfd);
2545     *count = rset->hits_count;
2546 }
2547
2548 #define RPN_MAX_ORDS 32
2549
2550 ZEBRA_RES rpn_scan(ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt,
2551                    oid_value attributeset,
2552                    int num_bases, char **basenames,
2553                    int *position, int *num_entries, ZebraScanEntry **list,
2554                    int *is_partial, RSET limit_set, int return_zero)
2555 {
2556     int i;
2557     int pos = *position;
2558     int num = *num_entries;
2559     int before;
2560     int after;
2561     int base_no;
2562     char termz[IT_MAX_WORD+20];
2563     struct scan_info *scan_info_array;
2564     ZebraScanEntry *glist;
2565     int ords[RPN_MAX_ORDS], ord_no = 0;
2566     int ptr[RPN_MAX_ORDS];
2567
2568     unsigned index_type;
2569     char *search_type = NULL;
2570     char rank_type[128];
2571     int complete_flag;
2572     int sort_flag;
2573     NMEM rset_nmem = NULL; 
2574     struct rset_key_control *kc = 0;
2575
2576     *list = 0;
2577     *is_partial = 0;
2578
2579     if (attributeset == VAL_NONE)
2580         attributeset = VAL_BIB1;
2581
2582     if (!limit_set)
2583     {
2584         AttrType termset;
2585         int termset_value_numeric;
2586         const char *termset_value_string;
2587         attr_init_APT(&termset, zapt, 8);
2588         termset_value_numeric =
2589             attr_find_ex(&termset, NULL, &termset_value_string);
2590         if (termset_value_numeric != -1)
2591         {
2592             char resname[32];
2593             const char *termset_name = 0;
2594             
2595             if (termset_value_numeric != -2)
2596             {
2597                 
2598                 sprintf(resname, "%d", termset_value_numeric);
2599                 termset_name = resname;
2600             }
2601             else
2602                 termset_name = termset_value_string;
2603             
2604             limit_set = resultSetRef (zh, termset_name);
2605         }
2606     }
2607         
2608     yaz_log(YLOG_DEBUG, "position = %d, num = %d set=%d",
2609             pos, num, attributeset);
2610         
2611     if (zebra_maps_attr(zh->reg->zebra_maps, zapt, &index_type, &search_type,
2612                         rank_type, &complete_flag, &sort_flag))
2613     {
2614         *num_entries = 0;
2615         zebra_setError(zh, YAZ_BIB1_UNSUPP_ATTRIBUTE_TYPE, 0);
2616         return ZEBRA_FAIL;
2617     }
2618     for (base_no = 0; base_no < num_bases && ord_no < RPN_MAX_ORDS; base_no++)
2619     {
2620         int ord;
2621
2622         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
2623         {
2624             zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
2625                            basenames[base_no]);
2626             *num_entries = 0;
2627             return ZEBRA_FAIL;
2628         }
2629         if (zebra_apt_get_ord(zh, zapt, index_type, 0, attributeset, &ord) 
2630             != ZEBRA_OK)
2631             continue;
2632         ords[ord_no++] = ord;
2633     }
2634     if (ord_no == 0)
2635     {
2636         *num_entries = 0;
2637         return ZEBRA_OK;
2638     }
2639     /* prepare dictionary scanning */
2640     if (num < 1)
2641     {
2642         *num_entries = 0;
2643         return ZEBRA_OK;
2644     }
2645     before = pos-1;
2646     if (before < 0)
2647         before = 0;
2648     after = 1+num-pos;
2649     if (after < 0)
2650         after = 0;
2651     yaz_log(YLOG_DEBUG, "rpn_scan pos=%d num=%d before=%d "
2652             "after=%d before+after=%d",
2653             pos, num, before, after, before+after);
2654     scan_info_array = (struct scan_info *)
2655         odr_malloc(stream, ord_no * sizeof(*scan_info_array));
2656     for (i = 0; i < ord_no; i++)
2657     {
2658         int j, prefix_len = 0;
2659         int before_tmp = before, after_tmp = after;
2660         struct scan_info *scan_info = scan_info_array + i;
2661         struct rpn_char_map_info rcmi;
2662
2663         rpn_char_map_prepare (zh->reg, index_type, &rcmi);
2664
2665         scan_info->before = before;
2666         scan_info->after = after;
2667         scan_info->odr = stream;
2668
2669         scan_info->list = (struct scan_info_entry *)
2670             odr_malloc(stream, (before+after) * sizeof(*scan_info->list));
2671         for (j = 0; j<before+after; j++)
2672             scan_info->list[j].term = NULL;
2673
2674         prefix_len += key_SU_encode (ords[i], termz + prefix_len);
2675         termz[prefix_len] = 0;
2676         strcpy(scan_info->prefix, termz);
2677
2678         if (trans_scan_term(zh, zapt, termz+prefix_len, index_type) == 
2679             ZEBRA_FAIL)
2680             return ZEBRA_FAIL;
2681         
2682         dict_scan(zh->reg->dict, termz, &before_tmp, &after_tmp,
2683                   scan_info, scan_handle);
2684     }
2685     glist = (ZebraScanEntry *)
2686         odr_malloc(stream, (before+after)*sizeof(*glist));
2687
2688     rset_nmem = nmem_create();
2689     kc = zebra_key_control_create(zh);
2690
2691     /* consider terms after main term */
2692     for (i = 0; i < ord_no; i++)
2693         ptr[i] = before;
2694     
2695     *is_partial = 0;
2696     for (i = 0; i<after; i++)
2697     {
2698         int j, j0 = -1;
2699         const char *mterm = NULL;
2700         const char *tst;
2701         RSET rset = 0;
2702         int lo = i + pos-1; /* offset in result list */
2703
2704         /* find: j0 is the first of the minimal values */
2705         for (j = 0; j < ord_no; j++)
2706         {
2707             if (ptr[j] < before+after && ptr[j] >= 0 &&
2708                 (tst = scan_info_array[j].list[ptr[j]].term) &&
2709                 (!mterm || strcmp (tst, mterm) < 0))
2710             {
2711                 j0 = j;
2712                 mterm = tst;
2713             }
2714         }
2715         if (j0 == -1)
2716             break;  /* no value found, stop */
2717
2718         /* get result set for first one , but only if it's within bounds */
2719         if (lo >= 0)
2720         {
2721             /* get result set for first term */
2722             zebra_term_untrans_iconv(zh, stream->mem, index_type,
2723                                      &glist[lo].term, mterm);
2724             rset = rset_trunc(zh, &scan_info_array[j0].list[ptr[j0]].isam_p, 1,
2725                               glist[lo].term, strlen(glist[lo].term),
2726                               NULL, 0, zapt->term->which, rset_nmem, 
2727                               kc, kc->scope, 0, index_type, 0 /* hits_limit */,
2728                               0 /* term_ref_id_str */);
2729         }
2730         ptr[j0]++; /* move index for this set .. */
2731         /* get result set for remaining scan terms */
2732         for (j = j0+1; j<ord_no; j++)
2733         {
2734             if (ptr[j] < before+after && ptr[j] >= 0 &&
2735                 (tst = scan_info_array[j].list[ptr[j]].term) &&
2736                 !strcmp (tst, mterm))
2737             {
2738                 if (lo >= 0)
2739                 {
2740                     RSET rsets[2];
2741                     
2742                     rsets[0] = rset;
2743                     rsets[1] =
2744                         rset_trunc(
2745                             zh, &scan_info_array[j].list[ptr[j]].isam_p, 1,
2746                             glist[lo].term,
2747                             strlen(glist[lo].term), NULL, 0,
2748                             zapt->term->which,rset_nmem,
2749                             kc, kc->scope, 0, index_type, 0 /* hits_limit */,
2750                             0 /* term_ref_id_str */ );
2751                     rset = rset_create_or(rset_nmem, kc,
2752                                           kc->scope, 0 /* termid */,
2753                                           2, rsets);
2754                 }
2755                 ptr[j]++;
2756             }
2757         }
2758         if (lo >= 0)
2759         {
2760             zint count;
2761             /* merge with limit_set if given */
2762             if (limit_set)
2763             {
2764                 RSET rsets[2];
2765                 rsets[0] = rset;
2766                 rsets[1] = rset_dup(limit_set);
2767                 
2768                 rset = rset_create_and(rset_nmem, kc, kc->scope, 2, rsets);
2769             }
2770             /* count it */
2771             count_set(zh, rset, &count);
2772             glist[lo].occurrences = count;
2773             rset_delete(rset);
2774         }
2775     }
2776     if (i < after)
2777     {
2778         *num_entries -= (after-i);
2779         *is_partial = 1;
2780         if (*num_entries < 0)
2781         {
2782             (*kc->dec)(kc);
2783             nmem_destroy(rset_nmem);
2784             *num_entries = 0;
2785             return ZEBRA_OK;
2786         }
2787     }
2788     /* consider terms before main term */
2789     for (i = 0; i<ord_no; i++)
2790         ptr[i] = 0;
2791     
2792     for (i = 0; i<before; i++)
2793     {
2794         int j, j0 = -1;
2795         const char *mterm = NULL;
2796         const char *tst;
2797         RSET rset;
2798         int lo = before-1-i; /* offset in result list */
2799         zint count;
2800         
2801         for (j = 0; j <ord_no; j++)
2802         {
2803             if (ptr[j] < before && ptr[j] >= 0 &&
2804                 (tst = scan_info_array[j].list[before-1-ptr[j]].term) &&
2805                 (!mterm || strcmp (tst, mterm) > 0))
2806             {
2807                 j0 = j;
2808                     mterm = tst;
2809             }
2810         }
2811         if (j0 == -1)
2812             break;
2813         
2814         zebra_term_untrans_iconv(zh, stream->mem, index_type,
2815                                  &glist[lo].term, mterm);
2816         
2817         rset = rset_trunc
2818             (zh, &scan_info_array[j0].list[before-1-ptr[j0]].isam_p, 1,
2819              glist[lo].term, strlen(glist[lo].term),
2820              NULL, 0, zapt->term->which, rset_nmem,
2821              kc, kc->scope, 0, index_type, 0 /* hits_limit */,
2822              0 /* term_ref_id_str */);
2823         
2824         ptr[j0]++;
2825         
2826         for (j = j0+1; j<ord_no; j++)
2827         {
2828             if (ptr[j] < before && ptr[j] >= 0 &&
2829                 (tst = scan_info_array[j].list[before-1-ptr[j]].term) &&
2830                 !strcmp (tst, mterm))
2831             {
2832                 RSET rsets[2];
2833                 
2834                 rsets[0] = rset;
2835                 rsets[1] = rset_trunc(
2836                     zh,
2837                     &scan_info_array[j].list[before-1-ptr[j]].isam_p, 1,
2838                     glist[lo].term,
2839                     strlen(glist[lo].term), NULL, 0,
2840                     zapt->term->which, rset_nmem,
2841                     kc, kc->scope, 0, index_type, 0 /* hits_limit */,
2842                     0 /* term_ref_id_str */);
2843                 rset = rset_create_or(rset_nmem, kc,
2844                                       kc->scope, 0 /* termid */, 2, rsets);
2845                 
2846                 ptr[j]++;
2847             }
2848         }
2849         if (limit_set)
2850         {
2851             RSET rsets[2];
2852             rsets[0] = rset;
2853             rsets[1] = rset_dup(limit_set);
2854             
2855             rset = rset_create_and(rset_nmem, kc, kc->scope, 2, rsets);
2856         }
2857         count_set(zh, rset, &count);
2858         glist[lo].occurrences = count;
2859         rset_delete (rset);
2860     }
2861     (*kc->dec)(kc);
2862     nmem_destroy(rset_nmem);
2863     i = before-i;
2864     if (i)
2865     {
2866         *is_partial = 1;
2867         *position -= i;
2868         *num_entries -= i;
2869         if (*num_entries <= 0)
2870         {
2871             *num_entries = 0;
2872             return ZEBRA_OK;
2873         }
2874     }
2875     
2876     *list = glist + i;               /* list is set to first 'real' entry */
2877     
2878     yaz_log(YLOG_DEBUG, "position = %d, num_entries = %d",
2879             *position, *num_entries);
2880     return ZEBRA_OK;
2881 }
2882
2883 /*
2884  * Local variables:
2885  * c-basic-offset: 4
2886  * indent-tabs-mode: nil
2887  * End:
2888  * vim: shiftwidth=4 tabstop=8 expandtab
2889  */
2890