Fixes for hit estimates. Added zebra_set_approx_limit.
[idzebra-moved-to-github.git] / index / zrpn.c
1 /* $Id: zrpn.c,v 1.198 2005-06-09 10:39:53 adam Exp $
2    Copyright (C) 1995-2005
3    Index Data ApS
4
5 This file is part of the Zebra server.
6
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
11
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with Zebra; see the file LICENSE.zebra.  If not, write to the
19 Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
20 02111-1307, USA.
21 */
22
23 #include <stdio.h>
24 #include <assert.h>
25 #ifdef WIN32
26 #include <io.h>
27 #else
28 #include <unistd.h>
29 #endif
30 #include <ctype.h>
31
32 #include <yaz/diagbib1.h>
33 #include "index.h"
34 #include <zebra_xpath.h>
35
36 #include <charmap.h>
37 #include <rset.h>
38
39 struct rpn_char_map_info
40 {
41     ZebraMaps zm;
42     int reg_type;
43 };
44
45 typedef struct
46 {
47     int type;
48     int major;
49     int minor;
50     Z_AttributesPlusTerm *zapt;
51 } AttrType;
52
53 static int log_level_set = 0;
54 static int log_level_rpn = 0;
55
56 static const char **rpn_char_map_handler(void *vp, const char **from, int len)
57 {
58     struct rpn_char_map_info *p = (struct rpn_char_map_info *) vp;
59     const char **out = zebra_maps_input(p->zm, p->reg_type, from, len, 0);
60 #if 0
61     if (out && *out)
62     {
63         const char *outp = *out;
64         yaz_log(YLOG_LOG, "---");
65         while (*outp)
66         {
67             yaz_log(YLOG_LOG, "%02X", *outp);
68             outp++;
69         }
70     }
71 #endif
72     return out;
73 }
74
75 static void rpn_char_map_prepare(struct zebra_register *reg, int reg_type,
76                                   struct rpn_char_map_info *map_info)
77 {
78     map_info->zm = reg->zebra_maps;
79     map_info->reg_type = reg_type;
80     dict_grep_cmap(reg->dict, map_info, rpn_char_map_handler);
81 }
82
83 static int attr_find_ex(AttrType *src, oid_value *attributeSetP,
84                          const char **string_value)
85 {
86     int num_attributes;
87
88     num_attributes = src->zapt->attributes->num_attributes;
89     while (src->major < num_attributes)
90     {
91         Z_AttributeElement *element;
92
93         element = src->zapt->attributes->attributes[src->major];
94         if (src->type == *element->attributeType)
95         {
96             switch (element->which) 
97             {
98             case Z_AttributeValue_numeric:
99                 ++(src->major);
100                 if (element->attributeSet && attributeSetP)
101                 {
102                     oident *attrset;
103
104                     attrset = oid_getentbyoid(element->attributeSet);
105                     *attributeSetP = attrset->value;
106                 }
107                 return *element->value.numeric;
108                 break;
109             case Z_AttributeValue_complex:
110                 if (src->minor >= element->value.complex->num_list)
111                     break;
112                 if (element->attributeSet && attributeSetP)
113                 {
114                     oident *attrset;
115                     
116                     attrset = oid_getentbyoid(element->attributeSet);
117                     *attributeSetP = attrset->value;
118                 }
119                 if (element->value.complex->list[src->minor]->which ==  
120                     Z_StringOrNumeric_numeric)
121                 {
122                     ++(src->minor);
123                     return
124                         *element->value.complex->list[src->minor-1]->u.numeric;
125                 }
126                 else if (element->value.complex->list[src->minor]->which ==  
127                          Z_StringOrNumeric_string)
128                 {
129                     if (!string_value)
130                         break;
131                     ++(src->minor);
132                     *string_value = 
133                         element->value.complex->list[src->minor-1]->u.string;
134                     return -2;
135                 }
136                 else
137                     break;
138             default:
139                 assert(0);
140             }
141         }
142         ++(src->major);
143     }
144     return -1;
145 }
146
147 static int attr_find(AttrType *src, oid_value *attributeSetP)
148 {
149     return attr_find_ex(src, attributeSetP, 0);
150 }
151
152 static void attr_init(AttrType *src, Z_AttributesPlusTerm *zapt,
153                        int type)
154 {
155     src->zapt = zapt;
156     src->type = type;
157     src->major = 0;
158     src->minor = 0;
159 }
160
161 #define TERM_COUNT        
162        
163 struct grep_info {        
164 #ifdef TERM_COUNT        
165     int *term_no;        
166 #endif        
167     ISAM_P *isam_p_buf;
168     int isam_p_size;        
169     int isam_p_indx;
170     ZebraHandle zh;
171     int reg_type;
172     ZebraSet termset;
173 };        
174
175 void zebra_term_untrans(ZebraHandle zh, int reg_type,
176                         char *dst, const char *src)
177 {
178     int len = 0;
179     while (*src)
180     {
181         const char *cp = zebra_maps_output(zh->reg->zebra_maps,
182                                            reg_type, &src);
183         if (!cp && len < IT_MAX_WORD-1)
184             dst[len++] = *src++;
185         else
186             while (*cp && len < IT_MAX_WORD-1)
187                 dst[len++] = *cp++;
188     }
189     dst[len] = '\0';
190 }
191
192 static void add_isam_p(const char *name, const char *info,
193                        struct grep_info *p)
194 {
195     if (!log_level_set)
196     {
197         log_level_rpn = yaz_log_module_level("rpn");
198         log_level_set = 1;
199     }
200     if (p->isam_p_indx == p->isam_p_size)
201     {
202         ISAM_P *new_isam_p_buf;
203 #ifdef TERM_COUNT        
204         int *new_term_no;        
205 #endif
206         p->isam_p_size = 2*p->isam_p_size + 100;
207         new_isam_p_buf = (ISAM_P *) xmalloc(sizeof(*new_isam_p_buf) *
208                                             p->isam_p_size);
209         if (p->isam_p_buf)
210         {
211             memcpy(new_isam_p_buf, p->isam_p_buf,
212                     p->isam_p_indx * sizeof(*p->isam_p_buf));
213             xfree(p->isam_p_buf);
214         }
215         p->isam_p_buf = new_isam_p_buf;
216
217 #ifdef TERM_COUNT
218         new_term_no = (int *) xmalloc(sizeof(*new_term_no) * p->isam_p_size);
219         if (p->term_no)
220         {
221             memcpy(new_term_no, p->isam_p_buf,
222                     p->isam_p_indx * sizeof(*p->term_no));
223             xfree(p->term_no);
224         }
225         p->term_no = new_term_no;
226 #endif
227     }
228     assert(*info == sizeof(*p->isam_p_buf));
229     memcpy(p->isam_p_buf + p->isam_p_indx, info+1, sizeof(*p->isam_p_buf));
230
231 #if 1
232     if (p->termset)
233     {
234         const char *db;
235         int set, use;
236         char term_tmp[IT_MAX_WORD];
237         int su_code = 0;
238         int len = key_SU_decode (&su_code, name);
239         
240         zebra_term_untrans  (p->zh, p->reg_type, term_tmp, name+len+1);
241         yaz_log(log_level_rpn, "grep: %d %c %s", su_code, name[len], term_tmp);
242         zebraExplain_lookup_ord (p->zh->reg->zei,
243                                  su_code, &db, &set, &use);
244         yaz_log(log_level_rpn, "grep:  set=%d use=%d db=%s", set, use, db);
245         
246         resultSetAddTerm(p->zh, p->termset, name[len], db,
247                          set, use, term_tmp);
248     }
249 #endif
250     (p->isam_p_indx)++;
251 }
252
253 static int grep_handle(char *name, const char *info, void *p)
254 {
255     add_isam_p(name, info, (struct grep_info *) p);
256     return 0;
257 }
258
259 static int term_pre(ZebraMaps zebra_maps, int reg_type, const char **src,
260                     const char *ct1, const char *ct2, int first)
261 {
262     const char *s1, *s0 = *src;
263     const char **map;
264
265     /* skip white space */
266     while (*s0)
267     {
268         if (ct1 && strchr(ct1, *s0))
269             break;
270         if (ct2 && strchr(ct2, *s0))
271             break;
272         s1 = s0;
273         map = zebra_maps_input(zebra_maps, reg_type, &s1, strlen(s1), first);
274         if (**map != *CHR_SPACE)
275             break;
276         s0 = s1;
277     }
278     *src = s0;
279     return *s0;
280 }
281
282
283 static void esc_str(char *out_buf, int out_size,
284                     const char *in_buf, int in_size)
285 {
286     int k;
287
288     assert(out_buf);
289     assert(in_buf);
290     assert(out_size > 20);
291     *out_buf = '\0';
292     for (k = 0; k<in_size; k++)
293     {
294         int c = in_buf[k] & 0xff;
295         int pc;
296         if (c < 32 || c > 126)
297             pc = '?';
298         else
299             pc = c;
300         sprintf(out_buf +strlen(out_buf), "%02X:%c  ", c, pc);
301         if (strlen(out_buf) > out_size-20)
302         {
303             strcat(out_buf, "..");
304             break;
305         }
306     }
307 }
308
309 #define REGEX_CHARS " []()|.*+?!"
310
311 /* term_100: handle term, where trunc = none(no operators at all) */
312 static int term_100(ZebraMaps zebra_maps, int reg_type,
313                     const char **src, char *dst, int space_split,
314                     char *dst_term)
315 {
316     const char *s0;
317     const char **map;
318     int i = 0;
319     int j = 0;
320
321     const char *space_start = 0;
322     const char *space_end = 0;
323
324     if (!term_pre(zebra_maps, reg_type, src, NULL, NULL, !space_split))
325         return 0;
326     s0 = *src;
327     while (*s0)
328     {
329         const char *s1 = s0;
330         int q_map_match = 0;
331         map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), 
332                                 &q_map_match);
333         if (space_split)
334         {
335             if (**map == *CHR_SPACE)
336                 break;
337         }
338         else  /* complete subfield only. */
339         {
340             if (**map == *CHR_SPACE)
341             {   /* save space mapping for later  .. */
342                 space_start = s1;
343                 space_end = s0;
344                 continue;
345             }
346             else if (space_start)
347             {   /* reload last space */
348                 while (space_start < space_end)
349                 {
350                     if (strchr(REGEX_CHARS, *space_start))
351                         dst[i++] = '\\';
352                     dst_term[j++] = *space_start;
353                     dst[i++] = *space_start++;
354                 }
355                 /* and reset */
356                 space_start = space_end = 0;
357             }
358         }
359         /* add non-space char */
360         memcpy(dst_term+j, s1, s0 - s1);
361         j += (s0 - s1);
362         if (!q_map_match)
363         {
364             while (s1 < s0)
365             {
366                 if (strchr(REGEX_CHARS, *s1))
367                     dst[i++] = '\\';
368                 dst[i++] = *s1++;
369             }
370         }
371         else
372         {
373             char tmpbuf[80];
374             esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
375             
376             strcpy(dst + i, map[0]);
377             i += strlen(map[0]);
378         }
379     }
380     dst[i] = '\0';
381     dst_term[j] = '\0';
382     *src = s0;
383     return i;
384 }
385
386 /* term_101: handle term, where trunc = Process # */
387 static int term_101(ZebraMaps zebra_maps, int reg_type,
388                     const char **src, char *dst, int space_split,
389                     char *dst_term)
390 {
391     const char *s0;
392     const char **map;
393     int i = 0;
394     int j = 0;
395
396     if (!term_pre(zebra_maps, reg_type, src, "#", "#", !space_split))
397         return 0;
398     s0 = *src;
399     while (*s0)
400     {
401         if (*s0 == '#')
402         {
403             dst[i++] = '.';
404             dst[i++] = '*';
405             dst_term[j++] = *s0++;
406         }
407         else
408         {
409             const char *s1 = s0;
410             int q_map_match = 0;
411             map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), 
412                                     &q_map_match);
413             if (space_split && **map == *CHR_SPACE)
414                 break;
415
416             /* add non-space char */
417             memcpy(dst_term+j, s1, s0 - s1);
418             j += (s0 - s1);
419             if (!q_map_match)
420             {
421                 while (s1 < s0)
422                 {
423                     if (strchr(REGEX_CHARS, *s1))
424                         dst[i++] = '\\';
425                     dst[i++] = *s1++;
426                 }
427             }
428             else
429             {
430                 char tmpbuf[80];
431                 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
432                 
433                 strcpy(dst + i, map[0]);
434                 i += strlen(map[0]);
435             }
436         }
437     }
438     dst[i] = '\0';
439     dst_term[j++] = '\0';
440     *src = s0;
441     return i;
442 }
443
444 /* term_103: handle term, where trunc = re-2 (regular expressions) */
445 static int term_103(ZebraMaps zebra_maps, int reg_type, const char **src,
446                     char *dst, int *errors, int space_split,
447                     char *dst_term)
448 {
449     int i = 0;
450     int j = 0;
451     const char *s0;
452     const char **map;
453
454     if (!term_pre(zebra_maps, reg_type, src, "^\\()[].*+?|", "(", !space_split))
455         return 0;
456     s0 = *src;
457     if (errors && *s0 == '+' && s0[1] && s0[2] == '+' && s0[3] &&
458         isdigit(((const unsigned char *)s0)[1]))
459     {
460         *errors = s0[1] - '0';
461         s0 += 3;
462         if (*errors > 3)
463             *errors = 3;
464     }
465     while (*s0)
466     {
467         if (strchr("^\\()[].*+?|-", *s0))
468         {
469             dst_term[j++] = *s0;
470             dst[i++] = *s0++;
471         }
472         else
473         {
474             const char *s1 = s0;
475             int q_map_match = 0;
476             map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), 
477                                     &q_map_match);
478             if (space_split && **map == *CHR_SPACE)
479                 break;
480
481             /* add non-space char */
482             memcpy(dst_term+j, s1, s0 - s1);
483             j += (s0 - s1);
484             if (!q_map_match)
485             {
486                 while (s1 < s0)
487                 {
488                     if (strchr(REGEX_CHARS, *s1))
489                         dst[i++] = '\\';
490                     dst[i++] = *s1++;
491                 }
492             }
493             else
494             {
495                 char tmpbuf[80];
496                 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
497                 
498                 strcpy(dst + i, map[0]);
499                 i += strlen(map[0]);
500             }
501         }
502     }
503     dst[i] = '\0';
504     dst_term[j] = '\0';
505     *src = s0;
506     
507     return i;
508 }
509
510 /* term_103: handle term, where trunc = re-1 (regular expressions) */
511 static int term_102(ZebraMaps zebra_maps, int reg_type, const char **src,
512                     char *dst, int space_split, char *dst_term)
513 {
514     return term_103(zebra_maps, reg_type, src, dst, NULL, space_split,
515                     dst_term);
516 }
517
518
519 /* term_104: handle term, where trunc = Process # and ! */
520 static int term_104(ZebraMaps zebra_maps, int reg_type,
521                     const char **src, char *dst, int space_split,
522                     char *dst_term)
523 {
524     const char *s0;
525     const char **map;
526     int i = 0;
527     int j = 0;
528
529     if (!term_pre(zebra_maps, reg_type, src, "?*#", "?*#", !space_split))
530         return 0;
531     s0 = *src;
532     while (*s0)
533     {
534         if (*s0 == '?')
535         {
536             dst_term[j++] = *s0++;
537             if (*s0 >= '0' && *s0 <= '9')
538             {
539                 int limit = 0;
540                 while (*s0 >= '0' && *s0 <= '9')
541                 {
542                     limit = limit * 10 + (*s0 - '0');
543                     dst_term[j++] = *s0++;
544                 }
545                 if (limit > 20)
546                     limit = 20;
547                 while (--limit >= 0)
548                 {
549                     dst[i++] = '.';
550                     dst[i++] = '?';
551                 }
552             }
553             else
554             {
555                 dst[i++] = '.';
556                 dst[i++] = '*';
557             }
558         }
559         else if (*s0 == '*')
560         {
561             dst[i++] = '.';
562             dst[i++] = '*';
563             dst_term[j++] = *s0++;
564         }
565         else if (*s0 == '#')
566         {
567             dst[i++] = '.';
568             dst_term[j++] = *s0++;
569         }
570         else
571         {
572             const char *s1 = s0;
573             int q_map_match = 0;
574             map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), 
575                                     &q_map_match);
576             if (space_split && **map == *CHR_SPACE)
577                 break;
578
579             /* add non-space char */
580             memcpy(dst_term+j, s1, s0 - s1);
581             j += (s0 - s1);
582             if (!q_map_match)
583             {
584                 while (s1 < s0)
585                 {
586                     if (strchr(REGEX_CHARS, *s1))
587                         dst[i++] = '\\';
588                     dst[i++] = *s1++;
589                 }
590             }
591             else
592             {
593                 char tmpbuf[80];
594                 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
595                 
596                 strcpy(dst + i, map[0]);
597                 i += strlen(map[0]);
598             }
599         }
600     }
601     dst[i] = '\0';
602     dst_term[j++] = '\0';
603     *src = s0;
604     return i;
605 }
606
607 /* term_105/106: handle term, where trunc = Process * and ! and right trunc */
608 static int term_105(ZebraMaps zebra_maps, int reg_type,
609                     const char **src, char *dst, int space_split,
610                     char *dst_term, int right_truncate)
611 {
612     const char *s0;
613     const char **map;
614     int i = 0;
615     int j = 0;
616
617     if (!term_pre(zebra_maps, reg_type, src, "*!", "*!", !space_split))
618         return 0;
619     s0 = *src;
620     while (*s0)
621     {
622         if (*s0 == '*')
623         {
624             dst[i++] = '.';
625             dst[i++] = '*';
626             dst_term[j++] = *s0++;
627         }
628         else if (*s0 == '!')
629         {
630             dst[i++] = '.';
631             dst_term[j++] = *s0++;
632         }
633         else
634         {
635             const char *s1 = s0;
636             int q_map_match = 0;
637             map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), 
638                                     &q_map_match);
639             if (space_split && **map == *CHR_SPACE)
640                 break;
641
642             /* add non-space char */
643             memcpy(dst_term+j, s1, s0 - s1);
644             j += (s0 - s1);
645             if (!q_map_match)
646             {
647                 while (s1 < s0)
648                 {
649                     if (strchr(REGEX_CHARS, *s1))
650                         dst[i++] = '\\';
651                     dst[i++] = *s1++;
652                 }
653             }
654             else
655             {
656                 char tmpbuf[80];
657                 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
658                 
659                 strcpy(dst + i, map[0]);
660                 i += strlen(map[0]);
661             }
662         }
663     }
664     if (right_truncate)
665     {
666         dst[i++] = '.';
667         dst[i++] = '*';
668     }
669     dst[i] = '\0';
670     
671     dst_term[j++] = '\0';
672     *src = s0;
673     return i;
674 }
675
676
677 /* gen_regular_rel - generate regular expression from relation
678  *  val:     border value (inclusive)
679  *  islt:    1 if <=; 0 if >=.
680  */
681 static void gen_regular_rel(char *dst, int val, int islt)
682 {
683     int dst_p;
684     int w, d, i;
685     int pos = 0;
686     char numstr[20];
687
688     yaz_log(YLOG_DEBUG, "gen_regular_rel. val=%d, islt=%d", val, islt);
689     if (val >= 0)
690     {
691         if (islt)
692             strcpy(dst, "(-[0-9]+|(");
693         else
694             strcpy(dst, "((");
695     } 
696     else
697     {
698         if (!islt)
699         {
700             strcpy(dst, "([0-9]+|-(");
701             dst_p = strlen(dst);
702             islt = 1;
703         }
704         else
705         {
706             strcpy(dst, "(-(");
707             islt = 0;
708         }
709         val = -val;
710     }
711     dst_p = strlen(dst);
712     sprintf(numstr, "%d", val);
713     for (w = strlen(numstr); --w >= 0; pos++)
714     {
715         d = numstr[w];
716         if (pos > 0)
717         {
718             if (islt)
719             {
720                 if (d == '0')
721                     continue;
722                 d--;
723             } 
724             else
725             {
726                 if (d == '9')
727                     continue;
728                 d++;
729             }
730         }
731         
732         strcpy(dst + dst_p, numstr);
733         dst_p = strlen(dst) - pos - 1;
734
735         if (islt)
736         {
737             if (d != '0')
738             {
739                 dst[dst_p++] = '[';
740                 dst[dst_p++] = '0';
741                 dst[dst_p++] = '-';
742                 dst[dst_p++] = d;
743                 dst[dst_p++] = ']';
744             }
745             else
746                 dst[dst_p++] = d;
747         }
748         else
749         {
750             if (d != '9')
751             { 
752                 dst[dst_p++] = '[';
753                 dst[dst_p++] = d;
754                 dst[dst_p++] = '-';
755                 dst[dst_p++] = '9';
756                 dst[dst_p++] = ']';
757             }
758             else
759                 dst[dst_p++] = d;
760         }
761         for (i = 0; i<pos; i++)
762         {
763             dst[dst_p++] = '[';
764             dst[dst_p++] = '0';
765             dst[dst_p++] = '-';
766             dst[dst_p++] = '9';
767             dst[dst_p++] = ']';
768         }
769         dst[dst_p++] = '|';
770     }
771     dst[dst_p] = '\0';
772     if (islt)
773     {
774         /* match everything less than 10^(pos-1) */
775         strcat(dst, "0*");
776         for (i = 1; i<pos; i++)
777             strcat(dst, "[0-9]?");
778     }
779     else
780     {
781         /* match everything greater than 10^pos */
782         for (i = 0; i <= pos; i++)
783             strcat(dst, "[0-9]");
784         strcat(dst, "[0-9]*");
785     }
786     strcat(dst, "))");
787 }
788
789 void string_rel_add_char(char **term_p, const char *src, int *indx)
790 {
791     if (src[*indx] == '\\')
792         *(*term_p)++ = src[(*indx)++];
793     *(*term_p)++ = src[(*indx)++];
794 }
795
796 /*
797  *   >  abc     ([b-].*|a[c-].*|ab[d-].*|abc.+)
798  *              ([^-a].*|a[^-b].*ab[^-c].*|abc.+)
799  *   >= abc     ([b-].*|a[c-].*|ab[c-].*)
800  *              ([^-a].*|a[^-b].*|ab[c-].*)
801  *   <  abc     ([-0].*|a[-a].*|ab[-b].*)
802  *              ([^a-].*|a[^b-].*|ab[^c-].*)
803  *   <= abc     ([-0].*|a[-a].*|ab[-b].*|abc)
804  *              ([^a-].*|a[^b-].*|ab[^c-].*|abc)
805  */
806 static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
807                            const char **term_sub, char *term_dict,
808                            oid_value attributeSet,
809                            int reg_type, int space_split, char *term_dst,
810                            int *error_code)
811 {
812     AttrType relation;
813     int relation_value;
814     int i;
815     char *term_tmp = term_dict + strlen(term_dict);
816     char term_component[2*IT_MAX_WORD+20];
817
818     attr_init(&relation, zapt, 2);
819     relation_value = attr_find(&relation, NULL);
820
821     *error_code = 0;
822     yaz_log(YLOG_DEBUG, "string relation value=%d", relation_value);
823     switch (relation_value)
824     {
825     case 1:
826         if (!term_100(zh->reg->zebra_maps, reg_type,
827                       term_sub, term_component,
828                       space_split, term_dst))
829             return 0;
830         yaz_log(log_level_rpn, "Relation <");
831         
832         *term_tmp++ = '(';
833         for (i = 0; term_component[i]; )
834         {
835             int j = 0;
836
837             if (i)
838                 *term_tmp++ = '|';
839             while (j < i)
840                 string_rel_add_char(&term_tmp, term_component, &j);
841
842             *term_tmp++ = '[';
843
844             *term_tmp++ = '^';
845             string_rel_add_char(&term_tmp, term_component, &i);
846             *term_tmp++ = '-';
847
848             *term_tmp++ = ']';
849             *term_tmp++ = '.';
850             *term_tmp++ = '*';
851
852             if ((term_tmp - term_dict) > IT_MAX_WORD)
853                 break;
854         }
855         *term_tmp++ = ')';
856         *term_tmp = '\0';
857         break;
858     case 2:
859         if (!term_100(zh->reg->zebra_maps, reg_type,
860                       term_sub, term_component,
861                       space_split, term_dst))
862             return 0;
863         yaz_log(log_level_rpn, "Relation <=");
864
865         *term_tmp++ = '(';
866         for (i = 0; term_component[i]; )
867         {
868             int j = 0;
869
870             while (j < i)
871                 string_rel_add_char(&term_tmp, term_component, &j);
872             *term_tmp++ = '[';
873
874             *term_tmp++ = '^';
875             string_rel_add_char(&term_tmp, term_component, &i);
876             *term_tmp++ = '-';
877
878             *term_tmp++ = ']';
879             *term_tmp++ = '.';
880             *term_tmp++ = '*';
881
882             *term_tmp++ = '|';
883
884             if ((term_tmp - term_dict) > IT_MAX_WORD)
885                 break;
886         }
887         for (i = 0; term_component[i]; )
888             string_rel_add_char(&term_tmp, term_component, &i);
889         *term_tmp++ = ')';
890         *term_tmp = '\0';
891         break;
892     case 5:
893         if (!term_100 (zh->reg->zebra_maps, reg_type,
894                        term_sub, term_component, space_split, term_dst))
895             return 0;
896         yaz_log(log_level_rpn, "Relation >");
897
898         *term_tmp++ = '(';
899         for (i = 0; term_component[i];)
900         {
901             int j = 0;
902
903             while (j < i)
904                 string_rel_add_char(&term_tmp, term_component, &j);
905             *term_tmp++ = '[';
906             
907             *term_tmp++ = '^';
908             *term_tmp++ = '-';
909             string_rel_add_char(&term_tmp, term_component, &i);
910
911             *term_tmp++ = ']';
912             *term_tmp++ = '.';
913             *term_tmp++ = '*';
914
915             *term_tmp++ = '|';
916
917             if ((term_tmp - term_dict) > IT_MAX_WORD)
918                 break;
919         }
920         for (i = 0; term_component[i];)
921             string_rel_add_char(&term_tmp, term_component, &i);
922         *term_tmp++ = '.';
923         *term_tmp++ = '+';
924         *term_tmp++ = ')';
925         *term_tmp = '\0';
926         break;
927     case 4:
928         if (!term_100(zh->reg->zebra_maps, reg_type, term_sub,
929                       term_component, space_split, term_dst))
930             return 0;
931         yaz_log(log_level_rpn, "Relation >=");
932
933         *term_tmp++ = '(';
934         for (i = 0; term_component[i];)
935         {
936             int j = 0;
937
938             if (i)
939                 *term_tmp++ = '|';
940             while (j < i)
941                 string_rel_add_char(&term_tmp, term_component, &j);
942             *term_tmp++ = '[';
943
944             if (term_component[i+1])
945             {
946                 *term_tmp++ = '^';
947                 *term_tmp++ = '-';
948                 string_rel_add_char(&term_tmp, term_component, &i);
949             }
950             else
951             {
952                 string_rel_add_char(&term_tmp, term_component, &i);
953                 *term_tmp++ = '-';
954             }
955             *term_tmp++ = ']';
956             *term_tmp++ = '.';
957             *term_tmp++ = '*';
958
959             if ((term_tmp - term_dict) > IT_MAX_WORD)
960                 break;
961         }
962         *term_tmp++ = ')';
963         *term_tmp = '\0';
964         break;
965     case 3:
966     case 102:
967     case -1:
968         yaz_log(log_level_rpn, "Relation =");
969         if (!term_100(zh->reg->zebra_maps, reg_type, term_sub,
970                       term_component, space_split, term_dst))
971             return 0;
972         strcat(term_tmp, "(");
973         strcat(term_tmp, term_component);
974         strcat(term_tmp, ")");
975         break;
976     default:
977         *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
978         return 0;
979     }
980     return 1;
981 }
982
983 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
984                              const char **term_sub, 
985                              oid_value attributeSet, NMEM stream,
986                              struct grep_info *grep_info,
987                              int reg_type, int complete_flag,
988                              int num_bases, char **basenames,
989                              char *term_dst, int xpath_use,
990                              struct ord_list **ol);
991
992 static ZEBRA_RES term_trunc(ZebraHandle zh,
993                             Z_AttributesPlusTerm *zapt,
994                             const char **term_sub, 
995                             oid_value attributeSet, NMEM stream,
996                             struct grep_info *grep_info,
997                             int reg_type, int complete_flag,
998                             int num_bases, char **basenames,
999                             char *term_dst,
1000                             const char *rank_type, int xpath_use,
1001                             NMEM rset_nmem,
1002                             RSET *rset,
1003                             struct rset_key_control *kc)
1004 {
1005     ZEBRA_RES res;
1006     struct ord_list *ol;
1007     *rset = 0;
1008     grep_info->isam_p_indx = 0;
1009     res = string_term(zh, zapt, term_sub, attributeSet, stream, grep_info,
1010                       reg_type, complete_flag, num_bases, basenames,
1011                       term_dst, xpath_use, &ol);
1012     if (res != ZEBRA_OK)
1013         return res;
1014     if (!*term_sub)  /* no more terms ? */
1015         return res;
1016     yaz_log(log_level_rpn, "term: %s", term_dst);
1017     *rset = rset_trunc(zh, grep_info->isam_p_buf,
1018                        grep_info->isam_p_indx, term_dst,
1019                        strlen(term_dst), rank_type, 1 /* preserve pos */,
1020                        zapt->term->which, rset_nmem,
1021                        kc, kc->scope, ol, reg_type);
1022     if (!*rset)
1023         return ZEBRA_FAIL;
1024     return ZEBRA_OK;
1025 }
1026
1027 static char *nmem_strdup_i(NMEM nmem, int v)
1028 {
1029     char val_str[64];
1030     sprintf(val_str, "%d", v);
1031     return nmem_strdup(nmem, val_str);
1032 }
1033
1034 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1035                              const char **term_sub, 
1036                              oid_value attributeSet, NMEM stream,
1037                              struct grep_info *grep_info,
1038                              int reg_type, int complete_flag,
1039                              int num_bases, char **basenames,
1040                              char *term_dst, int xpath_use,
1041                              struct ord_list **ol)
1042 {
1043     char term_dict[2*IT_MAX_WORD+4000];
1044     int j, r, base_no;
1045     AttrType truncation;
1046     int truncation_value;
1047     AttrType use;
1048     int use_value;
1049     const char *use_string = 0;
1050     oid_value curAttributeSet = attributeSet;
1051     const char *termp;
1052     struct rpn_char_map_info rcmi;
1053     int space_split = complete_flag ? 0 : 1;
1054
1055     int bases_ok = 0;     /* no of databases with OK attribute */
1056     int errCode = 0;      /* err code (if any is not OK) */
1057     char *errString = 0;  /* addinfo */
1058
1059
1060     *ol = ord_list_create(stream);
1061
1062     rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
1063     attr_init(&use, zapt, 1);
1064     use_value = attr_find_ex(&use, &curAttributeSet, &use_string);
1065     yaz_log(log_level_rpn, "string_term, use value %d", use_value);
1066     attr_init(&truncation, zapt, 5);
1067     truncation_value = attr_find(&truncation, NULL);
1068     yaz_log(log_level_rpn, "truncation value %d", truncation_value);
1069
1070     if (use_value == -1)    /* no attribute - assumy "any" */
1071         use_value = 1016;
1072     for (base_no = 0; base_no < num_bases; base_no++)
1073     {
1074         int ord = -1;
1075         int attr_ok = 0;
1076         int regex_range = 0;
1077         int init_pos = 0;
1078         attent attp;
1079         data1_local_attribute id_xpath_attr;
1080         data1_local_attribute *local_attr;
1081         int max_pos, prefix_len = 0;
1082         int relation_error;
1083
1084         termp = *term_sub;
1085
1086         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1087         {
1088             zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
1089                            basenames[base_no]);
1090             return ZEBRA_FAIL;
1091         }
1092         if (xpath_use > 0 && use_value == -2) 
1093         {
1094             /* xpath mode and we have a string attribute */
1095             attp.local_attributes = &id_xpath_attr;
1096             attp.attset_ordinal = VAL_IDXPATH;
1097             id_xpath_attr.next = 0;
1098
1099             use_value = xpath_use;  /* xpath_use as use-attribute now */
1100             id_xpath_attr.local = use_value;
1101         }
1102         else if (curAttributeSet == VAL_IDXPATH && use_value >= 0)
1103         {
1104             /* X-Path attribute, use numeric value directly */
1105             attp.local_attributes = &id_xpath_attr;
1106             attp.attset_ordinal = VAL_IDXPATH;
1107             id_xpath_attr.next = 0;
1108             id_xpath_attr.local = use_value;
1109         }
1110         else if (use_string &&
1111                  (ord = zebraExplain_lookup_attr_str(zh->reg->zei,
1112                                                      use_string)) >= 0)
1113         {
1114             /* we have a match for a raw string attribute */
1115             char ord_buf[32];
1116             int i, ord_len;
1117
1118             if (prefix_len)
1119                 term_dict[prefix_len++] = '|';
1120             else
1121                 term_dict[prefix_len++] = '(';
1122             
1123             ord_len = key_SU_encode (ord, ord_buf);
1124             for (i = 0; i<ord_len; i++)
1125             {
1126                 term_dict[prefix_len++] = 1;
1127                 term_dict[prefix_len++] = ord_buf[i];
1128             }
1129             attp.local_attributes = 0;  /* no more attributes */
1130             *ol = ord_list_append(stream, *ol, ord);
1131         }
1132         else 
1133         {
1134             /* lookup in the .att files . Allow string as well */
1135             if ((r = att_getentbyatt (zh, &attp, curAttributeSet, use_value,
1136                                       use_string)))
1137             {
1138                 yaz_log(YLOG_DEBUG, "att_getentbyatt fail. set=%d use=%d r=%d",
1139                         curAttributeSet, use_value, r);
1140                 if (r == -1)
1141                 {
1142                     /* set was found, but value wasn't defined */
1143                     errCode = YAZ_BIB1_UNSUPP_USE_ATTRIBUTE;
1144                     if (use_string)
1145                         errString = nmem_strdup(stream, use_string);
1146                     else
1147                         errString = nmem_strdup_i (stream, use_value);
1148                 }
1149                 else
1150                 {
1151                     int oid[OID_SIZE];
1152                     struct oident oident;
1153                     
1154                     oident.proto = PROTO_Z3950;
1155                     oident.oclass = CLASS_ATTSET;
1156                     oident.value = curAttributeSet;
1157                     oid_ent_to_oid (&oident, oid);
1158                     
1159                     errCode = YAZ_BIB1_UNSUPP_ATTRIBUTE_SET;
1160                     errString = nmem_strdup(stream, oident.desc);
1161                 }
1162                 continue;
1163             }
1164         }
1165         for (local_attr = attp.local_attributes; local_attr;
1166              local_attr = local_attr->next)
1167         {
1168             char ord_buf[32];
1169             int i, ord_len;
1170             
1171             ord = zebraExplain_lookup_attr_su(zh->reg->zei,
1172                                               attp.attset_ordinal,
1173                                               local_attr->local);
1174             if (ord < 0)
1175                 continue;
1176             *ol = ord_list_append(stream, *ol, ord);
1177             if (prefix_len)
1178                 term_dict[prefix_len++] = '|';
1179             else
1180                 term_dict[prefix_len++] = '(';
1181             
1182             ord_len = key_SU_encode (ord, ord_buf);
1183             for (i = 0; i<ord_len; i++)
1184             {
1185                 term_dict[prefix_len++] = 1;
1186                 term_dict[prefix_len++] = ord_buf[i];
1187             }
1188         }
1189         bases_ok++;
1190         if (prefix_len)
1191             attr_ok = 1;
1192
1193         term_dict[prefix_len++] = ')';
1194         term_dict[prefix_len++] = 1;
1195         term_dict[prefix_len++] = reg_type;
1196         yaz_log(log_level_rpn, "reg_type = %d", term_dict[prefix_len-1]);
1197         term_dict[prefix_len] = '\0';
1198         j = prefix_len;
1199         switch (truncation_value)
1200         {
1201         case -1:         /* not specified */
1202         case 100:        /* do not truncate */
1203             if (!string_relation (zh, zapt, &termp, term_dict,
1204                                   attributeSet,
1205                                   reg_type, space_split, term_dst,
1206                                   &relation_error))
1207             {
1208                 if (relation_error)
1209                 {
1210                     zebra_setError(zh, relation_error, 0);
1211                     return ZEBRA_FAIL;
1212                 }
1213                 *term_sub = 0;
1214                 return ZEBRA_OK;
1215             }
1216             break;
1217         case 1:          /* right truncation */
1218             term_dict[j++] = '(';
1219             if (!term_100(zh->reg->zebra_maps, reg_type,
1220                           &termp, term_dict + j, space_split, term_dst))
1221             {
1222                 *term_sub = 0;
1223                 return ZEBRA_OK;
1224             }
1225             strcat(term_dict, ".*)");
1226             break;
1227         case 2:          /* keft truncation */
1228             term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*';
1229             if (!term_100(zh->reg->zebra_maps, reg_type,
1230                           &termp, term_dict + j, space_split, term_dst))
1231             {
1232                 *term_sub = 0;
1233                 return ZEBRA_OK;
1234             }
1235             strcat(term_dict, ")");
1236             break;
1237         case 3:          /* left&right truncation */
1238             term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*';
1239             if (!term_100(zh->reg->zebra_maps, reg_type,
1240                           &termp, term_dict + j, space_split, term_dst))
1241             {
1242                 *term_sub = 0;
1243                 return ZEBRA_OK;
1244             }
1245             strcat(term_dict, ".*)");
1246             break;
1247         case 101:        /* process # in term */
1248             term_dict[j++] = '(';
1249             if (!term_101(zh->reg->zebra_maps, reg_type,
1250                           &termp, term_dict + j, space_split, term_dst))
1251             {
1252                 *term_sub = 0;
1253                 return ZEBRA_OK;
1254             }
1255             strcat(term_dict, ")");
1256             break;
1257         case 102:        /* Regexp-1 */
1258             term_dict[j++] = '(';
1259             if (!term_102(zh->reg->zebra_maps, reg_type,
1260                           &termp, term_dict + j, space_split, term_dst))
1261             {
1262                 *term_sub = 0;
1263                 return ZEBRA_OK;
1264             }
1265             strcat(term_dict, ")");
1266             break;
1267         case 103:       /* Regexp-2 */
1268             regex_range = 1;
1269             term_dict[j++] = '(';
1270             init_pos = 2;
1271             if (!term_103(zh->reg->zebra_maps, reg_type,
1272                           &termp, term_dict + j, &regex_range,
1273                           space_split, term_dst))
1274             {
1275                 *term_sub = 0;
1276                 return ZEBRA_OK;
1277             }
1278             strcat(term_dict, ")");
1279             break;
1280         case 104:        /* process # and ! in term */
1281             term_dict[j++] = '(';
1282             if (!term_104(zh->reg->zebra_maps, reg_type,
1283                           &termp, term_dict + j, space_split, term_dst))
1284             {
1285                 *term_sub = 0;
1286                 return ZEBRA_OK;
1287             }
1288             strcat(term_dict, ")");
1289             break;
1290         case 105:        /* process * and ! in term */
1291             term_dict[j++] = '(';
1292             if (!term_105(zh->reg->zebra_maps, reg_type,
1293                           &termp, term_dict + j, space_split, term_dst, 1))
1294             {
1295                 *term_sub = 0;
1296                 return ZEBRA_OK;
1297             }
1298             strcat(term_dict, ")");
1299             break;
1300         case 106:        /* process * and ! in term */
1301             term_dict[j++] = '(';
1302             if (!term_105(zh->reg->zebra_maps, reg_type,
1303                           &termp, term_dict + j, space_split, term_dst, 0))
1304             {
1305                 *term_sub = 0;
1306                 return ZEBRA_OK;
1307             }
1308             strcat(term_dict, ")");
1309             break;
1310         default:
1311             zebra_setError_zint(zh,
1312                                 YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
1313                                 truncation_value);
1314             return ZEBRA_FAIL;
1315         }
1316         if (attr_ok)
1317         {
1318             char buf[80];
1319             const char *input = term_dict + prefix_len;
1320             esc_str(buf, sizeof(buf), input, strlen(input));
1321         }
1322         if (attr_ok)
1323         {
1324             yaz_log(log_level_rpn, "dict_lookup_grep: %s", term_dict+prefix_len);
1325             r = dict_lookup_grep(zh->reg->dict, term_dict, regex_range,
1326                                  grep_info, &max_pos, init_pos,
1327                                  grep_handle);
1328             if (r)
1329                 yaz_log(YLOG_WARN, "dict_lookup_grep fail %d", r);
1330         }
1331     }
1332     if (!bases_ok)
1333     {
1334         zebra_setError(zh, errCode, errString);
1335         return ZEBRA_FAIL;
1336     }
1337     *term_sub = termp;
1338     yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1339     return ZEBRA_OK;
1340 }
1341
1342
1343 /* convert APT search term to UTF8 */
1344 static ZEBRA_RES zapt_term_to_utf8(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1345                                    char *termz)
1346 {
1347     size_t sizez;
1348     Z_Term *term = zapt->term;
1349
1350     switch (term->which)
1351     {
1352     case Z_Term_general:
1353         if (zh->iconv_to_utf8 != 0)
1354         {
1355             char *inbuf = term->u.general->buf;
1356             size_t inleft = term->u.general->len;
1357             char *outbuf = termz;
1358             size_t outleft = IT_MAX_WORD-1;
1359             size_t ret;
1360
1361             ret = yaz_iconv(zh->iconv_to_utf8, &inbuf, &inleft,
1362                         &outbuf, &outleft);
1363             if (ret == (size_t)(-1))
1364             {
1365                 ret = yaz_iconv(zh->iconv_to_utf8, 0, 0, 0, 0);
1366                 zebra_setError(
1367                     zh, 
1368                     YAZ_BIB1_QUERY_TERM_INCLUDES_CHARS_THAT_DO_NOT_TRANSLATE_INTO_,
1369                     0);
1370                 return ZEBRA_FAIL;
1371             }
1372             *outbuf = 0;
1373         }
1374         else
1375         {
1376             sizez = term->u.general->len;
1377             if (sizez > IT_MAX_WORD-1)
1378                 sizez = IT_MAX_WORD-1;
1379             memcpy (termz, term->u.general->buf, sizez);
1380             termz[sizez] = '\0';
1381         }
1382         break;
1383     case Z_Term_characterString:
1384         sizez = strlen(term->u.characterString);
1385         if (sizez > IT_MAX_WORD-1)
1386             sizez = IT_MAX_WORD-1;
1387         memcpy (termz, term->u.characterString, sizez);
1388         termz[sizez] = '\0';
1389         break;
1390     default:
1391         zebra_setError(zh, YAZ_BIB1_UNSUPP_CODED_VALUE_FOR_TERM, 0);
1392         return ZEBRA_FAIL;
1393     }
1394     return ZEBRA_OK;
1395 }
1396
1397 /* convert APT SCAN term to internal cmap */
1398 static ZEBRA_RES trans_scan_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1399                                  char *termz, int reg_type)
1400 {
1401     char termz0[IT_MAX_WORD];
1402
1403     if (zapt_term_to_utf8(zh, zapt, termz0) == ZEBRA_FAIL)
1404         return ZEBRA_FAIL;    /* error */
1405     else
1406     {
1407         const char **map;
1408         const char *cp = (const char *) termz0;
1409         const char *cp_end = cp + strlen(cp);
1410         const char *src;
1411         int i = 0;
1412         const char *space_map = NULL;
1413         int len;
1414             
1415         while ((len = (cp_end - cp)) > 0)
1416         {
1417             map = zebra_maps_input(zh->reg->zebra_maps, reg_type, &cp, len, 0);
1418             if (**map == *CHR_SPACE)
1419                 space_map = *map;
1420             else
1421             {
1422                 if (i && space_map)
1423                     for (src = space_map; *src; src++)
1424                         termz[i++] = *src;
1425                 space_map = NULL;
1426                 for (src = *map; *src; src++)
1427                     termz[i++] = *src;
1428             }
1429         }
1430         termz[i] = '\0';
1431     }
1432     return ZEBRA_OK;
1433 }
1434
1435 char *normalize_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1436                      const char *termz, NMEM stream, unsigned reg_id)
1437 {
1438     WRBUF wrbuf = 0;
1439     AttrType truncation;
1440     int truncation_value;
1441     char *ex_list = 0;
1442
1443     attr_init(&truncation, zapt, 5);
1444     truncation_value = attr_find(&truncation, NULL);
1445
1446     switch (truncation_value)
1447     {
1448     default:
1449         ex_list = "";
1450         break;
1451     case 101:
1452         ex_list = "#";
1453         break;
1454     case 102:
1455     case 103:
1456         ex_list = 0;
1457         break;
1458     case 104:
1459         ex_list = "!#";
1460         break;
1461     case 105:
1462         ex_list = "!*";
1463         break;
1464     }
1465     if (ex_list)
1466         wrbuf = zebra_replace(zh->reg->zebra_maps, reg_id, ex_list,
1467                               termz, strlen(termz));
1468     if (!wrbuf)
1469         return nmem_strdup(stream, termz);
1470     else
1471     {
1472         char *buf = (char*) nmem_malloc(stream, wrbuf_len(wrbuf)+1);
1473         memcpy (buf, wrbuf_buf(wrbuf), wrbuf_len(wrbuf));
1474         buf[wrbuf_len(wrbuf)] = '\0';
1475         return buf;
1476     }
1477 }
1478
1479 static void grep_info_delete(struct grep_info *grep_info)
1480 {
1481 #ifdef TERM_COUNT
1482     xfree(grep_info->term_no);
1483 #endif
1484     xfree(grep_info->isam_p_buf);
1485 }
1486
1487 static ZEBRA_RES grep_info_prepare(ZebraHandle zh,
1488                                    Z_AttributesPlusTerm *zapt,
1489                                    struct grep_info *grep_info,
1490                                    int reg_type)
1491 {
1492     AttrType termset;
1493     int termset_value_numeric;
1494     const char *termset_value_string;
1495
1496 #ifdef TERM_COUNT
1497     grep_info->term_no = 0;
1498 #endif
1499     grep_info->isam_p_size = 0;
1500     grep_info->isam_p_buf = NULL;
1501     grep_info->zh = zh;
1502     grep_info->reg_type = reg_type;
1503     grep_info->termset = 0;
1504
1505     if (!zapt)
1506         return ZEBRA_OK;
1507     attr_init(&termset, zapt, 8);
1508     termset_value_numeric =
1509         attr_find_ex(&termset, NULL, &termset_value_string);
1510     if (termset_value_numeric != -1)
1511     {
1512         char resname[32];
1513         const char *termset_name = 0;
1514         if (termset_value_numeric != -2)
1515         {
1516     
1517             sprintf(resname, "%d", termset_value_numeric);
1518             termset_name = resname;
1519         }
1520         else
1521             termset_name = termset_value_string;
1522         yaz_log(log_level_rpn, "creating termset set %s", termset_name);
1523         grep_info->termset = resultSetAdd(zh, termset_name, 1);
1524         if (!grep_info->termset)
1525         {
1526             zebra_setError(zh, YAZ_BIB1_ILLEGAL_RESULT_SET_NAME, termset_name);
1527             return ZEBRA_FAIL;
1528         }
1529     }
1530     return ZEBRA_OK;
1531 }
1532                                
1533 /**
1534   \brief Create result set(s) for list of terms
1535   \param zh Zebra Handle
1536   \param termz_org term as used in query but converted to UTF-8
1537   \param attributeSet default attribute set
1538   \param stream memory for result
1539   \param reg_type register type ('w', 'p',..)
1540   \param complete_flag whether it's phrases or not
1541   \param rank_type term flags for ranking
1542   \param xpath_use use attribute for X-Path (-1 for no X-path)
1543   \param num_bases number of databases
1544   \param basenames array of databases
1545   \param rset_mem memory for result sets
1546   \param result_sets output result set for each term in list (output)
1547   \param number number of output result sets
1548   \param kc rset key control to be used for created result sets
1549 */
1550 static ZEBRA_RES term_list_trunc(ZebraHandle zh,
1551                                  Z_AttributesPlusTerm *zapt,
1552                                  const char *termz_org,
1553                                  oid_value attributeSet,
1554                                  NMEM stream,
1555                                  int reg_type, int complete_flag,
1556                                  const char *rank_type, int xpath_use,
1557                                  int num_bases, char **basenames, 
1558                                  NMEM rset_nmem,
1559                                  RSET **result_sets, int *num_result_sets,
1560                                  struct rset_key_control *kc)
1561 {
1562     char term_dst[IT_MAX_WORD+1];
1563     struct grep_info grep_info;
1564     char *termz = normalize_term(zh, zapt, termz_org, stream, reg_type);
1565     const char *termp = termz;
1566     int alloc_sets = 0;
1567
1568     *num_result_sets = 0;
1569     *term_dst = 0;
1570     if (grep_info_prepare(zh, zapt, &grep_info, reg_type) == ZEBRA_FAIL)
1571         return ZEBRA_FAIL;
1572     while(1)
1573     { 
1574         ZEBRA_RES res;
1575
1576         if (alloc_sets == *num_result_sets)
1577         {
1578             int add = 10;
1579             RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) * 
1580                                               sizeof(*rnew));
1581             if (alloc_sets)
1582                 memcpy(rnew, *result_sets, alloc_sets * sizeof(*rnew));
1583             alloc_sets = alloc_sets + add;
1584             *result_sets = rnew;
1585         }
1586         res = term_trunc(zh, zapt, &termp, attributeSet,
1587                          stream, &grep_info,
1588                          reg_type, complete_flag,
1589                          num_bases, basenames,
1590                          term_dst, rank_type,
1591                          xpath_use, rset_nmem,
1592                          &(*result_sets)[*num_result_sets],
1593                          kc);
1594         if (res != ZEBRA_OK)
1595         {
1596             int i;
1597             for (i = 0; i < *num_result_sets; i++)
1598                 rset_delete((*result_sets)[i]);
1599             grep_info_delete (&grep_info);
1600             return res;
1601         }
1602         if ((*result_sets)[*num_result_sets] == 0)
1603             break;
1604         (*num_result_sets)++;
1605     }
1606     grep_info_delete(&grep_info);
1607     return ZEBRA_OK;
1608 }
1609
1610 static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh,
1611                                        Z_AttributesPlusTerm *zapt,
1612                                        const char *termz_org,
1613                                        oid_value attributeSet,
1614                                        NMEM stream,
1615                                        int reg_type, int complete_flag,
1616                                        const char *rank_type, int xpath_use,
1617                                        int num_bases, char **basenames, 
1618                                        NMEM rset_nmem,
1619                                        RSET *rset,
1620                                        struct rset_key_control *kc)
1621 {
1622     RSET *result_sets = 0;
1623     int num_result_sets = 0;
1624     ZEBRA_RES res =
1625         term_list_trunc(zh, zapt, termz_org, attributeSet,
1626                         stream, reg_type, complete_flag,
1627                         rank_type, xpath_use,
1628                         num_bases, basenames,
1629                         rset_nmem,
1630                         &result_sets, &num_result_sets, kc);
1631     if (res != ZEBRA_OK)
1632         return res;
1633     if (num_result_sets == 0)
1634         *rset = rsnull_create (rset_nmem, kc, 0); 
1635     else if (num_result_sets == 1)
1636         *rset = result_sets[0];
1637     else
1638         *rset = rsprox_create(rset_nmem, kc, kc->scope,
1639                               num_result_sets, result_sets,
1640                               1 /* ordered */, 0 /* exclusion */,
1641                               3 /* relation */, 1 /* distance */);
1642     if (!*rset)
1643         return ZEBRA_FAIL;
1644     return ZEBRA_OK;
1645 }
1646
1647 static ZEBRA_RES rpn_search_APT_or_list(ZebraHandle zh,
1648                                         Z_AttributesPlusTerm *zapt,
1649                                         const char *termz_org,
1650                                         oid_value attributeSet,
1651                                         NMEM stream,
1652                                         int reg_type, int complete_flag,
1653                                         const char *rank_type,
1654                                         int xpath_use,
1655                                         int num_bases, char **basenames,
1656                                         NMEM rset_nmem,
1657                                         RSET *rset,
1658                                         struct rset_key_control *kc)
1659 {
1660     RSET *result_sets = 0;
1661     int num_result_sets = 0;
1662     ZEBRA_RES res =
1663         term_list_trunc(zh, zapt, termz_org, attributeSet,
1664                         stream, reg_type, complete_flag,
1665                         rank_type, xpath_use,
1666                         num_bases, basenames,
1667                         rset_nmem,
1668                         &result_sets, &num_result_sets, kc);
1669     if (res != ZEBRA_OK)
1670         return res;
1671     if (num_result_sets == 0)
1672         *rset = rsnull_create (rset_nmem, kc, 0); 
1673     else if (num_result_sets == 1)
1674         *rset = result_sets[0];
1675     else
1676         *rset = rsmulti_or_create(rset_nmem, kc, kc->scope, 0 /* termid */,
1677                                   num_result_sets, result_sets);
1678     if (!*rset)
1679         return ZEBRA_FAIL;
1680     return ZEBRA_OK;
1681 }
1682
1683 static ZEBRA_RES rpn_search_APT_and_list(ZebraHandle zh,
1684                                          Z_AttributesPlusTerm *zapt,
1685                                          const char *termz_org,
1686                                          oid_value attributeSet,
1687                                          NMEM stream,
1688                                          int reg_type, int complete_flag,
1689                                          const char *rank_type, 
1690                                          int xpath_use,
1691                                          int num_bases, char **basenames,
1692                                          NMEM rset_nmem,
1693                                          RSET *rset,
1694                                          struct rset_key_control *kc)
1695 {
1696     RSET *result_sets = 0;
1697     int num_result_sets = 0;
1698     ZEBRA_RES res =
1699         term_list_trunc(zh, zapt, termz_org, attributeSet,
1700                         stream, reg_type, complete_flag,
1701                         rank_type, xpath_use,
1702                         num_bases, basenames,
1703                         rset_nmem,
1704                         &result_sets, &num_result_sets,
1705                         kc);
1706     if (res != ZEBRA_OK)
1707         return res;
1708     if (num_result_sets == 0)
1709         *rset = rsnull_create (rset_nmem, kc, 0); 
1710     else if (num_result_sets == 1)
1711         *rset = result_sets[0];
1712     else
1713         *rset = rsmulti_and_create(rset_nmem, kc, kc->scope,
1714                                    num_result_sets, result_sets);
1715     if (!*rset)
1716         return ZEBRA_FAIL;
1717     return ZEBRA_OK;
1718 }
1719
1720 static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1721                             const char **term_sub,
1722                             char *term_dict,
1723                             oid_value attributeSet,
1724                             struct grep_info *grep_info,
1725                             int *max_pos,
1726                             int reg_type,
1727                             char *term_dst,
1728                             int *error_code)
1729 {
1730     AttrType relation;
1731     int relation_value;
1732     int term_value;
1733     int r;
1734     char *term_tmp = term_dict + strlen(term_dict);
1735
1736     *error_code = 0;
1737     attr_init(&relation, zapt, 2);
1738     relation_value = attr_find(&relation, NULL);
1739
1740     yaz_log(log_level_rpn, "numeric relation value=%d", relation_value);
1741
1742     if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1743                   term_dst))
1744         return 0;
1745     term_value = atoi (term_tmp);
1746     switch (relation_value)
1747     {
1748     case 1:
1749         yaz_log(log_level_rpn, "Relation <");
1750         gen_regular_rel(term_tmp, term_value-1, 1);
1751         break;
1752     case 2:
1753         yaz_log(log_level_rpn, "Relation <=");
1754         gen_regular_rel(term_tmp, term_value, 1);
1755         break;
1756     case 4:
1757         yaz_log(log_level_rpn, "Relation >=");
1758         gen_regular_rel(term_tmp, term_value, 0);
1759         break;
1760     case 5:
1761         yaz_log(log_level_rpn, "Relation >");
1762         gen_regular_rel(term_tmp, term_value+1, 0);
1763         break;
1764     case -1:
1765     case 3:
1766         yaz_log(log_level_rpn, "Relation =");
1767         sprintf(term_tmp, "(0*%d)", term_value);
1768         break;
1769     default:
1770         *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
1771         return 0;
1772     }
1773     yaz_log(log_level_rpn, "dict_lookup_grep: %s", term_tmp);
1774     r = dict_lookup_grep(zh->reg->dict, term_dict, 0, grep_info, max_pos,
1775                           0, grep_handle);
1776     if (r)
1777         yaz_log(YLOG_WARN, "dict_lookup_grep fail, rel = gt: %d", r);
1778     yaz_log(log_level_rpn, "%d positions", grep_info->isam_p_indx);
1779     return 1;
1780 }
1781
1782 static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1783                               const char **term_sub, 
1784                               oid_value attributeSet,
1785                               struct grep_info *grep_info,
1786                               int reg_type, int complete_flag,
1787                               int num_bases, char **basenames,
1788                               char *term_dst, int xpath_use, NMEM stream)
1789 {
1790     char term_dict[2*IT_MAX_WORD+2];
1791     int r, base_no;
1792     AttrType use;
1793     int use_value;
1794     const char *use_string = 0;
1795     oid_value curAttributeSet = attributeSet;
1796     const char *termp;
1797     struct rpn_char_map_info rcmi;
1798
1799     int bases_ok = 0;     /* no of databases with OK attribute */
1800     int errCode = 0;      /* err code (if any is not OK) */
1801     char *errString = 0;  /* addinfo */
1802
1803     rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
1804     attr_init(&use, zapt, 1);
1805     use_value = attr_find_ex(&use, &curAttributeSet, &use_string);
1806
1807     if (use_value == -1)
1808         use_value = 1016;
1809
1810     for (base_no = 0; base_no < num_bases; base_no++)
1811     {
1812         attent attp;
1813         data1_local_attribute id_xpath_attr;
1814         data1_local_attribute *local_attr;
1815         int max_pos, prefix_len = 0;
1816         int relation_error = 0;
1817
1818         termp = *term_sub;
1819         if (use_value == -2)  /* string attribute (assume IDXPATH/any) */
1820         {
1821             use_value = xpath_use;
1822             attp.local_attributes = &id_xpath_attr;
1823             attp.attset_ordinal = VAL_IDXPATH;
1824             id_xpath_attr.next = 0;
1825             id_xpath_attr.local = use_value;
1826         }
1827         else if (curAttributeSet == VAL_IDXPATH)
1828         {
1829             attp.local_attributes = &id_xpath_attr;
1830             attp.attset_ordinal = VAL_IDXPATH;
1831             id_xpath_attr.next = 0;
1832             id_xpath_attr.local = use_value;
1833         }
1834         else
1835         {
1836             if ((r = att_getentbyatt (zh, &attp, curAttributeSet, use_value,
1837                                             use_string)))
1838             {
1839                 yaz_log(YLOG_DEBUG, "att_getentbyatt fail. set=%d use=%d r=%d",
1840                       curAttributeSet, use_value, r);
1841                 if (r == -1)
1842                 {
1843                     errCode = YAZ_BIB1_UNSUPP_USE_ATTRIBUTE;
1844                     if (use_string)
1845                         errString = nmem_strdup(stream, use_string);
1846                     else
1847                         errString = nmem_strdup_i (stream, use_value);
1848                 }
1849                 else
1850                     errCode = YAZ_BIB1_UNSUPP_ATTRIBUTE_SET;
1851                 continue;
1852             }
1853         }
1854         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1855         {
1856             zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
1857                            basenames[base_no]);
1858             return ZEBRA_FAIL;
1859         }
1860         for (local_attr = attp.local_attributes; local_attr;
1861              local_attr = local_attr->next)
1862         {
1863             int ord;
1864             char ord_buf[32];
1865             int i, ord_len;
1866
1867             ord = zebraExplain_lookup_attr_su(zh->reg->zei,
1868                                               attp.attset_ordinal,
1869                                               local_attr->local);
1870             if (ord < 0)
1871                 continue;
1872             if (prefix_len)
1873                 term_dict[prefix_len++] = '|';
1874             else
1875                 term_dict[prefix_len++] = '(';
1876
1877             ord_len = key_SU_encode (ord, ord_buf);
1878             for (i = 0; i<ord_len; i++)
1879             {
1880                 term_dict[prefix_len++] = 1;
1881                 term_dict[prefix_len++] = ord_buf[i];
1882             }
1883         }
1884         if (!prefix_len)
1885         {
1886             zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_USE_ATTRIBUTE, use_value);
1887             continue;
1888         }
1889         bases_ok++;
1890         term_dict[prefix_len++] = ')';        
1891         term_dict[prefix_len++] = 1;
1892         term_dict[prefix_len++] = reg_type;
1893         yaz_log(YLOG_DEBUG, "reg_type = %d", term_dict[prefix_len-1]);
1894         term_dict[prefix_len] = '\0';
1895         if (!numeric_relation(zh, zapt, &termp, term_dict,
1896                               attributeSet, grep_info, &max_pos, reg_type,
1897                               term_dst, &relation_error))
1898         {
1899             if (relation_error)
1900             {
1901                 zebra_setError(zh, relation_error, 0);
1902                 return ZEBRA_FAIL;
1903             }
1904             *term_sub = 0;
1905             return ZEBRA_OK;
1906         }
1907     }
1908     if (!bases_ok)
1909     {
1910         zebra_setError(zh, errCode, errString);
1911         return ZEBRA_FAIL;
1912     }
1913     *term_sub = termp;
1914     yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1915     return ZEBRA_OK;
1916 }
1917
1918 static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh,
1919                                         Z_AttributesPlusTerm *zapt,
1920                                         const char *termz,
1921                                         oid_value attributeSet,
1922                                         NMEM stream,
1923                                         int reg_type, int complete_flag,
1924                                         const char *rank_type, int xpath_use,
1925                                         int num_bases, char **basenames,
1926                                         NMEM rset_nmem,
1927                                         RSET *rset,
1928                                         struct rset_key_control *kc)
1929 {
1930     char term_dst[IT_MAX_WORD+1];
1931     const char *termp = termz;
1932     RSET *result_sets = 0;
1933     int num_result_sets = 0;
1934     ZEBRA_RES res;
1935     struct grep_info grep_info;
1936     int alloc_sets = 0;
1937
1938     yaz_log(log_level_rpn, "APT_numeric t='%s'", termz);
1939     if (grep_info_prepare(zh, zapt, &grep_info, reg_type) == ZEBRA_FAIL)
1940         return ZEBRA_FAIL;
1941     while (1)
1942     { 
1943         if (alloc_sets == num_result_sets)
1944         {
1945             int add = 10;
1946             RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) * 
1947                                               sizeof(*rnew));
1948             if (alloc_sets)
1949                 memcpy(rnew, result_sets, alloc_sets * sizeof(*rnew));
1950             alloc_sets = alloc_sets + add;
1951             result_sets = rnew;
1952         }
1953         yaz_log(YLOG_DEBUG, "APT_numeric termp=%s", termp);
1954         grep_info.isam_p_indx = 0;
1955         res = numeric_term(zh, zapt, &termp, attributeSet, &grep_info,
1956                            reg_type, complete_flag, num_bases, basenames,
1957                            term_dst, xpath_use,
1958                            stream);
1959         if (res == ZEBRA_FAIL || termp == 0)
1960             break;
1961         yaz_log(YLOG_DEBUG, "term: %s", term_dst);
1962         result_sets[num_result_sets] =
1963             rset_trunc(zh, grep_info.isam_p_buf,
1964                        grep_info.isam_p_indx, term_dst,
1965                        strlen(term_dst), rank_type,
1966                        0 /* preserve position */,
1967                        zapt->term->which, rset_nmem, 
1968                        kc, kc->scope, 0, reg_type);
1969         if (!result_sets[num_result_sets])
1970             break;
1971         num_result_sets++;
1972     }
1973     grep_info_delete(&grep_info);
1974     if (termp)
1975     {
1976         int i;
1977         for (i = 0; i<num_result_sets; i++)
1978             rset_delete(result_sets[i]);
1979         return ZEBRA_FAIL;
1980     }
1981     if (num_result_sets == 0)
1982         *rset = rsnull_create(rset_nmem, kc, 0);
1983     if (num_result_sets == 1)
1984         *rset = result_sets[0];
1985     else
1986         *rset = rsmulti_and_create(rset_nmem, kc, kc->scope,
1987                                    num_result_sets, result_sets);
1988     if (!*rset)
1989         return ZEBRA_FAIL;
1990     return ZEBRA_OK;
1991 }
1992
1993 static ZEBRA_RES rpn_search_APT_local(ZebraHandle zh,
1994                                       Z_AttributesPlusTerm *zapt,
1995                                       const char *termz,
1996                                       oid_value attributeSet,
1997                                       NMEM stream,
1998                                       const char *rank_type, NMEM rset_nmem,
1999                                       RSET *rset,
2000                                       struct rset_key_control *kc)
2001 {
2002     RSFD rsfd;
2003     struct it_key key;
2004     int sys;
2005     *rset = rstemp_create(rset_nmem, kc, kc->scope,
2006                           res_get (zh->res, "setTmpDir"),0 );
2007     rsfd = rset_open(*rset, RSETF_WRITE);
2008     
2009     sys = atoi(termz);
2010     if (sys <= 0)
2011         sys = 1;
2012     key.mem[0] = sys;
2013     key.mem[1] = 1;
2014     key.len = 2;
2015     rset_write (rsfd, &key);
2016     rset_close (rsfd);
2017     return ZEBRA_OK;
2018 }
2019
2020 static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2021                                oid_value attributeSet, NMEM stream,
2022                                Z_SortKeySpecList *sort_sequence,
2023                                const char *rank_type,
2024                                NMEM rset_nmem,
2025                                RSET *rset,
2026                                struct rset_key_control *kc)
2027 {
2028     int i;
2029     int sort_relation_value;
2030     AttrType sort_relation_type;
2031     Z_SortKeySpec *sks;
2032     Z_SortKey *sk;
2033     int oid[OID_SIZE];
2034     oident oe;
2035     char termz[20];
2036     
2037     attr_init(&sort_relation_type, zapt, 7);
2038     sort_relation_value = attr_find(&sort_relation_type, &attributeSet);
2039
2040     if (!sort_sequence->specs)
2041     {
2042         sort_sequence->num_specs = 10;
2043         sort_sequence->specs = (Z_SortKeySpec **)
2044             nmem_malloc(stream, sort_sequence->num_specs *
2045                          sizeof(*sort_sequence->specs));
2046         for (i = 0; i<sort_sequence->num_specs; i++)
2047             sort_sequence->specs[i] = 0;
2048     }
2049     if (zapt->term->which != Z_Term_general)
2050         i = 0;
2051     else
2052         i = atoi_n ((char *) zapt->term->u.general->buf,
2053                     zapt->term->u.general->len);
2054     if (i >= sort_sequence->num_specs)
2055         i = 0;
2056     sprintf(termz, "%d", i);
2057
2058     oe.proto = PROTO_Z3950;
2059     oe.oclass = CLASS_ATTSET;
2060     oe.value = attributeSet;
2061     if (!oid_ent_to_oid (&oe, oid))
2062         return ZEBRA_FAIL;
2063
2064     sks = (Z_SortKeySpec *) nmem_malloc(stream, sizeof(*sks));
2065     sks->sortElement = (Z_SortElement *)
2066         nmem_malloc(stream, sizeof(*sks->sortElement));
2067     sks->sortElement->which = Z_SortElement_generic;
2068     sk = sks->sortElement->u.generic = (Z_SortKey *)
2069         nmem_malloc(stream, sizeof(*sk));
2070     sk->which = Z_SortKey_sortAttributes;
2071     sk->u.sortAttributes = (Z_SortAttributes *)
2072         nmem_malloc(stream, sizeof(*sk->u.sortAttributes));
2073
2074     sk->u.sortAttributes->id = oid;
2075     sk->u.sortAttributes->list = zapt->attributes;
2076
2077     sks->sortRelation = (int *)
2078         nmem_malloc(stream, sizeof(*sks->sortRelation));
2079     if (sort_relation_value == 1)
2080         *sks->sortRelation = Z_SortKeySpec_ascending;
2081     else if (sort_relation_value == 2)
2082         *sks->sortRelation = Z_SortKeySpec_descending;
2083     else 
2084         *sks->sortRelation = Z_SortKeySpec_ascending;
2085
2086     sks->caseSensitivity = (int *)
2087         nmem_malloc(stream, sizeof(*sks->caseSensitivity));
2088     *sks->caseSensitivity = 0;
2089
2090     sks->which = Z_SortKeySpec_null;
2091     sks->u.null = odr_nullval ();
2092     sort_sequence->specs[i] = sks;
2093     *rset = rsnull_create (rset_nmem, kc, 0);
2094     return ZEBRA_OK;
2095 }
2096
2097
2098 static int parse_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2099                        oid_value attributeSet,
2100                        struct xpath_location_step *xpath, int max, NMEM mem)
2101 {
2102     oid_value curAttributeSet = attributeSet;
2103     AttrType use;
2104     const char *use_string = 0;
2105     
2106     attr_init(&use, zapt, 1);
2107     attr_find_ex(&use, &curAttributeSet, &use_string);
2108
2109     if (!use_string || *use_string != '/')
2110         return -1;
2111
2112     return zebra_parse_xpath_str(use_string, xpath, max, mem);
2113 }
2114  
2115                
2116
2117 static RSET xpath_trunc(ZebraHandle zh, NMEM stream,
2118                         int reg_type, const char *term, int use,
2119                         oid_value curAttributeSet, NMEM rset_nmem,
2120                         struct rset_key_control *kc)
2121 {
2122     RSET rset;
2123     struct grep_info grep_info;
2124     char term_dict[2048];
2125     char ord_buf[32];
2126     int prefix_len = 0;
2127     int ord = zebraExplain_lookup_attr_su(zh->reg->zei, curAttributeSet, use);
2128     int ord_len, i, r, max_pos;
2129     int term_type = Z_Term_characterString;
2130     const char *flags = "void";
2131
2132     if (grep_info_prepare(zh, 0 /* zapt */, &grep_info, '0') == ZEBRA_FAIL)
2133         return rsnull_create(rset_nmem, kc, 0);
2134     
2135     if (ord < 0)
2136         return rsnull_create(rset_nmem, kc, 0);
2137     if (prefix_len)
2138         term_dict[prefix_len++] = '|';
2139     else
2140         term_dict[prefix_len++] = '(';
2141     
2142     ord_len = key_SU_encode (ord, ord_buf);
2143     for (i = 0; i<ord_len; i++)
2144     {
2145         term_dict[prefix_len++] = 1;
2146         term_dict[prefix_len++] = ord_buf[i];
2147     }
2148     term_dict[prefix_len++] = ')';
2149     term_dict[prefix_len++] = 1;
2150     term_dict[prefix_len++] = reg_type;
2151     
2152     strcpy(term_dict+prefix_len, term);
2153     
2154     grep_info.isam_p_indx = 0;
2155     r = dict_lookup_grep(zh->reg->dict, term_dict, 0,
2156                           &grep_info, &max_pos, 0, grep_handle);
2157     yaz_log(YLOG_DEBUG, "%s %d positions", term,
2158              grep_info.isam_p_indx);
2159     rset = rset_trunc(zh, grep_info.isam_p_buf,
2160                       grep_info.isam_p_indx, term, strlen(term),
2161                       flags, 1, term_type,rset_nmem,
2162                       kc, kc->scope, 0, reg_type);
2163     grep_info_delete(&grep_info);
2164     return rset;
2165 }
2166
2167 static
2168 ZEBRA_RES rpn_search_xpath(ZebraHandle zh,
2169                            oid_value attributeSet,
2170                            int num_bases, char **basenames,
2171                            NMEM stream, const char *rank_type, RSET rset,
2172                            int xpath_len, struct xpath_location_step *xpath,
2173                            NMEM rset_nmem,
2174                            RSET *rset_out,
2175                            struct rset_key_control *kc)
2176 {
2177     oid_value curAttributeSet = attributeSet;
2178     int base_no;
2179     int i;
2180
2181     if (xpath_len < 0)
2182     {
2183         *rset_out = rset;
2184         return ZEBRA_OK;
2185     }
2186
2187     yaz_log(YLOG_DEBUG, "xpath len=%d", xpath_len);
2188     for (i = 0; i<xpath_len; i++)
2189     {
2190         yaz_log(log_level_rpn, "XPATH %d %s", i, xpath[i].part);
2191
2192     }
2193
2194     curAttributeSet = VAL_IDXPATH;
2195
2196     /*
2197       //a    ->    a/.*
2198       //a/b  ->    b/a/.*
2199       /a     ->    a/
2200       /a/b   ->    b/a/
2201
2202       /      ->    none
2203
2204    a[@attr = value]/b[@other = othervalue]
2205
2206  /e/@a val      range(e/,range(@a,freetext(w,1015,val),@a),e/)
2207  /a/b val       range(b/a/,freetext(w,1016,val),b/a/)
2208  /a/b/@c val    range(b/a/,range(@c,freetext(w,1016,val),@c),b/a/)
2209  /a/b[@c = y] val range(b/a/,freetext(w,1016,val),b/a/,@c = y)
2210  /a[@c = y]/b val range(a/,range(b/a/,freetext(w,1016,val),b/a/),a/,@c = y)
2211  /a[@c = x]/b[@c = y] range(a/,range(b/a/,freetext(w,1016,val),b/a/,@c = y),a/,@c = x)
2212       
2213     */
2214
2215     dict_grep_cmap (zh->reg->dict, 0, 0);
2216
2217     for (base_no = 0; base_no < num_bases; base_no++)
2218     {
2219         int level = xpath_len;
2220         int first_path = 1;
2221         
2222         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
2223         {
2224             zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
2225                            basenames[base_no]);
2226             *rset_out = rset;
2227             return ZEBRA_FAIL;
2228         }
2229         while (--level >= 0)
2230         {
2231             char xpath_rev[128];
2232             int i, len;
2233             RSET rset_start_tag = 0, rset_end_tag = 0, rset_attr = 0;
2234
2235             *xpath_rev = 0;
2236             len = 0;
2237             for (i = level; i >= 1; --i)
2238             {
2239                 const char *cp = xpath[i].part;
2240                 if (*cp)
2241                 {
2242                     for (;*cp; cp++)
2243                         if (*cp == '*')
2244                         {
2245                             memcpy (xpath_rev + len, "[^/]*", 5);
2246                             len += 5;
2247                         }
2248                         else if (*cp == ' ')
2249                         {
2250
2251                             xpath_rev[len++] = 1;
2252                             xpath_rev[len++] = ' ';
2253                         }
2254
2255                         else
2256                             xpath_rev[len++] = *cp;
2257                     xpath_rev[len++] = '/';
2258                 }
2259                 else if (i == 1)  /* // case */
2260                 {
2261                     xpath_rev[len++] = '.';
2262                     xpath_rev[len++] = '*';
2263                 }
2264             }
2265             xpath_rev[len] = 0;
2266
2267             if (xpath[level].predicate &&
2268                 xpath[level].predicate->which == XPATH_PREDICATE_RELATION &&
2269                 xpath[level].predicate->u.relation.name[0])
2270             {
2271                 WRBUF wbuf = wrbuf_alloc();
2272                 wrbuf_puts(wbuf, xpath[level].predicate->u.relation.name+1);
2273                 if (xpath[level].predicate->u.relation.value)
2274                 {
2275                     const char *cp = xpath[level].predicate->u.relation.value;
2276                     wrbuf_putc(wbuf, '=');
2277                     
2278                     while (*cp)
2279                     {
2280                         if (strchr(REGEX_CHARS, *cp))
2281                             wrbuf_putc(wbuf, '\\');
2282                         wrbuf_putc(wbuf, *cp);
2283                         cp++;
2284                     }
2285                 }
2286                 wrbuf_puts(wbuf, "");
2287                 rset_attr = xpath_trunc(
2288                     zh, stream, '0', wrbuf_buf(wbuf), 3, 
2289                     curAttributeSet, rset_nmem, kc);
2290                 wrbuf_free(wbuf, 1);
2291             } 
2292             else 
2293             {
2294                 if (!first_path)
2295                     continue;
2296             }
2297             yaz_log(log_level_rpn, "xpath_rev (%d) = %s", level, xpath_rev);
2298             if (strlen(xpath_rev))
2299             {
2300                 rset_start_tag = xpath_trunc(zh, stream, '0', 
2301                         xpath_rev, 1, curAttributeSet, rset_nmem, kc);
2302             
2303                 rset_end_tag = xpath_trunc(zh, stream, '0', 
2304                         xpath_rev, 2, curAttributeSet, rset_nmem, kc);
2305
2306                 rset = rsbetween_create(rset_nmem, kc, kc->scope,
2307                                         rset_start_tag, rset,
2308                                         rset_end_tag, rset_attr);
2309             }
2310             first_path = 0;
2311         }
2312     }
2313     *rset_out = rset;
2314     return ZEBRA_OK;
2315 }
2316
2317 static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2318                                 oid_value attributeSet, NMEM stream,
2319                                 Z_SortKeySpecList *sort_sequence,
2320                                 int num_bases, char **basenames, 
2321                                 NMEM rset_nmem,
2322                                 RSET *rset,
2323                                 struct rset_key_control *kc)
2324 {
2325     ZEBRA_RES res = ZEBRA_OK;
2326     unsigned reg_id;
2327     char *search_type = NULL;
2328     char rank_type[128];
2329     int complete_flag;
2330     int sort_flag;
2331     char termz[IT_MAX_WORD+1];
2332     int xpath_len;
2333     int xpath_use = 0;
2334     struct xpath_location_step xpath[10];
2335
2336     if (!log_level_set)
2337     {
2338         log_level_rpn = yaz_log_module_level("rpn");
2339         log_level_set = 1;
2340     }
2341     zebra_maps_attr(zh->reg->zebra_maps, zapt, &reg_id, &search_type,
2342                     rank_type, &complete_flag, &sort_flag);
2343     
2344     yaz_log(YLOG_DEBUG, "reg_id=%c", reg_id);
2345     yaz_log(YLOG_DEBUG, "complete_flag=%d", complete_flag);
2346     yaz_log(YLOG_DEBUG, "search_type=%s", search_type);
2347     yaz_log(YLOG_DEBUG, "rank_type=%s", rank_type);
2348
2349     if (zapt_term_to_utf8(zh, zapt, termz) == ZEBRA_FAIL)
2350         return ZEBRA_FAIL;
2351
2352     if (sort_flag)
2353         return rpn_sort_spec(zh, zapt, attributeSet, stream, sort_sequence,
2354                              rank_type, rset_nmem, rset, kc);
2355     /* consider if an X-Path query is used */
2356     xpath_len = parse_xpath(zh, zapt, attributeSet, xpath, 10, stream);
2357     if (xpath_len >= 0)
2358     {
2359         xpath_use = 1016;  /* searching for element by default */
2360         if (xpath[xpath_len-1].part[0] == '@') 
2361             xpath_use = 1015;  /* last step an attribute .. */
2362     }
2363
2364     /* search using one of the various search type strategies
2365        termz is our UTF-8 search term
2366        attributeSet is top-level default attribute set 
2367        stream is ODR for search
2368        reg_id is the register type
2369        complete_flag is 1 for complete subfield, 0 for incomplete
2370        xpath_use is use-attribute to be used for X-Path search, 0 for none
2371     */
2372     if (!strcmp(search_type, "phrase"))
2373     {
2374         res = rpn_search_APT_phrase(zh, zapt, termz, attributeSet, stream,
2375                                     reg_id, complete_flag, rank_type,
2376                                     xpath_use,
2377                                     num_bases, basenames, rset_nmem,
2378                                     rset, kc);
2379     }
2380     else if (!strcmp(search_type, "and-list"))
2381     {
2382         res = rpn_search_APT_and_list(zh, zapt, termz, attributeSet, stream,
2383                                       reg_id, complete_flag, rank_type,
2384                                       xpath_use,
2385                                       num_bases, basenames, rset_nmem,
2386                                       rset, kc);
2387     }
2388     else if (!strcmp(search_type, "or-list"))
2389     {
2390         res = rpn_search_APT_or_list(zh, zapt, termz, attributeSet, stream,
2391                                      reg_id, complete_flag, rank_type,
2392                                      xpath_use,
2393                                      num_bases, basenames, rset_nmem,
2394                                      rset, kc);
2395     }
2396     else if (!strcmp(search_type, "local"))
2397     {
2398         res = rpn_search_APT_local(zh, zapt, termz, attributeSet, stream,
2399                                    rank_type, rset_nmem, rset, kc);
2400     }
2401     else if (!strcmp(search_type, "numeric"))
2402     {
2403         res = rpn_search_APT_numeric(zh, zapt, termz, attributeSet, stream,
2404                                      reg_id, complete_flag, rank_type,
2405                                      xpath_use,
2406                                      num_bases, basenames, rset_nmem,
2407                                      rset, kc);
2408     }
2409     else
2410     {
2411         zebra_setError(zh, YAZ_BIB1_UNSUPP_STRUCTURE_ATTRIBUTE, 0);
2412         res = ZEBRA_FAIL;
2413     }
2414     if (res != ZEBRA_OK)
2415         return res;
2416     if (!*rset)
2417         return ZEBRA_FAIL;
2418     return rpn_search_xpath(zh, attributeSet, num_bases, basenames,
2419                             stream, rank_type, *rset, 
2420                             xpath_len, xpath, rset_nmem, rset, kc);
2421 }
2422
2423 static ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2424                                       oid_value attributeSet, 
2425                                       NMEM stream, NMEM rset_nmem,
2426                                       Z_SortKeySpecList *sort_sequence,
2427                                       int num_bases, char **basenames,
2428                                       RSET **result_sets, int *num_result_sets,
2429                                       Z_Operator *parent_op,
2430                                       struct rset_key_control *kc);
2431
2432 ZEBRA_RES rpn_search_top(ZebraHandle zh, Z_RPNStructure *zs,
2433                          oid_value attributeSet, 
2434                          NMEM stream, NMEM rset_nmem,
2435                          Z_SortKeySpecList *sort_sequence,
2436                          int num_bases, char **basenames,
2437                          RSET *result_set)
2438 {
2439     RSET *result_sets = 0;
2440     int num_result_sets = 0;
2441     ZEBRA_RES res;
2442     struct rset_key_control *kc = zebra_key_control_create(zh);
2443
2444     res = rpn_search_structure(zh, zs, attributeSet,
2445                                stream, rset_nmem,
2446                                sort_sequence, 
2447                                num_bases, basenames,
2448                                &result_sets, &num_result_sets,
2449                                0 /* no parent op */,
2450                                kc);
2451     if (res != ZEBRA_OK)
2452     {
2453         int i;
2454         for (i = 0; i<num_result_sets; i++)
2455             rset_delete(result_sets[i]);
2456         *result_set = 0;
2457     }
2458     else
2459     {
2460         assert(num_result_sets == 1);
2461         assert(result_sets);
2462         assert(*result_sets);
2463         *result_set = *result_sets;
2464     }
2465     (*kc->dec)(kc);
2466     return res;
2467 }
2468
2469 ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2470                                oid_value attributeSet, 
2471                                NMEM stream, NMEM rset_nmem,
2472                                Z_SortKeySpecList *sort_sequence,
2473                                int num_bases, char **basenames,
2474                                RSET **result_sets, int *num_result_sets,
2475                                Z_Operator *parent_op,
2476                                struct rset_key_control *kc)
2477 {
2478     *num_result_sets = 0;
2479     if (zs->which == Z_RPNStructure_complex)
2480     {
2481         ZEBRA_RES res;
2482         Z_Operator *zop = zs->u.complex->roperator;
2483         RSET *result_sets_l = 0;
2484         int num_result_sets_l = 0;
2485         RSET *result_sets_r = 0;
2486         int num_result_sets_r = 0;
2487
2488         res = rpn_search_structure(zh, zs->u.complex->s1,
2489                                    attributeSet, stream, rset_nmem,
2490                                    sort_sequence,
2491                                    num_bases, basenames,
2492                                    &result_sets_l, &num_result_sets_l,
2493                                    zop, kc);
2494         if (res != ZEBRA_OK)
2495         {
2496             int i;
2497             for (i = 0; i<num_result_sets_l; i++)
2498                 rset_delete(result_sets_l[i]);
2499             return res;
2500         }
2501         res = rpn_search_structure(zh, zs->u.complex->s2,
2502                                    attributeSet, stream, rset_nmem,
2503                                    sort_sequence,
2504                                    num_bases, basenames,
2505                                    &result_sets_r, &num_result_sets_r,
2506                                    zop, kc);
2507         if (res != ZEBRA_OK)
2508         {
2509             int i;
2510             for (i = 0; i<num_result_sets_l; i++)
2511                 rset_delete(result_sets_l[i]);
2512             for (i = 0; i<num_result_sets_r; i++)
2513                 rset_delete(result_sets_r[i]);
2514             return res;
2515         }
2516
2517         /* make a new list of result for all children */
2518         *num_result_sets = num_result_sets_l + num_result_sets_r;
2519         *result_sets = nmem_malloc(stream, *num_result_sets * 
2520                                    sizeof(**result_sets));
2521         memcpy(*result_sets, result_sets_l, 
2522                num_result_sets_l * sizeof(**result_sets));
2523         memcpy(*result_sets + num_result_sets_l, result_sets_r, 
2524                num_result_sets_r * sizeof(**result_sets));
2525
2526         if (!parent_op || parent_op->which != zop->which
2527             || (zop->which != Z_Operator_and &&
2528                 zop->which != Z_Operator_or))
2529         {
2530             /* parent node different from this one (or non-present) */
2531             /* we must combine result sets now */
2532             RSET rset;
2533             switch (zop->which)
2534             {
2535             case Z_Operator_and:
2536                 rset = rsmulti_and_create(rset_nmem, kc,
2537                                           kc->scope,
2538                                           *num_result_sets, *result_sets);
2539                 break;
2540             case Z_Operator_or:
2541                 rset = rsmulti_or_create(rset_nmem, kc,
2542                                          kc->scope, 0, /* termid */
2543                                          *num_result_sets, *result_sets);
2544                 break;
2545             case Z_Operator_and_not:
2546                 rset = rsbool_create_not(rset_nmem, kc,
2547                                          kc->scope,
2548                                          (*result_sets)[0],
2549                                          (*result_sets)[1]);
2550                 break;
2551             case Z_Operator_prox:
2552                 if (zop->u.prox->which != Z_ProximityOperator_known)
2553                 {
2554                     zebra_setError(zh, 
2555                                    YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2556                                    0);
2557                     return ZEBRA_FAIL;
2558                 }
2559                 if (*zop->u.prox->u.known != Z_ProxUnit_word)
2560                 {
2561                     zebra_setError_zint(zh,
2562                                         YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2563                                         *zop->u.prox->u.known);
2564                     return ZEBRA_FAIL;
2565                 }
2566                 else
2567                 {
2568                     rset = rsprox_create(rset_nmem, kc,
2569                                          kc->scope,
2570                                          *num_result_sets, *result_sets, 
2571                                          *zop->u.prox->ordered,
2572                                          (!zop->u.prox->exclusion ? 
2573                                           0 : *zop->u.prox->exclusion),
2574                                          *zop->u.prox->relationType,
2575                                          *zop->u.prox->distance );
2576                 }
2577                 break;
2578             default:
2579                 zebra_setError(zh, YAZ_BIB1_OPERATOR_UNSUPP, 0);
2580                 return ZEBRA_FAIL;
2581             }
2582             *num_result_sets = 1;
2583             *result_sets = nmem_malloc(stream, *num_result_sets * 
2584                                        sizeof(**result_sets));
2585             (*result_sets)[0] = rset;
2586         }
2587     }
2588     else if (zs->which == Z_RPNStructure_simple)
2589     {
2590         RSET rset;
2591         ZEBRA_RES res;
2592
2593         if (zs->u.simple->which == Z_Operand_APT)
2594         {
2595             yaz_log(YLOG_DEBUG, "rpn_search_APT");
2596             res = rpn_search_APT(zh, zs->u.simple->u.attributesPlusTerm,
2597                                  attributeSet, stream, sort_sequence,
2598                                  num_bases, basenames, rset_nmem, &rset,
2599                                  kc);
2600             if (res != ZEBRA_OK)
2601                 return res;
2602         }
2603         else if (zs->u.simple->which == Z_Operand_resultSetId)
2604         {
2605             yaz_log(YLOG_DEBUG, "rpn_search_ref");
2606             rset = resultSetRef(zh, zs->u.simple->u.resultSetId);
2607             if (!rset)
2608             {
2609                 zebra_setError(zh, 
2610                                YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
2611                                zs->u.simple->u.resultSetId);
2612                 return ZEBRA_FAIL;
2613             }
2614             rset_dup(rset);
2615         }
2616         else
2617         {
2618             zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2619             return ZEBRA_FAIL;
2620         }
2621         *num_result_sets = 1;
2622         *result_sets = nmem_malloc(stream, *num_result_sets * 
2623                                    sizeof(**result_sets));
2624         (*result_sets)[0] = rset;
2625     }
2626     else
2627     {
2628         zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2629         return ZEBRA_FAIL;
2630     }
2631     return ZEBRA_OK;
2632 }
2633
2634 struct scan_info_entry {
2635     char *term;
2636     ISAM_P isam_p;
2637 };
2638
2639 struct scan_info {
2640     struct scan_info_entry *list;
2641     ODR odr;
2642     int before, after;
2643     char prefix[20];
2644 };
2645
2646 static int scan_handle (char *name, const char *info, int pos, void *client)
2647 {
2648     int len_prefix, idx;
2649     struct scan_info *scan_info = (struct scan_info *) client;
2650
2651     len_prefix = strlen(scan_info->prefix);
2652     if (memcmp (name, scan_info->prefix, len_prefix))
2653         return 1;
2654     if (pos > 0)
2655         idx = scan_info->after - pos + scan_info->before;
2656     else
2657         idx = - pos - 1;
2658
2659     if (idx < 0)
2660         return 0;
2661     scan_info->list[idx].term = (char *)
2662         odr_malloc(scan_info->odr, strlen(name + len_prefix)+1);
2663     strcpy(scan_info->list[idx].term, name + len_prefix);
2664     assert (*info == sizeof(ISAM_P));
2665     memcpy (&scan_info->list[idx].isam_p, info+1, sizeof(ISAM_P));
2666     return 0;
2667 }
2668
2669 void zebra_term_untrans_iconv(ZebraHandle zh, NMEM stream, int reg_type,
2670                               char **dst, const char *src)
2671 {
2672     char term_src[IT_MAX_WORD];
2673     char term_dst[IT_MAX_WORD];
2674     
2675     zebra_term_untrans (zh, reg_type, term_src, src);
2676
2677     if (zh->iconv_from_utf8 != 0)
2678     {
2679         int len;
2680         char *inbuf = term_src;
2681         size_t inleft = strlen(term_src);
2682         char *outbuf = term_dst;
2683         size_t outleft = sizeof(term_dst)-1;
2684         size_t ret;
2685         
2686         ret = yaz_iconv (zh->iconv_from_utf8, &inbuf, &inleft,
2687                          &outbuf, &outleft);
2688         if (ret == (size_t)(-1))
2689             len = 0;
2690         else
2691             len = outbuf - term_dst;
2692         *dst = nmem_malloc(stream, len + 1);
2693         if (len > 0)
2694             memcpy (*dst, term_dst, len);
2695         (*dst)[len] = '\0';
2696     }
2697     else
2698         *dst = nmem_strdup(stream, term_src);
2699 }
2700
2701 static void count_set(ZebraHandle zh, RSET rset, zint *count)
2702 {
2703     zint psysno = 0;
2704     struct it_key key;
2705     RSFD rfd;
2706
2707     yaz_log(YLOG_DEBUG, "count_set");
2708
2709     rset->hits_limit = zh->approx_limit;
2710
2711     *count = 0;
2712     rfd = rset_open(rset, RSETF_READ);
2713     while (rset_read(rfd, &key,0 /* never mind terms */))
2714     {
2715         if (key.mem[0] != psysno)
2716         {
2717             psysno = key.mem[0];
2718             if (rfd->counted_items >= rset->hits_limit)
2719                 break;
2720         }
2721     }
2722     rset_close (rfd);
2723     *count = rset->hits_count;
2724 }
2725
2726 ZEBRA_RES rpn_scan(ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt,
2727                    oid_value attributeset,
2728                    int num_bases, char **basenames,
2729                    int *position, int *num_entries, ZebraScanEntry **list,
2730                    int *is_partial, RSET limit_set, int return_zero)
2731 {
2732     int i;
2733     int pos = *position;
2734     int num = *num_entries;
2735     int before;
2736     int after;
2737     int base_no;
2738     char termz[IT_MAX_WORD+20];
2739     AttrType use;
2740     int use_value;
2741     const char *use_string = 0;
2742     struct scan_info *scan_info_array;
2743     ZebraScanEntry *glist;
2744     int ords[32], ord_no = 0;
2745     int ptr[32];
2746
2747     int bases_ok = 0;     /* no of databases with OK attribute */
2748     int errCode = 0;      /* err code (if any is not OK) */
2749     char *errString = 0;  /* addinfo */
2750
2751     unsigned reg_id;
2752     char *search_type = NULL;
2753     char rank_type[128];
2754     int complete_flag;
2755     int sort_flag;
2756     NMEM rset_nmem = NULL; 
2757     struct rset_key_control *kc = 0;
2758
2759     *list = 0;
2760     *is_partial = 0;
2761
2762     if (attributeset == VAL_NONE)
2763         attributeset = VAL_BIB1;
2764
2765     if (!limit_set)
2766     {
2767         AttrType termset;
2768         int termset_value_numeric;
2769         const char *termset_value_string;
2770         attr_init(&termset, zapt, 8);
2771         termset_value_numeric =
2772             attr_find_ex(&termset, NULL, &termset_value_string);
2773         if (termset_value_numeric != -1)
2774         {
2775             char resname[32];
2776             const char *termset_name = 0;
2777             
2778             if (termset_value_numeric != -2)
2779             {
2780                 
2781                 sprintf(resname, "%d", termset_value_numeric);
2782                 termset_name = resname;
2783             }
2784             else
2785                 termset_name = termset_value_string;
2786             
2787             limit_set = resultSetRef (zh, termset_name);
2788         }
2789     }
2790         
2791     yaz_log(YLOG_DEBUG, "position = %d, num = %d set=%d",
2792             pos, num, attributeset);
2793         
2794     attr_init(&use, zapt, 1);
2795     use_value = attr_find_ex(&use, &attributeset, &use_string);
2796
2797     if (zebra_maps_attr(zh->reg->zebra_maps, zapt, &reg_id, &search_type,
2798                         rank_type, &complete_flag, &sort_flag))
2799     {
2800         *num_entries = 0;
2801         zebra_setError(zh, YAZ_BIB1_UNSUPP_ATTRIBUTE_TYPE, 0);
2802         return ZEBRA_FAIL;
2803     }
2804     yaz_log(YLOG_DEBUG, "use_value = %d", use_value);
2805
2806     if (use_value == -1)
2807         use_value = 1016;
2808     for (base_no = 0; base_no < num_bases && ord_no < 32; base_no++)
2809     {
2810         data1_local_attribute *local_attr;
2811         attent attp;
2812         int ord;
2813
2814         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
2815         {
2816             zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
2817                            basenames[base_no]);
2818             *num_entries = 0;
2819             return ZEBRA_FAIL;
2820         }
2821
2822         if (use_string &&
2823             (ord = zebraExplain_lookup_attr_str(zh->reg->zei,
2824                                                 use_string)) >= 0)
2825         {
2826             /* we have a match for a raw string attribute */
2827             if (ord > 0)
2828                 ords[ord_no++] = ord;
2829             attp.local_attributes = 0;  /* no more attributes */
2830         }
2831         else
2832         {
2833             int r;
2834             
2835             if ((r = att_getentbyatt (zh, &attp, attributeset, use_value,
2836                                       use_string)))
2837             {
2838                 yaz_log(YLOG_DEBUG, "att_getentbyatt fail. set=%d use=%d",
2839                         attributeset, use_value);
2840                 if (r == -1)
2841                 {
2842                     errCode = YAZ_BIB1_UNSUPP_USE_ATTRIBUTE;
2843                     if (use_string)
2844                         zebra_setError(zh, YAZ_BIB1_UNSUPP_USE_ATTRIBUTE,
2845                                        use_string);
2846                     else
2847                         zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_USE_ATTRIBUTE,
2848                                             use_value);
2849                 }   
2850                 else
2851                 {
2852                     zebra_setError(zh, YAZ_BIB1_UNSUPP_ATTRIBUTE_SET, 0);
2853                 }
2854                 continue;
2855             }
2856         }
2857         bases_ok++;
2858         for (local_attr = attp.local_attributes; local_attr && ord_no < 32;
2859              local_attr = local_attr->next)
2860         {
2861             ord = zebraExplain_lookup_attr_su(zh->reg->zei,
2862                                               attp.attset_ordinal,
2863                                               local_attr->local);
2864             if (ord > 0)
2865                 ords[ord_no++] = ord;
2866         }
2867     }
2868     if (!bases_ok && errCode)
2869     {
2870         zebra_setError(zh, errCode, errString);
2871         *num_entries = 0;
2872         return ZEBRA_FAIL;
2873     }
2874     if (ord_no == 0)
2875     {
2876         *num_entries = 0;
2877         return ZEBRA_OK;
2878     }
2879     /* prepare dictionary scanning */
2880     if (num < 1)
2881     {
2882         *num_entries = 0;
2883         return ZEBRA_OK;
2884     }
2885     before = pos-1;
2886     if (before < 0)
2887         before = 0;
2888     after = 1+num-pos;
2889     if (after < 0)
2890         after = 0;
2891     yaz_log(YLOG_DEBUG, "rpn_scan pos=%d num=%d before=%d "
2892             "after=%d before+after=%d",
2893             pos, num, before, after, before+after);
2894     scan_info_array = (struct scan_info *)
2895         odr_malloc(stream, ord_no * sizeof(*scan_info_array));
2896     for (i = 0; i < ord_no; i++)
2897     {
2898         int j, prefix_len = 0;
2899         int before_tmp = before, after_tmp = after;
2900         struct scan_info *scan_info = scan_info_array + i;
2901         struct rpn_char_map_info rcmi;
2902
2903         rpn_char_map_prepare (zh->reg, reg_id, &rcmi);
2904
2905         scan_info->before = before;
2906         scan_info->after = after;
2907         scan_info->odr = stream;
2908
2909         scan_info->list = (struct scan_info_entry *)
2910             odr_malloc(stream, (before+after) * sizeof(*scan_info->list));
2911         for (j = 0; j<before+after; j++)
2912             scan_info->list[j].term = NULL;
2913
2914         prefix_len += key_SU_encode (ords[i], termz + prefix_len);
2915         termz[prefix_len++] = reg_id;
2916         termz[prefix_len] = 0;
2917         strcpy(scan_info->prefix, termz);
2918
2919         if (trans_scan_term(zh, zapt, termz+prefix_len, reg_id) == ZEBRA_FAIL)
2920             return ZEBRA_FAIL;
2921         
2922         dict_scan(zh->reg->dict, termz, &before_tmp, &after_tmp,
2923                   scan_info, scan_handle);
2924     }
2925     glist = (ZebraScanEntry *)
2926         odr_malloc(stream, (before+after)*sizeof(*glist));
2927
2928     rset_nmem = nmem_create();
2929     kc = zebra_key_control_create(zh);
2930
2931     /* consider terms after main term */
2932     for (i = 0; i < ord_no; i++)
2933         ptr[i] = before;
2934     
2935     *is_partial = 0;
2936     for (i = 0; i<after; i++)
2937     {
2938         int j, j0 = -1;
2939         const char *mterm = NULL;
2940         const char *tst;
2941         RSET rset = 0;
2942         int lo = i + pos-1; /* offset in result list */
2943
2944         /* find: j0 is the first of the minimal values */
2945         for (j = 0; j < ord_no; j++)
2946         {
2947             if (ptr[j] < before+after && ptr[j] >= 0 &&
2948                 (tst = scan_info_array[j].list[ptr[j]].term) &&
2949                 (!mterm || strcmp (tst, mterm) < 0))
2950             {
2951                 j0 = j;
2952                 mterm = tst;
2953             }
2954         }
2955         if (j0 == -1)
2956             break;  /* no value found, stop */
2957
2958         /* get result set for first one , but only if it's within bounds */
2959         if (lo >= 0)
2960         {
2961             /* get result set for first term */
2962             zebra_term_untrans_iconv(zh, stream->mem, reg_id,
2963                                      &glist[lo].term, mterm);
2964             rset = rset_trunc(zh, &scan_info_array[j0].list[ptr[j0]].isam_p, 1,
2965                               glist[lo].term, strlen(glist[lo].term),
2966                               NULL, 0, zapt->term->which, rset_nmem, 
2967                               kc, kc->scope, 0, reg_id);
2968         }
2969         ptr[j0]++; /* move index for this set .. */
2970         /* get result set for remaining scan terms */
2971         for (j = j0+1; j<ord_no; j++)
2972         {
2973             if (ptr[j] < before+after && ptr[j] >= 0 &&
2974                 (tst = scan_info_array[j].list[ptr[j]].term) &&
2975                 !strcmp (tst, mterm))
2976             {
2977                 if (lo >= 0)
2978                 {
2979                     RSET rsets[2];
2980                     
2981                     rsets[0] = rset;
2982                     rsets[1] =
2983                         rset_trunc(
2984                             zh, &scan_info_array[j].list[ptr[j]].isam_p, 1,
2985                             glist[lo].term,
2986                             strlen(glist[lo].term), NULL, 0,
2987                             zapt->term->which,rset_nmem,
2988                             kc, kc->scope, 0, reg_id);
2989                     rset = rsmulti_or_create(rset_nmem, kc,
2990                                              kc->scope, 0 /* termid */,
2991                                              2, rsets);
2992                 }
2993                 ptr[j]++;
2994             }
2995         }
2996         if (lo >= 0)
2997         {
2998             zint count;
2999             /* merge with limit_set if given */
3000             if (limit_set)
3001             {
3002                 RSET rsets[2];
3003                 rsets[0] = rset;
3004                 rsets[1] = rset_dup(limit_set);
3005                 
3006                 rset = rsmulti_and_create(rset_nmem, kc,
3007                                           kc->scope,
3008                                           2, rsets);
3009             }
3010             /* count it */
3011             count_set(zh, rset, &count);
3012             glist[lo].occurrences = count;
3013             rset_delete(rset);
3014         }
3015     }
3016     if (i < after)
3017     {
3018         *num_entries -= (after-i);
3019         *is_partial = 1;
3020         if (*num_entries < 0)
3021         {
3022             (*kc->dec)(kc);
3023             nmem_destroy(rset_nmem);
3024             *num_entries = 0;
3025             return ZEBRA_OK;
3026         }
3027     }
3028     /* consider terms before main term */
3029     for (i = 0; i<ord_no; i++)
3030         ptr[i] = 0;
3031     
3032     for (i = 0; i<before; i++)
3033     {
3034         int j, j0 = -1;
3035         const char *mterm = NULL;
3036         const char *tst;
3037         RSET rset;
3038         int lo = before-1-i; /* offset in result list */
3039         zint count;
3040         
3041         for (j = 0; j <ord_no; j++)
3042         {
3043             if (ptr[j] < before && ptr[j] >= 0 &&
3044                 (tst = scan_info_array[j].list[before-1-ptr[j]].term) &&
3045                 (!mterm || strcmp (tst, mterm) > 0))
3046             {
3047                 j0 = j;
3048                     mterm = tst;
3049             }
3050         }
3051         if (j0 == -1)
3052             break;
3053         
3054         zebra_term_untrans_iconv(zh, stream->mem, reg_id,
3055                                  &glist[lo].term, mterm);
3056         
3057         rset = rset_trunc
3058             (zh, &scan_info_array[j0].list[before-1-ptr[j0]].isam_p, 1,
3059              glist[lo].term, strlen(glist[lo].term),
3060              NULL, 0, zapt->term->which, rset_nmem,
3061              kc, kc->scope, 0, reg_id);
3062         
3063         ptr[j0]++;
3064         
3065         for (j = j0+1; j<ord_no; j++)
3066         {
3067             if (ptr[j] < before && ptr[j] >= 0 &&
3068                 (tst = scan_info_array[j].list[before-1-ptr[j]].term) &&
3069                 !strcmp (tst, mterm))
3070             {
3071                 RSET rsets[2];
3072                 
3073                 rsets[0] = rset;
3074                 rsets[1] = rset_trunc(
3075                     zh,
3076                     &scan_info_array[j].list[before-1-ptr[j]].isam_p, 1,
3077                     glist[lo].term,
3078                     strlen(glist[lo].term), NULL, 0,
3079                     zapt->term->which, rset_nmem,
3080                     kc, kc->scope, 0, reg_id);
3081                 rset = rsmulti_or_create(rset_nmem, kc,
3082                                          kc->scope, 0 /* termid */, 2, rsets);
3083                 
3084                 ptr[j]++;
3085             }
3086         }
3087         if (limit_set)
3088         {
3089             RSET rsets[2];
3090             rsets[0] = rset;
3091             rsets[1] = rset_dup(limit_set);
3092             
3093             rset = rsmulti_and_create(rset_nmem, kc,
3094                                       kc->scope, 2, rsets);
3095         }
3096         count_set(zh, rset, &count);
3097         glist[lo].occurrences = count;
3098         rset_delete (rset);
3099     }
3100     (*kc->dec)(kc);
3101     nmem_destroy(rset_nmem);
3102     i = before-i;
3103     if (i)
3104     {
3105         *is_partial = 1;
3106         *position -= i;
3107         *num_entries -= i;
3108         if (*num_entries <= 0)
3109         {
3110             *num_entries = 0;
3111             return ZEBRA_OK;
3112         }
3113     }
3114     
3115     *list = glist + i;               /* list is set to first 'real' entry */
3116     
3117     yaz_log(YLOG_DEBUG, "position = %d, num_entries = %d",
3118             *position, *num_entries);
3119     return ZEBRA_OK;
3120 }
3121