Register type (w,p) + set-use/string attriute combined in register
[idzebra-moved-to-github.git] / index / zrpn.c
1 /* $Id: zrpn.c,v 1.202 2005-06-23 06:45:46 adam Exp $
2    Copyright (C) 1995-2005
3    Index Data ApS
4
5 This file is part of the Zebra server.
6
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
11
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with Zebra; see the file LICENSE.zebra.  If not, write to the
19 Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
20 02111-1307, USA.
21 */
22
23 #include <stdio.h>
24 #include <assert.h>
25 #ifdef WIN32
26 #include <io.h>
27 #endif
28 #if HAVE_UNISTD_H
29 #include <unistd.h>
30 #endif
31 #include <ctype.h>
32
33 #include <yaz/diagbib1.h>
34 #include "index.h"
35 #include <zebra_xpath.h>
36
37 #include <charmap.h>
38 #include <rset.h>
39
40 struct rpn_char_map_info
41 {
42     ZebraMaps zm;
43     int reg_type;
44 };
45
46 typedef struct
47 {
48     int type;
49     int major;
50     int minor;
51     Z_AttributesPlusTerm *zapt;
52 } AttrType;
53
54 static int log_level_set = 0;
55 static int log_level_rpn = 0;
56
57 static const char **rpn_char_map_handler(void *vp, const char **from, int len)
58 {
59     struct rpn_char_map_info *p = (struct rpn_char_map_info *) vp;
60     const char **out = zebra_maps_input(p->zm, p->reg_type, from, len, 0);
61 #if 0
62     if (out && *out)
63     {
64         const char *outp = *out;
65         yaz_log(YLOG_LOG, "---");
66         while (*outp)
67         {
68             yaz_log(YLOG_LOG, "%02X", *outp);
69             outp++;
70         }
71     }
72 #endif
73     return out;
74 }
75
76 static void rpn_char_map_prepare(struct zebra_register *reg, int reg_type,
77                                   struct rpn_char_map_info *map_info)
78 {
79     map_info->zm = reg->zebra_maps;
80     map_info->reg_type = reg_type;
81     dict_grep_cmap(reg->dict, map_info, rpn_char_map_handler);
82 }
83
84 static int attr_find_ex(AttrType *src, oid_value *attributeSetP,
85                          const char **string_value)
86 {
87     int num_attributes;
88
89     num_attributes = src->zapt->attributes->num_attributes;
90     while (src->major < num_attributes)
91     {
92         Z_AttributeElement *element;
93
94         element = src->zapt->attributes->attributes[src->major];
95         if (src->type == *element->attributeType)
96         {
97             switch (element->which) 
98             {
99             case Z_AttributeValue_numeric:
100                 ++(src->major);
101                 if (element->attributeSet && attributeSetP)
102                 {
103                     oident *attrset;
104
105                     attrset = oid_getentbyoid(element->attributeSet);
106                     *attributeSetP = attrset->value;
107                 }
108                 return *element->value.numeric;
109                 break;
110             case Z_AttributeValue_complex:
111                 if (src->minor >= element->value.complex->num_list)
112                     break;
113                 if (element->attributeSet && attributeSetP)
114                 {
115                     oident *attrset;
116                     
117                     attrset = oid_getentbyoid(element->attributeSet);
118                     *attributeSetP = attrset->value;
119                 }
120                 if (element->value.complex->list[src->minor]->which ==  
121                     Z_StringOrNumeric_numeric)
122                 {
123                     ++(src->minor);
124                     return
125                         *element->value.complex->list[src->minor-1]->u.numeric;
126                 }
127                 else if (element->value.complex->list[src->minor]->which ==  
128                          Z_StringOrNumeric_string)
129                 {
130                     if (!string_value)
131                         break;
132                     ++(src->minor);
133                     *string_value = 
134                         element->value.complex->list[src->minor-1]->u.string;
135                     return -2;
136                 }
137                 else
138                     break;
139             default:
140                 assert(0);
141             }
142         }
143         ++(src->major);
144     }
145     return -1;
146 }
147
148 static int attr_find(AttrType *src, oid_value *attributeSetP)
149 {
150     return attr_find_ex(src, attributeSetP, 0);
151 }
152
153 static void attr_init(AttrType *src, Z_AttributesPlusTerm *zapt,
154                        int type)
155 {
156     src->zapt = zapt;
157     src->type = type;
158     src->major = 0;
159     src->minor = 0;
160 }
161
162 #define TERM_COUNT        
163        
164 struct grep_info {        
165 #ifdef TERM_COUNT        
166     int *term_no;        
167 #endif        
168     ISAM_P *isam_p_buf;
169     int isam_p_size;        
170     int isam_p_indx;
171     ZebraHandle zh;
172     int reg_type;
173     ZebraSet termset;
174 };        
175
176 void zebra_term_untrans(ZebraHandle zh, int reg_type,
177                         char *dst, const char *src)
178 {
179     int len = 0;
180     while (*src)
181     {
182         const char *cp = zebra_maps_output(zh->reg->zebra_maps,
183                                            reg_type, &src);
184         if (!cp && len < IT_MAX_WORD-1)
185             dst[len++] = *src++;
186         else
187             while (*cp && len < IT_MAX_WORD-1)
188                 dst[len++] = *cp++;
189     }
190     dst[len] = '\0';
191 }
192
193 static void add_isam_p(const char *name, const char *info,
194                        struct grep_info *p)
195 {
196     if (!log_level_set)
197     {
198         log_level_rpn = yaz_log_module_level("rpn");
199         log_level_set = 1;
200     }
201     if (p->isam_p_indx == p->isam_p_size)
202     {
203         ISAM_P *new_isam_p_buf;
204 #ifdef TERM_COUNT        
205         int *new_term_no;        
206 #endif
207         p->isam_p_size = 2*p->isam_p_size + 100;
208         new_isam_p_buf = (ISAM_P *) xmalloc(sizeof(*new_isam_p_buf) *
209                                             p->isam_p_size);
210         if (p->isam_p_buf)
211         {
212             memcpy(new_isam_p_buf, p->isam_p_buf,
213                     p->isam_p_indx * sizeof(*p->isam_p_buf));
214             xfree(p->isam_p_buf);
215         }
216         p->isam_p_buf = new_isam_p_buf;
217
218 #ifdef TERM_COUNT
219         new_term_no = (int *) xmalloc(sizeof(*new_term_no) * p->isam_p_size);
220         if (p->term_no)
221         {
222             memcpy(new_term_no, p->isam_p_buf,
223                     p->isam_p_indx * sizeof(*p->term_no));
224             xfree(p->term_no);
225         }
226         p->term_no = new_term_no;
227 #endif
228     }
229     assert(*info == sizeof(*p->isam_p_buf));
230     memcpy(p->isam_p_buf + p->isam_p_indx, info+1, sizeof(*p->isam_p_buf));
231
232 #if 1
233     if (p->termset)
234     {
235         const char *db;
236         int set, use;
237         char term_tmp[IT_MAX_WORD];
238         int su_code = 0;
239         int len = key_SU_decode (&su_code, name);
240         
241         zebra_term_untrans  (p->zh, p->reg_type, term_tmp, name+len+1);
242         yaz_log(log_level_rpn, "grep: %d %c %s", su_code, name[len], term_tmp);
243         zebraExplain_lookup_ord (p->zh->reg->zei,
244                                  su_code, &db, &set, &use);
245         yaz_log(log_level_rpn, "grep:  set=%d use=%d db=%s", set, use, db);
246         
247         resultSetAddTerm(p->zh, p->termset, name[len], db,
248                          set, use, term_tmp);
249     }
250 #endif
251     (p->isam_p_indx)++;
252 }
253
254 static int grep_handle(char *name, const char *info, void *p)
255 {
256     add_isam_p(name, info, (struct grep_info *) p);
257     return 0;
258 }
259
260 static int term_pre(ZebraMaps zebra_maps, int reg_type, const char **src,
261                     const char *ct1, const char *ct2, int first)
262 {
263     const char *s1, *s0 = *src;
264     const char **map;
265
266     /* skip white space */
267     while (*s0)
268     {
269         if (ct1 && strchr(ct1, *s0))
270             break;
271         if (ct2 && strchr(ct2, *s0))
272             break;
273         s1 = s0;
274         map = zebra_maps_input(zebra_maps, reg_type, &s1, strlen(s1), first);
275         if (**map != *CHR_SPACE)
276             break;
277         s0 = s1;
278     }
279     *src = s0;
280     return *s0;
281 }
282
283
284 static void esc_str(char *out_buf, int out_size,
285                     const char *in_buf, int in_size)
286 {
287     int k;
288
289     assert(out_buf);
290     assert(in_buf);
291     assert(out_size > 20);
292     *out_buf = '\0';
293     for (k = 0; k<in_size; k++)
294     {
295         int c = in_buf[k] & 0xff;
296         int pc;
297         if (c < 32 || c > 126)
298             pc = '?';
299         else
300             pc = c;
301         sprintf(out_buf +strlen(out_buf), "%02X:%c  ", c, pc);
302         if (strlen(out_buf) > out_size-20)
303         {
304             strcat(out_buf, "..");
305             break;
306         }
307     }
308 }
309
310 #define REGEX_CHARS " []()|.*+?!"
311
312 /* term_100: handle term, where trunc = none(no operators at all) */
313 static int term_100(ZebraMaps zebra_maps, int reg_type,
314                     const char **src, char *dst, int space_split,
315                     char *dst_term)
316 {
317     const char *s0;
318     const char **map;
319     int i = 0;
320     int j = 0;
321
322     const char *space_start = 0;
323     const char *space_end = 0;
324
325     if (!term_pre(zebra_maps, reg_type, src, NULL, NULL, !space_split))
326         return 0;
327     s0 = *src;
328     while (*s0)
329     {
330         const char *s1 = s0;
331         int q_map_match = 0;
332         map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), 
333                                 &q_map_match);
334         if (space_split)
335         {
336             if (**map == *CHR_SPACE)
337                 break;
338         }
339         else  /* complete subfield only. */
340         {
341             if (**map == *CHR_SPACE)
342             {   /* save space mapping for later  .. */
343                 space_start = s1;
344                 space_end = s0;
345                 continue;
346             }
347             else if (space_start)
348             {   /* reload last space */
349                 while (space_start < space_end)
350                 {
351                     if (strchr(REGEX_CHARS, *space_start))
352                         dst[i++] = '\\';
353                     dst_term[j++] = *space_start;
354                     dst[i++] = *space_start++;
355                 }
356                 /* and reset */
357                 space_start = space_end = 0;
358             }
359         }
360         /* add non-space char */
361         memcpy(dst_term+j, s1, s0 - s1);
362         j += (s0 - s1);
363         if (!q_map_match)
364         {
365             while (s1 < s0)
366             {
367                 if (strchr(REGEX_CHARS, *s1))
368                     dst[i++] = '\\';
369                 dst[i++] = *s1++;
370             }
371         }
372         else
373         {
374             char tmpbuf[80];
375             esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
376             
377             strcpy(dst + i, map[0]);
378             i += strlen(map[0]);
379         }
380     }
381     dst[i] = '\0';
382     dst_term[j] = '\0';
383     *src = s0;
384     return i;
385 }
386
387 /* term_101: handle term, where trunc = Process # */
388 static int term_101(ZebraMaps zebra_maps, int reg_type,
389                     const char **src, char *dst, int space_split,
390                     char *dst_term)
391 {
392     const char *s0;
393     const char **map;
394     int i = 0;
395     int j = 0;
396
397     if (!term_pre(zebra_maps, reg_type, src, "#", "#", !space_split))
398         return 0;
399     s0 = *src;
400     while (*s0)
401     {
402         if (*s0 == '#')
403         {
404             dst[i++] = '.';
405             dst[i++] = '*';
406             dst_term[j++] = *s0++;
407         }
408         else
409         {
410             const char *s1 = s0;
411             int q_map_match = 0;
412             map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), 
413                                     &q_map_match);
414             if (space_split && **map == *CHR_SPACE)
415                 break;
416
417             /* add non-space char */
418             memcpy(dst_term+j, s1, s0 - s1);
419             j += (s0 - s1);
420             if (!q_map_match)
421             {
422                 while (s1 < s0)
423                 {
424                     if (strchr(REGEX_CHARS, *s1))
425                         dst[i++] = '\\';
426                     dst[i++] = *s1++;
427                 }
428             }
429             else
430             {
431                 char tmpbuf[80];
432                 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
433                 
434                 strcpy(dst + i, map[0]);
435                 i += strlen(map[0]);
436             }
437         }
438     }
439     dst[i] = '\0';
440     dst_term[j++] = '\0';
441     *src = s0;
442     return i;
443 }
444
445 /* term_103: handle term, where trunc = re-2 (regular expressions) */
446 static int term_103(ZebraMaps zebra_maps, int reg_type, const char **src,
447                     char *dst, int *errors, int space_split,
448                     char *dst_term)
449 {
450     int i = 0;
451     int j = 0;
452     const char *s0;
453     const char **map;
454
455     if (!term_pre(zebra_maps, reg_type, src, "^\\()[].*+?|", "(", !space_split))
456         return 0;
457     s0 = *src;
458     if (errors && *s0 == '+' && s0[1] && s0[2] == '+' && s0[3] &&
459         isdigit(((const unsigned char *)s0)[1]))
460     {
461         *errors = s0[1] - '0';
462         s0 += 3;
463         if (*errors > 3)
464             *errors = 3;
465     }
466     while (*s0)
467     {
468         if (strchr("^\\()[].*+?|-", *s0))
469         {
470             dst_term[j++] = *s0;
471             dst[i++] = *s0++;
472         }
473         else
474         {
475             const char *s1 = s0;
476             int q_map_match = 0;
477             map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), 
478                                     &q_map_match);
479             if (space_split && **map == *CHR_SPACE)
480                 break;
481
482             /* add non-space char */
483             memcpy(dst_term+j, s1, s0 - s1);
484             j += (s0 - s1);
485             if (!q_map_match)
486             {
487                 while (s1 < s0)
488                 {
489                     if (strchr(REGEX_CHARS, *s1))
490                         dst[i++] = '\\';
491                     dst[i++] = *s1++;
492                 }
493             }
494             else
495             {
496                 char tmpbuf[80];
497                 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
498                 
499                 strcpy(dst + i, map[0]);
500                 i += strlen(map[0]);
501             }
502         }
503     }
504     dst[i] = '\0';
505     dst_term[j] = '\0';
506     *src = s0;
507     
508     return i;
509 }
510
511 /* term_103: handle term, where trunc = re-1 (regular expressions) */
512 static int term_102(ZebraMaps zebra_maps, int reg_type, const char **src,
513                     char *dst, int space_split, char *dst_term)
514 {
515     return term_103(zebra_maps, reg_type, src, dst, NULL, space_split,
516                     dst_term);
517 }
518
519
520 /* term_104: handle term, where trunc = Process # and ! */
521 static int term_104(ZebraMaps zebra_maps, int reg_type,
522                     const char **src, char *dst, int space_split,
523                     char *dst_term)
524 {
525     const char *s0;
526     const char **map;
527     int i = 0;
528     int j = 0;
529
530     if (!term_pre(zebra_maps, reg_type, src, "?*#", "?*#", !space_split))
531         return 0;
532     s0 = *src;
533     while (*s0)
534     {
535         if (*s0 == '?')
536         {
537             dst_term[j++] = *s0++;
538             if (*s0 >= '0' && *s0 <= '9')
539             {
540                 int limit = 0;
541                 while (*s0 >= '0' && *s0 <= '9')
542                 {
543                     limit = limit * 10 + (*s0 - '0');
544                     dst_term[j++] = *s0++;
545                 }
546                 if (limit > 20)
547                     limit = 20;
548                 while (--limit >= 0)
549                 {
550                     dst[i++] = '.';
551                     dst[i++] = '?';
552                 }
553             }
554             else
555             {
556                 dst[i++] = '.';
557                 dst[i++] = '*';
558             }
559         }
560         else if (*s0 == '*')
561         {
562             dst[i++] = '.';
563             dst[i++] = '*';
564             dst_term[j++] = *s0++;
565         }
566         else if (*s0 == '#')
567         {
568             dst[i++] = '.';
569             dst_term[j++] = *s0++;
570         }
571         else
572         {
573             const char *s1 = s0;
574             int q_map_match = 0;
575             map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), 
576                                     &q_map_match);
577             if (space_split && **map == *CHR_SPACE)
578                 break;
579
580             /* add non-space char */
581             memcpy(dst_term+j, s1, s0 - s1);
582             j += (s0 - s1);
583             if (!q_map_match)
584             {
585                 while (s1 < s0)
586                 {
587                     if (strchr(REGEX_CHARS, *s1))
588                         dst[i++] = '\\';
589                     dst[i++] = *s1++;
590                 }
591             }
592             else
593             {
594                 char tmpbuf[80];
595                 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
596                 
597                 strcpy(dst + i, map[0]);
598                 i += strlen(map[0]);
599             }
600         }
601     }
602     dst[i] = '\0';
603     dst_term[j++] = '\0';
604     *src = s0;
605     return i;
606 }
607
608 /* term_105/106: handle term, where trunc = Process * and ! and right trunc */
609 static int term_105(ZebraMaps zebra_maps, int reg_type,
610                     const char **src, char *dst, int space_split,
611                     char *dst_term, int right_truncate)
612 {
613     const char *s0;
614     const char **map;
615     int i = 0;
616     int j = 0;
617
618     if (!term_pre(zebra_maps, reg_type, src, "*!", "*!", !space_split))
619         return 0;
620     s0 = *src;
621     while (*s0)
622     {
623         if (*s0 == '*')
624         {
625             dst[i++] = '.';
626             dst[i++] = '*';
627             dst_term[j++] = *s0++;
628         }
629         else if (*s0 == '!')
630         {
631             dst[i++] = '.';
632             dst_term[j++] = *s0++;
633         }
634         else
635         {
636             const char *s1 = s0;
637             int q_map_match = 0;
638             map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), 
639                                     &q_map_match);
640             if (space_split && **map == *CHR_SPACE)
641                 break;
642
643             /* add non-space char */
644             memcpy(dst_term+j, s1, s0 - s1);
645             j += (s0 - s1);
646             if (!q_map_match)
647             {
648                 while (s1 < s0)
649                 {
650                     if (strchr(REGEX_CHARS, *s1))
651                         dst[i++] = '\\';
652                     dst[i++] = *s1++;
653                 }
654             }
655             else
656             {
657                 char tmpbuf[80];
658                 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
659                 
660                 strcpy(dst + i, map[0]);
661                 i += strlen(map[0]);
662             }
663         }
664     }
665     if (right_truncate)
666     {
667         dst[i++] = '.';
668         dst[i++] = '*';
669     }
670     dst[i] = '\0';
671     
672     dst_term[j++] = '\0';
673     *src = s0;
674     return i;
675 }
676
677
678 /* gen_regular_rel - generate regular expression from relation
679  *  val:     border value (inclusive)
680  *  islt:    1 if <=; 0 if >=.
681  */
682 static void gen_regular_rel(char *dst, int val, int islt)
683 {
684     int dst_p;
685     int w, d, i;
686     int pos = 0;
687     char numstr[20];
688
689     yaz_log(YLOG_DEBUG, "gen_regular_rel. val=%d, islt=%d", val, islt);
690     if (val >= 0)
691     {
692         if (islt)
693             strcpy(dst, "(-[0-9]+|(");
694         else
695             strcpy(dst, "((");
696     } 
697     else
698     {
699         if (!islt)
700         {
701             strcpy(dst, "([0-9]+|-(");
702             dst_p = strlen(dst);
703             islt = 1;
704         }
705         else
706         {
707             strcpy(dst, "(-(");
708             islt = 0;
709         }
710         val = -val;
711     }
712     dst_p = strlen(dst);
713     sprintf(numstr, "%d", val);
714     for (w = strlen(numstr); --w >= 0; pos++)
715     {
716         d = numstr[w];
717         if (pos > 0)
718         {
719             if (islt)
720             {
721                 if (d == '0')
722                     continue;
723                 d--;
724             } 
725             else
726             {
727                 if (d == '9')
728                     continue;
729                 d++;
730             }
731         }
732         
733         strcpy(dst + dst_p, numstr);
734         dst_p = strlen(dst) - pos - 1;
735
736         if (islt)
737         {
738             if (d != '0')
739             {
740                 dst[dst_p++] = '[';
741                 dst[dst_p++] = '0';
742                 dst[dst_p++] = '-';
743                 dst[dst_p++] = d;
744                 dst[dst_p++] = ']';
745             }
746             else
747                 dst[dst_p++] = d;
748         }
749         else
750         {
751             if (d != '9')
752             { 
753                 dst[dst_p++] = '[';
754                 dst[dst_p++] = d;
755                 dst[dst_p++] = '-';
756                 dst[dst_p++] = '9';
757                 dst[dst_p++] = ']';
758             }
759             else
760                 dst[dst_p++] = d;
761         }
762         for (i = 0; i<pos; i++)
763         {
764             dst[dst_p++] = '[';
765             dst[dst_p++] = '0';
766             dst[dst_p++] = '-';
767             dst[dst_p++] = '9';
768             dst[dst_p++] = ']';
769         }
770         dst[dst_p++] = '|';
771     }
772     dst[dst_p] = '\0';
773     if (islt)
774     {
775         /* match everything less than 10^(pos-1) */
776         strcat(dst, "0*");
777         for (i = 1; i<pos; i++)
778             strcat(dst, "[0-9]?");
779     }
780     else
781     {
782         /* match everything greater than 10^pos */
783         for (i = 0; i <= pos; i++)
784             strcat(dst, "[0-9]");
785         strcat(dst, "[0-9]*");
786     }
787     strcat(dst, "))");
788 }
789
790 void string_rel_add_char(char **term_p, const char *src, int *indx)
791 {
792     if (src[*indx] == '\\')
793         *(*term_p)++ = src[(*indx)++];
794     *(*term_p)++ = src[(*indx)++];
795 }
796
797 /*
798  *   >  abc     ([b-].*|a[c-].*|ab[d-].*|abc.+)
799  *              ([^-a].*|a[^-b].*ab[^-c].*|abc.+)
800  *   >= abc     ([b-].*|a[c-].*|ab[c-].*)
801  *              ([^-a].*|a[^-b].*|ab[c-].*)
802  *   <  abc     ([-0].*|a[-a].*|ab[-b].*)
803  *              ([^a-].*|a[^b-].*|ab[^c-].*)
804  *   <= abc     ([-0].*|a[-a].*|ab[-b].*|abc)
805  *              ([^a-].*|a[^b-].*|ab[^c-].*|abc)
806  */
807 static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
808                            const char **term_sub, char *term_dict,
809                            oid_value attributeSet,
810                            int reg_type, int space_split, char *term_dst,
811                            int *error_code)
812 {
813     AttrType relation;
814     int relation_value;
815     int i;
816     char *term_tmp = term_dict + strlen(term_dict);
817     char term_component[2*IT_MAX_WORD+20];
818
819     attr_init(&relation, zapt, 2);
820     relation_value = attr_find(&relation, NULL);
821
822     *error_code = 0;
823     yaz_log(YLOG_DEBUG, "string relation value=%d", relation_value);
824     switch (relation_value)
825     {
826     case 1:
827         if (!term_100(zh->reg->zebra_maps, reg_type,
828                       term_sub, term_component,
829                       space_split, term_dst))
830             return 0;
831         yaz_log(log_level_rpn, "Relation <");
832         
833         *term_tmp++ = '(';
834         for (i = 0; term_component[i]; )
835         {
836             int j = 0;
837
838             if (i)
839                 *term_tmp++ = '|';
840             while (j < i)
841                 string_rel_add_char(&term_tmp, term_component, &j);
842
843             *term_tmp++ = '[';
844
845             *term_tmp++ = '^';
846             string_rel_add_char(&term_tmp, term_component, &i);
847             *term_tmp++ = '-';
848
849             *term_tmp++ = ']';
850             *term_tmp++ = '.';
851             *term_tmp++ = '*';
852
853             if ((term_tmp - term_dict) > IT_MAX_WORD)
854                 break;
855         }
856         *term_tmp++ = ')';
857         *term_tmp = '\0';
858         break;
859     case 2:
860         if (!term_100(zh->reg->zebra_maps, reg_type,
861                       term_sub, term_component,
862                       space_split, term_dst))
863             return 0;
864         yaz_log(log_level_rpn, "Relation <=");
865
866         *term_tmp++ = '(';
867         for (i = 0; term_component[i]; )
868         {
869             int j = 0;
870
871             while (j < i)
872                 string_rel_add_char(&term_tmp, term_component, &j);
873             *term_tmp++ = '[';
874
875             *term_tmp++ = '^';
876             string_rel_add_char(&term_tmp, term_component, &i);
877             *term_tmp++ = '-';
878
879             *term_tmp++ = ']';
880             *term_tmp++ = '.';
881             *term_tmp++ = '*';
882
883             *term_tmp++ = '|';
884
885             if ((term_tmp - term_dict) > IT_MAX_WORD)
886                 break;
887         }
888         for (i = 0; term_component[i]; )
889             string_rel_add_char(&term_tmp, term_component, &i);
890         *term_tmp++ = ')';
891         *term_tmp = '\0';
892         break;
893     case 5:
894         if (!term_100 (zh->reg->zebra_maps, reg_type,
895                        term_sub, term_component, space_split, term_dst))
896             return 0;
897         yaz_log(log_level_rpn, "Relation >");
898
899         *term_tmp++ = '(';
900         for (i = 0; term_component[i];)
901         {
902             int j = 0;
903
904             while (j < i)
905                 string_rel_add_char(&term_tmp, term_component, &j);
906             *term_tmp++ = '[';
907             
908             *term_tmp++ = '^';
909             *term_tmp++ = '-';
910             string_rel_add_char(&term_tmp, term_component, &i);
911
912             *term_tmp++ = ']';
913             *term_tmp++ = '.';
914             *term_tmp++ = '*';
915
916             *term_tmp++ = '|';
917
918             if ((term_tmp - term_dict) > IT_MAX_WORD)
919                 break;
920         }
921         for (i = 0; term_component[i];)
922             string_rel_add_char(&term_tmp, term_component, &i);
923         *term_tmp++ = '.';
924         *term_tmp++ = '+';
925         *term_tmp++ = ')';
926         *term_tmp = '\0';
927         break;
928     case 4:
929         if (!term_100(zh->reg->zebra_maps, reg_type, term_sub,
930                       term_component, space_split, term_dst))
931             return 0;
932         yaz_log(log_level_rpn, "Relation >=");
933
934         *term_tmp++ = '(';
935         for (i = 0; term_component[i];)
936         {
937             int j = 0;
938
939             if (i)
940                 *term_tmp++ = '|';
941             while (j < i)
942                 string_rel_add_char(&term_tmp, term_component, &j);
943             *term_tmp++ = '[';
944
945             if (term_component[i+1])
946             {
947                 *term_tmp++ = '^';
948                 *term_tmp++ = '-';
949                 string_rel_add_char(&term_tmp, term_component, &i);
950             }
951             else
952             {
953                 string_rel_add_char(&term_tmp, term_component, &i);
954                 *term_tmp++ = '-';
955             }
956             *term_tmp++ = ']';
957             *term_tmp++ = '.';
958             *term_tmp++ = '*';
959
960             if ((term_tmp - term_dict) > IT_MAX_WORD)
961                 break;
962         }
963         *term_tmp++ = ')';
964         *term_tmp = '\0';
965         break;
966     case 3:
967     case 102:
968     case -1:
969         yaz_log(log_level_rpn, "Relation =");
970         if (!term_100(zh->reg->zebra_maps, reg_type, term_sub,
971                       term_component, space_split, term_dst))
972             return 0;
973         strcat(term_tmp, "(");
974         strcat(term_tmp, term_component);
975         strcat(term_tmp, ")");
976         break;
977     default:
978         *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
979         return 0;
980     }
981     return 1;
982 }
983
984 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
985                              const char **term_sub, 
986                              oid_value attributeSet, NMEM stream,
987                              struct grep_info *grep_info,
988                              int reg_type, int complete_flag,
989                              int num_bases, char **basenames,
990                              char *term_dst, int xpath_use,
991                              struct ord_list **ol);
992
993 static ZEBRA_RES term_limits_APT(ZebraHandle zh,
994                                  Z_AttributesPlusTerm *zapt,
995                                  zint *hits_limit_value,
996                                  const char **term_ref_id_str)
997 {
998     AttrType term_ref_id_attr;
999     AttrType hits_limit_attr;
1000  
1001     attr_init(&hits_limit_attr, zapt, 9);
1002     *hits_limit_value  = attr_find(&hits_limit_attr, NULL);
1003
1004     attr_init(&term_ref_id_attr, zapt, 10);
1005     attr_find_ex(&term_ref_id_attr, NULL, term_ref_id_str);
1006
1007     /* no limit given ? */
1008     if (*hits_limit_value == -1)
1009         if (*term_ref_id_str)
1010         {
1011             /* use global if term_ref is present */
1012             *hits_limit_value = zh->approx_limit;
1013         }
1014         else
1015         {
1016             /* no counting if term_ref is not present */
1017             *hits_limit_value = 0;
1018         }
1019     else if (*hits_limit_value == 0)
1020     {
1021         /* 0 is the same as global limit */
1022         *hits_limit_value = zh->approx_limit;
1023     }
1024     yaz_log(YLOG_LOG, "term_limits_APT ref_id=%s limit=" ZINT_FORMAT,
1025             *term_ref_id_str ? *term_ref_id_str : "none",
1026             *hits_limit_value);
1027     return ZEBRA_OK;
1028 }
1029
1030 static ZEBRA_RES term_trunc(ZebraHandle zh,
1031                             Z_AttributesPlusTerm *zapt,
1032                             const char **term_sub, 
1033                             oid_value attributeSet, NMEM stream,
1034                             struct grep_info *grep_info,
1035                             int reg_type, int complete_flag,
1036                             int num_bases, char **basenames,
1037                             char *term_dst,
1038                             const char *rank_type, int xpath_use,
1039                             NMEM rset_nmem,
1040                             RSET *rset,
1041                             struct rset_key_control *kc)
1042 {
1043     ZEBRA_RES res;
1044     struct ord_list *ol;
1045     zint hits_limit_value;
1046     const char *term_ref_id_str = 0;
1047     *rset = 0;
1048
1049     term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str);
1050     grep_info->isam_p_indx = 0;
1051     res = string_term(zh, zapt, term_sub, attributeSet, stream, grep_info,
1052                       reg_type, complete_flag, num_bases, basenames,
1053                       term_dst, xpath_use, &ol);
1054     if (res != ZEBRA_OK)
1055         return res;
1056     if (!*term_sub)  /* no more terms ? */
1057         return res;
1058     yaz_log(log_level_rpn, "term: %s", term_dst);
1059     *rset = rset_trunc(zh, grep_info->isam_p_buf,
1060                        grep_info->isam_p_indx, term_dst,
1061                        strlen(term_dst), rank_type, 1 /* preserve pos */,
1062                        zapt->term->which, rset_nmem,
1063                        kc, kc->scope, ol, reg_type, hits_limit_value,
1064                        term_ref_id_str);
1065     if (!*rset)
1066         return ZEBRA_FAIL;
1067     return ZEBRA_OK;
1068 }
1069
1070 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1071                              const char **term_sub, 
1072                              oid_value attributeSet, NMEM stream,
1073                              struct grep_info *grep_info,
1074                              int reg_type, int complete_flag,
1075                              int num_bases, char **basenames,
1076                              char *term_dst, int xpath_use,
1077                              struct ord_list **ol)
1078 {
1079     char term_dict[2*IT_MAX_WORD+4000];
1080     int j, r, base_no;
1081     AttrType truncation;
1082     int truncation_value;
1083     AttrType use;
1084     int use_value;
1085     const char *use_string = 0;
1086     oid_value curAttributeSet = attributeSet;
1087     const char *termp;
1088     struct rpn_char_map_info rcmi;
1089     int space_split = complete_flag ? 0 : 1;
1090
1091     int bases_ok = 0;     /* no of databases with OK attribute */
1092
1093     *ol = ord_list_create(stream);
1094
1095     rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
1096     attr_init(&use, zapt, 1);
1097     use_value = attr_find_ex(&use, &curAttributeSet, &use_string);
1098     yaz_log(log_level_rpn, "string_term, use value %d", use_value);
1099     attr_init(&truncation, zapt, 5);
1100     truncation_value = attr_find(&truncation, NULL);
1101     yaz_log(log_level_rpn, "truncation value %d", truncation_value);
1102
1103     if (use_value == -1)    /* no attribute - assumy "any" */
1104         use_value = 1016;
1105     for (base_no = 0; base_no < num_bases; base_no++)
1106     {
1107         int ord = -1;
1108         int attr_ok = 0;
1109         int regex_range = 0;
1110         int init_pos = 0;
1111         attent attp;
1112         data1_local_attribute id_xpath_attr;
1113         data1_local_attribute *local_attr;
1114         int max_pos, prefix_len = 0;
1115         int relation_error;
1116
1117         termp = *term_sub;
1118
1119         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1120         {
1121             zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
1122                            basenames[base_no]);
1123             return ZEBRA_FAIL;
1124         }
1125         if (xpath_use > 0 && use_value == -2) 
1126         {
1127             /* xpath mode and we have a string attribute */
1128             attp.local_attributes = &id_xpath_attr;
1129             attp.attset_ordinal = VAL_IDXPATH;
1130             id_xpath_attr.next = 0;
1131
1132             use_value = xpath_use;  /* xpath_use as use-attribute now */
1133             id_xpath_attr.local = use_value;
1134         }
1135         else if (curAttributeSet == VAL_IDXPATH && use_value >= 0)
1136         {
1137             /* X-Path attribute, use numeric value directly */
1138             attp.local_attributes = &id_xpath_attr;
1139             attp.attset_ordinal = VAL_IDXPATH;
1140             id_xpath_attr.next = 0;
1141             id_xpath_attr.local = use_value;
1142         }
1143         else if (use_string &&
1144                  (ord = zebraExplain_lookup_attr_str(zh->reg->zei,
1145                                                      reg_type,
1146                                                      use_string)) >= 0)
1147         {
1148             /* we have a match for a raw string attribute */
1149             char ord_buf[32];
1150             int i, ord_len;
1151
1152             if (prefix_len)
1153                 term_dict[prefix_len++] = '|';
1154             else
1155                 term_dict[prefix_len++] = '(';
1156             
1157             ord_len = key_SU_encode (ord, ord_buf);
1158             for (i = 0; i<ord_len; i++)
1159             {
1160                 term_dict[prefix_len++] = 1;
1161                 term_dict[prefix_len++] = ord_buf[i];
1162             }
1163             attp.local_attributes = 0;  /* no more attributes */
1164             *ol = ord_list_append(stream, *ol, ord);
1165         }
1166         else 
1167         {
1168             /* lookup in the .att files . Allow string as well */
1169             if ((r = att_getentbyatt (zh, &attp, curAttributeSet, use_value,
1170                                       use_string)))
1171             {
1172                 yaz_log(YLOG_DEBUG, "att_getentbyatt fail. set=%d use=%d r=%d",
1173                         curAttributeSet, use_value, r);
1174                 if (r == -1)
1175                 {
1176                     /* set was found, but value wasn't defined */
1177                     if (use_string)
1178                         zebra_setError(zh, 
1179                                        YAZ_BIB1_UNSUPP_USE_ATTRIBUTE,
1180                                        use_string);
1181                     else
1182                         zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_USE_ATTRIBUTE, 
1183                                             use_value);
1184                 }
1185                 else
1186                 {
1187                     int oid[OID_SIZE];
1188                     struct oident oident;
1189                     
1190                     oident.proto = PROTO_Z3950;
1191                     oident.oclass = CLASS_ATTSET;
1192                     oident.value = curAttributeSet;
1193                     oid_ent_to_oid (&oident, oid);
1194                     
1195                     zebra_setError(zh, 
1196                                    YAZ_BIB1_UNSUPP_ATTRIBUTE_SET,
1197                                    oident.desc);
1198                     
1199                 }
1200                 continue;
1201             }
1202         }
1203         for (local_attr = attp.local_attributes; local_attr;
1204              local_attr = local_attr->next)
1205         {
1206             char ord_buf[32];
1207             int i, ord_len;
1208             
1209             ord = zebraExplain_lookup_attr_su(zh->reg->zei,
1210                                               reg_type,
1211                                               attp.attset_ordinal,
1212                                               local_attr->local);
1213             if (ord < 0)
1214                 continue;
1215             *ol = ord_list_append(stream, *ol, ord);
1216             if (prefix_len)
1217                 term_dict[prefix_len++] = '|';
1218             else
1219                 term_dict[prefix_len++] = '(';
1220             
1221             ord_len = key_SU_encode (ord, ord_buf);
1222             for (i = 0; i<ord_len; i++)
1223             {
1224                 term_dict[prefix_len++] = 1;
1225                 term_dict[prefix_len++] = ord_buf[i];
1226             }
1227         }
1228         bases_ok++;
1229         if (prefix_len)
1230             attr_ok = 1;
1231
1232         term_dict[prefix_len++] = ')';
1233 #if REG_TYPE_PREFIX
1234         term_dict[prefix_len++] = 1;
1235         term_dict[prefix_len++] = reg_type;
1236         yaz_log(log_level_rpn, "reg_type = %d", term_dict[prefix_len-1]);
1237 #endif
1238         term_dict[prefix_len] = '\0';
1239         j = prefix_len;
1240         switch (truncation_value)
1241         {
1242         case -1:         /* not specified */
1243         case 100:        /* do not truncate */
1244             if (!string_relation (zh, zapt, &termp, term_dict,
1245                                   attributeSet,
1246                                   reg_type, space_split, term_dst,
1247                                   &relation_error))
1248             {
1249                 if (relation_error)
1250                 {
1251                     zebra_setError(zh, relation_error, 0);
1252                     return ZEBRA_FAIL;
1253                 }
1254                 *term_sub = 0;
1255                 return ZEBRA_OK;
1256             }
1257             break;
1258         case 1:          /* right truncation */
1259             term_dict[j++] = '(';
1260             if (!term_100(zh->reg->zebra_maps, reg_type,
1261                           &termp, term_dict + j, space_split, term_dst))
1262             {
1263                 *term_sub = 0;
1264                 return ZEBRA_OK;
1265             }
1266             strcat(term_dict, ".*)");
1267             break;
1268         case 2:          /* keft truncation */
1269             term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*';
1270             if (!term_100(zh->reg->zebra_maps, reg_type,
1271                           &termp, term_dict + j, space_split, term_dst))
1272             {
1273                 *term_sub = 0;
1274                 return ZEBRA_OK;
1275             }
1276             strcat(term_dict, ")");
1277             break;
1278         case 3:          /* left&right truncation */
1279             term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*';
1280             if (!term_100(zh->reg->zebra_maps, reg_type,
1281                           &termp, term_dict + j, space_split, term_dst))
1282             {
1283                 *term_sub = 0;
1284                 return ZEBRA_OK;
1285             }
1286             strcat(term_dict, ".*)");
1287             break;
1288         case 101:        /* process # in term */
1289             term_dict[j++] = '(';
1290             if (!term_101(zh->reg->zebra_maps, reg_type,
1291                           &termp, term_dict + j, space_split, term_dst))
1292             {
1293                 *term_sub = 0;
1294                 return ZEBRA_OK;
1295             }
1296             strcat(term_dict, ")");
1297             break;
1298         case 102:        /* Regexp-1 */
1299             term_dict[j++] = '(';
1300             if (!term_102(zh->reg->zebra_maps, reg_type,
1301                           &termp, term_dict + j, space_split, term_dst))
1302             {
1303                 *term_sub = 0;
1304                 return ZEBRA_OK;
1305             }
1306             strcat(term_dict, ")");
1307             break;
1308         case 103:       /* Regexp-2 */
1309             regex_range = 1;
1310             term_dict[j++] = '(';
1311             init_pos = 2;
1312             if (!term_103(zh->reg->zebra_maps, reg_type,
1313                           &termp, term_dict + j, &regex_range,
1314                           space_split, term_dst))
1315             {
1316                 *term_sub = 0;
1317                 return ZEBRA_OK;
1318             }
1319             strcat(term_dict, ")");
1320             break;
1321         case 104:        /* process # and ! in term */
1322             term_dict[j++] = '(';
1323             if (!term_104(zh->reg->zebra_maps, reg_type,
1324                           &termp, term_dict + j, space_split, term_dst))
1325             {
1326                 *term_sub = 0;
1327                 return ZEBRA_OK;
1328             }
1329             strcat(term_dict, ")");
1330             break;
1331         case 105:        /* process * and ! in term */
1332             term_dict[j++] = '(';
1333             if (!term_105(zh->reg->zebra_maps, reg_type,
1334                           &termp, term_dict + j, space_split, term_dst, 1))
1335             {
1336                 *term_sub = 0;
1337                 return ZEBRA_OK;
1338             }
1339             strcat(term_dict, ")");
1340             break;
1341         case 106:        /* process * and ! in term */
1342             term_dict[j++] = '(';
1343             if (!term_105(zh->reg->zebra_maps, reg_type,
1344                           &termp, term_dict + j, space_split, term_dst, 0))
1345             {
1346                 *term_sub = 0;
1347                 return ZEBRA_OK;
1348             }
1349             strcat(term_dict, ")");
1350             break;
1351         default:
1352             zebra_setError_zint(zh,
1353                                 YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
1354                                 truncation_value);
1355             return ZEBRA_FAIL;
1356         }
1357         if (attr_ok)
1358         {
1359             char buf[80];
1360             const char *input = term_dict + prefix_len;
1361             esc_str(buf, sizeof(buf), input, strlen(input));
1362         }
1363         if (attr_ok)
1364         {
1365             yaz_log(log_level_rpn, "dict_lookup_grep: %s", term_dict+prefix_len);
1366             r = dict_lookup_grep(zh->reg->dict, term_dict, regex_range,
1367                                  grep_info, &max_pos, init_pos,
1368                                  grep_handle);
1369             if (r)
1370                 yaz_log(YLOG_WARN, "dict_lookup_grep fail %d", r);
1371         }
1372     }
1373     if (!bases_ok)
1374         return ZEBRA_FAIL;
1375     *term_sub = termp;
1376     yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1377     return ZEBRA_OK;
1378 }
1379
1380
1381 /* convert APT search term to UTF8 */
1382 static ZEBRA_RES zapt_term_to_utf8(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1383                                    char *termz)
1384 {
1385     size_t sizez;
1386     Z_Term *term = zapt->term;
1387
1388     switch (term->which)
1389     {
1390     case Z_Term_general:
1391         if (zh->iconv_to_utf8 != 0)
1392         {
1393             char *inbuf = term->u.general->buf;
1394             size_t inleft = term->u.general->len;
1395             char *outbuf = termz;
1396             size_t outleft = IT_MAX_WORD-1;
1397             size_t ret;
1398
1399             ret = yaz_iconv(zh->iconv_to_utf8, &inbuf, &inleft,
1400                         &outbuf, &outleft);
1401             if (ret == (size_t)(-1))
1402             {
1403                 ret = yaz_iconv(zh->iconv_to_utf8, 0, 0, 0, 0);
1404                 zebra_setError(
1405                     zh, 
1406                     YAZ_BIB1_QUERY_TERM_INCLUDES_CHARS_THAT_DO_NOT_TRANSLATE_INTO_,
1407                     0);
1408                 return ZEBRA_FAIL;
1409             }
1410             *outbuf = 0;
1411         }
1412         else
1413         {
1414             sizez = term->u.general->len;
1415             if (sizez > IT_MAX_WORD-1)
1416                 sizez = IT_MAX_WORD-1;
1417             memcpy (termz, term->u.general->buf, sizez);
1418             termz[sizez] = '\0';
1419         }
1420         break;
1421     case Z_Term_characterString:
1422         sizez = strlen(term->u.characterString);
1423         if (sizez > IT_MAX_WORD-1)
1424             sizez = IT_MAX_WORD-1;
1425         memcpy (termz, term->u.characterString, sizez);
1426         termz[sizez] = '\0';
1427         break;
1428     default:
1429         zebra_setError(zh, YAZ_BIB1_UNSUPP_CODED_VALUE_FOR_TERM, 0);
1430         return ZEBRA_FAIL;
1431     }
1432     return ZEBRA_OK;
1433 }
1434
1435 /* convert APT SCAN term to internal cmap */
1436 static ZEBRA_RES trans_scan_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1437                                  char *termz, int reg_type)
1438 {
1439     char termz0[IT_MAX_WORD];
1440
1441     if (zapt_term_to_utf8(zh, zapt, termz0) == ZEBRA_FAIL)
1442         return ZEBRA_FAIL;    /* error */
1443     else
1444     {
1445         const char **map;
1446         const char *cp = (const char *) termz0;
1447         const char *cp_end = cp + strlen(cp);
1448         const char *src;
1449         int i = 0;
1450         const char *space_map = NULL;
1451         int len;
1452             
1453         while ((len = (cp_end - cp)) > 0)
1454         {
1455             map = zebra_maps_input(zh->reg->zebra_maps, reg_type, &cp, len, 0);
1456             if (**map == *CHR_SPACE)
1457                 space_map = *map;
1458             else
1459             {
1460                 if (i && space_map)
1461                     for (src = space_map; *src; src++)
1462                         termz[i++] = *src;
1463                 space_map = NULL;
1464                 for (src = *map; *src; src++)
1465                     termz[i++] = *src;
1466             }
1467         }
1468         termz[i] = '\0';
1469     }
1470     return ZEBRA_OK;
1471 }
1472
1473 static void grep_info_delete(struct grep_info *grep_info)
1474 {
1475 #ifdef TERM_COUNT
1476     xfree(grep_info->term_no);
1477 #endif
1478     xfree(grep_info->isam_p_buf);
1479 }
1480
1481 static ZEBRA_RES grep_info_prepare(ZebraHandle zh,
1482                                    Z_AttributesPlusTerm *zapt,
1483                                    struct grep_info *grep_info,
1484                                    int reg_type)
1485 {
1486     AttrType termset;
1487     int termset_value_numeric;
1488     const char *termset_value_string;
1489
1490 #ifdef TERM_COUNT
1491     grep_info->term_no = 0;
1492 #endif
1493     grep_info->isam_p_size = 0;
1494     grep_info->isam_p_buf = NULL;
1495     grep_info->zh = zh;
1496     grep_info->reg_type = reg_type;
1497     grep_info->termset = 0;
1498
1499     if (!zapt)
1500         return ZEBRA_OK;
1501     attr_init(&termset, zapt, 8);
1502     termset_value_numeric =
1503         attr_find_ex(&termset, NULL, &termset_value_string);
1504     if (termset_value_numeric != -1)
1505     {
1506         char resname[32];
1507         const char *termset_name = 0;
1508         if (termset_value_numeric != -2)
1509         {
1510     
1511             sprintf(resname, "%d", termset_value_numeric);
1512             termset_name = resname;
1513         }
1514         else
1515             termset_name = termset_value_string;
1516         yaz_log(log_level_rpn, "creating termset set %s", termset_name);
1517         grep_info->termset = resultSetAdd(zh, termset_name, 1);
1518         if (!grep_info->termset)
1519         {
1520             zebra_setError(zh, YAZ_BIB1_ILLEGAL_RESULT_SET_NAME, termset_name);
1521             return ZEBRA_FAIL;
1522         }
1523     }
1524     return ZEBRA_OK;
1525 }
1526                                
1527 /**
1528   \brief Create result set(s) for list of terms
1529   \param zh Zebra Handle
1530   \param termz term as used in query but converted to UTF-8
1531   \param attributeSet default attribute set
1532   \param stream memory for result
1533   \param reg_type register type ('w', 'p',..)
1534   \param complete_flag whether it's phrases or not
1535   \param rank_type term flags for ranking
1536   \param xpath_use use attribute for X-Path (-1 for no X-path)
1537   \param num_bases number of databases
1538   \param basenames array of databases
1539   \param rset_mem memory for result sets
1540   \param result_sets output result set for each term in list (output)
1541   \param number number of output result sets
1542   \param kc rset key control to be used for created result sets
1543 */
1544 static ZEBRA_RES term_list_trunc(ZebraHandle zh,
1545                                  Z_AttributesPlusTerm *zapt,
1546                                  const char *termz,
1547                                  oid_value attributeSet,
1548                                  NMEM stream,
1549                                  int reg_type, int complete_flag,
1550                                  const char *rank_type, int xpath_use,
1551                                  int num_bases, char **basenames, 
1552                                  NMEM rset_nmem,
1553                                  RSET **result_sets, int *num_result_sets,
1554                                  struct rset_key_control *kc)
1555 {
1556     char term_dst[IT_MAX_WORD+1];
1557     struct grep_info grep_info;
1558     const char *termp = termz;
1559     int alloc_sets = 0;
1560
1561     *num_result_sets = 0;
1562     *term_dst = 0;
1563     if (grep_info_prepare(zh, zapt, &grep_info, reg_type) == ZEBRA_FAIL)
1564         return ZEBRA_FAIL;
1565     while(1)
1566     { 
1567         ZEBRA_RES res;
1568
1569         if (alloc_sets == *num_result_sets)
1570         {
1571             int add = 10;
1572             RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) * 
1573                                               sizeof(*rnew));
1574             if (alloc_sets)
1575                 memcpy(rnew, *result_sets, alloc_sets * sizeof(*rnew));
1576             alloc_sets = alloc_sets + add;
1577             *result_sets = rnew;
1578         }
1579         res = term_trunc(zh, zapt, &termp, attributeSet,
1580                          stream, &grep_info,
1581                          reg_type, complete_flag,
1582                          num_bases, basenames,
1583                          term_dst, rank_type,
1584                          xpath_use, rset_nmem,
1585                          &(*result_sets)[*num_result_sets],
1586                          kc);
1587         if (res != ZEBRA_OK)
1588         {
1589             int i;
1590             for (i = 0; i < *num_result_sets; i++)
1591                 rset_delete((*result_sets)[i]);
1592             grep_info_delete (&grep_info);
1593             return res;
1594         }
1595         if ((*result_sets)[*num_result_sets] == 0)
1596             break;
1597         (*num_result_sets)++;
1598     }
1599     grep_info_delete(&grep_info);
1600     return ZEBRA_OK;
1601 }
1602
1603 static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh,
1604                                        Z_AttributesPlusTerm *zapt,
1605                                        const char *termz_org,
1606                                        oid_value attributeSet,
1607                                        NMEM stream,
1608                                        int reg_type, int complete_flag,
1609                                        const char *rank_type, int xpath_use,
1610                                        int num_bases, char **basenames, 
1611                                        NMEM rset_nmem,
1612                                        RSET *rset,
1613                                        struct rset_key_control *kc)
1614 {
1615     RSET *result_sets = 0;
1616     int num_result_sets = 0;
1617     ZEBRA_RES res =
1618         term_list_trunc(zh, zapt, termz_org, attributeSet,
1619                         stream, reg_type, complete_flag,
1620                         rank_type, xpath_use,
1621                         num_bases, basenames,
1622                         rset_nmem,
1623                         &result_sets, &num_result_sets, kc);
1624     if (res != ZEBRA_OK)
1625         return res;
1626     if (num_result_sets == 0)
1627         *rset = rsnull_create (rset_nmem, kc, 0); 
1628     else if (num_result_sets == 1)
1629         *rset = result_sets[0];
1630     else
1631         *rset = rsprox_create(rset_nmem, kc, kc->scope,
1632                               num_result_sets, result_sets,
1633                               1 /* ordered */, 0 /* exclusion */,
1634                               3 /* relation */, 1 /* distance */);
1635     if (!*rset)
1636         return ZEBRA_FAIL;
1637     return ZEBRA_OK;
1638 }
1639
1640 static ZEBRA_RES rpn_search_APT_or_list(ZebraHandle zh,
1641                                         Z_AttributesPlusTerm *zapt,
1642                                         const char *termz_org,
1643                                         oid_value attributeSet,
1644                                         NMEM stream,
1645                                         int reg_type, int complete_flag,
1646                                         const char *rank_type,
1647                                         int xpath_use,
1648                                         int num_bases, char **basenames,
1649                                         NMEM rset_nmem,
1650                                         RSET *rset,
1651                                         struct rset_key_control *kc)
1652 {
1653     RSET *result_sets = 0;
1654     int num_result_sets = 0;
1655     ZEBRA_RES res =
1656         term_list_trunc(zh, zapt, termz_org, attributeSet,
1657                         stream, reg_type, complete_flag,
1658                         rank_type, xpath_use,
1659                         num_bases, basenames,
1660                         rset_nmem,
1661                         &result_sets, &num_result_sets, kc);
1662     if (res != ZEBRA_OK)
1663         return res;
1664     if (num_result_sets == 0)
1665         *rset = rsnull_create (rset_nmem, kc, 0); 
1666     else if (num_result_sets == 1)
1667         *rset = result_sets[0];
1668     else
1669         *rset = rsmulti_or_create(rset_nmem, kc, kc->scope, 0 /* termid */,
1670                                   num_result_sets, result_sets);
1671     if (!*rset)
1672         return ZEBRA_FAIL;
1673     return ZEBRA_OK;
1674 }
1675
1676 static ZEBRA_RES rpn_search_APT_and_list(ZebraHandle zh,
1677                                          Z_AttributesPlusTerm *zapt,
1678                                          const char *termz_org,
1679                                          oid_value attributeSet,
1680                                          NMEM stream,
1681                                          int reg_type, int complete_flag,
1682                                          const char *rank_type, 
1683                                          int xpath_use,
1684                                          int num_bases, char **basenames,
1685                                          NMEM rset_nmem,
1686                                          RSET *rset,
1687                                          struct rset_key_control *kc)
1688 {
1689     RSET *result_sets = 0;
1690     int num_result_sets = 0;
1691     ZEBRA_RES res =
1692         term_list_trunc(zh, zapt, termz_org, attributeSet,
1693                         stream, reg_type, complete_flag,
1694                         rank_type, xpath_use,
1695                         num_bases, basenames,
1696                         rset_nmem,
1697                         &result_sets, &num_result_sets,
1698                         kc);
1699     if (res != ZEBRA_OK)
1700         return res;
1701     if (num_result_sets == 0)
1702         *rset = rsnull_create (rset_nmem, kc, 0); 
1703     else if (num_result_sets == 1)
1704         *rset = result_sets[0];
1705     else
1706         *rset = rsmulti_and_create(rset_nmem, kc, kc->scope,
1707                                    num_result_sets, result_sets);
1708     if (!*rset)
1709         return ZEBRA_FAIL;
1710     return ZEBRA_OK;
1711 }
1712
1713 static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1714                             const char **term_sub,
1715                             char *term_dict,
1716                             oid_value attributeSet,
1717                             struct grep_info *grep_info,
1718                             int *max_pos,
1719                             int reg_type,
1720                             char *term_dst,
1721                             int *error_code)
1722 {
1723     AttrType relation;
1724     int relation_value;
1725     int term_value;
1726     int r;
1727     char *term_tmp = term_dict + strlen(term_dict);
1728
1729     *error_code = 0;
1730     attr_init(&relation, zapt, 2);
1731     relation_value = attr_find(&relation, NULL);
1732
1733     yaz_log(log_level_rpn, "numeric relation value=%d", relation_value);
1734
1735     if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1736                   term_dst))
1737         return 0;
1738     term_value = atoi (term_tmp);
1739     switch (relation_value)
1740     {
1741     case 1:
1742         yaz_log(log_level_rpn, "Relation <");
1743         gen_regular_rel(term_tmp, term_value-1, 1);
1744         break;
1745     case 2:
1746         yaz_log(log_level_rpn, "Relation <=");
1747         gen_regular_rel(term_tmp, term_value, 1);
1748         break;
1749     case 4:
1750         yaz_log(log_level_rpn, "Relation >=");
1751         gen_regular_rel(term_tmp, term_value, 0);
1752         break;
1753     case 5:
1754         yaz_log(log_level_rpn, "Relation >");
1755         gen_regular_rel(term_tmp, term_value+1, 0);
1756         break;
1757     case -1:
1758     case 3:
1759         yaz_log(log_level_rpn, "Relation =");
1760         sprintf(term_tmp, "(0*%d)", term_value);
1761         break;
1762     default:
1763         *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
1764         return 0;
1765     }
1766     yaz_log(log_level_rpn, "dict_lookup_grep: %s", term_tmp);
1767     r = dict_lookup_grep(zh->reg->dict, term_dict, 0, grep_info, max_pos,
1768                           0, grep_handle);
1769     if (r)
1770         yaz_log(YLOG_WARN, "dict_lookup_grep fail, rel = gt: %d", r);
1771     yaz_log(log_level_rpn, "%d positions", grep_info->isam_p_indx);
1772     return 1;
1773 }
1774
1775 static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1776                               const char **term_sub, 
1777                               oid_value attributeSet,
1778                               struct grep_info *grep_info,
1779                               int reg_type, int complete_flag,
1780                               int num_bases, char **basenames,
1781                               char *term_dst, int xpath_use, NMEM stream)
1782 {
1783     char term_dict[2*IT_MAX_WORD+2];
1784     int r, base_no;
1785     AttrType use;
1786     int use_value;
1787     const char *use_string = 0;
1788     oid_value curAttributeSet = attributeSet;
1789     const char *termp;
1790     struct rpn_char_map_info rcmi;
1791
1792     int bases_ok = 0;     /* no of databases with OK attribute */
1793
1794     rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
1795     attr_init(&use, zapt, 1);
1796     use_value = attr_find_ex(&use, &curAttributeSet, &use_string);
1797
1798     if (use_value == -1)
1799         use_value = 1016;
1800
1801     for (base_no = 0; base_no < num_bases; base_no++)
1802     {
1803         attent attp;
1804         data1_local_attribute id_xpath_attr;
1805         data1_local_attribute *local_attr;
1806         int max_pos, prefix_len = 0;
1807         int relation_error = 0;
1808
1809         termp = *term_sub;
1810         if (use_value == -2)  /* string attribute (assume IDXPATH/any) */
1811         {
1812             use_value = xpath_use;
1813             attp.local_attributes = &id_xpath_attr;
1814             attp.attset_ordinal = VAL_IDXPATH;
1815             id_xpath_attr.next = 0;
1816             id_xpath_attr.local = use_value;
1817         }
1818         else if (curAttributeSet == VAL_IDXPATH)
1819         {
1820             attp.local_attributes = &id_xpath_attr;
1821             attp.attset_ordinal = VAL_IDXPATH;
1822             id_xpath_attr.next = 0;
1823             id_xpath_attr.local = use_value;
1824         }
1825         else
1826         {
1827             if ((r = att_getentbyatt (zh, &attp, curAttributeSet, use_value,
1828                                       use_string)))
1829             {
1830                 yaz_log(YLOG_DEBUG, "att_getentbyatt fail. set=%d use=%d r=%d",
1831                       curAttributeSet, use_value, r);
1832                 if (r == -1)
1833                 {
1834                     if (use_string)
1835                         zebra_setError(zh, 
1836                                        YAZ_BIB1_UNSUPP_USE_ATTRIBUTE,
1837                                        use_string);
1838                     else
1839                         zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_USE_ATTRIBUTE, 
1840                                             use_value);
1841                 }
1842                 else
1843                     zebra_setError(zh, YAZ_BIB1_UNSUPP_ATTRIBUTE_SET, 0);
1844                 continue;
1845             }
1846         }
1847         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1848         {
1849             zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
1850                            basenames[base_no]);
1851             return ZEBRA_FAIL;
1852         }
1853         for (local_attr = attp.local_attributes; local_attr;
1854              local_attr = local_attr->next)
1855         {
1856             int ord;
1857             char ord_buf[32];
1858             int i, ord_len;
1859
1860             ord = zebraExplain_lookup_attr_su(zh->reg->zei,
1861                                               reg_type,
1862                                               attp.attset_ordinal,
1863                                               local_attr->local);
1864             if (ord < 0)
1865                 continue;
1866             if (prefix_len)
1867                 term_dict[prefix_len++] = '|';
1868             else
1869                 term_dict[prefix_len++] = '(';
1870
1871             ord_len = key_SU_encode (ord, ord_buf);
1872             for (i = 0; i<ord_len; i++)
1873             {
1874                 term_dict[prefix_len++] = 1;
1875                 term_dict[prefix_len++] = ord_buf[i];
1876             }
1877         }
1878         if (!prefix_len)
1879         {
1880             zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_USE_ATTRIBUTE, use_value);
1881             continue;
1882         }
1883         bases_ok++;
1884         term_dict[prefix_len++] = ')';
1885 #if REG_TYPE_PREFIX    
1886         term_dict[prefix_len++] = 1;
1887         term_dict[prefix_len++] = reg_type;
1888         yaz_log(YLOG_DEBUG, "reg_type = %d", term_dict[prefix_len-1]);
1889 #endif
1890         term_dict[prefix_len] = '\0';
1891         if (!numeric_relation(zh, zapt, &termp, term_dict,
1892                               attributeSet, grep_info, &max_pos, reg_type,
1893                               term_dst, &relation_error))
1894         {
1895             if (relation_error)
1896             {
1897                 zebra_setError(zh, relation_error, 0);
1898                 return ZEBRA_FAIL;
1899             }
1900             *term_sub = 0;
1901             return ZEBRA_OK;
1902         }
1903     }
1904     if (!bases_ok)
1905         return ZEBRA_FAIL;
1906     *term_sub = termp;
1907     yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1908     return ZEBRA_OK;
1909 }
1910
1911                                  
1912 static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh,
1913                                         Z_AttributesPlusTerm *zapt,
1914                                         const char *termz,
1915                                         oid_value attributeSet,
1916                                         NMEM stream,
1917                                         int reg_type, int complete_flag,
1918                                         const char *rank_type, int xpath_use,
1919                                         int num_bases, char **basenames,
1920                                         NMEM rset_nmem,
1921                                         RSET *rset,
1922                                         struct rset_key_control *kc)
1923 {
1924     char term_dst[IT_MAX_WORD+1];
1925     const char *termp = termz;
1926     RSET *result_sets = 0;
1927     int num_result_sets = 0;
1928     ZEBRA_RES res;
1929     struct grep_info grep_info;
1930     int alloc_sets = 0;
1931     zint hits_limit_value;
1932     const char *term_ref_id_str = 0;
1933
1934     term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str);
1935
1936     yaz_log(log_level_rpn, "APT_numeric t='%s'", termz);
1937     if (grep_info_prepare(zh, zapt, &grep_info, reg_type) == ZEBRA_FAIL)
1938         return ZEBRA_FAIL;
1939     while (1)
1940     { 
1941         if (alloc_sets == num_result_sets)
1942         {
1943             int add = 10;
1944             RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) * 
1945                                               sizeof(*rnew));
1946             if (alloc_sets)
1947                 memcpy(rnew, result_sets, alloc_sets * sizeof(*rnew));
1948             alloc_sets = alloc_sets + add;
1949             result_sets = rnew;
1950         }
1951         yaz_log(YLOG_DEBUG, "APT_numeric termp=%s", termp);
1952         grep_info.isam_p_indx = 0;
1953         res = numeric_term(zh, zapt, &termp, attributeSet, &grep_info,
1954                            reg_type, complete_flag, num_bases, basenames,
1955                            term_dst, xpath_use,
1956                            stream);
1957         if (res == ZEBRA_FAIL || termp == 0)
1958             break;
1959         yaz_log(YLOG_DEBUG, "term: %s", term_dst);
1960         result_sets[num_result_sets] =
1961             rset_trunc(zh, grep_info.isam_p_buf,
1962                        grep_info.isam_p_indx, term_dst,
1963                        strlen(term_dst), rank_type,
1964                        0 /* preserve position */,
1965                        zapt->term->which, rset_nmem, 
1966                        kc, kc->scope, 0, reg_type,
1967                        hits_limit_value,
1968                        term_ref_id_str);
1969         if (!result_sets[num_result_sets])
1970             break;
1971         num_result_sets++;
1972     }
1973     grep_info_delete(&grep_info);
1974     if (termp)
1975     {
1976         int i;
1977         for (i = 0; i<num_result_sets; i++)
1978             rset_delete(result_sets[i]);
1979         return ZEBRA_FAIL;
1980     }
1981     if (num_result_sets == 0)
1982         *rset = rsnull_create(rset_nmem, kc, 0);
1983     if (num_result_sets == 1)
1984         *rset = result_sets[0];
1985     else
1986         *rset = rsmulti_and_create(rset_nmem, kc, kc->scope,
1987                                    num_result_sets, result_sets);
1988     if (!*rset)
1989         return ZEBRA_FAIL;
1990     return ZEBRA_OK;
1991 }
1992
1993 static ZEBRA_RES rpn_search_APT_local(ZebraHandle zh,
1994                                       Z_AttributesPlusTerm *zapt,
1995                                       const char *termz,
1996                                       oid_value attributeSet,
1997                                       NMEM stream,
1998                                       const char *rank_type, NMEM rset_nmem,
1999                                       RSET *rset,
2000                                       struct rset_key_control *kc)
2001 {
2002     RSFD rsfd;
2003     struct it_key key;
2004     int sys;
2005     *rset = rstemp_create(rset_nmem, kc, kc->scope,
2006                           res_get (zh->res, "setTmpDir"),0 );
2007     rsfd = rset_open(*rset, RSETF_WRITE);
2008     
2009     sys = atoi(termz);
2010     if (sys <= 0)
2011         sys = 1;
2012     key.mem[0] = sys;
2013     key.mem[1] = 1;
2014     key.len = 2;
2015     rset_write (rsfd, &key);
2016     rset_close (rsfd);
2017     return ZEBRA_OK;
2018 }
2019
2020 static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2021                                oid_value attributeSet, NMEM stream,
2022                                Z_SortKeySpecList *sort_sequence,
2023                                const char *rank_type,
2024                                NMEM rset_nmem,
2025                                RSET *rset,
2026                                struct rset_key_control *kc)
2027 {
2028     int i;
2029     int sort_relation_value;
2030     AttrType sort_relation_type;
2031     Z_SortKeySpec *sks;
2032     Z_SortKey *sk;
2033     int oid[OID_SIZE];
2034     oident oe;
2035     char termz[20];
2036     
2037     attr_init(&sort_relation_type, zapt, 7);
2038     sort_relation_value = attr_find(&sort_relation_type, &attributeSet);
2039
2040     if (!sort_sequence->specs)
2041     {
2042         sort_sequence->num_specs = 10;
2043         sort_sequence->specs = (Z_SortKeySpec **)
2044             nmem_malloc(stream, sort_sequence->num_specs *
2045                          sizeof(*sort_sequence->specs));
2046         for (i = 0; i<sort_sequence->num_specs; i++)
2047             sort_sequence->specs[i] = 0;
2048     }
2049     if (zapt->term->which != Z_Term_general)
2050         i = 0;
2051     else
2052         i = atoi_n ((char *) zapt->term->u.general->buf,
2053                     zapt->term->u.general->len);
2054     if (i >= sort_sequence->num_specs)
2055         i = 0;
2056     sprintf(termz, "%d", i);
2057
2058     oe.proto = PROTO_Z3950;
2059     oe.oclass = CLASS_ATTSET;
2060     oe.value = attributeSet;
2061     if (!oid_ent_to_oid (&oe, oid))
2062         return ZEBRA_FAIL;
2063
2064     sks = (Z_SortKeySpec *) nmem_malloc(stream, sizeof(*sks));
2065     sks->sortElement = (Z_SortElement *)
2066         nmem_malloc(stream, sizeof(*sks->sortElement));
2067     sks->sortElement->which = Z_SortElement_generic;
2068     sk = sks->sortElement->u.generic = (Z_SortKey *)
2069         nmem_malloc(stream, sizeof(*sk));
2070     sk->which = Z_SortKey_sortAttributes;
2071     sk->u.sortAttributes = (Z_SortAttributes *)
2072         nmem_malloc(stream, sizeof(*sk->u.sortAttributes));
2073
2074     sk->u.sortAttributes->id = oid;
2075     sk->u.sortAttributes->list = zapt->attributes;
2076
2077     sks->sortRelation = (int *)
2078         nmem_malloc(stream, sizeof(*sks->sortRelation));
2079     if (sort_relation_value == 1)
2080         *sks->sortRelation = Z_SortKeySpec_ascending;
2081     else if (sort_relation_value == 2)
2082         *sks->sortRelation = Z_SortKeySpec_descending;
2083     else 
2084         *sks->sortRelation = Z_SortKeySpec_ascending;
2085
2086     sks->caseSensitivity = (int *)
2087         nmem_malloc(stream, sizeof(*sks->caseSensitivity));
2088     *sks->caseSensitivity = 0;
2089
2090     sks->which = Z_SortKeySpec_null;
2091     sks->u.null = odr_nullval ();
2092     sort_sequence->specs[i] = sks;
2093     *rset = rsnull_create (rset_nmem, kc, 0);
2094     return ZEBRA_OK;
2095 }
2096
2097
2098 static int parse_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2099                        oid_value attributeSet,
2100                        struct xpath_location_step *xpath, int max, NMEM mem)
2101 {
2102     oid_value curAttributeSet = attributeSet;
2103     AttrType use;
2104     const char *use_string = 0;
2105     
2106     attr_init(&use, zapt, 1);
2107     attr_find_ex(&use, &curAttributeSet, &use_string);
2108
2109     if (!use_string || *use_string != '/')
2110         return -1;
2111
2112     return zebra_parse_xpath_str(use_string, xpath, max, mem);
2113 }
2114  
2115                
2116
2117 static RSET xpath_trunc(ZebraHandle zh, NMEM stream,
2118                         int reg_type, const char *term, int use,
2119                         oid_value curAttributeSet, NMEM rset_nmem,
2120                         struct rset_key_control *kc)
2121 {
2122     RSET rset;
2123     struct grep_info grep_info;
2124     char term_dict[2048];
2125     char ord_buf[32];
2126     int prefix_len = 0;
2127     int ord = zebraExplain_lookup_attr_su(zh->reg->zei, reg_type,
2128                                           curAttributeSet, use);
2129     int ord_len, i, r, max_pos;
2130     int term_type = Z_Term_characterString;
2131     const char *flags = "void";
2132
2133     if (grep_info_prepare(zh, 0 /* zapt */, &grep_info, '0') == ZEBRA_FAIL)
2134         return rsnull_create(rset_nmem, kc, 0);
2135     
2136     if (ord < 0)
2137         return rsnull_create(rset_nmem, kc, 0);
2138     if (prefix_len)
2139         term_dict[prefix_len++] = '|';
2140     else
2141         term_dict[prefix_len++] = '(';
2142     
2143     ord_len = key_SU_encode (ord, ord_buf);
2144     for (i = 0; i<ord_len; i++)
2145     {
2146         term_dict[prefix_len++] = 1;
2147         term_dict[prefix_len++] = ord_buf[i];
2148     }
2149     term_dict[prefix_len++] = ')';
2150 #if REG_TYPE_PREFIX
2151     term_dict[prefix_len++] = 1;
2152     term_dict[prefix_len++] = reg_type;
2153 #endif
2154     strcpy(term_dict+prefix_len, term);
2155     
2156     grep_info.isam_p_indx = 0;
2157     r = dict_lookup_grep(zh->reg->dict, term_dict, 0,
2158                           &grep_info, &max_pos, 0, grep_handle);
2159     yaz_log(YLOG_DEBUG, "%s %d positions", term,
2160              grep_info.isam_p_indx);
2161     rset = rset_trunc(zh, grep_info.isam_p_buf,
2162                       grep_info.isam_p_indx, term, strlen(term),
2163                       flags, 1, term_type,rset_nmem,
2164                       kc, kc->scope, 0, reg_type, 0 /* hits_limit */,
2165                       0 /* term_ref_id_str */);
2166     grep_info_delete(&grep_info);
2167     return rset;
2168 }
2169
2170 static
2171 ZEBRA_RES rpn_search_xpath(ZebraHandle zh,
2172                            oid_value attributeSet,
2173                            int num_bases, char **basenames,
2174                            NMEM stream, const char *rank_type, RSET rset,
2175                            int xpath_len, struct xpath_location_step *xpath,
2176                            NMEM rset_nmem,
2177                            RSET *rset_out,
2178                            struct rset_key_control *kc)
2179 {
2180     oid_value curAttributeSet = attributeSet;
2181     int base_no;
2182     int i;
2183
2184     if (xpath_len < 0)
2185     {
2186         *rset_out = rset;
2187         return ZEBRA_OK;
2188     }
2189
2190     yaz_log(YLOG_DEBUG, "xpath len=%d", xpath_len);
2191     for (i = 0; i<xpath_len; i++)
2192     {
2193         yaz_log(log_level_rpn, "XPATH %d %s", i, xpath[i].part);
2194
2195     }
2196
2197     curAttributeSet = VAL_IDXPATH;
2198
2199     /*
2200       //a    ->    a/.*
2201       //a/b  ->    b/a/.*
2202       /a     ->    a/
2203       /a/b   ->    b/a/
2204
2205       /      ->    none
2206
2207    a[@attr = value]/b[@other = othervalue]
2208
2209  /e/@a val      range(e/,range(@a,freetext(w,1015,val),@a),e/)
2210  /a/b val       range(b/a/,freetext(w,1016,val),b/a/)
2211  /a/b/@c val    range(b/a/,range(@c,freetext(w,1016,val),@c),b/a/)
2212  /a/b[@c = y] val range(b/a/,freetext(w,1016,val),b/a/,@c = y)
2213  /a[@c = y]/b val range(a/,range(b/a/,freetext(w,1016,val),b/a/),a/,@c = y)
2214  /a[@c = x]/b[@c = y] range(a/,range(b/a/,freetext(w,1016,val),b/a/,@c = y),a/,@c = x)
2215       
2216     */
2217
2218     dict_grep_cmap (zh->reg->dict, 0, 0);
2219
2220     for (base_no = 0; base_no < num_bases; base_no++)
2221     {
2222         int level = xpath_len;
2223         int first_path = 1;
2224         
2225         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
2226         {
2227             zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
2228                            basenames[base_no]);
2229             *rset_out = rset;
2230             return ZEBRA_FAIL;
2231         }
2232         while (--level >= 0)
2233         {
2234             char xpath_rev[128];
2235             int i, len;
2236             RSET rset_start_tag = 0, rset_end_tag = 0, rset_attr = 0;
2237
2238             *xpath_rev = 0;
2239             len = 0;
2240             for (i = level; i >= 1; --i)
2241             {
2242                 const char *cp = xpath[i].part;
2243                 if (*cp)
2244                 {
2245                     for (;*cp; cp++)
2246                         if (*cp == '*')
2247                         {
2248                             memcpy (xpath_rev + len, "[^/]*", 5);
2249                             len += 5;
2250                         }
2251                         else if (*cp == ' ')
2252                         {
2253
2254                             xpath_rev[len++] = 1;
2255                             xpath_rev[len++] = ' ';
2256                         }
2257
2258                         else
2259                             xpath_rev[len++] = *cp;
2260                     xpath_rev[len++] = '/';
2261                 }
2262                 else if (i == 1)  /* // case */
2263                 {
2264                     xpath_rev[len++] = '.';
2265                     xpath_rev[len++] = '*';
2266                 }
2267             }
2268             xpath_rev[len] = 0;
2269
2270             if (xpath[level].predicate &&
2271                 xpath[level].predicate->which == XPATH_PREDICATE_RELATION &&
2272                 xpath[level].predicate->u.relation.name[0])
2273             {
2274                 WRBUF wbuf = wrbuf_alloc();
2275                 wrbuf_puts(wbuf, xpath[level].predicate->u.relation.name+1);
2276                 if (xpath[level].predicate->u.relation.value)
2277                 {
2278                     const char *cp = xpath[level].predicate->u.relation.value;
2279                     wrbuf_putc(wbuf, '=');
2280                     
2281                     while (*cp)
2282                     {
2283                         if (strchr(REGEX_CHARS, *cp))
2284                             wrbuf_putc(wbuf, '\\');
2285                         wrbuf_putc(wbuf, *cp);
2286                         cp++;
2287                     }
2288                 }
2289                 wrbuf_puts(wbuf, "");
2290                 rset_attr = xpath_trunc(
2291                     zh, stream, '0', wrbuf_buf(wbuf), 3, 
2292                     curAttributeSet, rset_nmem, kc);
2293                 wrbuf_free(wbuf, 1);
2294             } 
2295             else 
2296             {
2297                 if (!first_path)
2298                     continue;
2299             }
2300             yaz_log(log_level_rpn, "xpath_rev (%d) = %s", level, xpath_rev);
2301             if (strlen(xpath_rev))
2302             {
2303                 rset_start_tag = xpath_trunc(zh, stream, '0', 
2304                         xpath_rev, 1, curAttributeSet, rset_nmem, kc);
2305             
2306                 rset_end_tag = xpath_trunc(zh, stream, '0', 
2307                         xpath_rev, 2, curAttributeSet, rset_nmem, kc);
2308
2309                 rset = rsbetween_create(rset_nmem, kc, kc->scope,
2310                                         rset_start_tag, rset,
2311                                         rset_end_tag, rset_attr);
2312             }
2313             first_path = 0;
2314         }
2315     }
2316     *rset_out = rset;
2317     return ZEBRA_OK;
2318 }
2319
2320 static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2321                                 oid_value attributeSet, NMEM stream,
2322                                 Z_SortKeySpecList *sort_sequence,
2323                                 int num_bases, char **basenames, 
2324                                 NMEM rset_nmem,
2325                                 RSET *rset,
2326                                 struct rset_key_control *kc)
2327 {
2328     ZEBRA_RES res = ZEBRA_OK;
2329     unsigned reg_id;
2330     char *search_type = NULL;
2331     char rank_type[128];
2332     int complete_flag;
2333     int sort_flag;
2334     char termz[IT_MAX_WORD+1];
2335     int xpath_len;
2336     int xpath_use = 0;
2337     struct xpath_location_step xpath[10];
2338
2339     if (!log_level_set)
2340     {
2341         log_level_rpn = yaz_log_module_level("rpn");
2342         log_level_set = 1;
2343     }
2344     zebra_maps_attr(zh->reg->zebra_maps, zapt, &reg_id, &search_type,
2345                     rank_type, &complete_flag, &sort_flag);
2346     
2347     yaz_log(YLOG_DEBUG, "reg_id=%c", reg_id);
2348     yaz_log(YLOG_DEBUG, "complete_flag=%d", complete_flag);
2349     yaz_log(YLOG_DEBUG, "search_type=%s", search_type);
2350     yaz_log(YLOG_DEBUG, "rank_type=%s", rank_type);
2351
2352     if (zapt_term_to_utf8(zh, zapt, termz) == ZEBRA_FAIL)
2353         return ZEBRA_FAIL;
2354
2355     if (sort_flag)
2356         return rpn_sort_spec(zh, zapt, attributeSet, stream, sort_sequence,
2357                              rank_type, rset_nmem, rset, kc);
2358     /* consider if an X-Path query is used */
2359     xpath_len = parse_xpath(zh, zapt, attributeSet, xpath, 10, stream);
2360     if (xpath_len >= 0)
2361     {
2362         xpath_use = 1016;  /* searching for element by default */
2363         if (xpath[xpath_len-1].part[0] == '@') 
2364             xpath_use = 1015;  /* last step an attribute .. */
2365     }
2366
2367     /* search using one of the various search type strategies
2368        termz is our UTF-8 search term
2369        attributeSet is top-level default attribute set 
2370        stream is ODR for search
2371        reg_id is the register type
2372        complete_flag is 1 for complete subfield, 0 for incomplete
2373        xpath_use is use-attribute to be used for X-Path search, 0 for none
2374     */
2375     if (!strcmp(search_type, "phrase"))
2376     {
2377         res = rpn_search_APT_phrase(zh, zapt, termz, attributeSet, stream,
2378                                     reg_id, complete_flag, rank_type,
2379                                     xpath_use,
2380                                     num_bases, basenames, rset_nmem,
2381                                     rset, kc);
2382     }
2383     else if (!strcmp(search_type, "and-list"))
2384     {
2385         res = rpn_search_APT_and_list(zh, zapt, termz, attributeSet, stream,
2386                                       reg_id, complete_flag, rank_type,
2387                                       xpath_use,
2388                                       num_bases, basenames, rset_nmem,
2389                                       rset, kc);
2390     }
2391     else if (!strcmp(search_type, "or-list"))
2392     {
2393         res = rpn_search_APT_or_list(zh, zapt, termz, attributeSet, stream,
2394                                      reg_id, complete_flag, rank_type,
2395                                      xpath_use,
2396                                      num_bases, basenames, rset_nmem,
2397                                      rset, kc);
2398     }
2399     else if (!strcmp(search_type, "local"))
2400     {
2401         res = rpn_search_APT_local(zh, zapt, termz, attributeSet, stream,
2402                                    rank_type, rset_nmem, rset, kc);
2403     }
2404     else if (!strcmp(search_type, "numeric"))
2405     {
2406         res = rpn_search_APT_numeric(zh, zapt, termz, attributeSet, stream,
2407                                      reg_id, complete_flag, rank_type,
2408                                      xpath_use,
2409                                      num_bases, basenames, rset_nmem,
2410                                      rset, kc);
2411     }
2412     else
2413     {
2414         zebra_setError(zh, YAZ_BIB1_UNSUPP_STRUCTURE_ATTRIBUTE, 0);
2415         res = ZEBRA_FAIL;
2416     }
2417     if (res != ZEBRA_OK)
2418         return res;
2419     if (!*rset)
2420         return ZEBRA_FAIL;
2421     return rpn_search_xpath(zh, attributeSet, num_bases, basenames,
2422                             stream, rank_type, *rset, 
2423                             xpath_len, xpath, rset_nmem, rset, kc);
2424 }
2425
2426 static ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2427                                       oid_value attributeSet, 
2428                                       NMEM stream, NMEM rset_nmem,
2429                                       Z_SortKeySpecList *sort_sequence,
2430                                       int num_bases, char **basenames,
2431                                       RSET **result_sets, int *num_result_sets,
2432                                       Z_Operator *parent_op,
2433                                       struct rset_key_control *kc);
2434
2435 ZEBRA_RES rpn_search_top(ZebraHandle zh, Z_RPNStructure *zs,
2436                          oid_value attributeSet, 
2437                          NMEM stream, NMEM rset_nmem,
2438                          Z_SortKeySpecList *sort_sequence,
2439                          int num_bases, char **basenames,
2440                          RSET *result_set)
2441 {
2442     RSET *result_sets = 0;
2443     int num_result_sets = 0;
2444     ZEBRA_RES res;
2445     struct rset_key_control *kc = zebra_key_control_create(zh);
2446
2447     res = rpn_search_structure(zh, zs, attributeSet,
2448                                stream, rset_nmem,
2449                                sort_sequence, 
2450                                num_bases, basenames,
2451                                &result_sets, &num_result_sets,
2452                                0 /* no parent op */,
2453                                kc);
2454     if (res != ZEBRA_OK)
2455     {
2456         int i;
2457         for (i = 0; i<num_result_sets; i++)
2458             rset_delete(result_sets[i]);
2459         *result_set = 0;
2460     }
2461     else
2462     {
2463         assert(num_result_sets == 1);
2464         assert(result_sets);
2465         assert(*result_sets);
2466         *result_set = *result_sets;
2467     }
2468     (*kc->dec)(kc);
2469     return res;
2470 }
2471
2472 ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2473                                oid_value attributeSet, 
2474                                NMEM stream, NMEM rset_nmem,
2475                                Z_SortKeySpecList *sort_sequence,
2476                                int num_bases, char **basenames,
2477                                RSET **result_sets, int *num_result_sets,
2478                                Z_Operator *parent_op,
2479                                struct rset_key_control *kc)
2480 {
2481     *num_result_sets = 0;
2482     if (zs->which == Z_RPNStructure_complex)
2483     {
2484         ZEBRA_RES res;
2485         Z_Operator *zop = zs->u.complex->roperator;
2486         RSET *result_sets_l = 0;
2487         int num_result_sets_l = 0;
2488         RSET *result_sets_r = 0;
2489         int num_result_sets_r = 0;
2490
2491         res = rpn_search_structure(zh, zs->u.complex->s1,
2492                                    attributeSet, stream, rset_nmem,
2493                                    sort_sequence,
2494                                    num_bases, basenames,
2495                                    &result_sets_l, &num_result_sets_l,
2496                                    zop, kc);
2497         if (res != ZEBRA_OK)
2498         {
2499             int i;
2500             for (i = 0; i<num_result_sets_l; i++)
2501                 rset_delete(result_sets_l[i]);
2502             return res;
2503         }
2504         res = rpn_search_structure(zh, zs->u.complex->s2,
2505                                    attributeSet, stream, rset_nmem,
2506                                    sort_sequence,
2507                                    num_bases, basenames,
2508                                    &result_sets_r, &num_result_sets_r,
2509                                    zop, kc);
2510         if (res != ZEBRA_OK)
2511         {
2512             int i;
2513             for (i = 0; i<num_result_sets_l; i++)
2514                 rset_delete(result_sets_l[i]);
2515             for (i = 0; i<num_result_sets_r; i++)
2516                 rset_delete(result_sets_r[i]);
2517             return res;
2518         }
2519
2520         /* make a new list of result for all children */
2521         *num_result_sets = num_result_sets_l + num_result_sets_r;
2522         *result_sets = nmem_malloc(stream, *num_result_sets * 
2523                                    sizeof(**result_sets));
2524         memcpy(*result_sets, result_sets_l, 
2525                num_result_sets_l * sizeof(**result_sets));
2526         memcpy(*result_sets + num_result_sets_l, result_sets_r, 
2527                num_result_sets_r * sizeof(**result_sets));
2528
2529         if (!parent_op || parent_op->which != zop->which
2530             || (zop->which != Z_Operator_and &&
2531                 zop->which != Z_Operator_or))
2532         {
2533             /* parent node different from this one (or non-present) */
2534             /* we must combine result sets now */
2535             RSET rset;
2536             switch (zop->which)
2537             {
2538             case Z_Operator_and:
2539                 rset = rsmulti_and_create(rset_nmem, kc,
2540                                           kc->scope,
2541                                           *num_result_sets, *result_sets);
2542                 break;
2543             case Z_Operator_or:
2544                 rset = rsmulti_or_create(rset_nmem, kc,
2545                                          kc->scope, 0, /* termid */
2546                                          *num_result_sets, *result_sets);
2547                 break;
2548             case Z_Operator_and_not:
2549                 rset = rsbool_create_not(rset_nmem, kc,
2550                                          kc->scope,
2551                                          (*result_sets)[0],
2552                                          (*result_sets)[1]);
2553                 break;
2554             case Z_Operator_prox:
2555                 if (zop->u.prox->which != Z_ProximityOperator_known)
2556                 {
2557                     zebra_setError(zh, 
2558                                    YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2559                                    0);
2560                     return ZEBRA_FAIL;
2561                 }
2562                 if (*zop->u.prox->u.known != Z_ProxUnit_word)
2563                 {
2564                     zebra_setError_zint(zh,
2565                                         YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2566                                         *zop->u.prox->u.known);
2567                     return ZEBRA_FAIL;
2568                 }
2569                 else
2570                 {
2571                     rset = rsprox_create(rset_nmem, kc,
2572                                          kc->scope,
2573                                          *num_result_sets, *result_sets, 
2574                                          *zop->u.prox->ordered,
2575                                          (!zop->u.prox->exclusion ? 
2576                                           0 : *zop->u.prox->exclusion),
2577                                          *zop->u.prox->relationType,
2578                                          *zop->u.prox->distance );
2579                 }
2580                 break;
2581             default:
2582                 zebra_setError(zh, YAZ_BIB1_OPERATOR_UNSUPP, 0);
2583                 return ZEBRA_FAIL;
2584             }
2585             *num_result_sets = 1;
2586             *result_sets = nmem_malloc(stream, *num_result_sets * 
2587                                        sizeof(**result_sets));
2588             (*result_sets)[0] = rset;
2589         }
2590     }
2591     else if (zs->which == Z_RPNStructure_simple)
2592     {
2593         RSET rset;
2594         ZEBRA_RES res;
2595
2596         if (zs->u.simple->which == Z_Operand_APT)
2597         {
2598             yaz_log(YLOG_DEBUG, "rpn_search_APT");
2599             res = rpn_search_APT(zh, zs->u.simple->u.attributesPlusTerm,
2600                                  attributeSet, stream, sort_sequence,
2601                                  num_bases, basenames, rset_nmem, &rset,
2602                                  kc);
2603             if (res != ZEBRA_OK)
2604                 return res;
2605         }
2606         else if (zs->u.simple->which == Z_Operand_resultSetId)
2607         {
2608             yaz_log(YLOG_DEBUG, "rpn_search_ref");
2609             rset = resultSetRef(zh, zs->u.simple->u.resultSetId);
2610             if (!rset)
2611             {
2612                 zebra_setError(zh, 
2613                                YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
2614                                zs->u.simple->u.resultSetId);
2615                 return ZEBRA_FAIL;
2616             }
2617             rset_dup(rset);
2618         }
2619         else
2620         {
2621             zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2622             return ZEBRA_FAIL;
2623         }
2624         *num_result_sets = 1;
2625         *result_sets = nmem_malloc(stream, *num_result_sets * 
2626                                    sizeof(**result_sets));
2627         (*result_sets)[0] = rset;
2628     }
2629     else
2630     {
2631         zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2632         return ZEBRA_FAIL;
2633     }
2634     return ZEBRA_OK;
2635 }
2636
2637 struct scan_info_entry {
2638     char *term;
2639     ISAM_P isam_p;
2640 };
2641
2642 struct scan_info {
2643     struct scan_info_entry *list;
2644     ODR odr;
2645     int before, after;
2646     char prefix[20];
2647 };
2648
2649 static int scan_handle (char *name, const char *info, int pos, void *client)
2650 {
2651     int len_prefix, idx;
2652     struct scan_info *scan_info = (struct scan_info *) client;
2653
2654     len_prefix = strlen(scan_info->prefix);
2655     if (memcmp (name, scan_info->prefix, len_prefix))
2656         return 1;
2657     if (pos > 0)
2658         idx = scan_info->after - pos + scan_info->before;
2659     else
2660         idx = - pos - 1;
2661
2662     if (idx < 0)
2663         return 0;
2664     scan_info->list[idx].term = (char *)
2665         odr_malloc(scan_info->odr, strlen(name + len_prefix)+1);
2666     strcpy(scan_info->list[idx].term, name + len_prefix);
2667     assert (*info == sizeof(ISAM_P));
2668     memcpy (&scan_info->list[idx].isam_p, info+1, sizeof(ISAM_P));
2669     return 0;
2670 }
2671
2672 void zebra_term_untrans_iconv(ZebraHandle zh, NMEM stream, int reg_type,
2673                               char **dst, const char *src)
2674 {
2675     char term_src[IT_MAX_WORD];
2676     char term_dst[IT_MAX_WORD];
2677     
2678     zebra_term_untrans (zh, reg_type, term_src, src);
2679
2680     if (zh->iconv_from_utf8 != 0)
2681     {
2682         int len;
2683         char *inbuf = term_src;
2684         size_t inleft = strlen(term_src);
2685         char *outbuf = term_dst;
2686         size_t outleft = sizeof(term_dst)-1;
2687         size_t ret;
2688         
2689         ret = yaz_iconv (zh->iconv_from_utf8, &inbuf, &inleft,
2690                          &outbuf, &outleft);
2691         if (ret == (size_t)(-1))
2692             len = 0;
2693         else
2694             len = outbuf - term_dst;
2695         *dst = nmem_malloc(stream, len + 1);
2696         if (len > 0)
2697             memcpy (*dst, term_dst, len);
2698         (*dst)[len] = '\0';
2699     }
2700     else
2701         *dst = nmem_strdup(stream, term_src);
2702 }
2703
2704 static void count_set(ZebraHandle zh, RSET rset, zint *count)
2705 {
2706     zint psysno = 0;
2707     struct it_key key;
2708     RSFD rfd;
2709
2710     yaz_log(YLOG_DEBUG, "count_set");
2711
2712     rset->hits_limit = zh->approx_limit;
2713
2714     *count = 0;
2715     rfd = rset_open(rset, RSETF_READ);
2716     while (rset_read(rfd, &key,0 /* never mind terms */))
2717     {
2718         if (key.mem[0] != psysno)
2719         {
2720             psysno = key.mem[0];
2721             if (rfd->counted_items >= rset->hits_limit)
2722                 break;
2723         }
2724     }
2725     rset_close (rfd);
2726     *count = rset->hits_count;
2727 }
2728
2729 ZEBRA_RES rpn_scan(ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt,
2730                    oid_value attributeset,
2731                    int num_bases, char **basenames,
2732                    int *position, int *num_entries, ZebraScanEntry **list,
2733                    int *is_partial, RSET limit_set, int return_zero)
2734 {
2735     int i;
2736     int pos = *position;
2737     int num = *num_entries;
2738     int before;
2739     int after;
2740     int base_no;
2741     char termz[IT_MAX_WORD+20];
2742     AttrType use;
2743     int use_value;
2744     const char *use_string = 0;
2745     struct scan_info *scan_info_array;
2746     ZebraScanEntry *glist;
2747     int ords[32], ord_no = 0;
2748     int ptr[32];
2749
2750     int bases_ok = 0;     /* no of databases with OK attribute */
2751     int errCode = 0;      /* err code (if any is not OK) */
2752     char *errString = 0;  /* addinfo */
2753
2754     unsigned reg_id;
2755     char *search_type = NULL;
2756     char rank_type[128];
2757     int complete_flag;
2758     int sort_flag;
2759     NMEM rset_nmem = NULL; 
2760     struct rset_key_control *kc = 0;
2761
2762     *list = 0;
2763     *is_partial = 0;
2764
2765     if (attributeset == VAL_NONE)
2766         attributeset = VAL_BIB1;
2767
2768     if (!limit_set)
2769     {
2770         AttrType termset;
2771         int termset_value_numeric;
2772         const char *termset_value_string;
2773         attr_init(&termset, zapt, 8);
2774         termset_value_numeric =
2775             attr_find_ex(&termset, NULL, &termset_value_string);
2776         if (termset_value_numeric != -1)
2777         {
2778             char resname[32];
2779             const char *termset_name = 0;
2780             
2781             if (termset_value_numeric != -2)
2782             {
2783                 
2784                 sprintf(resname, "%d", termset_value_numeric);
2785                 termset_name = resname;
2786             }
2787             else
2788                 termset_name = termset_value_string;
2789             
2790             limit_set = resultSetRef (zh, termset_name);
2791         }
2792     }
2793         
2794     yaz_log(YLOG_DEBUG, "position = %d, num = %d set=%d",
2795             pos, num, attributeset);
2796         
2797     attr_init(&use, zapt, 1);
2798     use_value = attr_find_ex(&use, &attributeset, &use_string);
2799
2800     if (zebra_maps_attr(zh->reg->zebra_maps, zapt, &reg_id, &search_type,
2801                         rank_type, &complete_flag, &sort_flag))
2802     {
2803         *num_entries = 0;
2804         zebra_setError(zh, YAZ_BIB1_UNSUPP_ATTRIBUTE_TYPE, 0);
2805         return ZEBRA_FAIL;
2806     }
2807     yaz_log(YLOG_DEBUG, "use_value = %d", use_value);
2808
2809     if (use_value == -1)
2810         use_value = 1016;
2811     for (base_no = 0; base_no < num_bases && ord_no < 32; base_no++)
2812     {
2813         data1_local_attribute *local_attr;
2814         attent attp;
2815         int ord;
2816
2817         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
2818         {
2819             zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
2820                            basenames[base_no]);
2821             *num_entries = 0;
2822             return ZEBRA_FAIL;
2823         }
2824
2825         if (use_string &&
2826             (ord = zebraExplain_lookup_attr_str(zh->reg->zei, reg_id,
2827                                                 use_string)) >= 0)
2828         {
2829             /* we have a match for a raw string attribute */
2830             if (ord > 0)
2831                 ords[ord_no++] = ord;
2832             attp.local_attributes = 0;  /* no more attributes */
2833         }
2834         else
2835         {
2836             int r;
2837             
2838             if ((r = att_getentbyatt (zh, &attp, attributeset, use_value,
2839                                       use_string)))
2840             {
2841                 yaz_log(YLOG_DEBUG, "att_getentbyatt fail. set=%d use=%d",
2842                         attributeset, use_value);
2843                 if (r == -1)
2844                 {
2845                     errCode = YAZ_BIB1_UNSUPP_USE_ATTRIBUTE;
2846                     if (use_string)
2847                         zebra_setError(zh, YAZ_BIB1_UNSUPP_USE_ATTRIBUTE,
2848                                        use_string);
2849                     else
2850                         zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_USE_ATTRIBUTE,
2851                                             use_value);
2852                 }   
2853                 else
2854                 {
2855                     zebra_setError(zh, YAZ_BIB1_UNSUPP_ATTRIBUTE_SET, 0);
2856                 }
2857                 continue;
2858             }
2859         }
2860         bases_ok++;
2861         for (local_attr = attp.local_attributes; local_attr && ord_no < 32;
2862              local_attr = local_attr->next)
2863         {
2864             ord = zebraExplain_lookup_attr_su(zh->reg->zei, reg_id,
2865                                               attp.attset_ordinal,
2866                                               local_attr->local);
2867             if (ord > 0)
2868                 ords[ord_no++] = ord;
2869         }
2870     }
2871     if (!bases_ok && errCode)
2872     {
2873         zebra_setError(zh, errCode, errString);
2874         *num_entries = 0;
2875         return ZEBRA_FAIL;
2876     }
2877     if (ord_no == 0)
2878     {
2879         *num_entries = 0;
2880         return ZEBRA_OK;
2881     }
2882     /* prepare dictionary scanning */
2883     if (num < 1)
2884     {
2885         *num_entries = 0;
2886         return ZEBRA_OK;
2887     }
2888     before = pos-1;
2889     if (before < 0)
2890         before = 0;
2891     after = 1+num-pos;
2892     if (after < 0)
2893         after = 0;
2894     yaz_log(YLOG_DEBUG, "rpn_scan pos=%d num=%d before=%d "
2895             "after=%d before+after=%d",
2896             pos, num, before, after, before+after);
2897     scan_info_array = (struct scan_info *)
2898         odr_malloc(stream, ord_no * sizeof(*scan_info_array));
2899     for (i = 0; i < ord_no; i++)
2900     {
2901         int j, prefix_len = 0;
2902         int before_tmp = before, after_tmp = after;
2903         struct scan_info *scan_info = scan_info_array + i;
2904         struct rpn_char_map_info rcmi;
2905
2906         rpn_char_map_prepare (zh->reg, reg_id, &rcmi);
2907
2908         scan_info->before = before;
2909         scan_info->after = after;
2910         scan_info->odr = stream;
2911
2912         scan_info->list = (struct scan_info_entry *)
2913             odr_malloc(stream, (before+after) * sizeof(*scan_info->list));
2914         for (j = 0; j<before+after; j++)
2915             scan_info->list[j].term = NULL;
2916
2917         prefix_len += key_SU_encode (ords[i], termz + prefix_len);
2918 #if REG_TYPE_PREFIX
2919         termz[prefix_len++] = reg_id;
2920 #endif
2921         termz[prefix_len] = 0;
2922         strcpy(scan_info->prefix, termz);
2923
2924         if (trans_scan_term(zh, zapt, termz+prefix_len, reg_id) == ZEBRA_FAIL)
2925             return ZEBRA_FAIL;
2926         
2927         dict_scan(zh->reg->dict, termz, &before_tmp, &after_tmp,
2928                   scan_info, scan_handle);
2929     }
2930     glist = (ZebraScanEntry *)
2931         odr_malloc(stream, (before+after)*sizeof(*glist));
2932
2933     rset_nmem = nmem_create();
2934     kc = zebra_key_control_create(zh);
2935
2936     /* consider terms after main term */
2937     for (i = 0; i < ord_no; i++)
2938         ptr[i] = before;
2939     
2940     *is_partial = 0;
2941     for (i = 0; i<after; i++)
2942     {
2943         int j, j0 = -1;
2944         const char *mterm = NULL;
2945         const char *tst;
2946         RSET rset = 0;
2947         int lo = i + pos-1; /* offset in result list */
2948
2949         /* find: j0 is the first of the minimal values */
2950         for (j = 0; j < ord_no; j++)
2951         {
2952             if (ptr[j] < before+after && ptr[j] >= 0 &&
2953                 (tst = scan_info_array[j].list[ptr[j]].term) &&
2954                 (!mterm || strcmp (tst, mterm) < 0))
2955             {
2956                 j0 = j;
2957                 mterm = tst;
2958             }
2959         }
2960         if (j0 == -1)
2961             break;  /* no value found, stop */
2962
2963         /* get result set for first one , but only if it's within bounds */
2964         if (lo >= 0)
2965         {
2966             /* get result set for first term */
2967             zebra_term_untrans_iconv(zh, stream->mem, reg_id,
2968                                      &glist[lo].term, mterm);
2969             rset = rset_trunc(zh, &scan_info_array[j0].list[ptr[j0]].isam_p, 1,
2970                               glist[lo].term, strlen(glist[lo].term),
2971                               NULL, 0, zapt->term->which, rset_nmem, 
2972                               kc, kc->scope, 0, reg_id, 0 /* hits_limit */,
2973                               0 /* term_ref_id_str */);
2974         }
2975         ptr[j0]++; /* move index for this set .. */
2976         /* get result set for remaining scan terms */
2977         for (j = j0+1; j<ord_no; j++)
2978         {
2979             if (ptr[j] < before+after && ptr[j] >= 0 &&
2980                 (tst = scan_info_array[j].list[ptr[j]].term) &&
2981                 !strcmp (tst, mterm))
2982             {
2983                 if (lo >= 0)
2984                 {
2985                     RSET rsets[2];
2986                     
2987                     rsets[0] = rset;
2988                     rsets[1] =
2989                         rset_trunc(
2990                             zh, &scan_info_array[j].list[ptr[j]].isam_p, 1,
2991                             glist[lo].term,
2992                             strlen(glist[lo].term), NULL, 0,
2993                             zapt->term->which,rset_nmem,
2994                             kc, kc->scope, 0, reg_id, 0 /* hits_limit */,
2995                             0 /* term_ref_id_str */ );
2996                     rset = rsmulti_or_create(rset_nmem, kc,
2997                                              kc->scope, 0 /* termid */,
2998                                              2, rsets);
2999                 }
3000                 ptr[j]++;
3001             }
3002         }
3003         if (lo >= 0)
3004         {
3005             zint count;
3006             /* merge with limit_set if given */
3007             if (limit_set)
3008             {
3009                 RSET rsets[2];
3010                 rsets[0] = rset;
3011                 rsets[1] = rset_dup(limit_set);
3012                 
3013                 rset = rsmulti_and_create(rset_nmem, kc,
3014                                           kc->scope,
3015                                           2, rsets);
3016             }
3017             /* count it */
3018             count_set(zh, rset, &count);
3019             glist[lo].occurrences = count;
3020             rset_delete(rset);
3021         }
3022     }
3023     if (i < after)
3024     {
3025         *num_entries -= (after-i);
3026         *is_partial = 1;
3027         if (*num_entries < 0)
3028         {
3029             (*kc->dec)(kc);
3030             nmem_destroy(rset_nmem);
3031             *num_entries = 0;
3032             return ZEBRA_OK;
3033         }
3034     }
3035     /* consider terms before main term */
3036     for (i = 0; i<ord_no; i++)
3037         ptr[i] = 0;
3038     
3039     for (i = 0; i<before; i++)
3040     {
3041         int j, j0 = -1;
3042         const char *mterm = NULL;
3043         const char *tst;
3044         RSET rset;
3045         int lo = before-1-i; /* offset in result list */
3046         zint count;
3047         
3048         for (j = 0; j <ord_no; j++)
3049         {
3050             if (ptr[j] < before && ptr[j] >= 0 &&
3051                 (tst = scan_info_array[j].list[before-1-ptr[j]].term) &&
3052                 (!mterm || strcmp (tst, mterm) > 0))
3053             {
3054                 j0 = j;
3055                     mterm = tst;
3056             }
3057         }
3058         if (j0 == -1)
3059             break;
3060         
3061         zebra_term_untrans_iconv(zh, stream->mem, reg_id,
3062                                  &glist[lo].term, mterm);
3063         
3064         rset = rset_trunc
3065             (zh, &scan_info_array[j0].list[before-1-ptr[j0]].isam_p, 1,
3066              glist[lo].term, strlen(glist[lo].term),
3067              NULL, 0, zapt->term->which, rset_nmem,
3068              kc, kc->scope, 0, reg_id, 0 /* hits_limit */,
3069              0 /* term_ref_id_str */);
3070         
3071         ptr[j0]++;
3072         
3073         for (j = j0+1; j<ord_no; j++)
3074         {
3075             if (ptr[j] < before && ptr[j] >= 0 &&
3076                 (tst = scan_info_array[j].list[before-1-ptr[j]].term) &&
3077                 !strcmp (tst, mterm))
3078             {
3079                 RSET rsets[2];
3080                 
3081                 rsets[0] = rset;
3082                 rsets[1] = rset_trunc(
3083                     zh,
3084                     &scan_info_array[j].list[before-1-ptr[j]].isam_p, 1,
3085                     glist[lo].term,
3086                     strlen(glist[lo].term), NULL, 0,
3087                     zapt->term->which, rset_nmem,
3088                     kc, kc->scope, 0, reg_id, 0 /* hits_limit */,
3089                     0 /* term_ref_id_str */);
3090                 rset = rsmulti_or_create(rset_nmem, kc,
3091                                          kc->scope, 0 /* termid */, 2, rsets);
3092                 
3093                 ptr[j]++;
3094             }
3095         }
3096         if (limit_set)
3097         {
3098             RSET rsets[2];
3099             rsets[0] = rset;
3100             rsets[1] = rset_dup(limit_set);
3101             
3102             rset = rsmulti_and_create(rset_nmem, kc,
3103                                       kc->scope, 2, rsets);
3104         }
3105         count_set(zh, rset, &count);
3106         glist[lo].occurrences = count;
3107         rset_delete (rset);
3108     }
3109     (*kc->dec)(kc);
3110     nmem_destroy(rset_nmem);
3111     i = before-i;
3112     if (i)
3113     {
3114         *is_partial = 1;
3115         *position -= i;
3116         *num_entries -= i;
3117         if (*num_entries <= 0)
3118         {
3119             *num_entries = 0;
3120             return ZEBRA_OK;
3121         }
3122     }
3123     
3124     *list = glist + i;               /* list is set to first 'real' entry */
3125     
3126     yaz_log(YLOG_DEBUG, "position = %d, num_entries = %d",
3127             *position, *num_entries);
3128     return ZEBRA_OK;
3129 }
3130