a928fa08c029f0f83f76598d16fb4a19cb0602da
[idzebra-moved-to-github.git] / index / zrpn.c
1 /* $Id: zrpn.c,v 1.199 2005-06-14 12:42:48 adam Exp $
2    Copyright (C) 1995-2005
3    Index Data ApS
4
5 This file is part of the Zebra server.
6
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
11
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with Zebra; see the file LICENSE.zebra.  If not, write to the
19 Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
20 02111-1307, USA.
21 */
22
23 #include <stdio.h>
24 #include <assert.h>
25 #ifdef WIN32
26 #include <io.h>
27 #else
28 #include <unistd.h>
29 #endif
30 #include <ctype.h>
31
32 #include <yaz/diagbib1.h>
33 #include "index.h"
34 #include <zebra_xpath.h>
35
36 #include <charmap.h>
37 #include <rset.h>
38
39 struct rpn_char_map_info
40 {
41     ZebraMaps zm;
42     int reg_type;
43 };
44
45 typedef struct
46 {
47     int type;
48     int major;
49     int minor;
50     Z_AttributesPlusTerm *zapt;
51 } AttrType;
52
53 static int log_level_set = 0;
54 static int log_level_rpn = 0;
55
56 static const char **rpn_char_map_handler(void *vp, const char **from, int len)
57 {
58     struct rpn_char_map_info *p = (struct rpn_char_map_info *) vp;
59     const char **out = zebra_maps_input(p->zm, p->reg_type, from, len, 0);
60 #if 0
61     if (out && *out)
62     {
63         const char *outp = *out;
64         yaz_log(YLOG_LOG, "---");
65         while (*outp)
66         {
67             yaz_log(YLOG_LOG, "%02X", *outp);
68             outp++;
69         }
70     }
71 #endif
72     return out;
73 }
74
75 static void rpn_char_map_prepare(struct zebra_register *reg, int reg_type,
76                                   struct rpn_char_map_info *map_info)
77 {
78     map_info->zm = reg->zebra_maps;
79     map_info->reg_type = reg_type;
80     dict_grep_cmap(reg->dict, map_info, rpn_char_map_handler);
81 }
82
83 static int attr_find_ex(AttrType *src, oid_value *attributeSetP,
84                          const char **string_value)
85 {
86     int num_attributes;
87
88     num_attributes = src->zapt->attributes->num_attributes;
89     while (src->major < num_attributes)
90     {
91         Z_AttributeElement *element;
92
93         element = src->zapt->attributes->attributes[src->major];
94         if (src->type == *element->attributeType)
95         {
96             switch (element->which) 
97             {
98             case Z_AttributeValue_numeric:
99                 ++(src->major);
100                 if (element->attributeSet && attributeSetP)
101                 {
102                     oident *attrset;
103
104                     attrset = oid_getentbyoid(element->attributeSet);
105                     *attributeSetP = attrset->value;
106                 }
107                 return *element->value.numeric;
108                 break;
109             case Z_AttributeValue_complex:
110                 if (src->minor >= element->value.complex->num_list)
111                     break;
112                 if (element->attributeSet && attributeSetP)
113                 {
114                     oident *attrset;
115                     
116                     attrset = oid_getentbyoid(element->attributeSet);
117                     *attributeSetP = attrset->value;
118                 }
119                 if (element->value.complex->list[src->minor]->which ==  
120                     Z_StringOrNumeric_numeric)
121                 {
122                     ++(src->minor);
123                     return
124                         *element->value.complex->list[src->minor-1]->u.numeric;
125                 }
126                 else if (element->value.complex->list[src->minor]->which ==  
127                          Z_StringOrNumeric_string)
128                 {
129                     if (!string_value)
130                         break;
131                     ++(src->minor);
132                     *string_value = 
133                         element->value.complex->list[src->minor-1]->u.string;
134                     return -2;
135                 }
136                 else
137                     break;
138             default:
139                 assert(0);
140             }
141         }
142         ++(src->major);
143     }
144     return -1;
145 }
146
147 static int attr_find(AttrType *src, oid_value *attributeSetP)
148 {
149     return attr_find_ex(src, attributeSetP, 0);
150 }
151
152 static void attr_init(AttrType *src, Z_AttributesPlusTerm *zapt,
153                        int type)
154 {
155     src->zapt = zapt;
156     src->type = type;
157     src->major = 0;
158     src->minor = 0;
159 }
160
161 #define TERM_COUNT        
162        
163 struct grep_info {        
164 #ifdef TERM_COUNT        
165     int *term_no;        
166 #endif        
167     ISAM_P *isam_p_buf;
168     int isam_p_size;        
169     int isam_p_indx;
170     ZebraHandle zh;
171     int reg_type;
172     ZebraSet termset;
173 };        
174
175 void zebra_term_untrans(ZebraHandle zh, int reg_type,
176                         char *dst, const char *src)
177 {
178     int len = 0;
179     while (*src)
180     {
181         const char *cp = zebra_maps_output(zh->reg->zebra_maps,
182                                            reg_type, &src);
183         if (!cp && len < IT_MAX_WORD-1)
184             dst[len++] = *src++;
185         else
186             while (*cp && len < IT_MAX_WORD-1)
187                 dst[len++] = *cp++;
188     }
189     dst[len] = '\0';
190 }
191
192 static void add_isam_p(const char *name, const char *info,
193                        struct grep_info *p)
194 {
195     if (!log_level_set)
196     {
197         log_level_rpn = yaz_log_module_level("rpn");
198         log_level_set = 1;
199     }
200     if (p->isam_p_indx == p->isam_p_size)
201     {
202         ISAM_P *new_isam_p_buf;
203 #ifdef TERM_COUNT        
204         int *new_term_no;        
205 #endif
206         p->isam_p_size = 2*p->isam_p_size + 100;
207         new_isam_p_buf = (ISAM_P *) xmalloc(sizeof(*new_isam_p_buf) *
208                                             p->isam_p_size);
209         if (p->isam_p_buf)
210         {
211             memcpy(new_isam_p_buf, p->isam_p_buf,
212                     p->isam_p_indx * sizeof(*p->isam_p_buf));
213             xfree(p->isam_p_buf);
214         }
215         p->isam_p_buf = new_isam_p_buf;
216
217 #ifdef TERM_COUNT
218         new_term_no = (int *) xmalloc(sizeof(*new_term_no) * p->isam_p_size);
219         if (p->term_no)
220         {
221             memcpy(new_term_no, p->isam_p_buf,
222                     p->isam_p_indx * sizeof(*p->term_no));
223             xfree(p->term_no);
224         }
225         p->term_no = new_term_no;
226 #endif
227     }
228     assert(*info == sizeof(*p->isam_p_buf));
229     memcpy(p->isam_p_buf + p->isam_p_indx, info+1, sizeof(*p->isam_p_buf));
230
231 #if 1
232     if (p->termset)
233     {
234         const char *db;
235         int set, use;
236         char term_tmp[IT_MAX_WORD];
237         int su_code = 0;
238         int len = key_SU_decode (&su_code, name);
239         
240         zebra_term_untrans  (p->zh, p->reg_type, term_tmp, name+len+1);
241         yaz_log(log_level_rpn, "grep: %d %c %s", su_code, name[len], term_tmp);
242         zebraExplain_lookup_ord (p->zh->reg->zei,
243                                  su_code, &db, &set, &use);
244         yaz_log(log_level_rpn, "grep:  set=%d use=%d db=%s", set, use, db);
245         
246         resultSetAddTerm(p->zh, p->termset, name[len], db,
247                          set, use, term_tmp);
248     }
249 #endif
250     (p->isam_p_indx)++;
251 }
252
253 static int grep_handle(char *name, const char *info, void *p)
254 {
255     add_isam_p(name, info, (struct grep_info *) p);
256     return 0;
257 }
258
259 static int term_pre(ZebraMaps zebra_maps, int reg_type, const char **src,
260                     const char *ct1, const char *ct2, int first)
261 {
262     const char *s1, *s0 = *src;
263     const char **map;
264
265     /* skip white space */
266     while (*s0)
267     {
268         if (ct1 && strchr(ct1, *s0))
269             break;
270         if (ct2 && strchr(ct2, *s0))
271             break;
272         s1 = s0;
273         map = zebra_maps_input(zebra_maps, reg_type, &s1, strlen(s1), first);
274         if (**map != *CHR_SPACE)
275             break;
276         s0 = s1;
277     }
278     *src = s0;
279     return *s0;
280 }
281
282
283 static void esc_str(char *out_buf, int out_size,
284                     const char *in_buf, int in_size)
285 {
286     int k;
287
288     assert(out_buf);
289     assert(in_buf);
290     assert(out_size > 20);
291     *out_buf = '\0';
292     for (k = 0; k<in_size; k++)
293     {
294         int c = in_buf[k] & 0xff;
295         int pc;
296         if (c < 32 || c > 126)
297             pc = '?';
298         else
299             pc = c;
300         sprintf(out_buf +strlen(out_buf), "%02X:%c  ", c, pc);
301         if (strlen(out_buf) > out_size-20)
302         {
303             strcat(out_buf, "..");
304             break;
305         }
306     }
307 }
308
309 #define REGEX_CHARS " []()|.*+?!"
310
311 /* term_100: handle term, where trunc = none(no operators at all) */
312 static int term_100(ZebraMaps zebra_maps, int reg_type,
313                     const char **src, char *dst, int space_split,
314                     char *dst_term)
315 {
316     const char *s0;
317     const char **map;
318     int i = 0;
319     int j = 0;
320
321     const char *space_start = 0;
322     const char *space_end = 0;
323
324     if (!term_pre(zebra_maps, reg_type, src, NULL, NULL, !space_split))
325         return 0;
326     s0 = *src;
327     while (*s0)
328     {
329         const char *s1 = s0;
330         int q_map_match = 0;
331         map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), 
332                                 &q_map_match);
333         if (space_split)
334         {
335             if (**map == *CHR_SPACE)
336                 break;
337         }
338         else  /* complete subfield only. */
339         {
340             if (**map == *CHR_SPACE)
341             {   /* save space mapping for later  .. */
342                 space_start = s1;
343                 space_end = s0;
344                 continue;
345             }
346             else if (space_start)
347             {   /* reload last space */
348                 while (space_start < space_end)
349                 {
350                     if (strchr(REGEX_CHARS, *space_start))
351                         dst[i++] = '\\';
352                     dst_term[j++] = *space_start;
353                     dst[i++] = *space_start++;
354                 }
355                 /* and reset */
356                 space_start = space_end = 0;
357             }
358         }
359         /* add non-space char */
360         memcpy(dst_term+j, s1, s0 - s1);
361         j += (s0 - s1);
362         if (!q_map_match)
363         {
364             while (s1 < s0)
365             {
366                 if (strchr(REGEX_CHARS, *s1))
367                     dst[i++] = '\\';
368                 dst[i++] = *s1++;
369             }
370         }
371         else
372         {
373             char tmpbuf[80];
374             esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
375             
376             strcpy(dst + i, map[0]);
377             i += strlen(map[0]);
378         }
379     }
380     dst[i] = '\0';
381     dst_term[j] = '\0';
382     *src = s0;
383     return i;
384 }
385
386 /* term_101: handle term, where trunc = Process # */
387 static int term_101(ZebraMaps zebra_maps, int reg_type,
388                     const char **src, char *dst, int space_split,
389                     char *dst_term)
390 {
391     const char *s0;
392     const char **map;
393     int i = 0;
394     int j = 0;
395
396     if (!term_pre(zebra_maps, reg_type, src, "#", "#", !space_split))
397         return 0;
398     s0 = *src;
399     while (*s0)
400     {
401         if (*s0 == '#')
402         {
403             dst[i++] = '.';
404             dst[i++] = '*';
405             dst_term[j++] = *s0++;
406         }
407         else
408         {
409             const char *s1 = s0;
410             int q_map_match = 0;
411             map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), 
412                                     &q_map_match);
413             if (space_split && **map == *CHR_SPACE)
414                 break;
415
416             /* add non-space char */
417             memcpy(dst_term+j, s1, s0 - s1);
418             j += (s0 - s1);
419             if (!q_map_match)
420             {
421                 while (s1 < s0)
422                 {
423                     if (strchr(REGEX_CHARS, *s1))
424                         dst[i++] = '\\';
425                     dst[i++] = *s1++;
426                 }
427             }
428             else
429             {
430                 char tmpbuf[80];
431                 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
432                 
433                 strcpy(dst + i, map[0]);
434                 i += strlen(map[0]);
435             }
436         }
437     }
438     dst[i] = '\0';
439     dst_term[j++] = '\0';
440     *src = s0;
441     return i;
442 }
443
444 /* term_103: handle term, where trunc = re-2 (regular expressions) */
445 static int term_103(ZebraMaps zebra_maps, int reg_type, const char **src,
446                     char *dst, int *errors, int space_split,
447                     char *dst_term)
448 {
449     int i = 0;
450     int j = 0;
451     const char *s0;
452     const char **map;
453
454     if (!term_pre(zebra_maps, reg_type, src, "^\\()[].*+?|", "(", !space_split))
455         return 0;
456     s0 = *src;
457     if (errors && *s0 == '+' && s0[1] && s0[2] == '+' && s0[3] &&
458         isdigit(((const unsigned char *)s0)[1]))
459     {
460         *errors = s0[1] - '0';
461         s0 += 3;
462         if (*errors > 3)
463             *errors = 3;
464     }
465     while (*s0)
466     {
467         if (strchr("^\\()[].*+?|-", *s0))
468         {
469             dst_term[j++] = *s0;
470             dst[i++] = *s0++;
471         }
472         else
473         {
474             const char *s1 = s0;
475             int q_map_match = 0;
476             map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), 
477                                     &q_map_match);
478             if (space_split && **map == *CHR_SPACE)
479                 break;
480
481             /* add non-space char */
482             memcpy(dst_term+j, s1, s0 - s1);
483             j += (s0 - s1);
484             if (!q_map_match)
485             {
486                 while (s1 < s0)
487                 {
488                     if (strchr(REGEX_CHARS, *s1))
489                         dst[i++] = '\\';
490                     dst[i++] = *s1++;
491                 }
492             }
493             else
494             {
495                 char tmpbuf[80];
496                 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
497                 
498                 strcpy(dst + i, map[0]);
499                 i += strlen(map[0]);
500             }
501         }
502     }
503     dst[i] = '\0';
504     dst_term[j] = '\0';
505     *src = s0;
506     
507     return i;
508 }
509
510 /* term_103: handle term, where trunc = re-1 (regular expressions) */
511 static int term_102(ZebraMaps zebra_maps, int reg_type, const char **src,
512                     char *dst, int space_split, char *dst_term)
513 {
514     return term_103(zebra_maps, reg_type, src, dst, NULL, space_split,
515                     dst_term);
516 }
517
518
519 /* term_104: handle term, where trunc = Process # and ! */
520 static int term_104(ZebraMaps zebra_maps, int reg_type,
521                     const char **src, char *dst, int space_split,
522                     char *dst_term)
523 {
524     const char *s0;
525     const char **map;
526     int i = 0;
527     int j = 0;
528
529     if (!term_pre(zebra_maps, reg_type, src, "?*#", "?*#", !space_split))
530         return 0;
531     s0 = *src;
532     while (*s0)
533     {
534         if (*s0 == '?')
535         {
536             dst_term[j++] = *s0++;
537             if (*s0 >= '0' && *s0 <= '9')
538             {
539                 int limit = 0;
540                 while (*s0 >= '0' && *s0 <= '9')
541                 {
542                     limit = limit * 10 + (*s0 - '0');
543                     dst_term[j++] = *s0++;
544                 }
545                 if (limit > 20)
546                     limit = 20;
547                 while (--limit >= 0)
548                 {
549                     dst[i++] = '.';
550                     dst[i++] = '?';
551                 }
552             }
553             else
554             {
555                 dst[i++] = '.';
556                 dst[i++] = '*';
557             }
558         }
559         else if (*s0 == '*')
560         {
561             dst[i++] = '.';
562             dst[i++] = '*';
563             dst_term[j++] = *s0++;
564         }
565         else if (*s0 == '#')
566         {
567             dst[i++] = '.';
568             dst_term[j++] = *s0++;
569         }
570         else
571         {
572             const char *s1 = s0;
573             int q_map_match = 0;
574             map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), 
575                                     &q_map_match);
576             if (space_split && **map == *CHR_SPACE)
577                 break;
578
579             /* add non-space char */
580             memcpy(dst_term+j, s1, s0 - s1);
581             j += (s0 - s1);
582             if (!q_map_match)
583             {
584                 while (s1 < s0)
585                 {
586                     if (strchr(REGEX_CHARS, *s1))
587                         dst[i++] = '\\';
588                     dst[i++] = *s1++;
589                 }
590             }
591             else
592             {
593                 char tmpbuf[80];
594                 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
595                 
596                 strcpy(dst + i, map[0]);
597                 i += strlen(map[0]);
598             }
599         }
600     }
601     dst[i] = '\0';
602     dst_term[j++] = '\0';
603     *src = s0;
604     return i;
605 }
606
607 /* term_105/106: handle term, where trunc = Process * and ! and right trunc */
608 static int term_105(ZebraMaps zebra_maps, int reg_type,
609                     const char **src, char *dst, int space_split,
610                     char *dst_term, int right_truncate)
611 {
612     const char *s0;
613     const char **map;
614     int i = 0;
615     int j = 0;
616
617     if (!term_pre(zebra_maps, reg_type, src, "*!", "*!", !space_split))
618         return 0;
619     s0 = *src;
620     while (*s0)
621     {
622         if (*s0 == '*')
623         {
624             dst[i++] = '.';
625             dst[i++] = '*';
626             dst_term[j++] = *s0++;
627         }
628         else if (*s0 == '!')
629         {
630             dst[i++] = '.';
631             dst_term[j++] = *s0++;
632         }
633         else
634         {
635             const char *s1 = s0;
636             int q_map_match = 0;
637             map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), 
638                                     &q_map_match);
639             if (space_split && **map == *CHR_SPACE)
640                 break;
641
642             /* add non-space char */
643             memcpy(dst_term+j, s1, s0 - s1);
644             j += (s0 - s1);
645             if (!q_map_match)
646             {
647                 while (s1 < s0)
648                 {
649                     if (strchr(REGEX_CHARS, *s1))
650                         dst[i++] = '\\';
651                     dst[i++] = *s1++;
652                 }
653             }
654             else
655             {
656                 char tmpbuf[80];
657                 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
658                 
659                 strcpy(dst + i, map[0]);
660                 i += strlen(map[0]);
661             }
662         }
663     }
664     if (right_truncate)
665     {
666         dst[i++] = '.';
667         dst[i++] = '*';
668     }
669     dst[i] = '\0';
670     
671     dst_term[j++] = '\0';
672     *src = s0;
673     return i;
674 }
675
676
677 /* gen_regular_rel - generate regular expression from relation
678  *  val:     border value (inclusive)
679  *  islt:    1 if <=; 0 if >=.
680  */
681 static void gen_regular_rel(char *dst, int val, int islt)
682 {
683     int dst_p;
684     int w, d, i;
685     int pos = 0;
686     char numstr[20];
687
688     yaz_log(YLOG_DEBUG, "gen_regular_rel. val=%d, islt=%d", val, islt);
689     if (val >= 0)
690     {
691         if (islt)
692             strcpy(dst, "(-[0-9]+|(");
693         else
694             strcpy(dst, "((");
695     } 
696     else
697     {
698         if (!islt)
699         {
700             strcpy(dst, "([0-9]+|-(");
701             dst_p = strlen(dst);
702             islt = 1;
703         }
704         else
705         {
706             strcpy(dst, "(-(");
707             islt = 0;
708         }
709         val = -val;
710     }
711     dst_p = strlen(dst);
712     sprintf(numstr, "%d", val);
713     for (w = strlen(numstr); --w >= 0; pos++)
714     {
715         d = numstr[w];
716         if (pos > 0)
717         {
718             if (islt)
719             {
720                 if (d == '0')
721                     continue;
722                 d--;
723             } 
724             else
725             {
726                 if (d == '9')
727                     continue;
728                 d++;
729             }
730         }
731         
732         strcpy(dst + dst_p, numstr);
733         dst_p = strlen(dst) - pos - 1;
734
735         if (islt)
736         {
737             if (d != '0')
738             {
739                 dst[dst_p++] = '[';
740                 dst[dst_p++] = '0';
741                 dst[dst_p++] = '-';
742                 dst[dst_p++] = d;
743                 dst[dst_p++] = ']';
744             }
745             else
746                 dst[dst_p++] = d;
747         }
748         else
749         {
750             if (d != '9')
751             { 
752                 dst[dst_p++] = '[';
753                 dst[dst_p++] = d;
754                 dst[dst_p++] = '-';
755                 dst[dst_p++] = '9';
756                 dst[dst_p++] = ']';
757             }
758             else
759                 dst[dst_p++] = d;
760         }
761         for (i = 0; i<pos; i++)
762         {
763             dst[dst_p++] = '[';
764             dst[dst_p++] = '0';
765             dst[dst_p++] = '-';
766             dst[dst_p++] = '9';
767             dst[dst_p++] = ']';
768         }
769         dst[dst_p++] = '|';
770     }
771     dst[dst_p] = '\0';
772     if (islt)
773     {
774         /* match everything less than 10^(pos-1) */
775         strcat(dst, "0*");
776         for (i = 1; i<pos; i++)
777             strcat(dst, "[0-9]?");
778     }
779     else
780     {
781         /* match everything greater than 10^pos */
782         for (i = 0; i <= pos; i++)
783             strcat(dst, "[0-9]");
784         strcat(dst, "[0-9]*");
785     }
786     strcat(dst, "))");
787 }
788
789 void string_rel_add_char(char **term_p, const char *src, int *indx)
790 {
791     if (src[*indx] == '\\')
792         *(*term_p)++ = src[(*indx)++];
793     *(*term_p)++ = src[(*indx)++];
794 }
795
796 /*
797  *   >  abc     ([b-].*|a[c-].*|ab[d-].*|abc.+)
798  *              ([^-a].*|a[^-b].*ab[^-c].*|abc.+)
799  *   >= abc     ([b-].*|a[c-].*|ab[c-].*)
800  *              ([^-a].*|a[^-b].*|ab[c-].*)
801  *   <  abc     ([-0].*|a[-a].*|ab[-b].*)
802  *              ([^a-].*|a[^b-].*|ab[^c-].*)
803  *   <= abc     ([-0].*|a[-a].*|ab[-b].*|abc)
804  *              ([^a-].*|a[^b-].*|ab[^c-].*|abc)
805  */
806 static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
807                            const char **term_sub, char *term_dict,
808                            oid_value attributeSet,
809                            int reg_type, int space_split, char *term_dst,
810                            int *error_code)
811 {
812     AttrType relation;
813     int relation_value;
814     int i;
815     char *term_tmp = term_dict + strlen(term_dict);
816     char term_component[2*IT_MAX_WORD+20];
817
818     attr_init(&relation, zapt, 2);
819     relation_value = attr_find(&relation, NULL);
820
821     *error_code = 0;
822     yaz_log(YLOG_DEBUG, "string relation value=%d", relation_value);
823     switch (relation_value)
824     {
825     case 1:
826         if (!term_100(zh->reg->zebra_maps, reg_type,
827                       term_sub, term_component,
828                       space_split, term_dst))
829             return 0;
830         yaz_log(log_level_rpn, "Relation <");
831         
832         *term_tmp++ = '(';
833         for (i = 0; term_component[i]; )
834         {
835             int j = 0;
836
837             if (i)
838                 *term_tmp++ = '|';
839             while (j < i)
840                 string_rel_add_char(&term_tmp, term_component, &j);
841
842             *term_tmp++ = '[';
843
844             *term_tmp++ = '^';
845             string_rel_add_char(&term_tmp, term_component, &i);
846             *term_tmp++ = '-';
847
848             *term_tmp++ = ']';
849             *term_tmp++ = '.';
850             *term_tmp++ = '*';
851
852             if ((term_tmp - term_dict) > IT_MAX_WORD)
853                 break;
854         }
855         *term_tmp++ = ')';
856         *term_tmp = '\0';
857         break;
858     case 2:
859         if (!term_100(zh->reg->zebra_maps, reg_type,
860                       term_sub, term_component,
861                       space_split, term_dst))
862             return 0;
863         yaz_log(log_level_rpn, "Relation <=");
864
865         *term_tmp++ = '(';
866         for (i = 0; term_component[i]; )
867         {
868             int j = 0;
869
870             while (j < i)
871                 string_rel_add_char(&term_tmp, term_component, &j);
872             *term_tmp++ = '[';
873
874             *term_tmp++ = '^';
875             string_rel_add_char(&term_tmp, term_component, &i);
876             *term_tmp++ = '-';
877
878             *term_tmp++ = ']';
879             *term_tmp++ = '.';
880             *term_tmp++ = '*';
881
882             *term_tmp++ = '|';
883
884             if ((term_tmp - term_dict) > IT_MAX_WORD)
885                 break;
886         }
887         for (i = 0; term_component[i]; )
888             string_rel_add_char(&term_tmp, term_component, &i);
889         *term_tmp++ = ')';
890         *term_tmp = '\0';
891         break;
892     case 5:
893         if (!term_100 (zh->reg->zebra_maps, reg_type,
894                        term_sub, term_component, space_split, term_dst))
895             return 0;
896         yaz_log(log_level_rpn, "Relation >");
897
898         *term_tmp++ = '(';
899         for (i = 0; term_component[i];)
900         {
901             int j = 0;
902
903             while (j < i)
904                 string_rel_add_char(&term_tmp, term_component, &j);
905             *term_tmp++ = '[';
906             
907             *term_tmp++ = '^';
908             *term_tmp++ = '-';
909             string_rel_add_char(&term_tmp, term_component, &i);
910
911             *term_tmp++ = ']';
912             *term_tmp++ = '.';
913             *term_tmp++ = '*';
914
915             *term_tmp++ = '|';
916
917             if ((term_tmp - term_dict) > IT_MAX_WORD)
918                 break;
919         }
920         for (i = 0; term_component[i];)
921             string_rel_add_char(&term_tmp, term_component, &i);
922         *term_tmp++ = '.';
923         *term_tmp++ = '+';
924         *term_tmp++ = ')';
925         *term_tmp = '\0';
926         break;
927     case 4:
928         if (!term_100(zh->reg->zebra_maps, reg_type, term_sub,
929                       term_component, space_split, term_dst))
930             return 0;
931         yaz_log(log_level_rpn, "Relation >=");
932
933         *term_tmp++ = '(';
934         for (i = 0; term_component[i];)
935         {
936             int j = 0;
937
938             if (i)
939                 *term_tmp++ = '|';
940             while (j < i)
941                 string_rel_add_char(&term_tmp, term_component, &j);
942             *term_tmp++ = '[';
943
944             if (term_component[i+1])
945             {
946                 *term_tmp++ = '^';
947                 *term_tmp++ = '-';
948                 string_rel_add_char(&term_tmp, term_component, &i);
949             }
950             else
951             {
952                 string_rel_add_char(&term_tmp, term_component, &i);
953                 *term_tmp++ = '-';
954             }
955             *term_tmp++ = ']';
956             *term_tmp++ = '.';
957             *term_tmp++ = '*';
958
959             if ((term_tmp - term_dict) > IT_MAX_WORD)
960                 break;
961         }
962         *term_tmp++ = ')';
963         *term_tmp = '\0';
964         break;
965     case 3:
966     case 102:
967     case -1:
968         yaz_log(log_level_rpn, "Relation =");
969         if (!term_100(zh->reg->zebra_maps, reg_type, term_sub,
970                       term_component, space_split, term_dst))
971             return 0;
972         strcat(term_tmp, "(");
973         strcat(term_tmp, term_component);
974         strcat(term_tmp, ")");
975         break;
976     default:
977         *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
978         return 0;
979     }
980     return 1;
981 }
982
983 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
984                              const char **term_sub, 
985                              oid_value attributeSet, NMEM stream,
986                              struct grep_info *grep_info,
987                              int reg_type, int complete_flag,
988                              int num_bases, char **basenames,
989                              char *term_dst, int xpath_use,
990                              struct ord_list **ol);
991
992 static ZEBRA_RES term_trunc(ZebraHandle zh,
993                             Z_AttributesPlusTerm *zapt,
994                             const char **term_sub, 
995                             oid_value attributeSet, NMEM stream,
996                             struct grep_info *grep_info,
997                             int reg_type, int complete_flag,
998                             int num_bases, char **basenames,
999                             char *term_dst,
1000                             const char *rank_type, int xpath_use,
1001                             NMEM rset_nmem,
1002                             RSET *rset,
1003                             struct rset_key_control *kc)
1004 {
1005     ZEBRA_RES res;
1006     struct ord_list *ol;
1007     *rset = 0;
1008     grep_info->isam_p_indx = 0;
1009     res = string_term(zh, zapt, term_sub, attributeSet, stream, grep_info,
1010                       reg_type, complete_flag, num_bases, basenames,
1011                       term_dst, xpath_use, &ol);
1012     if (res != ZEBRA_OK)
1013         return res;
1014     if (!*term_sub)  /* no more terms ? */
1015         return res;
1016     yaz_log(log_level_rpn, "term: %s", term_dst);
1017     *rset = rset_trunc(zh, grep_info->isam_p_buf,
1018                        grep_info->isam_p_indx, term_dst,
1019                        strlen(term_dst), rank_type, 1 /* preserve pos */,
1020                        zapt->term->which, rset_nmem,
1021                        kc, kc->scope, ol, reg_type);
1022     if (!*rset)
1023         return ZEBRA_FAIL;
1024     return ZEBRA_OK;
1025 }
1026
1027 static char *nmem_strdup_i(NMEM nmem, int v)
1028 {
1029     char val_str[64];
1030     sprintf(val_str, "%d", v);
1031     return nmem_strdup(nmem, val_str);
1032 }
1033
1034 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1035                              const char **term_sub, 
1036                              oid_value attributeSet, NMEM stream,
1037                              struct grep_info *grep_info,
1038                              int reg_type, int complete_flag,
1039                              int num_bases, char **basenames,
1040                              char *term_dst, int xpath_use,
1041                              struct ord_list **ol)
1042 {
1043     char term_dict[2*IT_MAX_WORD+4000];
1044     int j, r, base_no;
1045     AttrType truncation;
1046     int truncation_value;
1047     AttrType use;
1048     int use_value;
1049     const char *use_string = 0;
1050     oid_value curAttributeSet = attributeSet;
1051     const char *termp;
1052     struct rpn_char_map_info rcmi;
1053     int space_split = complete_flag ? 0 : 1;
1054
1055     int bases_ok = 0;     /* no of databases with OK attribute */
1056     int errCode = 0;      /* err code (if any is not OK) */
1057     char *errString = 0;  /* addinfo */
1058
1059
1060     *ol = ord_list_create(stream);
1061
1062     rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
1063     attr_init(&use, zapt, 1);
1064     use_value = attr_find_ex(&use, &curAttributeSet, &use_string);
1065     yaz_log(log_level_rpn, "string_term, use value %d", use_value);
1066     attr_init(&truncation, zapt, 5);
1067     truncation_value = attr_find(&truncation, NULL);
1068     yaz_log(log_level_rpn, "truncation value %d", truncation_value);
1069
1070     if (use_value == -1)    /* no attribute - assumy "any" */
1071         use_value = 1016;
1072     for (base_no = 0; base_no < num_bases; base_no++)
1073     {
1074         int ord = -1;
1075         int attr_ok = 0;
1076         int regex_range = 0;
1077         int init_pos = 0;
1078         attent attp;
1079         data1_local_attribute id_xpath_attr;
1080         data1_local_attribute *local_attr;
1081         int max_pos, prefix_len = 0;
1082         int relation_error;
1083
1084         termp = *term_sub;
1085
1086         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1087         {
1088             zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
1089                            basenames[base_no]);
1090             return ZEBRA_FAIL;
1091         }
1092         if (xpath_use > 0 && use_value == -2) 
1093         {
1094             /* xpath mode and we have a string attribute */
1095             attp.local_attributes = &id_xpath_attr;
1096             attp.attset_ordinal = VAL_IDXPATH;
1097             id_xpath_attr.next = 0;
1098
1099             use_value = xpath_use;  /* xpath_use as use-attribute now */
1100             id_xpath_attr.local = use_value;
1101         }
1102         else if (curAttributeSet == VAL_IDXPATH && use_value >= 0)
1103         {
1104             /* X-Path attribute, use numeric value directly */
1105             attp.local_attributes = &id_xpath_attr;
1106             attp.attset_ordinal = VAL_IDXPATH;
1107             id_xpath_attr.next = 0;
1108             id_xpath_attr.local = use_value;
1109         }
1110         else if (use_string &&
1111                  (ord = zebraExplain_lookup_attr_str(zh->reg->zei,
1112                                                      use_string)) >= 0)
1113         {
1114             /* we have a match for a raw string attribute */
1115             char ord_buf[32];
1116             int i, ord_len;
1117
1118             if (prefix_len)
1119                 term_dict[prefix_len++] = '|';
1120             else
1121                 term_dict[prefix_len++] = '(';
1122             
1123             ord_len = key_SU_encode (ord, ord_buf);
1124             for (i = 0; i<ord_len; i++)
1125             {
1126                 term_dict[prefix_len++] = 1;
1127                 term_dict[prefix_len++] = ord_buf[i];
1128             }
1129             attp.local_attributes = 0;  /* no more attributes */
1130             *ol = ord_list_append(stream, *ol, ord);
1131         }
1132         else 
1133         {
1134             /* lookup in the .att files . Allow string as well */
1135             if ((r = att_getentbyatt (zh, &attp, curAttributeSet, use_value,
1136                                       use_string)))
1137             {
1138                 yaz_log(YLOG_DEBUG, "att_getentbyatt fail. set=%d use=%d r=%d",
1139                         curAttributeSet, use_value, r);
1140                 if (r == -1)
1141                 {
1142                     /* set was found, but value wasn't defined */
1143                     errCode = YAZ_BIB1_UNSUPP_USE_ATTRIBUTE;
1144                     if (use_string)
1145                         errString = nmem_strdup(stream, use_string);
1146                     else
1147                         errString = nmem_strdup_i (stream, use_value);
1148                 }
1149                 else
1150                 {
1151                     int oid[OID_SIZE];
1152                     struct oident oident;
1153                     
1154                     oident.proto = PROTO_Z3950;
1155                     oident.oclass = CLASS_ATTSET;
1156                     oident.value = curAttributeSet;
1157                     oid_ent_to_oid (&oident, oid);
1158                     
1159                     errCode = YAZ_BIB1_UNSUPP_ATTRIBUTE_SET;
1160                     errString = nmem_strdup(stream, oident.desc);
1161                 }
1162                 continue;
1163             }
1164         }
1165         for (local_attr = attp.local_attributes; local_attr;
1166              local_attr = local_attr->next)
1167         {
1168             char ord_buf[32];
1169             int i, ord_len;
1170             
1171             ord = zebraExplain_lookup_attr_su(zh->reg->zei,
1172                                               attp.attset_ordinal,
1173                                               local_attr->local);
1174             if (ord < 0)
1175                 continue;
1176             *ol = ord_list_append(stream, *ol, ord);
1177             if (prefix_len)
1178                 term_dict[prefix_len++] = '|';
1179             else
1180                 term_dict[prefix_len++] = '(';
1181             
1182             ord_len = key_SU_encode (ord, ord_buf);
1183             for (i = 0; i<ord_len; i++)
1184             {
1185                 term_dict[prefix_len++] = 1;
1186                 term_dict[prefix_len++] = ord_buf[i];
1187             }
1188         }
1189         bases_ok++;
1190         if (prefix_len)
1191             attr_ok = 1;
1192
1193         term_dict[prefix_len++] = ')';
1194         term_dict[prefix_len++] = 1;
1195         term_dict[prefix_len++] = reg_type;
1196         yaz_log(log_level_rpn, "reg_type = %d", term_dict[prefix_len-1]);
1197         term_dict[prefix_len] = '\0';
1198         j = prefix_len;
1199         switch (truncation_value)
1200         {
1201         case -1:         /* not specified */
1202         case 100:        /* do not truncate */
1203             if (!string_relation (zh, zapt, &termp, term_dict,
1204                                   attributeSet,
1205                                   reg_type, space_split, term_dst,
1206                                   &relation_error))
1207             {
1208                 if (relation_error)
1209                 {
1210                     zebra_setError(zh, relation_error, 0);
1211                     return ZEBRA_FAIL;
1212                 }
1213                 *term_sub = 0;
1214                 return ZEBRA_OK;
1215             }
1216             break;
1217         case 1:          /* right truncation */
1218             term_dict[j++] = '(';
1219             if (!term_100(zh->reg->zebra_maps, reg_type,
1220                           &termp, term_dict + j, space_split, term_dst))
1221             {
1222                 *term_sub = 0;
1223                 return ZEBRA_OK;
1224             }
1225             strcat(term_dict, ".*)");
1226             break;
1227         case 2:          /* keft truncation */
1228             term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*';
1229             if (!term_100(zh->reg->zebra_maps, reg_type,
1230                           &termp, term_dict + j, space_split, term_dst))
1231             {
1232                 *term_sub = 0;
1233                 return ZEBRA_OK;
1234             }
1235             strcat(term_dict, ")");
1236             break;
1237         case 3:          /* left&right truncation */
1238             term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*';
1239             if (!term_100(zh->reg->zebra_maps, reg_type,
1240                           &termp, term_dict + j, space_split, term_dst))
1241             {
1242                 *term_sub = 0;
1243                 return ZEBRA_OK;
1244             }
1245             strcat(term_dict, ".*)");
1246             break;
1247         case 101:        /* process # in term */
1248             term_dict[j++] = '(';
1249             if (!term_101(zh->reg->zebra_maps, reg_type,
1250                           &termp, term_dict + j, space_split, term_dst))
1251             {
1252                 *term_sub = 0;
1253                 return ZEBRA_OK;
1254             }
1255             strcat(term_dict, ")");
1256             break;
1257         case 102:        /* Regexp-1 */
1258             term_dict[j++] = '(';
1259             if (!term_102(zh->reg->zebra_maps, reg_type,
1260                           &termp, term_dict + j, space_split, term_dst))
1261             {
1262                 *term_sub = 0;
1263                 return ZEBRA_OK;
1264             }
1265             strcat(term_dict, ")");
1266             break;
1267         case 103:       /* Regexp-2 */
1268             regex_range = 1;
1269             term_dict[j++] = '(';
1270             init_pos = 2;
1271             if (!term_103(zh->reg->zebra_maps, reg_type,
1272                           &termp, term_dict + j, &regex_range,
1273                           space_split, term_dst))
1274             {
1275                 *term_sub = 0;
1276                 return ZEBRA_OK;
1277             }
1278             strcat(term_dict, ")");
1279             break;
1280         case 104:        /* process # and ! in term */
1281             term_dict[j++] = '(';
1282             if (!term_104(zh->reg->zebra_maps, reg_type,
1283                           &termp, term_dict + j, space_split, term_dst))
1284             {
1285                 *term_sub = 0;
1286                 return ZEBRA_OK;
1287             }
1288             strcat(term_dict, ")");
1289             break;
1290         case 105:        /* process * and ! in term */
1291             term_dict[j++] = '(';
1292             if (!term_105(zh->reg->zebra_maps, reg_type,
1293                           &termp, term_dict + j, space_split, term_dst, 1))
1294             {
1295                 *term_sub = 0;
1296                 return ZEBRA_OK;
1297             }
1298             strcat(term_dict, ")");
1299             break;
1300         case 106:        /* process * and ! in term */
1301             term_dict[j++] = '(';
1302             if (!term_105(zh->reg->zebra_maps, reg_type,
1303                           &termp, term_dict + j, space_split, term_dst, 0))
1304             {
1305                 *term_sub = 0;
1306                 return ZEBRA_OK;
1307             }
1308             strcat(term_dict, ")");
1309             break;
1310         default:
1311             zebra_setError_zint(zh,
1312                                 YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
1313                                 truncation_value);
1314             return ZEBRA_FAIL;
1315         }
1316         if (attr_ok)
1317         {
1318             char buf[80];
1319             const char *input = term_dict + prefix_len;
1320             esc_str(buf, sizeof(buf), input, strlen(input));
1321         }
1322         if (attr_ok)
1323         {
1324             yaz_log(log_level_rpn, "dict_lookup_grep: %s", term_dict+prefix_len);
1325             r = dict_lookup_grep(zh->reg->dict, term_dict, regex_range,
1326                                  grep_info, &max_pos, init_pos,
1327                                  grep_handle);
1328             if (r)
1329                 yaz_log(YLOG_WARN, "dict_lookup_grep fail %d", r);
1330         }
1331     }
1332     if (!bases_ok)
1333     {
1334         zebra_setError(zh, errCode, errString);
1335         return ZEBRA_FAIL;
1336     }
1337     *term_sub = termp;
1338     yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1339     return ZEBRA_OK;
1340 }
1341
1342
1343 /* convert APT search term to UTF8 */
1344 static ZEBRA_RES zapt_term_to_utf8(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1345                                    char *termz)
1346 {
1347     size_t sizez;
1348     Z_Term *term = zapt->term;
1349
1350     switch (term->which)
1351     {
1352     case Z_Term_general:
1353         if (zh->iconv_to_utf8 != 0)
1354         {
1355             char *inbuf = term->u.general->buf;
1356             size_t inleft = term->u.general->len;
1357             char *outbuf = termz;
1358             size_t outleft = IT_MAX_WORD-1;
1359             size_t ret;
1360
1361             ret = yaz_iconv(zh->iconv_to_utf8, &inbuf, &inleft,
1362                         &outbuf, &outleft);
1363             if (ret == (size_t)(-1))
1364             {
1365                 ret = yaz_iconv(zh->iconv_to_utf8, 0, 0, 0, 0);
1366                 zebra_setError(
1367                     zh, 
1368                     YAZ_BIB1_QUERY_TERM_INCLUDES_CHARS_THAT_DO_NOT_TRANSLATE_INTO_,
1369                     0);
1370                 return ZEBRA_FAIL;
1371             }
1372             *outbuf = 0;
1373         }
1374         else
1375         {
1376             sizez = term->u.general->len;
1377             if (sizez > IT_MAX_WORD-1)
1378                 sizez = IT_MAX_WORD-1;
1379             memcpy (termz, term->u.general->buf, sizez);
1380             termz[sizez] = '\0';
1381         }
1382         break;
1383     case Z_Term_characterString:
1384         sizez = strlen(term->u.characterString);
1385         if (sizez > IT_MAX_WORD-1)
1386             sizez = IT_MAX_WORD-1;
1387         memcpy (termz, term->u.characterString, sizez);
1388         termz[sizez] = '\0';
1389         break;
1390     default:
1391         zebra_setError(zh, YAZ_BIB1_UNSUPP_CODED_VALUE_FOR_TERM, 0);
1392         return ZEBRA_FAIL;
1393     }
1394     return ZEBRA_OK;
1395 }
1396
1397 /* convert APT SCAN term to internal cmap */
1398 static ZEBRA_RES trans_scan_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1399                                  char *termz, int reg_type)
1400 {
1401     char termz0[IT_MAX_WORD];
1402
1403     if (zapt_term_to_utf8(zh, zapt, termz0) == ZEBRA_FAIL)
1404         return ZEBRA_FAIL;    /* error */
1405     else
1406     {
1407         const char **map;
1408         const char *cp = (const char *) termz0;
1409         const char *cp_end = cp + strlen(cp);
1410         const char *src;
1411         int i = 0;
1412         const char *space_map = NULL;
1413         int len;
1414             
1415         while ((len = (cp_end - cp)) > 0)
1416         {
1417             map = zebra_maps_input(zh->reg->zebra_maps, reg_type, &cp, len, 0);
1418             if (**map == *CHR_SPACE)
1419                 space_map = *map;
1420             else
1421             {
1422                 if (i && space_map)
1423                     for (src = space_map; *src; src++)
1424                         termz[i++] = *src;
1425                 space_map = NULL;
1426                 for (src = *map; *src; src++)
1427                     termz[i++] = *src;
1428             }
1429         }
1430         termz[i] = '\0';
1431     }
1432     return ZEBRA_OK;
1433 }
1434
1435 static void grep_info_delete(struct grep_info *grep_info)
1436 {
1437 #ifdef TERM_COUNT
1438     xfree(grep_info->term_no);
1439 #endif
1440     xfree(grep_info->isam_p_buf);
1441 }
1442
1443 static ZEBRA_RES grep_info_prepare(ZebraHandle zh,
1444                                    Z_AttributesPlusTerm *zapt,
1445                                    struct grep_info *grep_info,
1446                                    int reg_type)
1447 {
1448     AttrType termset;
1449     int termset_value_numeric;
1450     const char *termset_value_string;
1451
1452 #ifdef TERM_COUNT
1453     grep_info->term_no = 0;
1454 #endif
1455     grep_info->isam_p_size = 0;
1456     grep_info->isam_p_buf = NULL;
1457     grep_info->zh = zh;
1458     grep_info->reg_type = reg_type;
1459     grep_info->termset = 0;
1460
1461     if (!zapt)
1462         return ZEBRA_OK;
1463     attr_init(&termset, zapt, 8);
1464     termset_value_numeric =
1465         attr_find_ex(&termset, NULL, &termset_value_string);
1466     if (termset_value_numeric != -1)
1467     {
1468         char resname[32];
1469         const char *termset_name = 0;
1470         if (termset_value_numeric != -2)
1471         {
1472     
1473             sprintf(resname, "%d", termset_value_numeric);
1474             termset_name = resname;
1475         }
1476         else
1477             termset_name = termset_value_string;
1478         yaz_log(log_level_rpn, "creating termset set %s", termset_name);
1479         grep_info->termset = resultSetAdd(zh, termset_name, 1);
1480         if (!grep_info->termset)
1481         {
1482             zebra_setError(zh, YAZ_BIB1_ILLEGAL_RESULT_SET_NAME, termset_name);
1483             return ZEBRA_FAIL;
1484         }
1485     }
1486     return ZEBRA_OK;
1487 }
1488                                
1489 /**
1490   \brief Create result set(s) for list of terms
1491   \param zh Zebra Handle
1492   \param termz term as used in query but converted to UTF-8
1493   \param attributeSet default attribute set
1494   \param stream memory for result
1495   \param reg_type register type ('w', 'p',..)
1496   \param complete_flag whether it's phrases or not
1497   \param rank_type term flags for ranking
1498   \param xpath_use use attribute for X-Path (-1 for no X-path)
1499   \param num_bases number of databases
1500   \param basenames array of databases
1501   \param rset_mem memory for result sets
1502   \param result_sets output result set for each term in list (output)
1503   \param number number of output result sets
1504   \param kc rset key control to be used for created result sets
1505 */
1506 static ZEBRA_RES term_list_trunc(ZebraHandle zh,
1507                                  Z_AttributesPlusTerm *zapt,
1508                                  const char *termz,
1509                                  oid_value attributeSet,
1510                                  NMEM stream,
1511                                  int reg_type, int complete_flag,
1512                                  const char *rank_type, int xpath_use,
1513                                  int num_bases, char **basenames, 
1514                                  NMEM rset_nmem,
1515                                  RSET **result_sets, int *num_result_sets,
1516                                  struct rset_key_control *kc)
1517 {
1518     char term_dst[IT_MAX_WORD+1];
1519     struct grep_info grep_info;
1520     const char *termp = termz;
1521     int alloc_sets = 0;
1522
1523     *num_result_sets = 0;
1524     *term_dst = 0;
1525     if (grep_info_prepare(zh, zapt, &grep_info, reg_type) == ZEBRA_FAIL)
1526         return ZEBRA_FAIL;
1527     while(1)
1528     { 
1529         ZEBRA_RES res;
1530
1531         if (alloc_sets == *num_result_sets)
1532         {
1533             int add = 10;
1534             RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) * 
1535                                               sizeof(*rnew));
1536             if (alloc_sets)
1537                 memcpy(rnew, *result_sets, alloc_sets * sizeof(*rnew));
1538             alloc_sets = alloc_sets + add;
1539             *result_sets = rnew;
1540         }
1541         res = term_trunc(zh, zapt, &termp, attributeSet,
1542                          stream, &grep_info,
1543                          reg_type, complete_flag,
1544                          num_bases, basenames,
1545                          term_dst, rank_type,
1546                          xpath_use, rset_nmem,
1547                          &(*result_sets)[*num_result_sets],
1548                          kc);
1549         if (res != ZEBRA_OK)
1550         {
1551             int i;
1552             for (i = 0; i < *num_result_sets; i++)
1553                 rset_delete((*result_sets)[i]);
1554             grep_info_delete (&grep_info);
1555             return res;
1556         }
1557         if ((*result_sets)[*num_result_sets] == 0)
1558             break;
1559         (*num_result_sets)++;
1560     }
1561     grep_info_delete(&grep_info);
1562     return ZEBRA_OK;
1563 }
1564
1565 static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh,
1566                                        Z_AttributesPlusTerm *zapt,
1567                                        const char *termz_org,
1568                                        oid_value attributeSet,
1569                                        NMEM stream,
1570                                        int reg_type, int complete_flag,
1571                                        const char *rank_type, int xpath_use,
1572                                        int num_bases, char **basenames, 
1573                                        NMEM rset_nmem,
1574                                        RSET *rset,
1575                                        struct rset_key_control *kc)
1576 {
1577     RSET *result_sets = 0;
1578     int num_result_sets = 0;
1579     ZEBRA_RES res =
1580         term_list_trunc(zh, zapt, termz_org, attributeSet,
1581                         stream, reg_type, complete_flag,
1582                         rank_type, xpath_use,
1583                         num_bases, basenames,
1584                         rset_nmem,
1585                         &result_sets, &num_result_sets, kc);
1586     if (res != ZEBRA_OK)
1587         return res;
1588     if (num_result_sets == 0)
1589         *rset = rsnull_create (rset_nmem, kc, 0); 
1590     else if (num_result_sets == 1)
1591         *rset = result_sets[0];
1592     else
1593         *rset = rsprox_create(rset_nmem, kc, kc->scope,
1594                               num_result_sets, result_sets,
1595                               1 /* ordered */, 0 /* exclusion */,
1596                               3 /* relation */, 1 /* distance */);
1597     if (!*rset)
1598         return ZEBRA_FAIL;
1599     return ZEBRA_OK;
1600 }
1601
1602 static ZEBRA_RES rpn_search_APT_or_list(ZebraHandle zh,
1603                                         Z_AttributesPlusTerm *zapt,
1604                                         const char *termz_org,
1605                                         oid_value attributeSet,
1606                                         NMEM stream,
1607                                         int reg_type, int complete_flag,
1608                                         const char *rank_type,
1609                                         int xpath_use,
1610                                         int num_bases, char **basenames,
1611                                         NMEM rset_nmem,
1612                                         RSET *rset,
1613                                         struct rset_key_control *kc)
1614 {
1615     RSET *result_sets = 0;
1616     int num_result_sets = 0;
1617     ZEBRA_RES res =
1618         term_list_trunc(zh, zapt, termz_org, attributeSet,
1619                         stream, reg_type, complete_flag,
1620                         rank_type, xpath_use,
1621                         num_bases, basenames,
1622                         rset_nmem,
1623                         &result_sets, &num_result_sets, kc);
1624     if (res != ZEBRA_OK)
1625         return res;
1626     if (num_result_sets == 0)
1627         *rset = rsnull_create (rset_nmem, kc, 0); 
1628     else if (num_result_sets == 1)
1629         *rset = result_sets[0];
1630     else
1631         *rset = rsmulti_or_create(rset_nmem, kc, kc->scope, 0 /* termid */,
1632                                   num_result_sets, result_sets);
1633     if (!*rset)
1634         return ZEBRA_FAIL;
1635     return ZEBRA_OK;
1636 }
1637
1638 static ZEBRA_RES rpn_search_APT_and_list(ZebraHandle zh,
1639                                          Z_AttributesPlusTerm *zapt,
1640                                          const char *termz_org,
1641                                          oid_value attributeSet,
1642                                          NMEM stream,
1643                                          int reg_type, int complete_flag,
1644                                          const char *rank_type, 
1645                                          int xpath_use,
1646                                          int num_bases, char **basenames,
1647                                          NMEM rset_nmem,
1648                                          RSET *rset,
1649                                          struct rset_key_control *kc)
1650 {
1651     RSET *result_sets = 0;
1652     int num_result_sets = 0;
1653     ZEBRA_RES res =
1654         term_list_trunc(zh, zapt, termz_org, attributeSet,
1655                         stream, reg_type, complete_flag,
1656                         rank_type, xpath_use,
1657                         num_bases, basenames,
1658                         rset_nmem,
1659                         &result_sets, &num_result_sets,
1660                         kc);
1661     if (res != ZEBRA_OK)
1662         return res;
1663     if (num_result_sets == 0)
1664         *rset = rsnull_create (rset_nmem, kc, 0); 
1665     else if (num_result_sets == 1)
1666         *rset = result_sets[0];
1667     else
1668         *rset = rsmulti_and_create(rset_nmem, kc, kc->scope,
1669                                    num_result_sets, result_sets);
1670     if (!*rset)
1671         return ZEBRA_FAIL;
1672     return ZEBRA_OK;
1673 }
1674
1675 static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1676                             const char **term_sub,
1677                             char *term_dict,
1678                             oid_value attributeSet,
1679                             struct grep_info *grep_info,
1680                             int *max_pos,
1681                             int reg_type,
1682                             char *term_dst,
1683                             int *error_code)
1684 {
1685     AttrType relation;
1686     int relation_value;
1687     int term_value;
1688     int r;
1689     char *term_tmp = term_dict + strlen(term_dict);
1690
1691     *error_code = 0;
1692     attr_init(&relation, zapt, 2);
1693     relation_value = attr_find(&relation, NULL);
1694
1695     yaz_log(log_level_rpn, "numeric relation value=%d", relation_value);
1696
1697     if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1698                   term_dst))
1699         return 0;
1700     term_value = atoi (term_tmp);
1701     switch (relation_value)
1702     {
1703     case 1:
1704         yaz_log(log_level_rpn, "Relation <");
1705         gen_regular_rel(term_tmp, term_value-1, 1);
1706         break;
1707     case 2:
1708         yaz_log(log_level_rpn, "Relation <=");
1709         gen_regular_rel(term_tmp, term_value, 1);
1710         break;
1711     case 4:
1712         yaz_log(log_level_rpn, "Relation >=");
1713         gen_regular_rel(term_tmp, term_value, 0);
1714         break;
1715     case 5:
1716         yaz_log(log_level_rpn, "Relation >");
1717         gen_regular_rel(term_tmp, term_value+1, 0);
1718         break;
1719     case -1:
1720     case 3:
1721         yaz_log(log_level_rpn, "Relation =");
1722         sprintf(term_tmp, "(0*%d)", term_value);
1723         break;
1724     default:
1725         *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
1726         return 0;
1727     }
1728     yaz_log(log_level_rpn, "dict_lookup_grep: %s", term_tmp);
1729     r = dict_lookup_grep(zh->reg->dict, term_dict, 0, grep_info, max_pos,
1730                           0, grep_handle);
1731     if (r)
1732         yaz_log(YLOG_WARN, "dict_lookup_grep fail, rel = gt: %d", r);
1733     yaz_log(log_level_rpn, "%d positions", grep_info->isam_p_indx);
1734     return 1;
1735 }
1736
1737 static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1738                               const char **term_sub, 
1739                               oid_value attributeSet,
1740                               struct grep_info *grep_info,
1741                               int reg_type, int complete_flag,
1742                               int num_bases, char **basenames,
1743                               char *term_dst, int xpath_use, NMEM stream)
1744 {
1745     char term_dict[2*IT_MAX_WORD+2];
1746     int r, base_no;
1747     AttrType use;
1748     int use_value;
1749     const char *use_string = 0;
1750     oid_value curAttributeSet = attributeSet;
1751     const char *termp;
1752     struct rpn_char_map_info rcmi;
1753
1754     int bases_ok = 0;     /* no of databases with OK attribute */
1755     int errCode = 0;      /* err code (if any is not OK) */
1756     char *errString = 0;  /* addinfo */
1757
1758     rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
1759     attr_init(&use, zapt, 1);
1760     use_value = attr_find_ex(&use, &curAttributeSet, &use_string);
1761
1762     if (use_value == -1)
1763         use_value = 1016;
1764
1765     for (base_no = 0; base_no < num_bases; base_no++)
1766     {
1767         attent attp;
1768         data1_local_attribute id_xpath_attr;
1769         data1_local_attribute *local_attr;
1770         int max_pos, prefix_len = 0;
1771         int relation_error = 0;
1772
1773         termp = *term_sub;
1774         if (use_value == -2)  /* string attribute (assume IDXPATH/any) */
1775         {
1776             use_value = xpath_use;
1777             attp.local_attributes = &id_xpath_attr;
1778             attp.attset_ordinal = VAL_IDXPATH;
1779             id_xpath_attr.next = 0;
1780             id_xpath_attr.local = use_value;
1781         }
1782         else if (curAttributeSet == VAL_IDXPATH)
1783         {
1784             attp.local_attributes = &id_xpath_attr;
1785             attp.attset_ordinal = VAL_IDXPATH;
1786             id_xpath_attr.next = 0;
1787             id_xpath_attr.local = use_value;
1788         }
1789         else
1790         {
1791             if ((r = att_getentbyatt (zh, &attp, curAttributeSet, use_value,
1792                                             use_string)))
1793             {
1794                 yaz_log(YLOG_DEBUG, "att_getentbyatt fail. set=%d use=%d r=%d",
1795                       curAttributeSet, use_value, r);
1796                 if (r == -1)
1797                 {
1798                     errCode = YAZ_BIB1_UNSUPP_USE_ATTRIBUTE;
1799                     if (use_string)
1800                         errString = nmem_strdup(stream, use_string);
1801                     else
1802                         errString = nmem_strdup_i (stream, use_value);
1803                 }
1804                 else
1805                     errCode = YAZ_BIB1_UNSUPP_ATTRIBUTE_SET;
1806                 continue;
1807             }
1808         }
1809         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1810         {
1811             zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
1812                            basenames[base_no]);
1813             return ZEBRA_FAIL;
1814         }
1815         for (local_attr = attp.local_attributes; local_attr;
1816              local_attr = local_attr->next)
1817         {
1818             int ord;
1819             char ord_buf[32];
1820             int i, ord_len;
1821
1822             ord = zebraExplain_lookup_attr_su(zh->reg->zei,
1823                                               attp.attset_ordinal,
1824                                               local_attr->local);
1825             if (ord < 0)
1826                 continue;
1827             if (prefix_len)
1828                 term_dict[prefix_len++] = '|';
1829             else
1830                 term_dict[prefix_len++] = '(';
1831
1832             ord_len = key_SU_encode (ord, ord_buf);
1833             for (i = 0; i<ord_len; i++)
1834             {
1835                 term_dict[prefix_len++] = 1;
1836                 term_dict[prefix_len++] = ord_buf[i];
1837             }
1838         }
1839         if (!prefix_len)
1840         {
1841             zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_USE_ATTRIBUTE, use_value);
1842             continue;
1843         }
1844         bases_ok++;
1845         term_dict[prefix_len++] = ')';        
1846         term_dict[prefix_len++] = 1;
1847         term_dict[prefix_len++] = reg_type;
1848         yaz_log(YLOG_DEBUG, "reg_type = %d", term_dict[prefix_len-1]);
1849         term_dict[prefix_len] = '\0';
1850         if (!numeric_relation(zh, zapt, &termp, term_dict,
1851                               attributeSet, grep_info, &max_pos, reg_type,
1852                               term_dst, &relation_error))
1853         {
1854             if (relation_error)
1855             {
1856                 zebra_setError(zh, relation_error, 0);
1857                 return ZEBRA_FAIL;
1858             }
1859             *term_sub = 0;
1860             return ZEBRA_OK;
1861         }
1862     }
1863     if (!bases_ok)
1864     {
1865         zebra_setError(zh, errCode, errString);
1866         return ZEBRA_FAIL;
1867     }
1868     *term_sub = termp;
1869     yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1870     return ZEBRA_OK;
1871 }
1872
1873 static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh,
1874                                         Z_AttributesPlusTerm *zapt,
1875                                         const char *termz,
1876                                         oid_value attributeSet,
1877                                         NMEM stream,
1878                                         int reg_type, int complete_flag,
1879                                         const char *rank_type, int xpath_use,
1880                                         int num_bases, char **basenames,
1881                                         NMEM rset_nmem,
1882                                         RSET *rset,
1883                                         struct rset_key_control *kc)
1884 {
1885     char term_dst[IT_MAX_WORD+1];
1886     const char *termp = termz;
1887     RSET *result_sets = 0;
1888     int num_result_sets = 0;
1889     ZEBRA_RES res;
1890     struct grep_info grep_info;
1891     int alloc_sets = 0;
1892
1893     yaz_log(log_level_rpn, "APT_numeric t='%s'", termz);
1894     if (grep_info_prepare(zh, zapt, &grep_info, reg_type) == ZEBRA_FAIL)
1895         return ZEBRA_FAIL;
1896     while (1)
1897     { 
1898         if (alloc_sets == num_result_sets)
1899         {
1900             int add = 10;
1901             RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) * 
1902                                               sizeof(*rnew));
1903             if (alloc_sets)
1904                 memcpy(rnew, result_sets, alloc_sets * sizeof(*rnew));
1905             alloc_sets = alloc_sets + add;
1906             result_sets = rnew;
1907         }
1908         yaz_log(YLOG_DEBUG, "APT_numeric termp=%s", termp);
1909         grep_info.isam_p_indx = 0;
1910         res = numeric_term(zh, zapt, &termp, attributeSet, &grep_info,
1911                            reg_type, complete_flag, num_bases, basenames,
1912                            term_dst, xpath_use,
1913                            stream);
1914         if (res == ZEBRA_FAIL || termp == 0)
1915             break;
1916         yaz_log(YLOG_DEBUG, "term: %s", term_dst);
1917         result_sets[num_result_sets] =
1918             rset_trunc(zh, grep_info.isam_p_buf,
1919                        grep_info.isam_p_indx, term_dst,
1920                        strlen(term_dst), rank_type,
1921                        0 /* preserve position */,
1922                        zapt->term->which, rset_nmem, 
1923                        kc, kc->scope, 0, reg_type);
1924         if (!result_sets[num_result_sets])
1925             break;
1926         num_result_sets++;
1927     }
1928     grep_info_delete(&grep_info);
1929     if (termp)
1930     {
1931         int i;
1932         for (i = 0; i<num_result_sets; i++)
1933             rset_delete(result_sets[i]);
1934         return ZEBRA_FAIL;
1935     }
1936     if (num_result_sets == 0)
1937         *rset = rsnull_create(rset_nmem, kc, 0);
1938     if (num_result_sets == 1)
1939         *rset = result_sets[0];
1940     else
1941         *rset = rsmulti_and_create(rset_nmem, kc, kc->scope,
1942                                    num_result_sets, result_sets);
1943     if (!*rset)
1944         return ZEBRA_FAIL;
1945     return ZEBRA_OK;
1946 }
1947
1948 static ZEBRA_RES rpn_search_APT_local(ZebraHandle zh,
1949                                       Z_AttributesPlusTerm *zapt,
1950                                       const char *termz,
1951                                       oid_value attributeSet,
1952                                       NMEM stream,
1953                                       const char *rank_type, NMEM rset_nmem,
1954                                       RSET *rset,
1955                                       struct rset_key_control *kc)
1956 {
1957     RSFD rsfd;
1958     struct it_key key;
1959     int sys;
1960     *rset = rstemp_create(rset_nmem, kc, kc->scope,
1961                           res_get (zh->res, "setTmpDir"),0 );
1962     rsfd = rset_open(*rset, RSETF_WRITE);
1963     
1964     sys = atoi(termz);
1965     if (sys <= 0)
1966         sys = 1;
1967     key.mem[0] = sys;
1968     key.mem[1] = 1;
1969     key.len = 2;
1970     rset_write (rsfd, &key);
1971     rset_close (rsfd);
1972     return ZEBRA_OK;
1973 }
1974
1975 static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1976                                oid_value attributeSet, NMEM stream,
1977                                Z_SortKeySpecList *sort_sequence,
1978                                const char *rank_type,
1979                                NMEM rset_nmem,
1980                                RSET *rset,
1981                                struct rset_key_control *kc)
1982 {
1983     int i;
1984     int sort_relation_value;
1985     AttrType sort_relation_type;
1986     Z_SortKeySpec *sks;
1987     Z_SortKey *sk;
1988     int oid[OID_SIZE];
1989     oident oe;
1990     char termz[20];
1991     
1992     attr_init(&sort_relation_type, zapt, 7);
1993     sort_relation_value = attr_find(&sort_relation_type, &attributeSet);
1994
1995     if (!sort_sequence->specs)
1996     {
1997         sort_sequence->num_specs = 10;
1998         sort_sequence->specs = (Z_SortKeySpec **)
1999             nmem_malloc(stream, sort_sequence->num_specs *
2000                          sizeof(*sort_sequence->specs));
2001         for (i = 0; i<sort_sequence->num_specs; i++)
2002             sort_sequence->specs[i] = 0;
2003     }
2004     if (zapt->term->which != Z_Term_general)
2005         i = 0;
2006     else
2007         i = atoi_n ((char *) zapt->term->u.general->buf,
2008                     zapt->term->u.general->len);
2009     if (i >= sort_sequence->num_specs)
2010         i = 0;
2011     sprintf(termz, "%d", i);
2012
2013     oe.proto = PROTO_Z3950;
2014     oe.oclass = CLASS_ATTSET;
2015     oe.value = attributeSet;
2016     if (!oid_ent_to_oid (&oe, oid))
2017         return ZEBRA_FAIL;
2018
2019     sks = (Z_SortKeySpec *) nmem_malloc(stream, sizeof(*sks));
2020     sks->sortElement = (Z_SortElement *)
2021         nmem_malloc(stream, sizeof(*sks->sortElement));
2022     sks->sortElement->which = Z_SortElement_generic;
2023     sk = sks->sortElement->u.generic = (Z_SortKey *)
2024         nmem_malloc(stream, sizeof(*sk));
2025     sk->which = Z_SortKey_sortAttributes;
2026     sk->u.sortAttributes = (Z_SortAttributes *)
2027         nmem_malloc(stream, sizeof(*sk->u.sortAttributes));
2028
2029     sk->u.sortAttributes->id = oid;
2030     sk->u.sortAttributes->list = zapt->attributes;
2031
2032     sks->sortRelation = (int *)
2033         nmem_malloc(stream, sizeof(*sks->sortRelation));
2034     if (sort_relation_value == 1)
2035         *sks->sortRelation = Z_SortKeySpec_ascending;
2036     else if (sort_relation_value == 2)
2037         *sks->sortRelation = Z_SortKeySpec_descending;
2038     else 
2039         *sks->sortRelation = Z_SortKeySpec_ascending;
2040
2041     sks->caseSensitivity = (int *)
2042         nmem_malloc(stream, sizeof(*sks->caseSensitivity));
2043     *sks->caseSensitivity = 0;
2044
2045     sks->which = Z_SortKeySpec_null;
2046     sks->u.null = odr_nullval ();
2047     sort_sequence->specs[i] = sks;
2048     *rset = rsnull_create (rset_nmem, kc, 0);
2049     return ZEBRA_OK;
2050 }
2051
2052
2053 static int parse_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2054                        oid_value attributeSet,
2055                        struct xpath_location_step *xpath, int max, NMEM mem)
2056 {
2057     oid_value curAttributeSet = attributeSet;
2058     AttrType use;
2059     const char *use_string = 0;
2060     
2061     attr_init(&use, zapt, 1);
2062     attr_find_ex(&use, &curAttributeSet, &use_string);
2063
2064     if (!use_string || *use_string != '/')
2065         return -1;
2066
2067     return zebra_parse_xpath_str(use_string, xpath, max, mem);
2068 }
2069  
2070                
2071
2072 static RSET xpath_trunc(ZebraHandle zh, NMEM stream,
2073                         int reg_type, const char *term, int use,
2074                         oid_value curAttributeSet, NMEM rset_nmem,
2075                         struct rset_key_control *kc)
2076 {
2077     RSET rset;
2078     struct grep_info grep_info;
2079     char term_dict[2048];
2080     char ord_buf[32];
2081     int prefix_len = 0;
2082     int ord = zebraExplain_lookup_attr_su(zh->reg->zei, curAttributeSet, use);
2083     int ord_len, i, r, max_pos;
2084     int term_type = Z_Term_characterString;
2085     const char *flags = "void";
2086
2087     if (grep_info_prepare(zh, 0 /* zapt */, &grep_info, '0') == ZEBRA_FAIL)
2088         return rsnull_create(rset_nmem, kc, 0);
2089     
2090     if (ord < 0)
2091         return rsnull_create(rset_nmem, kc, 0);
2092     if (prefix_len)
2093         term_dict[prefix_len++] = '|';
2094     else
2095         term_dict[prefix_len++] = '(';
2096     
2097     ord_len = key_SU_encode (ord, ord_buf);
2098     for (i = 0; i<ord_len; i++)
2099     {
2100         term_dict[prefix_len++] = 1;
2101         term_dict[prefix_len++] = ord_buf[i];
2102     }
2103     term_dict[prefix_len++] = ')';
2104     term_dict[prefix_len++] = 1;
2105     term_dict[prefix_len++] = reg_type;
2106     
2107     strcpy(term_dict+prefix_len, term);
2108     
2109     grep_info.isam_p_indx = 0;
2110     r = dict_lookup_grep(zh->reg->dict, term_dict, 0,
2111                           &grep_info, &max_pos, 0, grep_handle);
2112     yaz_log(YLOG_DEBUG, "%s %d positions", term,
2113              grep_info.isam_p_indx);
2114     rset = rset_trunc(zh, grep_info.isam_p_buf,
2115                       grep_info.isam_p_indx, term, strlen(term),
2116                       flags, 1, term_type,rset_nmem,
2117                       kc, kc->scope, 0, reg_type);
2118     grep_info_delete(&grep_info);
2119     return rset;
2120 }
2121
2122 static
2123 ZEBRA_RES rpn_search_xpath(ZebraHandle zh,
2124                            oid_value attributeSet,
2125                            int num_bases, char **basenames,
2126                            NMEM stream, const char *rank_type, RSET rset,
2127                            int xpath_len, struct xpath_location_step *xpath,
2128                            NMEM rset_nmem,
2129                            RSET *rset_out,
2130                            struct rset_key_control *kc)
2131 {
2132     oid_value curAttributeSet = attributeSet;
2133     int base_no;
2134     int i;
2135
2136     if (xpath_len < 0)
2137     {
2138         *rset_out = rset;
2139         return ZEBRA_OK;
2140     }
2141
2142     yaz_log(YLOG_DEBUG, "xpath len=%d", xpath_len);
2143     for (i = 0; i<xpath_len; i++)
2144     {
2145         yaz_log(log_level_rpn, "XPATH %d %s", i, xpath[i].part);
2146
2147     }
2148
2149     curAttributeSet = VAL_IDXPATH;
2150
2151     /*
2152       //a    ->    a/.*
2153       //a/b  ->    b/a/.*
2154       /a     ->    a/
2155       /a/b   ->    b/a/
2156
2157       /      ->    none
2158
2159    a[@attr = value]/b[@other = othervalue]
2160
2161  /e/@a val      range(e/,range(@a,freetext(w,1015,val),@a),e/)
2162  /a/b val       range(b/a/,freetext(w,1016,val),b/a/)
2163  /a/b/@c val    range(b/a/,range(@c,freetext(w,1016,val),@c),b/a/)
2164  /a/b[@c = y] val range(b/a/,freetext(w,1016,val),b/a/,@c = y)
2165  /a[@c = y]/b val range(a/,range(b/a/,freetext(w,1016,val),b/a/),a/,@c = y)
2166  /a[@c = x]/b[@c = y] range(a/,range(b/a/,freetext(w,1016,val),b/a/,@c = y),a/,@c = x)
2167       
2168     */
2169
2170     dict_grep_cmap (zh->reg->dict, 0, 0);
2171
2172     for (base_no = 0; base_no < num_bases; base_no++)
2173     {
2174         int level = xpath_len;
2175         int first_path = 1;
2176         
2177         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
2178         {
2179             zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
2180                            basenames[base_no]);
2181             *rset_out = rset;
2182             return ZEBRA_FAIL;
2183         }
2184         while (--level >= 0)
2185         {
2186             char xpath_rev[128];
2187             int i, len;
2188             RSET rset_start_tag = 0, rset_end_tag = 0, rset_attr = 0;
2189
2190             *xpath_rev = 0;
2191             len = 0;
2192             for (i = level; i >= 1; --i)
2193             {
2194                 const char *cp = xpath[i].part;
2195                 if (*cp)
2196                 {
2197                     for (;*cp; cp++)
2198                         if (*cp == '*')
2199                         {
2200                             memcpy (xpath_rev + len, "[^/]*", 5);
2201                             len += 5;
2202                         }
2203                         else if (*cp == ' ')
2204                         {
2205
2206                             xpath_rev[len++] = 1;
2207                             xpath_rev[len++] = ' ';
2208                         }
2209
2210                         else
2211                             xpath_rev[len++] = *cp;
2212                     xpath_rev[len++] = '/';
2213                 }
2214                 else if (i == 1)  /* // case */
2215                 {
2216                     xpath_rev[len++] = '.';
2217                     xpath_rev[len++] = '*';
2218                 }
2219             }
2220             xpath_rev[len] = 0;
2221
2222             if (xpath[level].predicate &&
2223                 xpath[level].predicate->which == XPATH_PREDICATE_RELATION &&
2224                 xpath[level].predicate->u.relation.name[0])
2225             {
2226                 WRBUF wbuf = wrbuf_alloc();
2227                 wrbuf_puts(wbuf, xpath[level].predicate->u.relation.name+1);
2228                 if (xpath[level].predicate->u.relation.value)
2229                 {
2230                     const char *cp = xpath[level].predicate->u.relation.value;
2231                     wrbuf_putc(wbuf, '=');
2232                     
2233                     while (*cp)
2234                     {
2235                         if (strchr(REGEX_CHARS, *cp))
2236                             wrbuf_putc(wbuf, '\\');
2237                         wrbuf_putc(wbuf, *cp);
2238                         cp++;
2239                     }
2240                 }
2241                 wrbuf_puts(wbuf, "");
2242                 rset_attr = xpath_trunc(
2243                     zh, stream, '0', wrbuf_buf(wbuf), 3, 
2244                     curAttributeSet, rset_nmem, kc);
2245                 wrbuf_free(wbuf, 1);
2246             } 
2247             else 
2248             {
2249                 if (!first_path)
2250                     continue;
2251             }
2252             yaz_log(log_level_rpn, "xpath_rev (%d) = %s", level, xpath_rev);
2253             if (strlen(xpath_rev))
2254             {
2255                 rset_start_tag = xpath_trunc(zh, stream, '0', 
2256                         xpath_rev, 1, curAttributeSet, rset_nmem, kc);
2257             
2258                 rset_end_tag = xpath_trunc(zh, stream, '0', 
2259                         xpath_rev, 2, curAttributeSet, rset_nmem, kc);
2260
2261                 rset = rsbetween_create(rset_nmem, kc, kc->scope,
2262                                         rset_start_tag, rset,
2263                                         rset_end_tag, rset_attr);
2264             }
2265             first_path = 0;
2266         }
2267     }
2268     *rset_out = rset;
2269     return ZEBRA_OK;
2270 }
2271
2272 static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2273                                 oid_value attributeSet, NMEM stream,
2274                                 Z_SortKeySpecList *sort_sequence,
2275                                 int num_bases, char **basenames, 
2276                                 NMEM rset_nmem,
2277                                 RSET *rset,
2278                                 struct rset_key_control *kc)
2279 {
2280     ZEBRA_RES res = ZEBRA_OK;
2281     unsigned reg_id;
2282     char *search_type = NULL;
2283     char rank_type[128];
2284     int complete_flag;
2285     int sort_flag;
2286     char termz[IT_MAX_WORD+1];
2287     int xpath_len;
2288     int xpath_use = 0;
2289     struct xpath_location_step xpath[10];
2290
2291     if (!log_level_set)
2292     {
2293         log_level_rpn = yaz_log_module_level("rpn");
2294         log_level_set = 1;
2295     }
2296     zebra_maps_attr(zh->reg->zebra_maps, zapt, &reg_id, &search_type,
2297                     rank_type, &complete_flag, &sort_flag);
2298     
2299     yaz_log(YLOG_DEBUG, "reg_id=%c", reg_id);
2300     yaz_log(YLOG_DEBUG, "complete_flag=%d", complete_flag);
2301     yaz_log(YLOG_DEBUG, "search_type=%s", search_type);
2302     yaz_log(YLOG_DEBUG, "rank_type=%s", rank_type);
2303
2304     if (zapt_term_to_utf8(zh, zapt, termz) == ZEBRA_FAIL)
2305         return ZEBRA_FAIL;
2306
2307     if (sort_flag)
2308         return rpn_sort_spec(zh, zapt, attributeSet, stream, sort_sequence,
2309                              rank_type, rset_nmem, rset, kc);
2310     /* consider if an X-Path query is used */
2311     xpath_len = parse_xpath(zh, zapt, attributeSet, xpath, 10, stream);
2312     if (xpath_len >= 0)
2313     {
2314         xpath_use = 1016;  /* searching for element by default */
2315         if (xpath[xpath_len-1].part[0] == '@') 
2316             xpath_use = 1015;  /* last step an attribute .. */
2317     }
2318
2319     /* search using one of the various search type strategies
2320        termz is our UTF-8 search term
2321        attributeSet is top-level default attribute set 
2322        stream is ODR for search
2323        reg_id is the register type
2324        complete_flag is 1 for complete subfield, 0 for incomplete
2325        xpath_use is use-attribute to be used for X-Path search, 0 for none
2326     */
2327     if (!strcmp(search_type, "phrase"))
2328     {
2329         res = rpn_search_APT_phrase(zh, zapt, termz, attributeSet, stream,
2330                                     reg_id, complete_flag, rank_type,
2331                                     xpath_use,
2332                                     num_bases, basenames, rset_nmem,
2333                                     rset, kc);
2334     }
2335     else if (!strcmp(search_type, "and-list"))
2336     {
2337         res = rpn_search_APT_and_list(zh, zapt, termz, attributeSet, stream,
2338                                       reg_id, complete_flag, rank_type,
2339                                       xpath_use,
2340                                       num_bases, basenames, rset_nmem,
2341                                       rset, kc);
2342     }
2343     else if (!strcmp(search_type, "or-list"))
2344     {
2345         res = rpn_search_APT_or_list(zh, zapt, termz, attributeSet, stream,
2346                                      reg_id, complete_flag, rank_type,
2347                                      xpath_use,
2348                                      num_bases, basenames, rset_nmem,
2349                                      rset, kc);
2350     }
2351     else if (!strcmp(search_type, "local"))
2352     {
2353         res = rpn_search_APT_local(zh, zapt, termz, attributeSet, stream,
2354                                    rank_type, rset_nmem, rset, kc);
2355     }
2356     else if (!strcmp(search_type, "numeric"))
2357     {
2358         res = rpn_search_APT_numeric(zh, zapt, termz, attributeSet, stream,
2359                                      reg_id, complete_flag, rank_type,
2360                                      xpath_use,
2361                                      num_bases, basenames, rset_nmem,
2362                                      rset, kc);
2363     }
2364     else
2365     {
2366         zebra_setError(zh, YAZ_BIB1_UNSUPP_STRUCTURE_ATTRIBUTE, 0);
2367         res = ZEBRA_FAIL;
2368     }
2369     if (res != ZEBRA_OK)
2370         return res;
2371     if (!*rset)
2372         return ZEBRA_FAIL;
2373     return rpn_search_xpath(zh, attributeSet, num_bases, basenames,
2374                             stream, rank_type, *rset, 
2375                             xpath_len, xpath, rset_nmem, rset, kc);
2376 }
2377
2378 static ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2379                                       oid_value attributeSet, 
2380                                       NMEM stream, NMEM rset_nmem,
2381                                       Z_SortKeySpecList *sort_sequence,
2382                                       int num_bases, char **basenames,
2383                                       RSET **result_sets, int *num_result_sets,
2384                                       Z_Operator *parent_op,
2385                                       struct rset_key_control *kc);
2386
2387 ZEBRA_RES rpn_search_top(ZebraHandle zh, Z_RPNStructure *zs,
2388                          oid_value attributeSet, 
2389                          NMEM stream, NMEM rset_nmem,
2390                          Z_SortKeySpecList *sort_sequence,
2391                          int num_bases, char **basenames,
2392                          RSET *result_set)
2393 {
2394     RSET *result_sets = 0;
2395     int num_result_sets = 0;
2396     ZEBRA_RES res;
2397     struct rset_key_control *kc = zebra_key_control_create(zh);
2398
2399     res = rpn_search_structure(zh, zs, attributeSet,
2400                                stream, rset_nmem,
2401                                sort_sequence, 
2402                                num_bases, basenames,
2403                                &result_sets, &num_result_sets,
2404                                0 /* no parent op */,
2405                                kc);
2406     if (res != ZEBRA_OK)
2407     {
2408         int i;
2409         for (i = 0; i<num_result_sets; i++)
2410             rset_delete(result_sets[i]);
2411         *result_set = 0;
2412     }
2413     else
2414     {
2415         assert(num_result_sets == 1);
2416         assert(result_sets);
2417         assert(*result_sets);
2418         *result_set = *result_sets;
2419     }
2420     (*kc->dec)(kc);
2421     return res;
2422 }
2423
2424 ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2425                                oid_value attributeSet, 
2426                                NMEM stream, NMEM rset_nmem,
2427                                Z_SortKeySpecList *sort_sequence,
2428                                int num_bases, char **basenames,
2429                                RSET **result_sets, int *num_result_sets,
2430                                Z_Operator *parent_op,
2431                                struct rset_key_control *kc)
2432 {
2433     *num_result_sets = 0;
2434     if (zs->which == Z_RPNStructure_complex)
2435     {
2436         ZEBRA_RES res;
2437         Z_Operator *zop = zs->u.complex->roperator;
2438         RSET *result_sets_l = 0;
2439         int num_result_sets_l = 0;
2440         RSET *result_sets_r = 0;
2441         int num_result_sets_r = 0;
2442
2443         res = rpn_search_structure(zh, zs->u.complex->s1,
2444                                    attributeSet, stream, rset_nmem,
2445                                    sort_sequence,
2446                                    num_bases, basenames,
2447                                    &result_sets_l, &num_result_sets_l,
2448                                    zop, kc);
2449         if (res != ZEBRA_OK)
2450         {
2451             int i;
2452             for (i = 0; i<num_result_sets_l; i++)
2453                 rset_delete(result_sets_l[i]);
2454             return res;
2455         }
2456         res = rpn_search_structure(zh, zs->u.complex->s2,
2457                                    attributeSet, stream, rset_nmem,
2458                                    sort_sequence,
2459                                    num_bases, basenames,
2460                                    &result_sets_r, &num_result_sets_r,
2461                                    zop, kc);
2462         if (res != ZEBRA_OK)
2463         {
2464             int i;
2465             for (i = 0; i<num_result_sets_l; i++)
2466                 rset_delete(result_sets_l[i]);
2467             for (i = 0; i<num_result_sets_r; i++)
2468                 rset_delete(result_sets_r[i]);
2469             return res;
2470         }
2471
2472         /* make a new list of result for all children */
2473         *num_result_sets = num_result_sets_l + num_result_sets_r;
2474         *result_sets = nmem_malloc(stream, *num_result_sets * 
2475                                    sizeof(**result_sets));
2476         memcpy(*result_sets, result_sets_l, 
2477                num_result_sets_l * sizeof(**result_sets));
2478         memcpy(*result_sets + num_result_sets_l, result_sets_r, 
2479                num_result_sets_r * sizeof(**result_sets));
2480
2481         if (!parent_op || parent_op->which != zop->which
2482             || (zop->which != Z_Operator_and &&
2483                 zop->which != Z_Operator_or))
2484         {
2485             /* parent node different from this one (or non-present) */
2486             /* we must combine result sets now */
2487             RSET rset;
2488             switch (zop->which)
2489             {
2490             case Z_Operator_and:
2491                 rset = rsmulti_and_create(rset_nmem, kc,
2492                                           kc->scope,
2493                                           *num_result_sets, *result_sets);
2494                 break;
2495             case Z_Operator_or:
2496                 rset = rsmulti_or_create(rset_nmem, kc,
2497                                          kc->scope, 0, /* termid */
2498                                          *num_result_sets, *result_sets);
2499                 break;
2500             case Z_Operator_and_not:
2501                 rset = rsbool_create_not(rset_nmem, kc,
2502                                          kc->scope,
2503                                          (*result_sets)[0],
2504                                          (*result_sets)[1]);
2505                 break;
2506             case Z_Operator_prox:
2507                 if (zop->u.prox->which != Z_ProximityOperator_known)
2508                 {
2509                     zebra_setError(zh, 
2510                                    YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2511                                    0);
2512                     return ZEBRA_FAIL;
2513                 }
2514                 if (*zop->u.prox->u.known != Z_ProxUnit_word)
2515                 {
2516                     zebra_setError_zint(zh,
2517                                         YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2518                                         *zop->u.prox->u.known);
2519                     return ZEBRA_FAIL;
2520                 }
2521                 else
2522                 {
2523                     rset = rsprox_create(rset_nmem, kc,
2524                                          kc->scope,
2525                                          *num_result_sets, *result_sets, 
2526                                          *zop->u.prox->ordered,
2527                                          (!zop->u.prox->exclusion ? 
2528                                           0 : *zop->u.prox->exclusion),
2529                                          *zop->u.prox->relationType,
2530                                          *zop->u.prox->distance );
2531                 }
2532                 break;
2533             default:
2534                 zebra_setError(zh, YAZ_BIB1_OPERATOR_UNSUPP, 0);
2535                 return ZEBRA_FAIL;
2536             }
2537             *num_result_sets = 1;
2538             *result_sets = nmem_malloc(stream, *num_result_sets * 
2539                                        sizeof(**result_sets));
2540             (*result_sets)[0] = rset;
2541         }
2542     }
2543     else if (zs->which == Z_RPNStructure_simple)
2544     {
2545         RSET rset;
2546         ZEBRA_RES res;
2547
2548         if (zs->u.simple->which == Z_Operand_APT)
2549         {
2550             yaz_log(YLOG_DEBUG, "rpn_search_APT");
2551             res = rpn_search_APT(zh, zs->u.simple->u.attributesPlusTerm,
2552                                  attributeSet, stream, sort_sequence,
2553                                  num_bases, basenames, rset_nmem, &rset,
2554                                  kc);
2555             if (res != ZEBRA_OK)
2556                 return res;
2557         }
2558         else if (zs->u.simple->which == Z_Operand_resultSetId)
2559         {
2560             yaz_log(YLOG_DEBUG, "rpn_search_ref");
2561             rset = resultSetRef(zh, zs->u.simple->u.resultSetId);
2562             if (!rset)
2563             {
2564                 zebra_setError(zh, 
2565                                YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
2566                                zs->u.simple->u.resultSetId);
2567                 return ZEBRA_FAIL;
2568             }
2569             rset_dup(rset);
2570         }
2571         else
2572         {
2573             zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2574             return ZEBRA_FAIL;
2575         }
2576         *num_result_sets = 1;
2577         *result_sets = nmem_malloc(stream, *num_result_sets * 
2578                                    sizeof(**result_sets));
2579         (*result_sets)[0] = rset;
2580     }
2581     else
2582     {
2583         zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2584         return ZEBRA_FAIL;
2585     }
2586     return ZEBRA_OK;
2587 }
2588
2589 struct scan_info_entry {
2590     char *term;
2591     ISAM_P isam_p;
2592 };
2593
2594 struct scan_info {
2595     struct scan_info_entry *list;
2596     ODR odr;
2597     int before, after;
2598     char prefix[20];
2599 };
2600
2601 static int scan_handle (char *name, const char *info, int pos, void *client)
2602 {
2603     int len_prefix, idx;
2604     struct scan_info *scan_info = (struct scan_info *) client;
2605
2606     len_prefix = strlen(scan_info->prefix);
2607     if (memcmp (name, scan_info->prefix, len_prefix))
2608         return 1;
2609     if (pos > 0)
2610         idx = scan_info->after - pos + scan_info->before;
2611     else
2612         idx = - pos - 1;
2613
2614     if (idx < 0)
2615         return 0;
2616     scan_info->list[idx].term = (char *)
2617         odr_malloc(scan_info->odr, strlen(name + len_prefix)+1);
2618     strcpy(scan_info->list[idx].term, name + len_prefix);
2619     assert (*info == sizeof(ISAM_P));
2620     memcpy (&scan_info->list[idx].isam_p, info+1, sizeof(ISAM_P));
2621     return 0;
2622 }
2623
2624 void zebra_term_untrans_iconv(ZebraHandle zh, NMEM stream, int reg_type,
2625                               char **dst, const char *src)
2626 {
2627     char term_src[IT_MAX_WORD];
2628     char term_dst[IT_MAX_WORD];
2629     
2630     zebra_term_untrans (zh, reg_type, term_src, src);
2631
2632     if (zh->iconv_from_utf8 != 0)
2633     {
2634         int len;
2635         char *inbuf = term_src;
2636         size_t inleft = strlen(term_src);
2637         char *outbuf = term_dst;
2638         size_t outleft = sizeof(term_dst)-1;
2639         size_t ret;
2640         
2641         ret = yaz_iconv (zh->iconv_from_utf8, &inbuf, &inleft,
2642                          &outbuf, &outleft);
2643         if (ret == (size_t)(-1))
2644             len = 0;
2645         else
2646             len = outbuf - term_dst;
2647         *dst = nmem_malloc(stream, len + 1);
2648         if (len > 0)
2649             memcpy (*dst, term_dst, len);
2650         (*dst)[len] = '\0';
2651     }
2652     else
2653         *dst = nmem_strdup(stream, term_src);
2654 }
2655
2656 static void count_set(ZebraHandle zh, RSET rset, zint *count)
2657 {
2658     zint psysno = 0;
2659     struct it_key key;
2660     RSFD rfd;
2661
2662     yaz_log(YLOG_DEBUG, "count_set");
2663
2664     rset->hits_limit = zh->approx_limit;
2665
2666     *count = 0;
2667     rfd = rset_open(rset, RSETF_READ);
2668     while (rset_read(rfd, &key,0 /* never mind terms */))
2669     {
2670         if (key.mem[0] != psysno)
2671         {
2672             psysno = key.mem[0];
2673             if (rfd->counted_items >= rset->hits_limit)
2674                 break;
2675         }
2676     }
2677     rset_close (rfd);
2678     *count = rset->hits_count;
2679 }
2680
2681 ZEBRA_RES rpn_scan(ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt,
2682                    oid_value attributeset,
2683                    int num_bases, char **basenames,
2684                    int *position, int *num_entries, ZebraScanEntry **list,
2685                    int *is_partial, RSET limit_set, int return_zero)
2686 {
2687     int i;
2688     int pos = *position;
2689     int num = *num_entries;
2690     int before;
2691     int after;
2692     int base_no;
2693     char termz[IT_MAX_WORD+20];
2694     AttrType use;
2695     int use_value;
2696     const char *use_string = 0;
2697     struct scan_info *scan_info_array;
2698     ZebraScanEntry *glist;
2699     int ords[32], ord_no = 0;
2700     int ptr[32];
2701
2702     int bases_ok = 0;     /* no of databases with OK attribute */
2703     int errCode = 0;      /* err code (if any is not OK) */
2704     char *errString = 0;  /* addinfo */
2705
2706     unsigned reg_id;
2707     char *search_type = NULL;
2708     char rank_type[128];
2709     int complete_flag;
2710     int sort_flag;
2711     NMEM rset_nmem = NULL; 
2712     struct rset_key_control *kc = 0;
2713
2714     *list = 0;
2715     *is_partial = 0;
2716
2717     if (attributeset == VAL_NONE)
2718         attributeset = VAL_BIB1;
2719
2720     if (!limit_set)
2721     {
2722         AttrType termset;
2723         int termset_value_numeric;
2724         const char *termset_value_string;
2725         attr_init(&termset, zapt, 8);
2726         termset_value_numeric =
2727             attr_find_ex(&termset, NULL, &termset_value_string);
2728         if (termset_value_numeric != -1)
2729         {
2730             char resname[32];
2731             const char *termset_name = 0;
2732             
2733             if (termset_value_numeric != -2)
2734             {
2735                 
2736                 sprintf(resname, "%d", termset_value_numeric);
2737                 termset_name = resname;
2738             }
2739             else
2740                 termset_name = termset_value_string;
2741             
2742             limit_set = resultSetRef (zh, termset_name);
2743         }
2744     }
2745         
2746     yaz_log(YLOG_DEBUG, "position = %d, num = %d set=%d",
2747             pos, num, attributeset);
2748         
2749     attr_init(&use, zapt, 1);
2750     use_value = attr_find_ex(&use, &attributeset, &use_string);
2751
2752     if (zebra_maps_attr(zh->reg->zebra_maps, zapt, &reg_id, &search_type,
2753                         rank_type, &complete_flag, &sort_flag))
2754     {
2755         *num_entries = 0;
2756         zebra_setError(zh, YAZ_BIB1_UNSUPP_ATTRIBUTE_TYPE, 0);
2757         return ZEBRA_FAIL;
2758     }
2759     yaz_log(YLOG_DEBUG, "use_value = %d", use_value);
2760
2761     if (use_value == -1)
2762         use_value = 1016;
2763     for (base_no = 0; base_no < num_bases && ord_no < 32; base_no++)
2764     {
2765         data1_local_attribute *local_attr;
2766         attent attp;
2767         int ord;
2768
2769         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
2770         {
2771             zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
2772                            basenames[base_no]);
2773             *num_entries = 0;
2774             return ZEBRA_FAIL;
2775         }
2776
2777         if (use_string &&
2778             (ord = zebraExplain_lookup_attr_str(zh->reg->zei,
2779                                                 use_string)) >= 0)
2780         {
2781             /* we have a match for a raw string attribute */
2782             if (ord > 0)
2783                 ords[ord_no++] = ord;
2784             attp.local_attributes = 0;  /* no more attributes */
2785         }
2786         else
2787         {
2788             int r;
2789             
2790             if ((r = att_getentbyatt (zh, &attp, attributeset, use_value,
2791                                       use_string)))
2792             {
2793                 yaz_log(YLOG_DEBUG, "att_getentbyatt fail. set=%d use=%d",
2794                         attributeset, use_value);
2795                 if (r == -1)
2796                 {
2797                     errCode = YAZ_BIB1_UNSUPP_USE_ATTRIBUTE;
2798                     if (use_string)
2799                         zebra_setError(zh, YAZ_BIB1_UNSUPP_USE_ATTRIBUTE,
2800                                        use_string);
2801                     else
2802                         zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_USE_ATTRIBUTE,
2803                                             use_value);
2804                 }   
2805                 else
2806                 {
2807                     zebra_setError(zh, YAZ_BIB1_UNSUPP_ATTRIBUTE_SET, 0);
2808                 }
2809                 continue;
2810             }
2811         }
2812         bases_ok++;
2813         for (local_attr = attp.local_attributes; local_attr && ord_no < 32;
2814              local_attr = local_attr->next)
2815         {
2816             ord = zebraExplain_lookup_attr_su(zh->reg->zei,
2817                                               attp.attset_ordinal,
2818                                               local_attr->local);
2819             if (ord > 0)
2820                 ords[ord_no++] = ord;
2821         }
2822     }
2823     if (!bases_ok && errCode)
2824     {
2825         zebra_setError(zh, errCode, errString);
2826         *num_entries = 0;
2827         return ZEBRA_FAIL;
2828     }
2829     if (ord_no == 0)
2830     {
2831         *num_entries = 0;
2832         return ZEBRA_OK;
2833     }
2834     /* prepare dictionary scanning */
2835     if (num < 1)
2836     {
2837         *num_entries = 0;
2838         return ZEBRA_OK;
2839     }
2840     before = pos-1;
2841     if (before < 0)
2842         before = 0;
2843     after = 1+num-pos;
2844     if (after < 0)
2845         after = 0;
2846     yaz_log(YLOG_DEBUG, "rpn_scan pos=%d num=%d before=%d "
2847             "after=%d before+after=%d",
2848             pos, num, before, after, before+after);
2849     scan_info_array = (struct scan_info *)
2850         odr_malloc(stream, ord_no * sizeof(*scan_info_array));
2851     for (i = 0; i < ord_no; i++)
2852     {
2853         int j, prefix_len = 0;
2854         int before_tmp = before, after_tmp = after;
2855         struct scan_info *scan_info = scan_info_array + i;
2856         struct rpn_char_map_info rcmi;
2857
2858         rpn_char_map_prepare (zh->reg, reg_id, &rcmi);
2859
2860         scan_info->before = before;
2861         scan_info->after = after;
2862         scan_info->odr = stream;
2863
2864         scan_info->list = (struct scan_info_entry *)
2865             odr_malloc(stream, (before+after) * sizeof(*scan_info->list));
2866         for (j = 0; j<before+after; j++)
2867             scan_info->list[j].term = NULL;
2868
2869         prefix_len += key_SU_encode (ords[i], termz + prefix_len);
2870         termz[prefix_len++] = reg_id;
2871         termz[prefix_len] = 0;
2872         strcpy(scan_info->prefix, termz);
2873
2874         if (trans_scan_term(zh, zapt, termz+prefix_len, reg_id) == ZEBRA_FAIL)
2875             return ZEBRA_FAIL;
2876         
2877         dict_scan(zh->reg->dict, termz, &before_tmp, &after_tmp,
2878                   scan_info, scan_handle);
2879     }
2880     glist = (ZebraScanEntry *)
2881         odr_malloc(stream, (before+after)*sizeof(*glist));
2882
2883     rset_nmem = nmem_create();
2884     kc = zebra_key_control_create(zh);
2885
2886     /* consider terms after main term */
2887     for (i = 0; i < ord_no; i++)
2888         ptr[i] = before;
2889     
2890     *is_partial = 0;
2891     for (i = 0; i<after; i++)
2892     {
2893         int j, j0 = -1;
2894         const char *mterm = NULL;
2895         const char *tst;
2896         RSET rset = 0;
2897         int lo = i + pos-1; /* offset in result list */
2898
2899         /* find: j0 is the first of the minimal values */
2900         for (j = 0; j < ord_no; j++)
2901         {
2902             if (ptr[j] < before+after && ptr[j] >= 0 &&
2903                 (tst = scan_info_array[j].list[ptr[j]].term) &&
2904                 (!mterm || strcmp (tst, mterm) < 0))
2905             {
2906                 j0 = j;
2907                 mterm = tst;
2908             }
2909         }
2910         if (j0 == -1)
2911             break;  /* no value found, stop */
2912
2913         /* get result set for first one , but only if it's within bounds */
2914         if (lo >= 0)
2915         {
2916             /* get result set for first term */
2917             zebra_term_untrans_iconv(zh, stream->mem, reg_id,
2918                                      &glist[lo].term, mterm);
2919             rset = rset_trunc(zh, &scan_info_array[j0].list[ptr[j0]].isam_p, 1,
2920                               glist[lo].term, strlen(glist[lo].term),
2921                               NULL, 0, zapt->term->which, rset_nmem, 
2922                               kc, kc->scope, 0, reg_id);
2923         }
2924         ptr[j0]++; /* move index for this set .. */
2925         /* get result set for remaining scan terms */
2926         for (j = j0+1; j<ord_no; j++)
2927         {
2928             if (ptr[j] < before+after && ptr[j] >= 0 &&
2929                 (tst = scan_info_array[j].list[ptr[j]].term) &&
2930                 !strcmp (tst, mterm))
2931             {
2932                 if (lo >= 0)
2933                 {
2934                     RSET rsets[2];
2935                     
2936                     rsets[0] = rset;
2937                     rsets[1] =
2938                         rset_trunc(
2939                             zh, &scan_info_array[j].list[ptr[j]].isam_p, 1,
2940                             glist[lo].term,
2941                             strlen(glist[lo].term), NULL, 0,
2942                             zapt->term->which,rset_nmem,
2943                             kc, kc->scope, 0, reg_id);
2944                     rset = rsmulti_or_create(rset_nmem, kc,
2945                                              kc->scope, 0 /* termid */,
2946                                              2, rsets);
2947                 }
2948                 ptr[j]++;
2949             }
2950         }
2951         if (lo >= 0)
2952         {
2953             zint count;
2954             /* merge with limit_set if given */
2955             if (limit_set)
2956             {
2957                 RSET rsets[2];
2958                 rsets[0] = rset;
2959                 rsets[1] = rset_dup(limit_set);
2960                 
2961                 rset = rsmulti_and_create(rset_nmem, kc,
2962                                           kc->scope,
2963                                           2, rsets);
2964             }
2965             /* count it */
2966             count_set(zh, rset, &count);
2967             glist[lo].occurrences = count;
2968             rset_delete(rset);
2969         }
2970     }
2971     if (i < after)
2972     {
2973         *num_entries -= (after-i);
2974         *is_partial = 1;
2975         if (*num_entries < 0)
2976         {
2977             (*kc->dec)(kc);
2978             nmem_destroy(rset_nmem);
2979             *num_entries = 0;
2980             return ZEBRA_OK;
2981         }
2982     }
2983     /* consider terms before main term */
2984     for (i = 0; i<ord_no; i++)
2985         ptr[i] = 0;
2986     
2987     for (i = 0; i<before; i++)
2988     {
2989         int j, j0 = -1;
2990         const char *mterm = NULL;
2991         const char *tst;
2992         RSET rset;
2993         int lo = before-1-i; /* offset in result list */
2994         zint count;
2995         
2996         for (j = 0; j <ord_no; j++)
2997         {
2998             if (ptr[j] < before && ptr[j] >= 0 &&
2999                 (tst = scan_info_array[j].list[before-1-ptr[j]].term) &&
3000                 (!mterm || strcmp (tst, mterm) > 0))
3001             {
3002                 j0 = j;
3003                     mterm = tst;
3004             }
3005         }
3006         if (j0 == -1)
3007             break;
3008         
3009         zebra_term_untrans_iconv(zh, stream->mem, reg_id,
3010                                  &glist[lo].term, mterm);
3011         
3012         rset = rset_trunc
3013             (zh, &scan_info_array[j0].list[before-1-ptr[j0]].isam_p, 1,
3014              glist[lo].term, strlen(glist[lo].term),
3015              NULL, 0, zapt->term->which, rset_nmem,
3016              kc, kc->scope, 0, reg_id);
3017         
3018         ptr[j0]++;
3019         
3020         for (j = j0+1; j<ord_no; j++)
3021         {
3022             if (ptr[j] < before && ptr[j] >= 0 &&
3023                 (tst = scan_info_array[j].list[before-1-ptr[j]].term) &&
3024                 !strcmp (tst, mterm))
3025             {
3026                 RSET rsets[2];
3027                 
3028                 rsets[0] = rset;
3029                 rsets[1] = rset_trunc(
3030                     zh,
3031                     &scan_info_array[j].list[before-1-ptr[j]].isam_p, 1,
3032                     glist[lo].term,
3033                     strlen(glist[lo].term), NULL, 0,
3034                     zapt->term->which, rset_nmem,
3035                     kc, kc->scope, 0, reg_id);
3036                 rset = rsmulti_or_create(rset_nmem, kc,
3037                                          kc->scope, 0 /* termid */, 2, rsets);
3038                 
3039                 ptr[j]++;
3040             }
3041         }
3042         if (limit_set)
3043         {
3044             RSET rsets[2];
3045             rsets[0] = rset;
3046             rsets[1] = rset_dup(limit_set);
3047             
3048             rset = rsmulti_and_create(rset_nmem, kc,
3049                                       kc->scope, 2, rsets);
3050         }
3051         count_set(zh, rset, &count);
3052         glist[lo].occurrences = count;
3053         rset_delete (rset);
3054     }
3055     (*kc->dec)(kc);
3056     nmem_destroy(rset_nmem);
3057     i = before-i;
3058     if (i)
3059     {
3060         *is_partial = 1;
3061         *position -= i;
3062         *num_entries -= i;
3063         if (*num_entries <= 0)
3064         {
3065             *num_entries = 0;
3066             return ZEBRA_OK;
3067         }
3068     }
3069     
3070     *list = glist + i;               /* list is set to first 'real' entry */
3071     
3072     yaz_log(YLOG_DEBUG, "position = %d, num_entries = %d",
3073             *position, *num_entries);
3074     return ZEBRA_OK;
3075 }
3076