Use HAVE_UNISTD_H when including unistd.h.
[idzebra-moved-to-github.git] / index / zrpn.c
1 /* $Id: zrpn.c,v 1.200 2005-06-14 20:28:54 adam Exp $
2    Copyright (C) 1995-2005
3    Index Data ApS
4
5 This file is part of the Zebra server.
6
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
11
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with Zebra; see the file LICENSE.zebra.  If not, write to the
19 Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
20 02111-1307, USA.
21 */
22
23 #include <stdio.h>
24 #include <assert.h>
25 #ifdef WIN32
26 #include <io.h>
27 #endif
28 #if HAVE_UNISTD_H
29 #include <unistd.h>
30 #endif
31 #include <ctype.h>
32
33 #include <yaz/diagbib1.h>
34 #include "index.h"
35 #include <zebra_xpath.h>
36
37 #include <charmap.h>
38 #include <rset.h>
39
40 struct rpn_char_map_info
41 {
42     ZebraMaps zm;
43     int reg_type;
44 };
45
46 typedef struct
47 {
48     int type;
49     int major;
50     int minor;
51     Z_AttributesPlusTerm *zapt;
52 } AttrType;
53
54 static int log_level_set = 0;
55 static int log_level_rpn = 0;
56
57 static const char **rpn_char_map_handler(void *vp, const char **from, int len)
58 {
59     struct rpn_char_map_info *p = (struct rpn_char_map_info *) vp;
60     const char **out = zebra_maps_input(p->zm, p->reg_type, from, len, 0);
61 #if 0
62     if (out && *out)
63     {
64         const char *outp = *out;
65         yaz_log(YLOG_LOG, "---");
66         while (*outp)
67         {
68             yaz_log(YLOG_LOG, "%02X", *outp);
69             outp++;
70         }
71     }
72 #endif
73     return out;
74 }
75
76 static void rpn_char_map_prepare(struct zebra_register *reg, int reg_type,
77                                   struct rpn_char_map_info *map_info)
78 {
79     map_info->zm = reg->zebra_maps;
80     map_info->reg_type = reg_type;
81     dict_grep_cmap(reg->dict, map_info, rpn_char_map_handler);
82 }
83
84 static int attr_find_ex(AttrType *src, oid_value *attributeSetP,
85                          const char **string_value)
86 {
87     int num_attributes;
88
89     num_attributes = src->zapt->attributes->num_attributes;
90     while (src->major < num_attributes)
91     {
92         Z_AttributeElement *element;
93
94         element = src->zapt->attributes->attributes[src->major];
95         if (src->type == *element->attributeType)
96         {
97             switch (element->which) 
98             {
99             case Z_AttributeValue_numeric:
100                 ++(src->major);
101                 if (element->attributeSet && attributeSetP)
102                 {
103                     oident *attrset;
104
105                     attrset = oid_getentbyoid(element->attributeSet);
106                     *attributeSetP = attrset->value;
107                 }
108                 return *element->value.numeric;
109                 break;
110             case Z_AttributeValue_complex:
111                 if (src->minor >= element->value.complex->num_list)
112                     break;
113                 if (element->attributeSet && attributeSetP)
114                 {
115                     oident *attrset;
116                     
117                     attrset = oid_getentbyoid(element->attributeSet);
118                     *attributeSetP = attrset->value;
119                 }
120                 if (element->value.complex->list[src->minor]->which ==  
121                     Z_StringOrNumeric_numeric)
122                 {
123                     ++(src->minor);
124                     return
125                         *element->value.complex->list[src->minor-1]->u.numeric;
126                 }
127                 else if (element->value.complex->list[src->minor]->which ==  
128                          Z_StringOrNumeric_string)
129                 {
130                     if (!string_value)
131                         break;
132                     ++(src->minor);
133                     *string_value = 
134                         element->value.complex->list[src->minor-1]->u.string;
135                     return -2;
136                 }
137                 else
138                     break;
139             default:
140                 assert(0);
141             }
142         }
143         ++(src->major);
144     }
145     return -1;
146 }
147
148 static int attr_find(AttrType *src, oid_value *attributeSetP)
149 {
150     return attr_find_ex(src, attributeSetP, 0);
151 }
152
153 static void attr_init(AttrType *src, Z_AttributesPlusTerm *zapt,
154                        int type)
155 {
156     src->zapt = zapt;
157     src->type = type;
158     src->major = 0;
159     src->minor = 0;
160 }
161
162 #define TERM_COUNT        
163        
164 struct grep_info {        
165 #ifdef TERM_COUNT        
166     int *term_no;        
167 #endif        
168     ISAM_P *isam_p_buf;
169     int isam_p_size;        
170     int isam_p_indx;
171     ZebraHandle zh;
172     int reg_type;
173     ZebraSet termset;
174 };        
175
176 void zebra_term_untrans(ZebraHandle zh, int reg_type,
177                         char *dst, const char *src)
178 {
179     int len = 0;
180     while (*src)
181     {
182         const char *cp = zebra_maps_output(zh->reg->zebra_maps,
183                                            reg_type, &src);
184         if (!cp && len < IT_MAX_WORD-1)
185             dst[len++] = *src++;
186         else
187             while (*cp && len < IT_MAX_WORD-1)
188                 dst[len++] = *cp++;
189     }
190     dst[len] = '\0';
191 }
192
193 static void add_isam_p(const char *name, const char *info,
194                        struct grep_info *p)
195 {
196     if (!log_level_set)
197     {
198         log_level_rpn = yaz_log_module_level("rpn");
199         log_level_set = 1;
200     }
201     if (p->isam_p_indx == p->isam_p_size)
202     {
203         ISAM_P *new_isam_p_buf;
204 #ifdef TERM_COUNT        
205         int *new_term_no;        
206 #endif
207         p->isam_p_size = 2*p->isam_p_size + 100;
208         new_isam_p_buf = (ISAM_P *) xmalloc(sizeof(*new_isam_p_buf) *
209                                             p->isam_p_size);
210         if (p->isam_p_buf)
211         {
212             memcpy(new_isam_p_buf, p->isam_p_buf,
213                     p->isam_p_indx * sizeof(*p->isam_p_buf));
214             xfree(p->isam_p_buf);
215         }
216         p->isam_p_buf = new_isam_p_buf;
217
218 #ifdef TERM_COUNT
219         new_term_no = (int *) xmalloc(sizeof(*new_term_no) * p->isam_p_size);
220         if (p->term_no)
221         {
222             memcpy(new_term_no, p->isam_p_buf,
223                     p->isam_p_indx * sizeof(*p->term_no));
224             xfree(p->term_no);
225         }
226         p->term_no = new_term_no;
227 #endif
228     }
229     assert(*info == sizeof(*p->isam_p_buf));
230     memcpy(p->isam_p_buf + p->isam_p_indx, info+1, sizeof(*p->isam_p_buf));
231
232 #if 1
233     if (p->termset)
234     {
235         const char *db;
236         int set, use;
237         char term_tmp[IT_MAX_WORD];
238         int su_code = 0;
239         int len = key_SU_decode (&su_code, name);
240         
241         zebra_term_untrans  (p->zh, p->reg_type, term_tmp, name+len+1);
242         yaz_log(log_level_rpn, "grep: %d %c %s", su_code, name[len], term_tmp);
243         zebraExplain_lookup_ord (p->zh->reg->zei,
244                                  su_code, &db, &set, &use);
245         yaz_log(log_level_rpn, "grep:  set=%d use=%d db=%s", set, use, db);
246         
247         resultSetAddTerm(p->zh, p->termset, name[len], db,
248                          set, use, term_tmp);
249     }
250 #endif
251     (p->isam_p_indx)++;
252 }
253
254 static int grep_handle(char *name, const char *info, void *p)
255 {
256     add_isam_p(name, info, (struct grep_info *) p);
257     return 0;
258 }
259
260 static int term_pre(ZebraMaps zebra_maps, int reg_type, const char **src,
261                     const char *ct1, const char *ct2, int first)
262 {
263     const char *s1, *s0 = *src;
264     const char **map;
265
266     /* skip white space */
267     while (*s0)
268     {
269         if (ct1 && strchr(ct1, *s0))
270             break;
271         if (ct2 && strchr(ct2, *s0))
272             break;
273         s1 = s0;
274         map = zebra_maps_input(zebra_maps, reg_type, &s1, strlen(s1), first);
275         if (**map != *CHR_SPACE)
276             break;
277         s0 = s1;
278     }
279     *src = s0;
280     return *s0;
281 }
282
283
284 static void esc_str(char *out_buf, int out_size,
285                     const char *in_buf, int in_size)
286 {
287     int k;
288
289     assert(out_buf);
290     assert(in_buf);
291     assert(out_size > 20);
292     *out_buf = '\0';
293     for (k = 0; k<in_size; k++)
294     {
295         int c = in_buf[k] & 0xff;
296         int pc;
297         if (c < 32 || c > 126)
298             pc = '?';
299         else
300             pc = c;
301         sprintf(out_buf +strlen(out_buf), "%02X:%c  ", c, pc);
302         if (strlen(out_buf) > out_size-20)
303         {
304             strcat(out_buf, "..");
305             break;
306         }
307     }
308 }
309
310 #define REGEX_CHARS " []()|.*+?!"
311
312 /* term_100: handle term, where trunc = none(no operators at all) */
313 static int term_100(ZebraMaps zebra_maps, int reg_type,
314                     const char **src, char *dst, int space_split,
315                     char *dst_term)
316 {
317     const char *s0;
318     const char **map;
319     int i = 0;
320     int j = 0;
321
322     const char *space_start = 0;
323     const char *space_end = 0;
324
325     if (!term_pre(zebra_maps, reg_type, src, NULL, NULL, !space_split))
326         return 0;
327     s0 = *src;
328     while (*s0)
329     {
330         const char *s1 = s0;
331         int q_map_match = 0;
332         map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), 
333                                 &q_map_match);
334         if (space_split)
335         {
336             if (**map == *CHR_SPACE)
337                 break;
338         }
339         else  /* complete subfield only. */
340         {
341             if (**map == *CHR_SPACE)
342             {   /* save space mapping for later  .. */
343                 space_start = s1;
344                 space_end = s0;
345                 continue;
346             }
347             else if (space_start)
348             {   /* reload last space */
349                 while (space_start < space_end)
350                 {
351                     if (strchr(REGEX_CHARS, *space_start))
352                         dst[i++] = '\\';
353                     dst_term[j++] = *space_start;
354                     dst[i++] = *space_start++;
355                 }
356                 /* and reset */
357                 space_start = space_end = 0;
358             }
359         }
360         /* add non-space char */
361         memcpy(dst_term+j, s1, s0 - s1);
362         j += (s0 - s1);
363         if (!q_map_match)
364         {
365             while (s1 < s0)
366             {
367                 if (strchr(REGEX_CHARS, *s1))
368                     dst[i++] = '\\';
369                 dst[i++] = *s1++;
370             }
371         }
372         else
373         {
374             char tmpbuf[80];
375             esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
376             
377             strcpy(dst + i, map[0]);
378             i += strlen(map[0]);
379         }
380     }
381     dst[i] = '\0';
382     dst_term[j] = '\0';
383     *src = s0;
384     return i;
385 }
386
387 /* term_101: handle term, where trunc = Process # */
388 static int term_101(ZebraMaps zebra_maps, int reg_type,
389                     const char **src, char *dst, int space_split,
390                     char *dst_term)
391 {
392     const char *s0;
393     const char **map;
394     int i = 0;
395     int j = 0;
396
397     if (!term_pre(zebra_maps, reg_type, src, "#", "#", !space_split))
398         return 0;
399     s0 = *src;
400     while (*s0)
401     {
402         if (*s0 == '#')
403         {
404             dst[i++] = '.';
405             dst[i++] = '*';
406             dst_term[j++] = *s0++;
407         }
408         else
409         {
410             const char *s1 = s0;
411             int q_map_match = 0;
412             map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), 
413                                     &q_map_match);
414             if (space_split && **map == *CHR_SPACE)
415                 break;
416
417             /* add non-space char */
418             memcpy(dst_term+j, s1, s0 - s1);
419             j += (s0 - s1);
420             if (!q_map_match)
421             {
422                 while (s1 < s0)
423                 {
424                     if (strchr(REGEX_CHARS, *s1))
425                         dst[i++] = '\\';
426                     dst[i++] = *s1++;
427                 }
428             }
429             else
430             {
431                 char tmpbuf[80];
432                 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
433                 
434                 strcpy(dst + i, map[0]);
435                 i += strlen(map[0]);
436             }
437         }
438     }
439     dst[i] = '\0';
440     dst_term[j++] = '\0';
441     *src = s0;
442     return i;
443 }
444
445 /* term_103: handle term, where trunc = re-2 (regular expressions) */
446 static int term_103(ZebraMaps zebra_maps, int reg_type, const char **src,
447                     char *dst, int *errors, int space_split,
448                     char *dst_term)
449 {
450     int i = 0;
451     int j = 0;
452     const char *s0;
453     const char **map;
454
455     if (!term_pre(zebra_maps, reg_type, src, "^\\()[].*+?|", "(", !space_split))
456         return 0;
457     s0 = *src;
458     if (errors && *s0 == '+' && s0[1] && s0[2] == '+' && s0[3] &&
459         isdigit(((const unsigned char *)s0)[1]))
460     {
461         *errors = s0[1] - '0';
462         s0 += 3;
463         if (*errors > 3)
464             *errors = 3;
465     }
466     while (*s0)
467     {
468         if (strchr("^\\()[].*+?|-", *s0))
469         {
470             dst_term[j++] = *s0;
471             dst[i++] = *s0++;
472         }
473         else
474         {
475             const char *s1 = s0;
476             int q_map_match = 0;
477             map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), 
478                                     &q_map_match);
479             if (space_split && **map == *CHR_SPACE)
480                 break;
481
482             /* add non-space char */
483             memcpy(dst_term+j, s1, s0 - s1);
484             j += (s0 - s1);
485             if (!q_map_match)
486             {
487                 while (s1 < s0)
488                 {
489                     if (strchr(REGEX_CHARS, *s1))
490                         dst[i++] = '\\';
491                     dst[i++] = *s1++;
492                 }
493             }
494             else
495             {
496                 char tmpbuf[80];
497                 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
498                 
499                 strcpy(dst + i, map[0]);
500                 i += strlen(map[0]);
501             }
502         }
503     }
504     dst[i] = '\0';
505     dst_term[j] = '\0';
506     *src = s0;
507     
508     return i;
509 }
510
511 /* term_103: handle term, where trunc = re-1 (regular expressions) */
512 static int term_102(ZebraMaps zebra_maps, int reg_type, const char **src,
513                     char *dst, int space_split, char *dst_term)
514 {
515     return term_103(zebra_maps, reg_type, src, dst, NULL, space_split,
516                     dst_term);
517 }
518
519
520 /* term_104: handle term, where trunc = Process # and ! */
521 static int term_104(ZebraMaps zebra_maps, int reg_type,
522                     const char **src, char *dst, int space_split,
523                     char *dst_term)
524 {
525     const char *s0;
526     const char **map;
527     int i = 0;
528     int j = 0;
529
530     if (!term_pre(zebra_maps, reg_type, src, "?*#", "?*#", !space_split))
531         return 0;
532     s0 = *src;
533     while (*s0)
534     {
535         if (*s0 == '?')
536         {
537             dst_term[j++] = *s0++;
538             if (*s0 >= '0' && *s0 <= '9')
539             {
540                 int limit = 0;
541                 while (*s0 >= '0' && *s0 <= '9')
542                 {
543                     limit = limit * 10 + (*s0 - '0');
544                     dst_term[j++] = *s0++;
545                 }
546                 if (limit > 20)
547                     limit = 20;
548                 while (--limit >= 0)
549                 {
550                     dst[i++] = '.';
551                     dst[i++] = '?';
552                 }
553             }
554             else
555             {
556                 dst[i++] = '.';
557                 dst[i++] = '*';
558             }
559         }
560         else if (*s0 == '*')
561         {
562             dst[i++] = '.';
563             dst[i++] = '*';
564             dst_term[j++] = *s0++;
565         }
566         else if (*s0 == '#')
567         {
568             dst[i++] = '.';
569             dst_term[j++] = *s0++;
570         }
571         else
572         {
573             const char *s1 = s0;
574             int q_map_match = 0;
575             map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), 
576                                     &q_map_match);
577             if (space_split && **map == *CHR_SPACE)
578                 break;
579
580             /* add non-space char */
581             memcpy(dst_term+j, s1, s0 - s1);
582             j += (s0 - s1);
583             if (!q_map_match)
584             {
585                 while (s1 < s0)
586                 {
587                     if (strchr(REGEX_CHARS, *s1))
588                         dst[i++] = '\\';
589                     dst[i++] = *s1++;
590                 }
591             }
592             else
593             {
594                 char tmpbuf[80];
595                 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
596                 
597                 strcpy(dst + i, map[0]);
598                 i += strlen(map[0]);
599             }
600         }
601     }
602     dst[i] = '\0';
603     dst_term[j++] = '\0';
604     *src = s0;
605     return i;
606 }
607
608 /* term_105/106: handle term, where trunc = Process * and ! and right trunc */
609 static int term_105(ZebraMaps zebra_maps, int reg_type,
610                     const char **src, char *dst, int space_split,
611                     char *dst_term, int right_truncate)
612 {
613     const char *s0;
614     const char **map;
615     int i = 0;
616     int j = 0;
617
618     if (!term_pre(zebra_maps, reg_type, src, "*!", "*!", !space_split))
619         return 0;
620     s0 = *src;
621     while (*s0)
622     {
623         if (*s0 == '*')
624         {
625             dst[i++] = '.';
626             dst[i++] = '*';
627             dst_term[j++] = *s0++;
628         }
629         else if (*s0 == '!')
630         {
631             dst[i++] = '.';
632             dst_term[j++] = *s0++;
633         }
634         else
635         {
636             const char *s1 = s0;
637             int q_map_match = 0;
638             map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), 
639                                     &q_map_match);
640             if (space_split && **map == *CHR_SPACE)
641                 break;
642
643             /* add non-space char */
644             memcpy(dst_term+j, s1, s0 - s1);
645             j += (s0 - s1);
646             if (!q_map_match)
647             {
648                 while (s1 < s0)
649                 {
650                     if (strchr(REGEX_CHARS, *s1))
651                         dst[i++] = '\\';
652                     dst[i++] = *s1++;
653                 }
654             }
655             else
656             {
657                 char tmpbuf[80];
658                 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
659                 
660                 strcpy(dst + i, map[0]);
661                 i += strlen(map[0]);
662             }
663         }
664     }
665     if (right_truncate)
666     {
667         dst[i++] = '.';
668         dst[i++] = '*';
669     }
670     dst[i] = '\0';
671     
672     dst_term[j++] = '\0';
673     *src = s0;
674     return i;
675 }
676
677
678 /* gen_regular_rel - generate regular expression from relation
679  *  val:     border value (inclusive)
680  *  islt:    1 if <=; 0 if >=.
681  */
682 static void gen_regular_rel(char *dst, int val, int islt)
683 {
684     int dst_p;
685     int w, d, i;
686     int pos = 0;
687     char numstr[20];
688
689     yaz_log(YLOG_DEBUG, "gen_regular_rel. val=%d, islt=%d", val, islt);
690     if (val >= 0)
691     {
692         if (islt)
693             strcpy(dst, "(-[0-9]+|(");
694         else
695             strcpy(dst, "((");
696     } 
697     else
698     {
699         if (!islt)
700         {
701             strcpy(dst, "([0-9]+|-(");
702             dst_p = strlen(dst);
703             islt = 1;
704         }
705         else
706         {
707             strcpy(dst, "(-(");
708             islt = 0;
709         }
710         val = -val;
711     }
712     dst_p = strlen(dst);
713     sprintf(numstr, "%d", val);
714     for (w = strlen(numstr); --w >= 0; pos++)
715     {
716         d = numstr[w];
717         if (pos > 0)
718         {
719             if (islt)
720             {
721                 if (d == '0')
722                     continue;
723                 d--;
724             } 
725             else
726             {
727                 if (d == '9')
728                     continue;
729                 d++;
730             }
731         }
732         
733         strcpy(dst + dst_p, numstr);
734         dst_p = strlen(dst) - pos - 1;
735
736         if (islt)
737         {
738             if (d != '0')
739             {
740                 dst[dst_p++] = '[';
741                 dst[dst_p++] = '0';
742                 dst[dst_p++] = '-';
743                 dst[dst_p++] = d;
744                 dst[dst_p++] = ']';
745             }
746             else
747                 dst[dst_p++] = d;
748         }
749         else
750         {
751             if (d != '9')
752             { 
753                 dst[dst_p++] = '[';
754                 dst[dst_p++] = d;
755                 dst[dst_p++] = '-';
756                 dst[dst_p++] = '9';
757                 dst[dst_p++] = ']';
758             }
759             else
760                 dst[dst_p++] = d;
761         }
762         for (i = 0; i<pos; i++)
763         {
764             dst[dst_p++] = '[';
765             dst[dst_p++] = '0';
766             dst[dst_p++] = '-';
767             dst[dst_p++] = '9';
768             dst[dst_p++] = ']';
769         }
770         dst[dst_p++] = '|';
771     }
772     dst[dst_p] = '\0';
773     if (islt)
774     {
775         /* match everything less than 10^(pos-1) */
776         strcat(dst, "0*");
777         for (i = 1; i<pos; i++)
778             strcat(dst, "[0-9]?");
779     }
780     else
781     {
782         /* match everything greater than 10^pos */
783         for (i = 0; i <= pos; i++)
784             strcat(dst, "[0-9]");
785         strcat(dst, "[0-9]*");
786     }
787     strcat(dst, "))");
788 }
789
790 void string_rel_add_char(char **term_p, const char *src, int *indx)
791 {
792     if (src[*indx] == '\\')
793         *(*term_p)++ = src[(*indx)++];
794     *(*term_p)++ = src[(*indx)++];
795 }
796
797 /*
798  *   >  abc     ([b-].*|a[c-].*|ab[d-].*|abc.+)
799  *              ([^-a].*|a[^-b].*ab[^-c].*|abc.+)
800  *   >= abc     ([b-].*|a[c-].*|ab[c-].*)
801  *              ([^-a].*|a[^-b].*|ab[c-].*)
802  *   <  abc     ([-0].*|a[-a].*|ab[-b].*)
803  *              ([^a-].*|a[^b-].*|ab[^c-].*)
804  *   <= abc     ([-0].*|a[-a].*|ab[-b].*|abc)
805  *              ([^a-].*|a[^b-].*|ab[^c-].*|abc)
806  */
807 static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
808                            const char **term_sub, char *term_dict,
809                            oid_value attributeSet,
810                            int reg_type, int space_split, char *term_dst,
811                            int *error_code)
812 {
813     AttrType relation;
814     int relation_value;
815     int i;
816     char *term_tmp = term_dict + strlen(term_dict);
817     char term_component[2*IT_MAX_WORD+20];
818
819     attr_init(&relation, zapt, 2);
820     relation_value = attr_find(&relation, NULL);
821
822     *error_code = 0;
823     yaz_log(YLOG_DEBUG, "string relation value=%d", relation_value);
824     switch (relation_value)
825     {
826     case 1:
827         if (!term_100(zh->reg->zebra_maps, reg_type,
828                       term_sub, term_component,
829                       space_split, term_dst))
830             return 0;
831         yaz_log(log_level_rpn, "Relation <");
832         
833         *term_tmp++ = '(';
834         for (i = 0; term_component[i]; )
835         {
836             int j = 0;
837
838             if (i)
839                 *term_tmp++ = '|';
840             while (j < i)
841                 string_rel_add_char(&term_tmp, term_component, &j);
842
843             *term_tmp++ = '[';
844
845             *term_tmp++ = '^';
846             string_rel_add_char(&term_tmp, term_component, &i);
847             *term_tmp++ = '-';
848
849             *term_tmp++ = ']';
850             *term_tmp++ = '.';
851             *term_tmp++ = '*';
852
853             if ((term_tmp - term_dict) > IT_MAX_WORD)
854                 break;
855         }
856         *term_tmp++ = ')';
857         *term_tmp = '\0';
858         break;
859     case 2:
860         if (!term_100(zh->reg->zebra_maps, reg_type,
861                       term_sub, term_component,
862                       space_split, term_dst))
863             return 0;
864         yaz_log(log_level_rpn, "Relation <=");
865
866         *term_tmp++ = '(';
867         for (i = 0; term_component[i]; )
868         {
869             int j = 0;
870
871             while (j < i)
872                 string_rel_add_char(&term_tmp, term_component, &j);
873             *term_tmp++ = '[';
874
875             *term_tmp++ = '^';
876             string_rel_add_char(&term_tmp, term_component, &i);
877             *term_tmp++ = '-';
878
879             *term_tmp++ = ']';
880             *term_tmp++ = '.';
881             *term_tmp++ = '*';
882
883             *term_tmp++ = '|';
884
885             if ((term_tmp - term_dict) > IT_MAX_WORD)
886                 break;
887         }
888         for (i = 0; term_component[i]; )
889             string_rel_add_char(&term_tmp, term_component, &i);
890         *term_tmp++ = ')';
891         *term_tmp = '\0';
892         break;
893     case 5:
894         if (!term_100 (zh->reg->zebra_maps, reg_type,
895                        term_sub, term_component, space_split, term_dst))
896             return 0;
897         yaz_log(log_level_rpn, "Relation >");
898
899         *term_tmp++ = '(';
900         for (i = 0; term_component[i];)
901         {
902             int j = 0;
903
904             while (j < i)
905                 string_rel_add_char(&term_tmp, term_component, &j);
906             *term_tmp++ = '[';
907             
908             *term_tmp++ = '^';
909             *term_tmp++ = '-';
910             string_rel_add_char(&term_tmp, term_component, &i);
911
912             *term_tmp++ = ']';
913             *term_tmp++ = '.';
914             *term_tmp++ = '*';
915
916             *term_tmp++ = '|';
917
918             if ((term_tmp - term_dict) > IT_MAX_WORD)
919                 break;
920         }
921         for (i = 0; term_component[i];)
922             string_rel_add_char(&term_tmp, term_component, &i);
923         *term_tmp++ = '.';
924         *term_tmp++ = '+';
925         *term_tmp++ = ')';
926         *term_tmp = '\0';
927         break;
928     case 4:
929         if (!term_100(zh->reg->zebra_maps, reg_type, term_sub,
930                       term_component, space_split, term_dst))
931             return 0;
932         yaz_log(log_level_rpn, "Relation >=");
933
934         *term_tmp++ = '(';
935         for (i = 0; term_component[i];)
936         {
937             int j = 0;
938
939             if (i)
940                 *term_tmp++ = '|';
941             while (j < i)
942                 string_rel_add_char(&term_tmp, term_component, &j);
943             *term_tmp++ = '[';
944
945             if (term_component[i+1])
946             {
947                 *term_tmp++ = '^';
948                 *term_tmp++ = '-';
949                 string_rel_add_char(&term_tmp, term_component, &i);
950             }
951             else
952             {
953                 string_rel_add_char(&term_tmp, term_component, &i);
954                 *term_tmp++ = '-';
955             }
956             *term_tmp++ = ']';
957             *term_tmp++ = '.';
958             *term_tmp++ = '*';
959
960             if ((term_tmp - term_dict) > IT_MAX_WORD)
961                 break;
962         }
963         *term_tmp++ = ')';
964         *term_tmp = '\0';
965         break;
966     case 3:
967     case 102:
968     case -1:
969         yaz_log(log_level_rpn, "Relation =");
970         if (!term_100(zh->reg->zebra_maps, reg_type, term_sub,
971                       term_component, space_split, term_dst))
972             return 0;
973         strcat(term_tmp, "(");
974         strcat(term_tmp, term_component);
975         strcat(term_tmp, ")");
976         break;
977     default:
978         *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
979         return 0;
980     }
981     return 1;
982 }
983
984 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
985                              const char **term_sub, 
986                              oid_value attributeSet, NMEM stream,
987                              struct grep_info *grep_info,
988                              int reg_type, int complete_flag,
989                              int num_bases, char **basenames,
990                              char *term_dst, int xpath_use,
991                              struct ord_list **ol);
992
993 static ZEBRA_RES term_trunc(ZebraHandle zh,
994                             Z_AttributesPlusTerm *zapt,
995                             const char **term_sub, 
996                             oid_value attributeSet, NMEM stream,
997                             struct grep_info *grep_info,
998                             int reg_type, int complete_flag,
999                             int num_bases, char **basenames,
1000                             char *term_dst,
1001                             const char *rank_type, int xpath_use,
1002                             NMEM rset_nmem,
1003                             RSET *rset,
1004                             struct rset_key_control *kc)
1005 {
1006     ZEBRA_RES res;
1007     struct ord_list *ol;
1008     *rset = 0;
1009     grep_info->isam_p_indx = 0;
1010     res = string_term(zh, zapt, term_sub, attributeSet, stream, grep_info,
1011                       reg_type, complete_flag, num_bases, basenames,
1012                       term_dst, xpath_use, &ol);
1013     if (res != ZEBRA_OK)
1014         return res;
1015     if (!*term_sub)  /* no more terms ? */
1016         return res;
1017     yaz_log(log_level_rpn, "term: %s", term_dst);
1018     *rset = rset_trunc(zh, grep_info->isam_p_buf,
1019                        grep_info->isam_p_indx, term_dst,
1020                        strlen(term_dst), rank_type, 1 /* preserve pos */,
1021                        zapt->term->which, rset_nmem,
1022                        kc, kc->scope, ol, reg_type);
1023     if (!*rset)
1024         return ZEBRA_FAIL;
1025     return ZEBRA_OK;
1026 }
1027
1028 static char *nmem_strdup_i(NMEM nmem, int v)
1029 {
1030     char val_str[64];
1031     sprintf(val_str, "%d", v);
1032     return nmem_strdup(nmem, val_str);
1033 }
1034
1035 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1036                              const char **term_sub, 
1037                              oid_value attributeSet, NMEM stream,
1038                              struct grep_info *grep_info,
1039                              int reg_type, int complete_flag,
1040                              int num_bases, char **basenames,
1041                              char *term_dst, int xpath_use,
1042                              struct ord_list **ol)
1043 {
1044     char term_dict[2*IT_MAX_WORD+4000];
1045     int j, r, base_no;
1046     AttrType truncation;
1047     int truncation_value;
1048     AttrType use;
1049     int use_value;
1050     const char *use_string = 0;
1051     oid_value curAttributeSet = attributeSet;
1052     const char *termp;
1053     struct rpn_char_map_info rcmi;
1054     int space_split = complete_flag ? 0 : 1;
1055
1056     int bases_ok = 0;     /* no of databases with OK attribute */
1057     int errCode = 0;      /* err code (if any is not OK) */
1058     char *errString = 0;  /* addinfo */
1059
1060
1061     *ol = ord_list_create(stream);
1062
1063     rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
1064     attr_init(&use, zapt, 1);
1065     use_value = attr_find_ex(&use, &curAttributeSet, &use_string);
1066     yaz_log(log_level_rpn, "string_term, use value %d", use_value);
1067     attr_init(&truncation, zapt, 5);
1068     truncation_value = attr_find(&truncation, NULL);
1069     yaz_log(log_level_rpn, "truncation value %d", truncation_value);
1070
1071     if (use_value == -1)    /* no attribute - assumy "any" */
1072         use_value = 1016;
1073     for (base_no = 0; base_no < num_bases; base_no++)
1074     {
1075         int ord = -1;
1076         int attr_ok = 0;
1077         int regex_range = 0;
1078         int init_pos = 0;
1079         attent attp;
1080         data1_local_attribute id_xpath_attr;
1081         data1_local_attribute *local_attr;
1082         int max_pos, prefix_len = 0;
1083         int relation_error;
1084
1085         termp = *term_sub;
1086
1087         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1088         {
1089             zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
1090                            basenames[base_no]);
1091             return ZEBRA_FAIL;
1092         }
1093         if (xpath_use > 0 && use_value == -2) 
1094         {
1095             /* xpath mode and we have a string attribute */
1096             attp.local_attributes = &id_xpath_attr;
1097             attp.attset_ordinal = VAL_IDXPATH;
1098             id_xpath_attr.next = 0;
1099
1100             use_value = xpath_use;  /* xpath_use as use-attribute now */
1101             id_xpath_attr.local = use_value;
1102         }
1103         else if (curAttributeSet == VAL_IDXPATH && use_value >= 0)
1104         {
1105             /* X-Path attribute, use numeric value directly */
1106             attp.local_attributes = &id_xpath_attr;
1107             attp.attset_ordinal = VAL_IDXPATH;
1108             id_xpath_attr.next = 0;
1109             id_xpath_attr.local = use_value;
1110         }
1111         else if (use_string &&
1112                  (ord = zebraExplain_lookup_attr_str(zh->reg->zei,
1113                                                      use_string)) >= 0)
1114         {
1115             /* we have a match for a raw string attribute */
1116             char ord_buf[32];
1117             int i, ord_len;
1118
1119             if (prefix_len)
1120                 term_dict[prefix_len++] = '|';
1121             else
1122                 term_dict[prefix_len++] = '(';
1123             
1124             ord_len = key_SU_encode (ord, ord_buf);
1125             for (i = 0; i<ord_len; i++)
1126             {
1127                 term_dict[prefix_len++] = 1;
1128                 term_dict[prefix_len++] = ord_buf[i];
1129             }
1130             attp.local_attributes = 0;  /* no more attributes */
1131             *ol = ord_list_append(stream, *ol, ord);
1132         }
1133         else 
1134         {
1135             /* lookup in the .att files . Allow string as well */
1136             if ((r = att_getentbyatt (zh, &attp, curAttributeSet, use_value,
1137                                       use_string)))
1138             {
1139                 yaz_log(YLOG_DEBUG, "att_getentbyatt fail. set=%d use=%d r=%d",
1140                         curAttributeSet, use_value, r);
1141                 if (r == -1)
1142                 {
1143                     /* set was found, but value wasn't defined */
1144                     errCode = YAZ_BIB1_UNSUPP_USE_ATTRIBUTE;
1145                     if (use_string)
1146                         errString = nmem_strdup(stream, use_string);
1147                     else
1148                         errString = nmem_strdup_i (stream, use_value);
1149                 }
1150                 else
1151                 {
1152                     int oid[OID_SIZE];
1153                     struct oident oident;
1154                     
1155                     oident.proto = PROTO_Z3950;
1156                     oident.oclass = CLASS_ATTSET;
1157                     oident.value = curAttributeSet;
1158                     oid_ent_to_oid (&oident, oid);
1159                     
1160                     errCode = YAZ_BIB1_UNSUPP_ATTRIBUTE_SET;
1161                     errString = nmem_strdup(stream, oident.desc);
1162                 }
1163                 continue;
1164             }
1165         }
1166         for (local_attr = attp.local_attributes; local_attr;
1167              local_attr = local_attr->next)
1168         {
1169             char ord_buf[32];
1170             int i, ord_len;
1171             
1172             ord = zebraExplain_lookup_attr_su(zh->reg->zei,
1173                                               attp.attset_ordinal,
1174                                               local_attr->local);
1175             if (ord < 0)
1176                 continue;
1177             *ol = ord_list_append(stream, *ol, ord);
1178             if (prefix_len)
1179                 term_dict[prefix_len++] = '|';
1180             else
1181                 term_dict[prefix_len++] = '(';
1182             
1183             ord_len = key_SU_encode (ord, ord_buf);
1184             for (i = 0; i<ord_len; i++)
1185             {
1186                 term_dict[prefix_len++] = 1;
1187                 term_dict[prefix_len++] = ord_buf[i];
1188             }
1189         }
1190         bases_ok++;
1191         if (prefix_len)
1192             attr_ok = 1;
1193
1194         term_dict[prefix_len++] = ')';
1195         term_dict[prefix_len++] = 1;
1196         term_dict[prefix_len++] = reg_type;
1197         yaz_log(log_level_rpn, "reg_type = %d", term_dict[prefix_len-1]);
1198         term_dict[prefix_len] = '\0';
1199         j = prefix_len;
1200         switch (truncation_value)
1201         {
1202         case -1:         /* not specified */
1203         case 100:        /* do not truncate */
1204             if (!string_relation (zh, zapt, &termp, term_dict,
1205                                   attributeSet,
1206                                   reg_type, space_split, term_dst,
1207                                   &relation_error))
1208             {
1209                 if (relation_error)
1210                 {
1211                     zebra_setError(zh, relation_error, 0);
1212                     return ZEBRA_FAIL;
1213                 }
1214                 *term_sub = 0;
1215                 return ZEBRA_OK;
1216             }
1217             break;
1218         case 1:          /* right truncation */
1219             term_dict[j++] = '(';
1220             if (!term_100(zh->reg->zebra_maps, reg_type,
1221                           &termp, term_dict + j, space_split, term_dst))
1222             {
1223                 *term_sub = 0;
1224                 return ZEBRA_OK;
1225             }
1226             strcat(term_dict, ".*)");
1227             break;
1228         case 2:          /* keft truncation */
1229             term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*';
1230             if (!term_100(zh->reg->zebra_maps, reg_type,
1231                           &termp, term_dict + j, space_split, term_dst))
1232             {
1233                 *term_sub = 0;
1234                 return ZEBRA_OK;
1235             }
1236             strcat(term_dict, ")");
1237             break;
1238         case 3:          /* left&right truncation */
1239             term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*';
1240             if (!term_100(zh->reg->zebra_maps, reg_type,
1241                           &termp, term_dict + j, space_split, term_dst))
1242             {
1243                 *term_sub = 0;
1244                 return ZEBRA_OK;
1245             }
1246             strcat(term_dict, ".*)");
1247             break;
1248         case 101:        /* process # in term */
1249             term_dict[j++] = '(';
1250             if (!term_101(zh->reg->zebra_maps, reg_type,
1251                           &termp, term_dict + j, space_split, term_dst))
1252             {
1253                 *term_sub = 0;
1254                 return ZEBRA_OK;
1255             }
1256             strcat(term_dict, ")");
1257             break;
1258         case 102:        /* Regexp-1 */
1259             term_dict[j++] = '(';
1260             if (!term_102(zh->reg->zebra_maps, reg_type,
1261                           &termp, term_dict + j, space_split, term_dst))
1262             {
1263                 *term_sub = 0;
1264                 return ZEBRA_OK;
1265             }
1266             strcat(term_dict, ")");
1267             break;
1268         case 103:       /* Regexp-2 */
1269             regex_range = 1;
1270             term_dict[j++] = '(';
1271             init_pos = 2;
1272             if (!term_103(zh->reg->zebra_maps, reg_type,
1273                           &termp, term_dict + j, &regex_range,
1274                           space_split, term_dst))
1275             {
1276                 *term_sub = 0;
1277                 return ZEBRA_OK;
1278             }
1279             strcat(term_dict, ")");
1280             break;
1281         case 104:        /* process # and ! in term */
1282             term_dict[j++] = '(';
1283             if (!term_104(zh->reg->zebra_maps, reg_type,
1284                           &termp, term_dict + j, space_split, term_dst))
1285             {
1286                 *term_sub = 0;
1287                 return ZEBRA_OK;
1288             }
1289             strcat(term_dict, ")");
1290             break;
1291         case 105:        /* process * and ! in term */
1292             term_dict[j++] = '(';
1293             if (!term_105(zh->reg->zebra_maps, reg_type,
1294                           &termp, term_dict + j, space_split, term_dst, 1))
1295             {
1296                 *term_sub = 0;
1297                 return ZEBRA_OK;
1298             }
1299             strcat(term_dict, ")");
1300             break;
1301         case 106:        /* process * and ! in term */
1302             term_dict[j++] = '(';
1303             if (!term_105(zh->reg->zebra_maps, reg_type,
1304                           &termp, term_dict + j, space_split, term_dst, 0))
1305             {
1306                 *term_sub = 0;
1307                 return ZEBRA_OK;
1308             }
1309             strcat(term_dict, ")");
1310             break;
1311         default:
1312             zebra_setError_zint(zh,
1313                                 YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
1314                                 truncation_value);
1315             return ZEBRA_FAIL;
1316         }
1317         if (attr_ok)
1318         {
1319             char buf[80];
1320             const char *input = term_dict + prefix_len;
1321             esc_str(buf, sizeof(buf), input, strlen(input));
1322         }
1323         if (attr_ok)
1324         {
1325             yaz_log(log_level_rpn, "dict_lookup_grep: %s", term_dict+prefix_len);
1326             r = dict_lookup_grep(zh->reg->dict, term_dict, regex_range,
1327                                  grep_info, &max_pos, init_pos,
1328                                  grep_handle);
1329             if (r)
1330                 yaz_log(YLOG_WARN, "dict_lookup_grep fail %d", r);
1331         }
1332     }
1333     if (!bases_ok)
1334     {
1335         zebra_setError(zh, errCode, errString);
1336         return ZEBRA_FAIL;
1337     }
1338     *term_sub = termp;
1339     yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1340     return ZEBRA_OK;
1341 }
1342
1343
1344 /* convert APT search term to UTF8 */
1345 static ZEBRA_RES zapt_term_to_utf8(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1346                                    char *termz)
1347 {
1348     size_t sizez;
1349     Z_Term *term = zapt->term;
1350
1351     switch (term->which)
1352     {
1353     case Z_Term_general:
1354         if (zh->iconv_to_utf8 != 0)
1355         {
1356             char *inbuf = term->u.general->buf;
1357             size_t inleft = term->u.general->len;
1358             char *outbuf = termz;
1359             size_t outleft = IT_MAX_WORD-1;
1360             size_t ret;
1361
1362             ret = yaz_iconv(zh->iconv_to_utf8, &inbuf, &inleft,
1363                         &outbuf, &outleft);
1364             if (ret == (size_t)(-1))
1365             {
1366                 ret = yaz_iconv(zh->iconv_to_utf8, 0, 0, 0, 0);
1367                 zebra_setError(
1368                     zh, 
1369                     YAZ_BIB1_QUERY_TERM_INCLUDES_CHARS_THAT_DO_NOT_TRANSLATE_INTO_,
1370                     0);
1371                 return ZEBRA_FAIL;
1372             }
1373             *outbuf = 0;
1374         }
1375         else
1376         {
1377             sizez = term->u.general->len;
1378             if (sizez > IT_MAX_WORD-1)
1379                 sizez = IT_MAX_WORD-1;
1380             memcpy (termz, term->u.general->buf, sizez);
1381             termz[sizez] = '\0';
1382         }
1383         break;
1384     case Z_Term_characterString:
1385         sizez = strlen(term->u.characterString);
1386         if (sizez > IT_MAX_WORD-1)
1387             sizez = IT_MAX_WORD-1;
1388         memcpy (termz, term->u.characterString, sizez);
1389         termz[sizez] = '\0';
1390         break;
1391     default:
1392         zebra_setError(zh, YAZ_BIB1_UNSUPP_CODED_VALUE_FOR_TERM, 0);
1393         return ZEBRA_FAIL;
1394     }
1395     return ZEBRA_OK;
1396 }
1397
1398 /* convert APT SCAN term to internal cmap */
1399 static ZEBRA_RES trans_scan_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1400                                  char *termz, int reg_type)
1401 {
1402     char termz0[IT_MAX_WORD];
1403
1404     if (zapt_term_to_utf8(zh, zapt, termz0) == ZEBRA_FAIL)
1405         return ZEBRA_FAIL;    /* error */
1406     else
1407     {
1408         const char **map;
1409         const char *cp = (const char *) termz0;
1410         const char *cp_end = cp + strlen(cp);
1411         const char *src;
1412         int i = 0;
1413         const char *space_map = NULL;
1414         int len;
1415             
1416         while ((len = (cp_end - cp)) > 0)
1417         {
1418             map = zebra_maps_input(zh->reg->zebra_maps, reg_type, &cp, len, 0);
1419             if (**map == *CHR_SPACE)
1420                 space_map = *map;
1421             else
1422             {
1423                 if (i && space_map)
1424                     for (src = space_map; *src; src++)
1425                         termz[i++] = *src;
1426                 space_map = NULL;
1427                 for (src = *map; *src; src++)
1428                     termz[i++] = *src;
1429             }
1430         }
1431         termz[i] = '\0';
1432     }
1433     return ZEBRA_OK;
1434 }
1435
1436 static void grep_info_delete(struct grep_info *grep_info)
1437 {
1438 #ifdef TERM_COUNT
1439     xfree(grep_info->term_no);
1440 #endif
1441     xfree(grep_info->isam_p_buf);
1442 }
1443
1444 static ZEBRA_RES grep_info_prepare(ZebraHandle zh,
1445                                    Z_AttributesPlusTerm *zapt,
1446                                    struct grep_info *grep_info,
1447                                    int reg_type)
1448 {
1449     AttrType termset;
1450     int termset_value_numeric;
1451     const char *termset_value_string;
1452
1453 #ifdef TERM_COUNT
1454     grep_info->term_no = 0;
1455 #endif
1456     grep_info->isam_p_size = 0;
1457     grep_info->isam_p_buf = NULL;
1458     grep_info->zh = zh;
1459     grep_info->reg_type = reg_type;
1460     grep_info->termset = 0;
1461
1462     if (!zapt)
1463         return ZEBRA_OK;
1464     attr_init(&termset, zapt, 8);
1465     termset_value_numeric =
1466         attr_find_ex(&termset, NULL, &termset_value_string);
1467     if (termset_value_numeric != -1)
1468     {
1469         char resname[32];
1470         const char *termset_name = 0;
1471         if (termset_value_numeric != -2)
1472         {
1473     
1474             sprintf(resname, "%d", termset_value_numeric);
1475             termset_name = resname;
1476         }
1477         else
1478             termset_name = termset_value_string;
1479         yaz_log(log_level_rpn, "creating termset set %s", termset_name);
1480         grep_info->termset = resultSetAdd(zh, termset_name, 1);
1481         if (!grep_info->termset)
1482         {
1483             zebra_setError(zh, YAZ_BIB1_ILLEGAL_RESULT_SET_NAME, termset_name);
1484             return ZEBRA_FAIL;
1485         }
1486     }
1487     return ZEBRA_OK;
1488 }
1489                                
1490 /**
1491   \brief Create result set(s) for list of terms
1492   \param zh Zebra Handle
1493   \param termz term as used in query but converted to UTF-8
1494   \param attributeSet default attribute set
1495   \param stream memory for result
1496   \param reg_type register type ('w', 'p',..)
1497   \param complete_flag whether it's phrases or not
1498   \param rank_type term flags for ranking
1499   \param xpath_use use attribute for X-Path (-1 for no X-path)
1500   \param num_bases number of databases
1501   \param basenames array of databases
1502   \param rset_mem memory for result sets
1503   \param result_sets output result set for each term in list (output)
1504   \param number number of output result sets
1505   \param kc rset key control to be used for created result sets
1506 */
1507 static ZEBRA_RES term_list_trunc(ZebraHandle zh,
1508                                  Z_AttributesPlusTerm *zapt,
1509                                  const char *termz,
1510                                  oid_value attributeSet,
1511                                  NMEM stream,
1512                                  int reg_type, int complete_flag,
1513                                  const char *rank_type, int xpath_use,
1514                                  int num_bases, char **basenames, 
1515                                  NMEM rset_nmem,
1516                                  RSET **result_sets, int *num_result_sets,
1517                                  struct rset_key_control *kc)
1518 {
1519     char term_dst[IT_MAX_WORD+1];
1520     struct grep_info grep_info;
1521     const char *termp = termz;
1522     int alloc_sets = 0;
1523
1524     *num_result_sets = 0;
1525     *term_dst = 0;
1526     if (grep_info_prepare(zh, zapt, &grep_info, reg_type) == ZEBRA_FAIL)
1527         return ZEBRA_FAIL;
1528     while(1)
1529     { 
1530         ZEBRA_RES res;
1531
1532         if (alloc_sets == *num_result_sets)
1533         {
1534             int add = 10;
1535             RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) * 
1536                                               sizeof(*rnew));
1537             if (alloc_sets)
1538                 memcpy(rnew, *result_sets, alloc_sets * sizeof(*rnew));
1539             alloc_sets = alloc_sets + add;
1540             *result_sets = rnew;
1541         }
1542         res = term_trunc(zh, zapt, &termp, attributeSet,
1543                          stream, &grep_info,
1544                          reg_type, complete_flag,
1545                          num_bases, basenames,
1546                          term_dst, rank_type,
1547                          xpath_use, rset_nmem,
1548                          &(*result_sets)[*num_result_sets],
1549                          kc);
1550         if (res != ZEBRA_OK)
1551         {
1552             int i;
1553             for (i = 0; i < *num_result_sets; i++)
1554                 rset_delete((*result_sets)[i]);
1555             grep_info_delete (&grep_info);
1556             return res;
1557         }
1558         if ((*result_sets)[*num_result_sets] == 0)
1559             break;
1560         (*num_result_sets)++;
1561     }
1562     grep_info_delete(&grep_info);
1563     return ZEBRA_OK;
1564 }
1565
1566 static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh,
1567                                        Z_AttributesPlusTerm *zapt,
1568                                        const char *termz_org,
1569                                        oid_value attributeSet,
1570                                        NMEM stream,
1571                                        int reg_type, int complete_flag,
1572                                        const char *rank_type, int xpath_use,
1573                                        int num_bases, char **basenames, 
1574                                        NMEM rset_nmem,
1575                                        RSET *rset,
1576                                        struct rset_key_control *kc)
1577 {
1578     RSET *result_sets = 0;
1579     int num_result_sets = 0;
1580     ZEBRA_RES res =
1581         term_list_trunc(zh, zapt, termz_org, attributeSet,
1582                         stream, reg_type, complete_flag,
1583                         rank_type, xpath_use,
1584                         num_bases, basenames,
1585                         rset_nmem,
1586                         &result_sets, &num_result_sets, kc);
1587     if (res != ZEBRA_OK)
1588         return res;
1589     if (num_result_sets == 0)
1590         *rset = rsnull_create (rset_nmem, kc, 0); 
1591     else if (num_result_sets == 1)
1592         *rset = result_sets[0];
1593     else
1594         *rset = rsprox_create(rset_nmem, kc, kc->scope,
1595                               num_result_sets, result_sets,
1596                               1 /* ordered */, 0 /* exclusion */,
1597                               3 /* relation */, 1 /* distance */);
1598     if (!*rset)
1599         return ZEBRA_FAIL;
1600     return ZEBRA_OK;
1601 }
1602
1603 static ZEBRA_RES rpn_search_APT_or_list(ZebraHandle zh,
1604                                         Z_AttributesPlusTerm *zapt,
1605                                         const char *termz_org,
1606                                         oid_value attributeSet,
1607                                         NMEM stream,
1608                                         int reg_type, int complete_flag,
1609                                         const char *rank_type,
1610                                         int xpath_use,
1611                                         int num_bases, char **basenames,
1612                                         NMEM rset_nmem,
1613                                         RSET *rset,
1614                                         struct rset_key_control *kc)
1615 {
1616     RSET *result_sets = 0;
1617     int num_result_sets = 0;
1618     ZEBRA_RES res =
1619         term_list_trunc(zh, zapt, termz_org, attributeSet,
1620                         stream, reg_type, complete_flag,
1621                         rank_type, xpath_use,
1622                         num_bases, basenames,
1623                         rset_nmem,
1624                         &result_sets, &num_result_sets, kc);
1625     if (res != ZEBRA_OK)
1626         return res;
1627     if (num_result_sets == 0)
1628         *rset = rsnull_create (rset_nmem, kc, 0); 
1629     else if (num_result_sets == 1)
1630         *rset = result_sets[0];
1631     else
1632         *rset = rsmulti_or_create(rset_nmem, kc, kc->scope, 0 /* termid */,
1633                                   num_result_sets, result_sets);
1634     if (!*rset)
1635         return ZEBRA_FAIL;
1636     return ZEBRA_OK;
1637 }
1638
1639 static ZEBRA_RES rpn_search_APT_and_list(ZebraHandle zh,
1640                                          Z_AttributesPlusTerm *zapt,
1641                                          const char *termz_org,
1642                                          oid_value attributeSet,
1643                                          NMEM stream,
1644                                          int reg_type, int complete_flag,
1645                                          const char *rank_type, 
1646                                          int xpath_use,
1647                                          int num_bases, char **basenames,
1648                                          NMEM rset_nmem,
1649                                          RSET *rset,
1650                                          struct rset_key_control *kc)
1651 {
1652     RSET *result_sets = 0;
1653     int num_result_sets = 0;
1654     ZEBRA_RES res =
1655         term_list_trunc(zh, zapt, termz_org, attributeSet,
1656                         stream, reg_type, complete_flag,
1657                         rank_type, xpath_use,
1658                         num_bases, basenames,
1659                         rset_nmem,
1660                         &result_sets, &num_result_sets,
1661                         kc);
1662     if (res != ZEBRA_OK)
1663         return res;
1664     if (num_result_sets == 0)
1665         *rset = rsnull_create (rset_nmem, kc, 0); 
1666     else if (num_result_sets == 1)
1667         *rset = result_sets[0];
1668     else
1669         *rset = rsmulti_and_create(rset_nmem, kc, kc->scope,
1670                                    num_result_sets, result_sets);
1671     if (!*rset)
1672         return ZEBRA_FAIL;
1673     return ZEBRA_OK;
1674 }
1675
1676 static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1677                             const char **term_sub,
1678                             char *term_dict,
1679                             oid_value attributeSet,
1680                             struct grep_info *grep_info,
1681                             int *max_pos,
1682                             int reg_type,
1683                             char *term_dst,
1684                             int *error_code)
1685 {
1686     AttrType relation;
1687     int relation_value;
1688     int term_value;
1689     int r;
1690     char *term_tmp = term_dict + strlen(term_dict);
1691
1692     *error_code = 0;
1693     attr_init(&relation, zapt, 2);
1694     relation_value = attr_find(&relation, NULL);
1695
1696     yaz_log(log_level_rpn, "numeric relation value=%d", relation_value);
1697
1698     if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1699                   term_dst))
1700         return 0;
1701     term_value = atoi (term_tmp);
1702     switch (relation_value)
1703     {
1704     case 1:
1705         yaz_log(log_level_rpn, "Relation <");
1706         gen_regular_rel(term_tmp, term_value-1, 1);
1707         break;
1708     case 2:
1709         yaz_log(log_level_rpn, "Relation <=");
1710         gen_regular_rel(term_tmp, term_value, 1);
1711         break;
1712     case 4:
1713         yaz_log(log_level_rpn, "Relation >=");
1714         gen_regular_rel(term_tmp, term_value, 0);
1715         break;
1716     case 5:
1717         yaz_log(log_level_rpn, "Relation >");
1718         gen_regular_rel(term_tmp, term_value+1, 0);
1719         break;
1720     case -1:
1721     case 3:
1722         yaz_log(log_level_rpn, "Relation =");
1723         sprintf(term_tmp, "(0*%d)", term_value);
1724         break;
1725     default:
1726         *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
1727         return 0;
1728     }
1729     yaz_log(log_level_rpn, "dict_lookup_grep: %s", term_tmp);
1730     r = dict_lookup_grep(zh->reg->dict, term_dict, 0, grep_info, max_pos,
1731                           0, grep_handle);
1732     if (r)
1733         yaz_log(YLOG_WARN, "dict_lookup_grep fail, rel = gt: %d", r);
1734     yaz_log(log_level_rpn, "%d positions", grep_info->isam_p_indx);
1735     return 1;
1736 }
1737
1738 static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1739                               const char **term_sub, 
1740                               oid_value attributeSet,
1741                               struct grep_info *grep_info,
1742                               int reg_type, int complete_flag,
1743                               int num_bases, char **basenames,
1744                               char *term_dst, int xpath_use, NMEM stream)
1745 {
1746     char term_dict[2*IT_MAX_WORD+2];
1747     int r, base_no;
1748     AttrType use;
1749     int use_value;
1750     const char *use_string = 0;
1751     oid_value curAttributeSet = attributeSet;
1752     const char *termp;
1753     struct rpn_char_map_info rcmi;
1754
1755     int bases_ok = 0;     /* no of databases with OK attribute */
1756     int errCode = 0;      /* err code (if any is not OK) */
1757     char *errString = 0;  /* addinfo */
1758
1759     rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
1760     attr_init(&use, zapt, 1);
1761     use_value = attr_find_ex(&use, &curAttributeSet, &use_string);
1762
1763     if (use_value == -1)
1764         use_value = 1016;
1765
1766     for (base_no = 0; base_no < num_bases; base_no++)
1767     {
1768         attent attp;
1769         data1_local_attribute id_xpath_attr;
1770         data1_local_attribute *local_attr;
1771         int max_pos, prefix_len = 0;
1772         int relation_error = 0;
1773
1774         termp = *term_sub;
1775         if (use_value == -2)  /* string attribute (assume IDXPATH/any) */
1776         {
1777             use_value = xpath_use;
1778             attp.local_attributes = &id_xpath_attr;
1779             attp.attset_ordinal = VAL_IDXPATH;
1780             id_xpath_attr.next = 0;
1781             id_xpath_attr.local = use_value;
1782         }
1783         else if (curAttributeSet == VAL_IDXPATH)
1784         {
1785             attp.local_attributes = &id_xpath_attr;
1786             attp.attset_ordinal = VAL_IDXPATH;
1787             id_xpath_attr.next = 0;
1788             id_xpath_attr.local = use_value;
1789         }
1790         else
1791         {
1792             if ((r = att_getentbyatt (zh, &attp, curAttributeSet, use_value,
1793                                             use_string)))
1794             {
1795                 yaz_log(YLOG_DEBUG, "att_getentbyatt fail. set=%d use=%d r=%d",
1796                       curAttributeSet, use_value, r);
1797                 if (r == -1)
1798                 {
1799                     errCode = YAZ_BIB1_UNSUPP_USE_ATTRIBUTE;
1800                     if (use_string)
1801                         errString = nmem_strdup(stream, use_string);
1802                     else
1803                         errString = nmem_strdup_i (stream, use_value);
1804                 }
1805                 else
1806                     errCode = YAZ_BIB1_UNSUPP_ATTRIBUTE_SET;
1807                 continue;
1808             }
1809         }
1810         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1811         {
1812             zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
1813                            basenames[base_no]);
1814             return ZEBRA_FAIL;
1815         }
1816         for (local_attr = attp.local_attributes; local_attr;
1817              local_attr = local_attr->next)
1818         {
1819             int ord;
1820             char ord_buf[32];
1821             int i, ord_len;
1822
1823             ord = zebraExplain_lookup_attr_su(zh->reg->zei,
1824                                               attp.attset_ordinal,
1825                                               local_attr->local);
1826             if (ord < 0)
1827                 continue;
1828             if (prefix_len)
1829                 term_dict[prefix_len++] = '|';
1830             else
1831                 term_dict[prefix_len++] = '(';
1832
1833             ord_len = key_SU_encode (ord, ord_buf);
1834             for (i = 0; i<ord_len; i++)
1835             {
1836                 term_dict[prefix_len++] = 1;
1837                 term_dict[prefix_len++] = ord_buf[i];
1838             }
1839         }
1840         if (!prefix_len)
1841         {
1842             zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_USE_ATTRIBUTE, use_value);
1843             continue;
1844         }
1845         bases_ok++;
1846         term_dict[prefix_len++] = ')';        
1847         term_dict[prefix_len++] = 1;
1848         term_dict[prefix_len++] = reg_type;
1849         yaz_log(YLOG_DEBUG, "reg_type = %d", term_dict[prefix_len-1]);
1850         term_dict[prefix_len] = '\0';
1851         if (!numeric_relation(zh, zapt, &termp, term_dict,
1852                               attributeSet, grep_info, &max_pos, reg_type,
1853                               term_dst, &relation_error))
1854         {
1855             if (relation_error)
1856             {
1857                 zebra_setError(zh, relation_error, 0);
1858                 return ZEBRA_FAIL;
1859             }
1860             *term_sub = 0;
1861             return ZEBRA_OK;
1862         }
1863     }
1864     if (!bases_ok)
1865     {
1866         zebra_setError(zh, errCode, errString);
1867         return ZEBRA_FAIL;
1868     }
1869     *term_sub = termp;
1870     yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1871     return ZEBRA_OK;
1872 }
1873
1874 static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh,
1875                                         Z_AttributesPlusTerm *zapt,
1876                                         const char *termz,
1877                                         oid_value attributeSet,
1878                                         NMEM stream,
1879                                         int reg_type, int complete_flag,
1880                                         const char *rank_type, int xpath_use,
1881                                         int num_bases, char **basenames,
1882                                         NMEM rset_nmem,
1883                                         RSET *rset,
1884                                         struct rset_key_control *kc)
1885 {
1886     char term_dst[IT_MAX_WORD+1];
1887     const char *termp = termz;
1888     RSET *result_sets = 0;
1889     int num_result_sets = 0;
1890     ZEBRA_RES res;
1891     struct grep_info grep_info;
1892     int alloc_sets = 0;
1893
1894     yaz_log(log_level_rpn, "APT_numeric t='%s'", termz);
1895     if (grep_info_prepare(zh, zapt, &grep_info, reg_type) == ZEBRA_FAIL)
1896         return ZEBRA_FAIL;
1897     while (1)
1898     { 
1899         if (alloc_sets == num_result_sets)
1900         {
1901             int add = 10;
1902             RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) * 
1903                                               sizeof(*rnew));
1904             if (alloc_sets)
1905                 memcpy(rnew, result_sets, alloc_sets * sizeof(*rnew));
1906             alloc_sets = alloc_sets + add;
1907             result_sets = rnew;
1908         }
1909         yaz_log(YLOG_DEBUG, "APT_numeric termp=%s", termp);
1910         grep_info.isam_p_indx = 0;
1911         res = numeric_term(zh, zapt, &termp, attributeSet, &grep_info,
1912                            reg_type, complete_flag, num_bases, basenames,
1913                            term_dst, xpath_use,
1914                            stream);
1915         if (res == ZEBRA_FAIL || termp == 0)
1916             break;
1917         yaz_log(YLOG_DEBUG, "term: %s", term_dst);
1918         result_sets[num_result_sets] =
1919             rset_trunc(zh, grep_info.isam_p_buf,
1920                        grep_info.isam_p_indx, term_dst,
1921                        strlen(term_dst), rank_type,
1922                        0 /* preserve position */,
1923                        zapt->term->which, rset_nmem, 
1924                        kc, kc->scope, 0, reg_type);
1925         if (!result_sets[num_result_sets])
1926             break;
1927         num_result_sets++;
1928     }
1929     grep_info_delete(&grep_info);
1930     if (termp)
1931     {
1932         int i;
1933         for (i = 0; i<num_result_sets; i++)
1934             rset_delete(result_sets[i]);
1935         return ZEBRA_FAIL;
1936     }
1937     if (num_result_sets == 0)
1938         *rset = rsnull_create(rset_nmem, kc, 0);
1939     if (num_result_sets == 1)
1940         *rset = result_sets[0];
1941     else
1942         *rset = rsmulti_and_create(rset_nmem, kc, kc->scope,
1943                                    num_result_sets, result_sets);
1944     if (!*rset)
1945         return ZEBRA_FAIL;
1946     return ZEBRA_OK;
1947 }
1948
1949 static ZEBRA_RES rpn_search_APT_local(ZebraHandle zh,
1950                                       Z_AttributesPlusTerm *zapt,
1951                                       const char *termz,
1952                                       oid_value attributeSet,
1953                                       NMEM stream,
1954                                       const char *rank_type, NMEM rset_nmem,
1955                                       RSET *rset,
1956                                       struct rset_key_control *kc)
1957 {
1958     RSFD rsfd;
1959     struct it_key key;
1960     int sys;
1961     *rset = rstemp_create(rset_nmem, kc, kc->scope,
1962                           res_get (zh->res, "setTmpDir"),0 );
1963     rsfd = rset_open(*rset, RSETF_WRITE);
1964     
1965     sys = atoi(termz);
1966     if (sys <= 0)
1967         sys = 1;
1968     key.mem[0] = sys;
1969     key.mem[1] = 1;
1970     key.len = 2;
1971     rset_write (rsfd, &key);
1972     rset_close (rsfd);
1973     return ZEBRA_OK;
1974 }
1975
1976 static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1977                                oid_value attributeSet, NMEM stream,
1978                                Z_SortKeySpecList *sort_sequence,
1979                                const char *rank_type,
1980                                NMEM rset_nmem,
1981                                RSET *rset,
1982                                struct rset_key_control *kc)
1983 {
1984     int i;
1985     int sort_relation_value;
1986     AttrType sort_relation_type;
1987     Z_SortKeySpec *sks;
1988     Z_SortKey *sk;
1989     int oid[OID_SIZE];
1990     oident oe;
1991     char termz[20];
1992     
1993     attr_init(&sort_relation_type, zapt, 7);
1994     sort_relation_value = attr_find(&sort_relation_type, &attributeSet);
1995
1996     if (!sort_sequence->specs)
1997     {
1998         sort_sequence->num_specs = 10;
1999         sort_sequence->specs = (Z_SortKeySpec **)
2000             nmem_malloc(stream, sort_sequence->num_specs *
2001                          sizeof(*sort_sequence->specs));
2002         for (i = 0; i<sort_sequence->num_specs; i++)
2003             sort_sequence->specs[i] = 0;
2004     }
2005     if (zapt->term->which != Z_Term_general)
2006         i = 0;
2007     else
2008         i = atoi_n ((char *) zapt->term->u.general->buf,
2009                     zapt->term->u.general->len);
2010     if (i >= sort_sequence->num_specs)
2011         i = 0;
2012     sprintf(termz, "%d", i);
2013
2014     oe.proto = PROTO_Z3950;
2015     oe.oclass = CLASS_ATTSET;
2016     oe.value = attributeSet;
2017     if (!oid_ent_to_oid (&oe, oid))
2018         return ZEBRA_FAIL;
2019
2020     sks = (Z_SortKeySpec *) nmem_malloc(stream, sizeof(*sks));
2021     sks->sortElement = (Z_SortElement *)
2022         nmem_malloc(stream, sizeof(*sks->sortElement));
2023     sks->sortElement->which = Z_SortElement_generic;
2024     sk = sks->sortElement->u.generic = (Z_SortKey *)
2025         nmem_malloc(stream, sizeof(*sk));
2026     sk->which = Z_SortKey_sortAttributes;
2027     sk->u.sortAttributes = (Z_SortAttributes *)
2028         nmem_malloc(stream, sizeof(*sk->u.sortAttributes));
2029
2030     sk->u.sortAttributes->id = oid;
2031     sk->u.sortAttributes->list = zapt->attributes;
2032
2033     sks->sortRelation = (int *)
2034         nmem_malloc(stream, sizeof(*sks->sortRelation));
2035     if (sort_relation_value == 1)
2036         *sks->sortRelation = Z_SortKeySpec_ascending;
2037     else if (sort_relation_value == 2)
2038         *sks->sortRelation = Z_SortKeySpec_descending;
2039     else 
2040         *sks->sortRelation = Z_SortKeySpec_ascending;
2041
2042     sks->caseSensitivity = (int *)
2043         nmem_malloc(stream, sizeof(*sks->caseSensitivity));
2044     *sks->caseSensitivity = 0;
2045
2046     sks->which = Z_SortKeySpec_null;
2047     sks->u.null = odr_nullval ();
2048     sort_sequence->specs[i] = sks;
2049     *rset = rsnull_create (rset_nmem, kc, 0);
2050     return ZEBRA_OK;
2051 }
2052
2053
2054 static int parse_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2055                        oid_value attributeSet,
2056                        struct xpath_location_step *xpath, int max, NMEM mem)
2057 {
2058     oid_value curAttributeSet = attributeSet;
2059     AttrType use;
2060     const char *use_string = 0;
2061     
2062     attr_init(&use, zapt, 1);
2063     attr_find_ex(&use, &curAttributeSet, &use_string);
2064
2065     if (!use_string || *use_string != '/')
2066         return -1;
2067
2068     return zebra_parse_xpath_str(use_string, xpath, max, mem);
2069 }
2070  
2071                
2072
2073 static RSET xpath_trunc(ZebraHandle zh, NMEM stream,
2074                         int reg_type, const char *term, int use,
2075                         oid_value curAttributeSet, NMEM rset_nmem,
2076                         struct rset_key_control *kc)
2077 {
2078     RSET rset;
2079     struct grep_info grep_info;
2080     char term_dict[2048];
2081     char ord_buf[32];
2082     int prefix_len = 0;
2083     int ord = zebraExplain_lookup_attr_su(zh->reg->zei, curAttributeSet, use);
2084     int ord_len, i, r, max_pos;
2085     int term_type = Z_Term_characterString;
2086     const char *flags = "void";
2087
2088     if (grep_info_prepare(zh, 0 /* zapt */, &grep_info, '0') == ZEBRA_FAIL)
2089         return rsnull_create(rset_nmem, kc, 0);
2090     
2091     if (ord < 0)
2092         return rsnull_create(rset_nmem, kc, 0);
2093     if (prefix_len)
2094         term_dict[prefix_len++] = '|';
2095     else
2096         term_dict[prefix_len++] = '(';
2097     
2098     ord_len = key_SU_encode (ord, ord_buf);
2099     for (i = 0; i<ord_len; i++)
2100     {
2101         term_dict[prefix_len++] = 1;
2102         term_dict[prefix_len++] = ord_buf[i];
2103     }
2104     term_dict[prefix_len++] = ')';
2105     term_dict[prefix_len++] = 1;
2106     term_dict[prefix_len++] = reg_type;
2107     
2108     strcpy(term_dict+prefix_len, term);
2109     
2110     grep_info.isam_p_indx = 0;
2111     r = dict_lookup_grep(zh->reg->dict, term_dict, 0,
2112                           &grep_info, &max_pos, 0, grep_handle);
2113     yaz_log(YLOG_DEBUG, "%s %d positions", term,
2114              grep_info.isam_p_indx);
2115     rset = rset_trunc(zh, grep_info.isam_p_buf,
2116                       grep_info.isam_p_indx, term, strlen(term),
2117                       flags, 1, term_type,rset_nmem,
2118                       kc, kc->scope, 0, reg_type);
2119     grep_info_delete(&grep_info);
2120     return rset;
2121 }
2122
2123 static
2124 ZEBRA_RES rpn_search_xpath(ZebraHandle zh,
2125                            oid_value attributeSet,
2126                            int num_bases, char **basenames,
2127                            NMEM stream, const char *rank_type, RSET rset,
2128                            int xpath_len, struct xpath_location_step *xpath,
2129                            NMEM rset_nmem,
2130                            RSET *rset_out,
2131                            struct rset_key_control *kc)
2132 {
2133     oid_value curAttributeSet = attributeSet;
2134     int base_no;
2135     int i;
2136
2137     if (xpath_len < 0)
2138     {
2139         *rset_out = rset;
2140         return ZEBRA_OK;
2141     }
2142
2143     yaz_log(YLOG_DEBUG, "xpath len=%d", xpath_len);
2144     for (i = 0; i<xpath_len; i++)
2145     {
2146         yaz_log(log_level_rpn, "XPATH %d %s", i, xpath[i].part);
2147
2148     }
2149
2150     curAttributeSet = VAL_IDXPATH;
2151
2152     /*
2153       //a    ->    a/.*
2154       //a/b  ->    b/a/.*
2155       /a     ->    a/
2156       /a/b   ->    b/a/
2157
2158       /      ->    none
2159
2160    a[@attr = value]/b[@other = othervalue]
2161
2162  /e/@a val      range(e/,range(@a,freetext(w,1015,val),@a),e/)
2163  /a/b val       range(b/a/,freetext(w,1016,val),b/a/)
2164  /a/b/@c val    range(b/a/,range(@c,freetext(w,1016,val),@c),b/a/)
2165  /a/b[@c = y] val range(b/a/,freetext(w,1016,val),b/a/,@c = y)
2166  /a[@c = y]/b val range(a/,range(b/a/,freetext(w,1016,val),b/a/),a/,@c = y)
2167  /a[@c = x]/b[@c = y] range(a/,range(b/a/,freetext(w,1016,val),b/a/,@c = y),a/,@c = x)
2168       
2169     */
2170
2171     dict_grep_cmap (zh->reg->dict, 0, 0);
2172
2173     for (base_no = 0; base_no < num_bases; base_no++)
2174     {
2175         int level = xpath_len;
2176         int first_path = 1;
2177         
2178         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
2179         {
2180             zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
2181                            basenames[base_no]);
2182             *rset_out = rset;
2183             return ZEBRA_FAIL;
2184         }
2185         while (--level >= 0)
2186         {
2187             char xpath_rev[128];
2188             int i, len;
2189             RSET rset_start_tag = 0, rset_end_tag = 0, rset_attr = 0;
2190
2191             *xpath_rev = 0;
2192             len = 0;
2193             for (i = level; i >= 1; --i)
2194             {
2195                 const char *cp = xpath[i].part;
2196                 if (*cp)
2197                 {
2198                     for (;*cp; cp++)
2199                         if (*cp == '*')
2200                         {
2201                             memcpy (xpath_rev + len, "[^/]*", 5);
2202                             len += 5;
2203                         }
2204                         else if (*cp == ' ')
2205                         {
2206
2207                             xpath_rev[len++] = 1;
2208                             xpath_rev[len++] = ' ';
2209                         }
2210
2211                         else
2212                             xpath_rev[len++] = *cp;
2213                     xpath_rev[len++] = '/';
2214                 }
2215                 else if (i == 1)  /* // case */
2216                 {
2217                     xpath_rev[len++] = '.';
2218                     xpath_rev[len++] = '*';
2219                 }
2220             }
2221             xpath_rev[len] = 0;
2222
2223             if (xpath[level].predicate &&
2224                 xpath[level].predicate->which == XPATH_PREDICATE_RELATION &&
2225                 xpath[level].predicate->u.relation.name[0])
2226             {
2227                 WRBUF wbuf = wrbuf_alloc();
2228                 wrbuf_puts(wbuf, xpath[level].predicate->u.relation.name+1);
2229                 if (xpath[level].predicate->u.relation.value)
2230                 {
2231                     const char *cp = xpath[level].predicate->u.relation.value;
2232                     wrbuf_putc(wbuf, '=');
2233                     
2234                     while (*cp)
2235                     {
2236                         if (strchr(REGEX_CHARS, *cp))
2237                             wrbuf_putc(wbuf, '\\');
2238                         wrbuf_putc(wbuf, *cp);
2239                         cp++;
2240                     }
2241                 }
2242                 wrbuf_puts(wbuf, "");
2243                 rset_attr = xpath_trunc(
2244                     zh, stream, '0', wrbuf_buf(wbuf), 3, 
2245                     curAttributeSet, rset_nmem, kc);
2246                 wrbuf_free(wbuf, 1);
2247             } 
2248             else 
2249             {
2250                 if (!first_path)
2251                     continue;
2252             }
2253             yaz_log(log_level_rpn, "xpath_rev (%d) = %s", level, xpath_rev);
2254             if (strlen(xpath_rev))
2255             {
2256                 rset_start_tag = xpath_trunc(zh, stream, '0', 
2257                         xpath_rev, 1, curAttributeSet, rset_nmem, kc);
2258             
2259                 rset_end_tag = xpath_trunc(zh, stream, '0', 
2260                         xpath_rev, 2, curAttributeSet, rset_nmem, kc);
2261
2262                 rset = rsbetween_create(rset_nmem, kc, kc->scope,
2263                                         rset_start_tag, rset,
2264                                         rset_end_tag, rset_attr);
2265             }
2266             first_path = 0;
2267         }
2268     }
2269     *rset_out = rset;
2270     return ZEBRA_OK;
2271 }
2272
2273 static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2274                                 oid_value attributeSet, NMEM stream,
2275                                 Z_SortKeySpecList *sort_sequence,
2276                                 int num_bases, char **basenames, 
2277                                 NMEM rset_nmem,
2278                                 RSET *rset,
2279                                 struct rset_key_control *kc)
2280 {
2281     ZEBRA_RES res = ZEBRA_OK;
2282     unsigned reg_id;
2283     char *search_type = NULL;
2284     char rank_type[128];
2285     int complete_flag;
2286     int sort_flag;
2287     char termz[IT_MAX_WORD+1];
2288     int xpath_len;
2289     int xpath_use = 0;
2290     struct xpath_location_step xpath[10];
2291
2292     if (!log_level_set)
2293     {
2294         log_level_rpn = yaz_log_module_level("rpn");
2295         log_level_set = 1;
2296     }
2297     zebra_maps_attr(zh->reg->zebra_maps, zapt, &reg_id, &search_type,
2298                     rank_type, &complete_flag, &sort_flag);
2299     
2300     yaz_log(YLOG_DEBUG, "reg_id=%c", reg_id);
2301     yaz_log(YLOG_DEBUG, "complete_flag=%d", complete_flag);
2302     yaz_log(YLOG_DEBUG, "search_type=%s", search_type);
2303     yaz_log(YLOG_DEBUG, "rank_type=%s", rank_type);
2304
2305     if (zapt_term_to_utf8(zh, zapt, termz) == ZEBRA_FAIL)
2306         return ZEBRA_FAIL;
2307
2308     if (sort_flag)
2309         return rpn_sort_spec(zh, zapt, attributeSet, stream, sort_sequence,
2310                              rank_type, rset_nmem, rset, kc);
2311     /* consider if an X-Path query is used */
2312     xpath_len = parse_xpath(zh, zapt, attributeSet, xpath, 10, stream);
2313     if (xpath_len >= 0)
2314     {
2315         xpath_use = 1016;  /* searching for element by default */
2316         if (xpath[xpath_len-1].part[0] == '@') 
2317             xpath_use = 1015;  /* last step an attribute .. */
2318     }
2319
2320     /* search using one of the various search type strategies
2321        termz is our UTF-8 search term
2322        attributeSet is top-level default attribute set 
2323        stream is ODR for search
2324        reg_id is the register type
2325        complete_flag is 1 for complete subfield, 0 for incomplete
2326        xpath_use is use-attribute to be used for X-Path search, 0 for none
2327     */
2328     if (!strcmp(search_type, "phrase"))
2329     {
2330         res = rpn_search_APT_phrase(zh, zapt, termz, attributeSet, stream,
2331                                     reg_id, complete_flag, rank_type,
2332                                     xpath_use,
2333                                     num_bases, basenames, rset_nmem,
2334                                     rset, kc);
2335     }
2336     else if (!strcmp(search_type, "and-list"))
2337     {
2338         res = rpn_search_APT_and_list(zh, zapt, termz, attributeSet, stream,
2339                                       reg_id, complete_flag, rank_type,
2340                                       xpath_use,
2341                                       num_bases, basenames, rset_nmem,
2342                                       rset, kc);
2343     }
2344     else if (!strcmp(search_type, "or-list"))
2345     {
2346         res = rpn_search_APT_or_list(zh, zapt, termz, attributeSet, stream,
2347                                      reg_id, complete_flag, rank_type,
2348                                      xpath_use,
2349                                      num_bases, basenames, rset_nmem,
2350                                      rset, kc);
2351     }
2352     else if (!strcmp(search_type, "local"))
2353     {
2354         res = rpn_search_APT_local(zh, zapt, termz, attributeSet, stream,
2355                                    rank_type, rset_nmem, rset, kc);
2356     }
2357     else if (!strcmp(search_type, "numeric"))
2358     {
2359         res = rpn_search_APT_numeric(zh, zapt, termz, attributeSet, stream,
2360                                      reg_id, complete_flag, rank_type,
2361                                      xpath_use,
2362                                      num_bases, basenames, rset_nmem,
2363                                      rset, kc);
2364     }
2365     else
2366     {
2367         zebra_setError(zh, YAZ_BIB1_UNSUPP_STRUCTURE_ATTRIBUTE, 0);
2368         res = ZEBRA_FAIL;
2369     }
2370     if (res != ZEBRA_OK)
2371         return res;
2372     if (!*rset)
2373         return ZEBRA_FAIL;
2374     return rpn_search_xpath(zh, attributeSet, num_bases, basenames,
2375                             stream, rank_type, *rset, 
2376                             xpath_len, xpath, rset_nmem, rset, kc);
2377 }
2378
2379 static ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2380                                       oid_value attributeSet, 
2381                                       NMEM stream, NMEM rset_nmem,
2382                                       Z_SortKeySpecList *sort_sequence,
2383                                       int num_bases, char **basenames,
2384                                       RSET **result_sets, int *num_result_sets,
2385                                       Z_Operator *parent_op,
2386                                       struct rset_key_control *kc);
2387
2388 ZEBRA_RES rpn_search_top(ZebraHandle zh, Z_RPNStructure *zs,
2389                          oid_value attributeSet, 
2390                          NMEM stream, NMEM rset_nmem,
2391                          Z_SortKeySpecList *sort_sequence,
2392                          int num_bases, char **basenames,
2393                          RSET *result_set)
2394 {
2395     RSET *result_sets = 0;
2396     int num_result_sets = 0;
2397     ZEBRA_RES res;
2398     struct rset_key_control *kc = zebra_key_control_create(zh);
2399
2400     res = rpn_search_structure(zh, zs, attributeSet,
2401                                stream, rset_nmem,
2402                                sort_sequence, 
2403                                num_bases, basenames,
2404                                &result_sets, &num_result_sets,
2405                                0 /* no parent op */,
2406                                kc);
2407     if (res != ZEBRA_OK)
2408     {
2409         int i;
2410         for (i = 0; i<num_result_sets; i++)
2411             rset_delete(result_sets[i]);
2412         *result_set = 0;
2413     }
2414     else
2415     {
2416         assert(num_result_sets == 1);
2417         assert(result_sets);
2418         assert(*result_sets);
2419         *result_set = *result_sets;
2420     }
2421     (*kc->dec)(kc);
2422     return res;
2423 }
2424
2425 ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2426                                oid_value attributeSet, 
2427                                NMEM stream, NMEM rset_nmem,
2428                                Z_SortKeySpecList *sort_sequence,
2429                                int num_bases, char **basenames,
2430                                RSET **result_sets, int *num_result_sets,
2431                                Z_Operator *parent_op,
2432                                struct rset_key_control *kc)
2433 {
2434     *num_result_sets = 0;
2435     if (zs->which == Z_RPNStructure_complex)
2436     {
2437         ZEBRA_RES res;
2438         Z_Operator *zop = zs->u.complex->roperator;
2439         RSET *result_sets_l = 0;
2440         int num_result_sets_l = 0;
2441         RSET *result_sets_r = 0;
2442         int num_result_sets_r = 0;
2443
2444         res = rpn_search_structure(zh, zs->u.complex->s1,
2445                                    attributeSet, stream, rset_nmem,
2446                                    sort_sequence,
2447                                    num_bases, basenames,
2448                                    &result_sets_l, &num_result_sets_l,
2449                                    zop, kc);
2450         if (res != ZEBRA_OK)
2451         {
2452             int i;
2453             for (i = 0; i<num_result_sets_l; i++)
2454                 rset_delete(result_sets_l[i]);
2455             return res;
2456         }
2457         res = rpn_search_structure(zh, zs->u.complex->s2,
2458                                    attributeSet, stream, rset_nmem,
2459                                    sort_sequence,
2460                                    num_bases, basenames,
2461                                    &result_sets_r, &num_result_sets_r,
2462                                    zop, kc);
2463         if (res != ZEBRA_OK)
2464         {
2465             int i;
2466             for (i = 0; i<num_result_sets_l; i++)
2467                 rset_delete(result_sets_l[i]);
2468             for (i = 0; i<num_result_sets_r; i++)
2469                 rset_delete(result_sets_r[i]);
2470             return res;
2471         }
2472
2473         /* make a new list of result for all children */
2474         *num_result_sets = num_result_sets_l + num_result_sets_r;
2475         *result_sets = nmem_malloc(stream, *num_result_sets * 
2476                                    sizeof(**result_sets));
2477         memcpy(*result_sets, result_sets_l, 
2478                num_result_sets_l * sizeof(**result_sets));
2479         memcpy(*result_sets + num_result_sets_l, result_sets_r, 
2480                num_result_sets_r * sizeof(**result_sets));
2481
2482         if (!parent_op || parent_op->which != zop->which
2483             || (zop->which != Z_Operator_and &&
2484                 zop->which != Z_Operator_or))
2485         {
2486             /* parent node different from this one (or non-present) */
2487             /* we must combine result sets now */
2488             RSET rset;
2489             switch (zop->which)
2490             {
2491             case Z_Operator_and:
2492                 rset = rsmulti_and_create(rset_nmem, kc,
2493                                           kc->scope,
2494                                           *num_result_sets, *result_sets);
2495                 break;
2496             case Z_Operator_or:
2497                 rset = rsmulti_or_create(rset_nmem, kc,
2498                                          kc->scope, 0, /* termid */
2499                                          *num_result_sets, *result_sets);
2500                 break;
2501             case Z_Operator_and_not:
2502                 rset = rsbool_create_not(rset_nmem, kc,
2503                                          kc->scope,
2504                                          (*result_sets)[0],
2505                                          (*result_sets)[1]);
2506                 break;
2507             case Z_Operator_prox:
2508                 if (zop->u.prox->which != Z_ProximityOperator_known)
2509                 {
2510                     zebra_setError(zh, 
2511                                    YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2512                                    0);
2513                     return ZEBRA_FAIL;
2514                 }
2515                 if (*zop->u.prox->u.known != Z_ProxUnit_word)
2516                 {
2517                     zebra_setError_zint(zh,
2518                                         YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2519                                         *zop->u.prox->u.known);
2520                     return ZEBRA_FAIL;
2521                 }
2522                 else
2523                 {
2524                     rset = rsprox_create(rset_nmem, kc,
2525                                          kc->scope,
2526                                          *num_result_sets, *result_sets, 
2527                                          *zop->u.prox->ordered,
2528                                          (!zop->u.prox->exclusion ? 
2529                                           0 : *zop->u.prox->exclusion),
2530                                          *zop->u.prox->relationType,
2531                                          *zop->u.prox->distance );
2532                 }
2533                 break;
2534             default:
2535                 zebra_setError(zh, YAZ_BIB1_OPERATOR_UNSUPP, 0);
2536                 return ZEBRA_FAIL;
2537             }
2538             *num_result_sets = 1;
2539             *result_sets = nmem_malloc(stream, *num_result_sets * 
2540                                        sizeof(**result_sets));
2541             (*result_sets)[0] = rset;
2542         }
2543     }
2544     else if (zs->which == Z_RPNStructure_simple)
2545     {
2546         RSET rset;
2547         ZEBRA_RES res;
2548
2549         if (zs->u.simple->which == Z_Operand_APT)
2550         {
2551             yaz_log(YLOG_DEBUG, "rpn_search_APT");
2552             res = rpn_search_APT(zh, zs->u.simple->u.attributesPlusTerm,
2553                                  attributeSet, stream, sort_sequence,
2554                                  num_bases, basenames, rset_nmem, &rset,
2555                                  kc);
2556             if (res != ZEBRA_OK)
2557                 return res;
2558         }
2559         else if (zs->u.simple->which == Z_Operand_resultSetId)
2560         {
2561             yaz_log(YLOG_DEBUG, "rpn_search_ref");
2562             rset = resultSetRef(zh, zs->u.simple->u.resultSetId);
2563             if (!rset)
2564             {
2565                 zebra_setError(zh, 
2566                                YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
2567                                zs->u.simple->u.resultSetId);
2568                 return ZEBRA_FAIL;
2569             }
2570             rset_dup(rset);
2571         }
2572         else
2573         {
2574             zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2575             return ZEBRA_FAIL;
2576         }
2577         *num_result_sets = 1;
2578         *result_sets = nmem_malloc(stream, *num_result_sets * 
2579                                    sizeof(**result_sets));
2580         (*result_sets)[0] = rset;
2581     }
2582     else
2583     {
2584         zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2585         return ZEBRA_FAIL;
2586     }
2587     return ZEBRA_OK;
2588 }
2589
2590 struct scan_info_entry {
2591     char *term;
2592     ISAM_P isam_p;
2593 };
2594
2595 struct scan_info {
2596     struct scan_info_entry *list;
2597     ODR odr;
2598     int before, after;
2599     char prefix[20];
2600 };
2601
2602 static int scan_handle (char *name, const char *info, int pos, void *client)
2603 {
2604     int len_prefix, idx;
2605     struct scan_info *scan_info = (struct scan_info *) client;
2606
2607     len_prefix = strlen(scan_info->prefix);
2608     if (memcmp (name, scan_info->prefix, len_prefix))
2609         return 1;
2610     if (pos > 0)
2611         idx = scan_info->after - pos + scan_info->before;
2612     else
2613         idx = - pos - 1;
2614
2615     if (idx < 0)
2616         return 0;
2617     scan_info->list[idx].term = (char *)
2618         odr_malloc(scan_info->odr, strlen(name + len_prefix)+1);
2619     strcpy(scan_info->list[idx].term, name + len_prefix);
2620     assert (*info == sizeof(ISAM_P));
2621     memcpy (&scan_info->list[idx].isam_p, info+1, sizeof(ISAM_P));
2622     return 0;
2623 }
2624
2625 void zebra_term_untrans_iconv(ZebraHandle zh, NMEM stream, int reg_type,
2626                               char **dst, const char *src)
2627 {
2628     char term_src[IT_MAX_WORD];
2629     char term_dst[IT_MAX_WORD];
2630     
2631     zebra_term_untrans (zh, reg_type, term_src, src);
2632
2633     if (zh->iconv_from_utf8 != 0)
2634     {
2635         int len;
2636         char *inbuf = term_src;
2637         size_t inleft = strlen(term_src);
2638         char *outbuf = term_dst;
2639         size_t outleft = sizeof(term_dst)-1;
2640         size_t ret;
2641         
2642         ret = yaz_iconv (zh->iconv_from_utf8, &inbuf, &inleft,
2643                          &outbuf, &outleft);
2644         if (ret == (size_t)(-1))
2645             len = 0;
2646         else
2647             len = outbuf - term_dst;
2648         *dst = nmem_malloc(stream, len + 1);
2649         if (len > 0)
2650             memcpy (*dst, term_dst, len);
2651         (*dst)[len] = '\0';
2652     }
2653     else
2654         *dst = nmem_strdup(stream, term_src);
2655 }
2656
2657 static void count_set(ZebraHandle zh, RSET rset, zint *count)
2658 {
2659     zint psysno = 0;
2660     struct it_key key;
2661     RSFD rfd;
2662
2663     yaz_log(YLOG_DEBUG, "count_set");
2664
2665     rset->hits_limit = zh->approx_limit;
2666
2667     *count = 0;
2668     rfd = rset_open(rset, RSETF_READ);
2669     while (rset_read(rfd, &key,0 /* never mind terms */))
2670     {
2671         if (key.mem[0] != psysno)
2672         {
2673             psysno = key.mem[0];
2674             if (rfd->counted_items >= rset->hits_limit)
2675                 break;
2676         }
2677     }
2678     rset_close (rfd);
2679     *count = rset->hits_count;
2680 }
2681
2682 ZEBRA_RES rpn_scan(ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt,
2683                    oid_value attributeset,
2684                    int num_bases, char **basenames,
2685                    int *position, int *num_entries, ZebraScanEntry **list,
2686                    int *is_partial, RSET limit_set, int return_zero)
2687 {
2688     int i;
2689     int pos = *position;
2690     int num = *num_entries;
2691     int before;
2692     int after;
2693     int base_no;
2694     char termz[IT_MAX_WORD+20];
2695     AttrType use;
2696     int use_value;
2697     const char *use_string = 0;
2698     struct scan_info *scan_info_array;
2699     ZebraScanEntry *glist;
2700     int ords[32], ord_no = 0;
2701     int ptr[32];
2702
2703     int bases_ok = 0;     /* no of databases with OK attribute */
2704     int errCode = 0;      /* err code (if any is not OK) */
2705     char *errString = 0;  /* addinfo */
2706
2707     unsigned reg_id;
2708     char *search_type = NULL;
2709     char rank_type[128];
2710     int complete_flag;
2711     int sort_flag;
2712     NMEM rset_nmem = NULL; 
2713     struct rset_key_control *kc = 0;
2714
2715     *list = 0;
2716     *is_partial = 0;
2717
2718     if (attributeset == VAL_NONE)
2719         attributeset = VAL_BIB1;
2720
2721     if (!limit_set)
2722     {
2723         AttrType termset;
2724         int termset_value_numeric;
2725         const char *termset_value_string;
2726         attr_init(&termset, zapt, 8);
2727         termset_value_numeric =
2728             attr_find_ex(&termset, NULL, &termset_value_string);
2729         if (termset_value_numeric != -1)
2730         {
2731             char resname[32];
2732             const char *termset_name = 0;
2733             
2734             if (termset_value_numeric != -2)
2735             {
2736                 
2737                 sprintf(resname, "%d", termset_value_numeric);
2738                 termset_name = resname;
2739             }
2740             else
2741                 termset_name = termset_value_string;
2742             
2743             limit_set = resultSetRef (zh, termset_name);
2744         }
2745     }
2746         
2747     yaz_log(YLOG_DEBUG, "position = %d, num = %d set=%d",
2748             pos, num, attributeset);
2749         
2750     attr_init(&use, zapt, 1);
2751     use_value = attr_find_ex(&use, &attributeset, &use_string);
2752
2753     if (zebra_maps_attr(zh->reg->zebra_maps, zapt, &reg_id, &search_type,
2754                         rank_type, &complete_flag, &sort_flag))
2755     {
2756         *num_entries = 0;
2757         zebra_setError(zh, YAZ_BIB1_UNSUPP_ATTRIBUTE_TYPE, 0);
2758         return ZEBRA_FAIL;
2759     }
2760     yaz_log(YLOG_DEBUG, "use_value = %d", use_value);
2761
2762     if (use_value == -1)
2763         use_value = 1016;
2764     for (base_no = 0; base_no < num_bases && ord_no < 32; base_no++)
2765     {
2766         data1_local_attribute *local_attr;
2767         attent attp;
2768         int ord;
2769
2770         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
2771         {
2772             zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
2773                            basenames[base_no]);
2774             *num_entries = 0;
2775             return ZEBRA_FAIL;
2776         }
2777
2778         if (use_string &&
2779             (ord = zebraExplain_lookup_attr_str(zh->reg->zei,
2780                                                 use_string)) >= 0)
2781         {
2782             /* we have a match for a raw string attribute */
2783             if (ord > 0)
2784                 ords[ord_no++] = ord;
2785             attp.local_attributes = 0;  /* no more attributes */
2786         }
2787         else
2788         {
2789             int r;
2790             
2791             if ((r = att_getentbyatt (zh, &attp, attributeset, use_value,
2792                                       use_string)))
2793             {
2794                 yaz_log(YLOG_DEBUG, "att_getentbyatt fail. set=%d use=%d",
2795                         attributeset, use_value);
2796                 if (r == -1)
2797                 {
2798                     errCode = YAZ_BIB1_UNSUPP_USE_ATTRIBUTE;
2799                     if (use_string)
2800                         zebra_setError(zh, YAZ_BIB1_UNSUPP_USE_ATTRIBUTE,
2801                                        use_string);
2802                     else
2803                         zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_USE_ATTRIBUTE,
2804                                             use_value);
2805                 }   
2806                 else
2807                 {
2808                     zebra_setError(zh, YAZ_BIB1_UNSUPP_ATTRIBUTE_SET, 0);
2809                 }
2810                 continue;
2811             }
2812         }
2813         bases_ok++;
2814         for (local_attr = attp.local_attributes; local_attr && ord_no < 32;
2815              local_attr = local_attr->next)
2816         {
2817             ord = zebraExplain_lookup_attr_su(zh->reg->zei,
2818                                               attp.attset_ordinal,
2819                                               local_attr->local);
2820             if (ord > 0)
2821                 ords[ord_no++] = ord;
2822         }
2823     }
2824     if (!bases_ok && errCode)
2825     {
2826         zebra_setError(zh, errCode, errString);
2827         *num_entries = 0;
2828         return ZEBRA_FAIL;
2829     }
2830     if (ord_no == 0)
2831     {
2832         *num_entries = 0;
2833         return ZEBRA_OK;
2834     }
2835     /* prepare dictionary scanning */
2836     if (num < 1)
2837     {
2838         *num_entries = 0;
2839         return ZEBRA_OK;
2840     }
2841     before = pos-1;
2842     if (before < 0)
2843         before = 0;
2844     after = 1+num-pos;
2845     if (after < 0)
2846         after = 0;
2847     yaz_log(YLOG_DEBUG, "rpn_scan pos=%d num=%d before=%d "
2848             "after=%d before+after=%d",
2849             pos, num, before, after, before+after);
2850     scan_info_array = (struct scan_info *)
2851         odr_malloc(stream, ord_no * sizeof(*scan_info_array));
2852     for (i = 0; i < ord_no; i++)
2853     {
2854         int j, prefix_len = 0;
2855         int before_tmp = before, after_tmp = after;
2856         struct scan_info *scan_info = scan_info_array + i;
2857         struct rpn_char_map_info rcmi;
2858
2859         rpn_char_map_prepare (zh->reg, reg_id, &rcmi);
2860
2861         scan_info->before = before;
2862         scan_info->after = after;
2863         scan_info->odr = stream;
2864
2865         scan_info->list = (struct scan_info_entry *)
2866             odr_malloc(stream, (before+after) * sizeof(*scan_info->list));
2867         for (j = 0; j<before+after; j++)
2868             scan_info->list[j].term = NULL;
2869
2870         prefix_len += key_SU_encode (ords[i], termz + prefix_len);
2871         termz[prefix_len++] = reg_id;
2872         termz[prefix_len] = 0;
2873         strcpy(scan_info->prefix, termz);
2874
2875         if (trans_scan_term(zh, zapt, termz+prefix_len, reg_id) == ZEBRA_FAIL)
2876             return ZEBRA_FAIL;
2877         
2878         dict_scan(zh->reg->dict, termz, &before_tmp, &after_tmp,
2879                   scan_info, scan_handle);
2880     }
2881     glist = (ZebraScanEntry *)
2882         odr_malloc(stream, (before+after)*sizeof(*glist));
2883
2884     rset_nmem = nmem_create();
2885     kc = zebra_key_control_create(zh);
2886
2887     /* consider terms after main term */
2888     for (i = 0; i < ord_no; i++)
2889         ptr[i] = before;
2890     
2891     *is_partial = 0;
2892     for (i = 0; i<after; i++)
2893     {
2894         int j, j0 = -1;
2895         const char *mterm = NULL;
2896         const char *tst;
2897         RSET rset = 0;
2898         int lo = i + pos-1; /* offset in result list */
2899
2900         /* find: j0 is the first of the minimal values */
2901         for (j = 0; j < ord_no; j++)
2902         {
2903             if (ptr[j] < before+after && ptr[j] >= 0 &&
2904                 (tst = scan_info_array[j].list[ptr[j]].term) &&
2905                 (!mterm || strcmp (tst, mterm) < 0))
2906             {
2907                 j0 = j;
2908                 mterm = tst;
2909             }
2910         }
2911         if (j0 == -1)
2912             break;  /* no value found, stop */
2913
2914         /* get result set for first one , but only if it's within bounds */
2915         if (lo >= 0)
2916         {
2917             /* get result set for first term */
2918             zebra_term_untrans_iconv(zh, stream->mem, reg_id,
2919                                      &glist[lo].term, mterm);
2920             rset = rset_trunc(zh, &scan_info_array[j0].list[ptr[j0]].isam_p, 1,
2921                               glist[lo].term, strlen(glist[lo].term),
2922                               NULL, 0, zapt->term->which, rset_nmem, 
2923                               kc, kc->scope, 0, reg_id);
2924         }
2925         ptr[j0]++; /* move index for this set .. */
2926         /* get result set for remaining scan terms */
2927         for (j = j0+1; j<ord_no; j++)
2928         {
2929             if (ptr[j] < before+after && ptr[j] >= 0 &&
2930                 (tst = scan_info_array[j].list[ptr[j]].term) &&
2931                 !strcmp (tst, mterm))
2932             {
2933                 if (lo >= 0)
2934                 {
2935                     RSET rsets[2];
2936                     
2937                     rsets[0] = rset;
2938                     rsets[1] =
2939                         rset_trunc(
2940                             zh, &scan_info_array[j].list[ptr[j]].isam_p, 1,
2941                             glist[lo].term,
2942                             strlen(glist[lo].term), NULL, 0,
2943                             zapt->term->which,rset_nmem,
2944                             kc, kc->scope, 0, reg_id);
2945                     rset = rsmulti_or_create(rset_nmem, kc,
2946                                              kc->scope, 0 /* termid */,
2947                                              2, rsets);
2948                 }
2949                 ptr[j]++;
2950             }
2951         }
2952         if (lo >= 0)
2953         {
2954             zint count;
2955             /* merge with limit_set if given */
2956             if (limit_set)
2957             {
2958                 RSET rsets[2];
2959                 rsets[0] = rset;
2960                 rsets[1] = rset_dup(limit_set);
2961                 
2962                 rset = rsmulti_and_create(rset_nmem, kc,
2963                                           kc->scope,
2964                                           2, rsets);
2965             }
2966             /* count it */
2967             count_set(zh, rset, &count);
2968             glist[lo].occurrences = count;
2969             rset_delete(rset);
2970         }
2971     }
2972     if (i < after)
2973     {
2974         *num_entries -= (after-i);
2975         *is_partial = 1;
2976         if (*num_entries < 0)
2977         {
2978             (*kc->dec)(kc);
2979             nmem_destroy(rset_nmem);
2980             *num_entries = 0;
2981             return ZEBRA_OK;
2982         }
2983     }
2984     /* consider terms before main term */
2985     for (i = 0; i<ord_no; i++)
2986         ptr[i] = 0;
2987     
2988     for (i = 0; i<before; i++)
2989     {
2990         int j, j0 = -1;
2991         const char *mterm = NULL;
2992         const char *tst;
2993         RSET rset;
2994         int lo = before-1-i; /* offset in result list */
2995         zint count;
2996         
2997         for (j = 0; j <ord_no; j++)
2998         {
2999             if (ptr[j] < before && ptr[j] >= 0 &&
3000                 (tst = scan_info_array[j].list[before-1-ptr[j]].term) &&
3001                 (!mterm || strcmp (tst, mterm) > 0))
3002             {
3003                 j0 = j;
3004                     mterm = tst;
3005             }
3006         }
3007         if (j0 == -1)
3008             break;
3009         
3010         zebra_term_untrans_iconv(zh, stream->mem, reg_id,
3011                                  &glist[lo].term, mterm);
3012         
3013         rset = rset_trunc
3014             (zh, &scan_info_array[j0].list[before-1-ptr[j0]].isam_p, 1,
3015              glist[lo].term, strlen(glist[lo].term),
3016              NULL, 0, zapt->term->which, rset_nmem,
3017              kc, kc->scope, 0, reg_id);
3018         
3019         ptr[j0]++;
3020         
3021         for (j = j0+1; j<ord_no; j++)
3022         {
3023             if (ptr[j] < before && ptr[j] >= 0 &&
3024                 (tst = scan_info_array[j].list[before-1-ptr[j]].term) &&
3025                 !strcmp (tst, mterm))
3026             {
3027                 RSET rsets[2];
3028                 
3029                 rsets[0] = rset;
3030                 rsets[1] = rset_trunc(
3031                     zh,
3032                     &scan_info_array[j].list[before-1-ptr[j]].isam_p, 1,
3033                     glist[lo].term,
3034                     strlen(glist[lo].term), NULL, 0,
3035                     zapt->term->which, rset_nmem,
3036                     kc, kc->scope, 0, reg_id);
3037                 rset = rsmulti_or_create(rset_nmem, kc,
3038                                          kc->scope, 0 /* termid */, 2, rsets);
3039                 
3040                 ptr[j]++;
3041             }
3042         }
3043         if (limit_set)
3044         {
3045             RSET rsets[2];
3046             rsets[0] = rset;
3047             rsets[1] = rset_dup(limit_set);
3048             
3049             rset = rsmulti_and_create(rset_nmem, kc,
3050                                       kc->scope, 2, rsets);
3051         }
3052         count_set(zh, rset, &count);
3053         glist[lo].occurrences = count;
3054         rset_delete (rset);
3055     }
3056     (*kc->dec)(kc);
3057     nmem_destroy(rset_nmem);
3058     i = before-i;
3059     if (i)
3060     {
3061         *is_partial = 1;
3062         *position -= i;
3063         *num_entries -= i;
3064         if (*num_entries <= 0)
3065         {
3066             *num_entries = 0;
3067             return ZEBRA_OK;
3068         }
3069     }
3070     
3071     *list = glist + i;               /* list is set to first 'real' entry */
3072     
3073     yaz_log(YLOG_DEBUG, "position = %d, num_entries = %d",
3074             *position, *num_entries);
3075     return ZEBRA_OK;
3076 }
3077