Added support for temr hit counts. This was not in place for earlier
[idzebra-moved-to-github.git] / index / zrpn.c
1 /* $Id: zrpn.c,v 1.192 2005-05-24 11:35:42 adam Exp $
2    Copyright (C) 1995-2005
3    Index Data ApS
4
5 This file is part of the Zebra server.
6
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
11
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with Zebra; see the file LICENSE.zebra.  If not, write to the
19 Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
20 02111-1307, USA.
21 */
22
23 #include <stdio.h>
24 #include <assert.h>
25 #ifdef WIN32
26 #include <io.h>
27 #else
28 #include <unistd.h>
29 #endif
30 #include <ctype.h>
31
32 #include <yaz/diagbib1.h>
33 #include "index.h"
34 #include <zebra_xpath.h>
35
36 #include <charmap.h>
37 #include <rset.h>
38
39 struct rpn_char_map_info
40 {
41     ZebraMaps zm;
42     int reg_type;
43 };
44
45 typedef struct
46 {
47     int type;
48     int major;
49     int minor;
50     Z_AttributesPlusTerm *zapt;
51 } AttrType;
52
53
54 static int log_level_set = 0;
55 static int log_level_rpn = 0;
56
57 static const char **rpn_char_map_handler(void *vp, const char **from, int len)
58 {
59     struct rpn_char_map_info *p = (struct rpn_char_map_info *) vp;
60     const char **out = zebra_maps_input(p->zm, p->reg_type, from, len, 0);
61 #if 0
62     if (out && *out)
63     {
64         const char *outp = *out;
65         yaz_log(YLOG_LOG, "---");
66         while (*outp)
67         {
68             yaz_log(YLOG_LOG, "%02X", *outp);
69             outp++;
70         }
71     }
72 #endif
73     return out;
74 }
75
76 static void rpn_char_map_prepare(struct zebra_register *reg, int reg_type,
77                                   struct rpn_char_map_info *map_info)
78 {
79     map_info->zm = reg->zebra_maps;
80     map_info->reg_type = reg_type;
81     dict_grep_cmap(reg->dict, map_info, rpn_char_map_handler);
82 }
83
84 static int attr_find_ex(AttrType *src, oid_value *attributeSetP,
85                          const char **string_value)
86 {
87     int num_attributes;
88
89     num_attributes = src->zapt->attributes->num_attributes;
90     while (src->major < num_attributes)
91     {
92         Z_AttributeElement *element;
93
94         element = src->zapt->attributes->attributes[src->major];
95         if (src->type == *element->attributeType)
96         {
97             switch (element->which) 
98             {
99             case Z_AttributeValue_numeric:
100                 ++(src->major);
101                 if (element->attributeSet && attributeSetP)
102                 {
103                     oident *attrset;
104
105                     attrset = oid_getentbyoid(element->attributeSet);
106                     *attributeSetP = attrset->value;
107                 }
108                 return *element->value.numeric;
109                 break;
110             case Z_AttributeValue_complex:
111                 if (src->minor >= element->value.complex->num_list)
112                     break;
113                 if (element->attributeSet && attributeSetP)
114                 {
115                     oident *attrset;
116                     
117                     attrset = oid_getentbyoid(element->attributeSet);
118                     *attributeSetP = attrset->value;
119                 }
120                 if (element->value.complex->list[src->minor]->which ==  
121                     Z_StringOrNumeric_numeric)
122                 {
123                     ++(src->minor);
124                     return
125                         *element->value.complex->list[src->minor-1]->u.numeric;
126                 }
127                 else if (element->value.complex->list[src->minor]->which ==  
128                          Z_StringOrNumeric_string)
129                 {
130                     if (!string_value)
131                         break;
132                     ++(src->minor);
133                     *string_value = 
134                         element->value.complex->list[src->minor-1]->u.string;
135                     return -2;
136                 }
137                 else
138                     break;
139             default:
140                 assert(0);
141             }
142         }
143         ++(src->major);
144     }
145     return -1;
146 }
147
148 static int attr_find(AttrType *src, oid_value *attributeSetP)
149 {
150     return attr_find_ex(src, attributeSetP, 0);
151 }
152
153 static void attr_init(AttrType *src, Z_AttributesPlusTerm *zapt,
154                        int type)
155 {
156     src->zapt = zapt;
157     src->type = type;
158     src->major = 0;
159     src->minor = 0;
160 }
161
162 #define TERM_COUNT        
163        
164 struct grep_info {        
165 #ifdef TERM_COUNT        
166     int *term_no;        
167 #endif        
168     ISAM_P *isam_p_buf;
169     int isam_p_size;        
170     int isam_p_indx;
171     ZebraHandle zh;
172     int reg_type;
173     ZebraSet termset;
174 };        
175
176 static void term_untrans(ZebraHandle zh, int reg_type,
177                            char *dst, const char *src)
178 {
179     int len = 0;
180     while (*src)
181     {
182         const char *cp = zebra_maps_output(zh->reg->zebra_maps,
183                                            reg_type, &src);
184         if (!cp && len < IT_MAX_WORD-1)
185             dst[len++] = *src++;
186         else
187             while (*cp && len < IT_MAX_WORD-1)
188                 dst[len++] = *cp++;
189     }
190     dst[len] = '\0';
191 }
192
193 static void add_isam_p(const char *name, const char *info,
194                        struct grep_info *p)
195 {
196     if (!log_level_set)
197     {
198         log_level_rpn = yaz_log_module_level("rpn");
199         log_level_set = 1;
200     }
201     if (p->isam_p_indx == p->isam_p_size)
202     {
203         ISAM_P *new_isam_p_buf;
204 #ifdef TERM_COUNT        
205         int *new_term_no;        
206 #endif
207         p->isam_p_size = 2*p->isam_p_size + 100;
208         new_isam_p_buf = (ISAM_P *) xmalloc(sizeof(*new_isam_p_buf) *
209                                             p->isam_p_size);
210         if (p->isam_p_buf)
211         {
212             memcpy(new_isam_p_buf, p->isam_p_buf,
213                     p->isam_p_indx * sizeof(*p->isam_p_buf));
214             xfree(p->isam_p_buf);
215         }
216         p->isam_p_buf = new_isam_p_buf;
217
218 #ifdef TERM_COUNT
219         new_term_no = (int *) xmalloc(sizeof(*new_term_no) * p->isam_p_size);
220         if (p->term_no)
221         {
222             memcpy(new_term_no, p->isam_p_buf,
223                     p->isam_p_indx * sizeof(*p->term_no));
224             xfree(p->term_no);
225         }
226         p->term_no = new_term_no;
227 #endif
228     }
229     assert(*info == sizeof(*p->isam_p_buf));
230     memcpy(p->isam_p_buf + p->isam_p_indx, info+1, sizeof(*p->isam_p_buf));
231
232 #if 1
233     if (p->termset)
234     {
235         const char *db;
236         int set, use;
237         char term_tmp[IT_MAX_WORD];
238         int su_code = 0;
239         int len = key_SU_decode (&su_code, name);
240         
241         term_untrans  (p->zh, p->reg_type, term_tmp, name+len+1);
242         yaz_log(log_level_rpn, "grep: %d %c %s", su_code, name[len], term_tmp);
243         zebraExplain_lookup_ord (p->zh->reg->zei,
244                                  su_code, &db, &set, &use);
245         yaz_log(log_level_rpn, "grep:  set=%d use=%d db=%s", set, use, db);
246         
247         resultSetAddTerm(p->zh, p->termset, name[len], db,
248                          set, use, term_tmp);
249     }
250 #endif
251     (p->isam_p_indx)++;
252 }
253
254 static int grep_handle(char *name, const char *info, void *p)
255 {
256     add_isam_p(name, info, (struct grep_info *) p);
257     return 0;
258 }
259
260 static int term_pre(ZebraMaps zebra_maps, int reg_type, const char **src,
261                     const char *ct1, const char *ct2, int first)
262 {
263     const char *s1, *s0 = *src;
264     const char **map;
265
266     /* skip white space */
267     while (*s0)
268     {
269         if (ct1 && strchr(ct1, *s0))
270             break;
271         if (ct2 && strchr(ct2, *s0))
272             break;
273         s1 = s0;
274         map = zebra_maps_input(zebra_maps, reg_type, &s1, strlen(s1), first);
275         if (**map != *CHR_SPACE)
276             break;
277         s0 = s1;
278     }
279     *src = s0;
280     return *s0;
281 }
282
283
284 static void esc_str(char *out_buf, int out_size,
285                     const char *in_buf, int in_size)
286 {
287     int k;
288
289     assert(out_buf);
290     assert(in_buf);
291     assert(out_size > 20);
292     *out_buf = '\0';
293     for (k = 0; k<in_size; k++)
294     {
295         int c = in_buf[k] & 0xff;
296         int pc;
297         if (c < 32 || c > 126)
298             pc = '?';
299         else
300             pc = c;
301         sprintf(out_buf +strlen(out_buf), "%02X:%c  ", c, pc);
302         if (strlen(out_buf) > out_size-20)
303         {
304             strcat(out_buf, "..");
305             break;
306         }
307     }
308 }
309
310 #define REGEX_CHARS " []()|.*+?!"
311
312 /* term_100: handle term, where trunc = none(no operators at all) */
313 static int term_100(ZebraMaps zebra_maps, int reg_type,
314                     const char **src, char *dst, int space_split,
315                     char *dst_term)
316 {
317     const char *s0;
318     const char **map;
319     int i = 0;
320     int j = 0;
321
322     const char *space_start = 0;
323     const char *space_end = 0;
324
325     if (!term_pre(zebra_maps, reg_type, src, NULL, NULL, !space_split))
326         return 0;
327     s0 = *src;
328     while (*s0)
329     {
330         const char *s1 = s0;
331         int q_map_match = 0;
332         map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), 
333                                 &q_map_match);
334         if (space_split)
335         {
336             if (**map == *CHR_SPACE)
337                 break;
338         }
339         else  /* complete subfield only. */
340         {
341             if (**map == *CHR_SPACE)
342             {   /* save space mapping for later  .. */
343                 space_start = s1;
344                 space_end = s0;
345                 continue;
346             }
347             else if (space_start)
348             {   /* reload last space */
349                 while (space_start < space_end)
350                 {
351                     if (strchr(REGEX_CHARS, *space_start))
352                         dst[i++] = '\\';
353                     dst_term[j++] = *space_start;
354                     dst[i++] = *space_start++;
355                 }
356                 /* and reset */
357                 space_start = space_end = 0;
358             }
359         }
360         /* add non-space char */
361         memcpy(dst_term+j, s1, s0 - s1);
362         j += (s0 - s1);
363         if (!q_map_match)
364         {
365             while (s1 < s0)
366             {
367                 if (strchr(REGEX_CHARS, *s1))
368                     dst[i++] = '\\';
369                 dst[i++] = *s1++;
370             }
371         }
372         else
373         {
374             char tmpbuf[80];
375             esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
376             
377             strcpy(dst + i, map[0]);
378             i += strlen(map[0]);
379         }
380     }
381     dst[i] = '\0';
382     dst_term[j] = '\0';
383     *src = s0;
384     return i;
385 }
386
387 /* term_101: handle term, where trunc = Process # */
388 static int term_101(ZebraMaps zebra_maps, int reg_type,
389                     const char **src, char *dst, int space_split,
390                     char *dst_term)
391 {
392     const char *s0;
393     const char **map;
394     int i = 0;
395     int j = 0;
396
397     if (!term_pre(zebra_maps, reg_type, src, "#", "#", !space_split))
398         return 0;
399     s0 = *src;
400     while (*s0)
401     {
402         if (*s0 == '#')
403         {
404             dst[i++] = '.';
405             dst[i++] = '*';
406             dst_term[j++] = *s0++;
407         }
408         else
409         {
410             const char *s1 = s0;
411             int q_map_match = 0;
412             map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), 
413                                     &q_map_match);
414             if (space_split && **map == *CHR_SPACE)
415                 break;
416
417             /* add non-space char */
418             memcpy(dst_term+j, s1, s0 - s1);
419             j += (s0 - s1);
420             if (!q_map_match)
421             {
422                 while (s1 < s0)
423                 {
424                     if (strchr(REGEX_CHARS, *s1))
425                         dst[i++] = '\\';
426                     dst[i++] = *s1++;
427                 }
428             }
429             else
430             {
431                 char tmpbuf[80];
432                 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
433                 
434                 strcpy(dst + i, map[0]);
435                 i += strlen(map[0]);
436             }
437         }
438     }
439     dst[i] = '\0';
440     dst_term[j++] = '\0';
441     *src = s0;
442     return i;
443 }
444
445 /* term_103: handle term, where trunc = re-2 (regular expressions) */
446 static int term_103(ZebraMaps zebra_maps, int reg_type, const char **src,
447                     char *dst, int *errors, int space_split,
448                     char *dst_term)
449 {
450     int i = 0;
451     int j = 0;
452     const char *s0;
453     const char **map;
454
455     if (!term_pre(zebra_maps, reg_type, src, "^\\()[].*+?|", "(", !space_split))
456         return 0;
457     s0 = *src;
458     if (errors && *s0 == '+' && s0[1] && s0[2] == '+' && s0[3] &&
459         isdigit(((const unsigned char *)s0)[1]))
460     {
461         *errors = s0[1] - '0';
462         s0 += 3;
463         if (*errors > 3)
464             *errors = 3;
465     }
466     while (*s0)
467     {
468         if (strchr("^\\()[].*+?|-", *s0))
469         {
470             dst_term[j++] = *s0;
471             dst[i++] = *s0++;
472         }
473         else
474         {
475             const char *s1 = s0;
476             int q_map_match = 0;
477             map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), 
478                                     &q_map_match);
479             if (space_split && **map == *CHR_SPACE)
480                 break;
481
482             /* add non-space char */
483             memcpy(dst_term+j, s1, s0 - s1);
484             j += (s0 - s1);
485             if (!q_map_match)
486             {
487                 while (s1 < s0)
488                 {
489                     if (strchr(REGEX_CHARS, *s1))
490                         dst[i++] = '\\';
491                     dst[i++] = *s1++;
492                 }
493             }
494             else
495             {
496                 char tmpbuf[80];
497                 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
498                 
499                 strcpy(dst + i, map[0]);
500                 i += strlen(map[0]);
501             }
502         }
503     }
504     dst[i] = '\0';
505     dst_term[j] = '\0';
506     *src = s0;
507     
508     return i;
509 }
510
511 /* term_103: handle term, where trunc = re-1 (regular expressions) */
512 static int term_102(ZebraMaps zebra_maps, int reg_type, const char **src,
513                     char *dst, int space_split, char *dst_term)
514 {
515     return term_103(zebra_maps, reg_type, src, dst, NULL, space_split,
516                     dst_term);
517 }
518
519
520 /* term_104: handle term, where trunc = Process # and ! */
521 static int term_104(ZebraMaps zebra_maps, int reg_type,
522                     const char **src, char *dst, int space_split,
523                     char *dst_term)
524 {
525     const char *s0;
526     const char **map;
527     int i = 0;
528     int j = 0;
529
530     if (!term_pre(zebra_maps, reg_type, src, "?*#", "?*#", !space_split))
531         return 0;
532     s0 = *src;
533     while (*s0)
534     {
535         if (*s0 == '?')
536         {
537             dst_term[j++] = *s0++;
538             if (*s0 >= '0' && *s0 <= '9')
539             {
540                 int limit = 0;
541                 while (*s0 >= '0' && *s0 <= '9')
542                 {
543                     limit = limit * 10 + (*s0 - '0');
544                     dst_term[j++] = *s0++;
545                 }
546                 if (limit > 20)
547                     limit = 20;
548                 while (--limit >= 0)
549                 {
550                     dst[i++] = '.';
551                     dst[i++] = '?';
552                 }
553             }
554             else
555             {
556                 dst[i++] = '.';
557                 dst[i++] = '*';
558             }
559         }
560         else if (*s0 == '*')
561         {
562             dst[i++] = '.';
563             dst[i++] = '*';
564             dst_term[j++] = *s0++;
565         }
566         else if (*s0 == '#')
567         {
568             dst[i++] = '.';
569             dst_term[j++] = *s0++;
570         }
571         else
572         {
573             const char *s1 = s0;
574             int q_map_match = 0;
575             map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), 
576                                     &q_map_match);
577             if (space_split && **map == *CHR_SPACE)
578                 break;
579
580             /* add non-space char */
581             memcpy(dst_term+j, s1, s0 - s1);
582             j += (s0 - s1);
583             if (!q_map_match)
584             {
585                 while (s1 < s0)
586                 {
587                     if (strchr(REGEX_CHARS, *s1))
588                         dst[i++] = '\\';
589                     dst[i++] = *s1++;
590                 }
591             }
592             else
593             {
594                 char tmpbuf[80];
595                 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
596                 
597                 strcpy(dst + i, map[0]);
598                 i += strlen(map[0]);
599             }
600         }
601     }
602     dst[i] = '\0';
603     dst_term[j++] = '\0';
604     *src = s0;
605     return i;
606 }
607
608 /* term_105/106: handle term, where trunc = Process * and ! and right trunc */
609 static int term_105(ZebraMaps zebra_maps, int reg_type,
610                     const char **src, char *dst, int space_split,
611                     char *dst_term, int right_truncate)
612 {
613     const char *s0;
614     const char **map;
615     int i = 0;
616     int j = 0;
617
618     if (!term_pre(zebra_maps, reg_type, src, "*!", "*!", !space_split))
619         return 0;
620     s0 = *src;
621     while (*s0)
622     {
623         if (*s0 == '*')
624         {
625             dst[i++] = '.';
626             dst[i++] = '*';
627             dst_term[j++] = *s0++;
628         }
629         else if (*s0 == '!')
630         {
631             dst[i++] = '.';
632             dst_term[j++] = *s0++;
633         }
634         else
635         {
636             const char *s1 = s0;
637             int q_map_match = 0;
638             map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), 
639                                     &q_map_match);
640             if (space_split && **map == *CHR_SPACE)
641                 break;
642
643             /* add non-space char */
644             memcpy(dst_term+j, s1, s0 - s1);
645             j += (s0 - s1);
646             if (!q_map_match)
647             {
648                 while (s1 < s0)
649                 {
650                     if (strchr(REGEX_CHARS, *s1))
651                         dst[i++] = '\\';
652                     dst[i++] = *s1++;
653                 }
654             }
655             else
656             {
657                 char tmpbuf[80];
658                 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
659                 
660                 strcpy(dst + i, map[0]);
661                 i += strlen(map[0]);
662             }
663         }
664     }
665     if (right_truncate)
666     {
667         dst[i++] = '.';
668         dst[i++] = '*';
669     }
670     dst[i] = '\0';
671     
672     dst_term[j++] = '\0';
673     *src = s0;
674     return i;
675 }
676
677
678 /* gen_regular_rel - generate regular expression from relation
679  *  val:     border value (inclusive)
680  *  islt:    1 if <=; 0 if >=.
681  */
682 static void gen_regular_rel(char *dst, int val, int islt)
683 {
684     int dst_p;
685     int w, d, i;
686     int pos = 0;
687     char numstr[20];
688
689     yaz_log(YLOG_DEBUG, "gen_regular_rel. val=%d, islt=%d", val, islt);
690     if (val >= 0)
691     {
692         if (islt)
693             strcpy(dst, "(-[0-9]+|(");
694         else
695             strcpy(dst, "((");
696     } 
697     else
698     {
699         if (!islt)
700         {
701             strcpy(dst, "([0-9]+|-(");
702             dst_p = strlen(dst);
703             islt = 1;
704         }
705         else
706         {
707             strcpy(dst, "(-(");
708             islt = 0;
709         }
710         val = -val;
711     }
712     dst_p = strlen(dst);
713     sprintf(numstr, "%d", val);
714     for (w = strlen(numstr); --w >= 0; pos++)
715     {
716         d = numstr[w];
717         if (pos > 0)
718         {
719             if (islt)
720             {
721                 if (d == '0')
722                     continue;
723                 d--;
724             } 
725             else
726             {
727                 if (d == '9')
728                     continue;
729                 d++;
730             }
731         }
732         
733         strcpy(dst + dst_p, numstr);
734         dst_p = strlen(dst) - pos - 1;
735
736         if (islt)
737         {
738             if (d != '0')
739             {
740                 dst[dst_p++] = '[';
741                 dst[dst_p++] = '0';
742                 dst[dst_p++] = '-';
743                 dst[dst_p++] = d;
744                 dst[dst_p++] = ']';
745             }
746             else
747                 dst[dst_p++] = d;
748         }
749         else
750         {
751             if (d != '9')
752             { 
753                 dst[dst_p++] = '[';
754                 dst[dst_p++] = d;
755                 dst[dst_p++] = '-';
756                 dst[dst_p++] = '9';
757                 dst[dst_p++] = ']';
758             }
759             else
760                 dst[dst_p++] = d;
761         }
762         for (i = 0; i<pos; i++)
763         {
764             dst[dst_p++] = '[';
765             dst[dst_p++] = '0';
766             dst[dst_p++] = '-';
767             dst[dst_p++] = '9';
768             dst[dst_p++] = ']';
769         }
770         dst[dst_p++] = '|';
771     }
772     dst[dst_p] = '\0';
773     if (islt)
774     {
775         /* match everything less than 10^(pos-1) */
776         strcat(dst, "0*");
777         for (i = 1; i<pos; i++)
778             strcat(dst, "[0-9]?");
779     }
780     else
781     {
782         /* match everything greater than 10^pos */
783         for (i = 0; i <= pos; i++)
784             strcat(dst, "[0-9]");
785         strcat(dst, "[0-9]*");
786     }
787     strcat(dst, "))");
788 }
789
790 void string_rel_add_char(char **term_p, const char *src, int *indx)
791 {
792     if (src[*indx] == '\\')
793         *(*term_p)++ = src[(*indx)++];
794     *(*term_p)++ = src[(*indx)++];
795 }
796
797 /*
798  *   >  abc     ([b-].*|a[c-].*|ab[d-].*|abc.+)
799  *              ([^-a].*|a[^-b].*ab[^-c].*|abc.+)
800  *   >= abc     ([b-].*|a[c-].*|ab[c-].*)
801  *              ([^-a].*|a[^-b].*|ab[c-].*)
802  *   <  abc     ([-0].*|a[-a].*|ab[-b].*)
803  *              ([^a-].*|a[^b-].*|ab[^c-].*)
804  *   <= abc     ([-0].*|a[-a].*|ab[-b].*|abc)
805  *              ([^a-].*|a[^b-].*|ab[^c-].*|abc)
806  */
807 static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
808                            const char **term_sub, char *term_dict,
809                            oid_value attributeSet,
810                            int reg_type, int space_split, char *term_dst,
811                            int *error_code)
812 {
813     AttrType relation;
814     int relation_value;
815     int i;
816     char *term_tmp = term_dict + strlen(term_dict);
817     char term_component[2*IT_MAX_WORD+20];
818
819     attr_init(&relation, zapt, 2);
820     relation_value = attr_find(&relation, NULL);
821
822     *error_code = 0;
823     yaz_log(YLOG_DEBUG, "string relation value=%d", relation_value);
824     switch (relation_value)
825     {
826     case 1:
827         if (!term_100(zh->reg->zebra_maps, reg_type,
828                       term_sub, term_component,
829                       space_split, term_dst))
830             return 0;
831         yaz_log(log_level_rpn, "Relation <");
832         
833         *term_tmp++ = '(';
834         for (i = 0; term_component[i]; )
835         {
836             int j = 0;
837
838             if (i)
839                 *term_tmp++ = '|';
840             while (j < i)
841                 string_rel_add_char(&term_tmp, term_component, &j);
842
843             *term_tmp++ = '[';
844
845             *term_tmp++ = '^';
846             string_rel_add_char(&term_tmp, term_component, &i);
847             *term_tmp++ = '-';
848
849             *term_tmp++ = ']';
850             *term_tmp++ = '.';
851             *term_tmp++ = '*';
852
853             if ((term_tmp - term_dict) > IT_MAX_WORD)
854                 break;
855         }
856         *term_tmp++ = ')';
857         *term_tmp = '\0';
858         break;
859     case 2:
860         if (!term_100(zh->reg->zebra_maps, reg_type,
861                       term_sub, term_component,
862                       space_split, term_dst))
863             return 0;
864         yaz_log(log_level_rpn, "Relation <=");
865
866         *term_tmp++ = '(';
867         for (i = 0; term_component[i]; )
868         {
869             int j = 0;
870
871             while (j < i)
872                 string_rel_add_char(&term_tmp, term_component, &j);
873             *term_tmp++ = '[';
874
875             *term_tmp++ = '^';
876             string_rel_add_char(&term_tmp, term_component, &i);
877             *term_tmp++ = '-';
878
879             *term_tmp++ = ']';
880             *term_tmp++ = '.';
881             *term_tmp++ = '*';
882
883             *term_tmp++ = '|';
884
885             if ((term_tmp - term_dict) > IT_MAX_WORD)
886                 break;
887         }
888         for (i = 0; term_component[i]; )
889             string_rel_add_char(&term_tmp, term_component, &i);
890         *term_tmp++ = ')';
891         *term_tmp = '\0';
892         break;
893     case 5:
894         if (!term_100 (zh->reg->zebra_maps, reg_type,
895                        term_sub, term_component, space_split, term_dst))
896             return 0;
897         yaz_log(log_level_rpn, "Relation >");
898
899         *term_tmp++ = '(';
900         for (i = 0; term_component[i];)
901         {
902             int j = 0;
903
904             while (j < i)
905                 string_rel_add_char(&term_tmp, term_component, &j);
906             *term_tmp++ = '[';
907             
908             *term_tmp++ = '^';
909             *term_tmp++ = '-';
910             string_rel_add_char(&term_tmp, term_component, &i);
911
912             *term_tmp++ = ']';
913             *term_tmp++ = '.';
914             *term_tmp++ = '*';
915
916             *term_tmp++ = '|';
917
918             if ((term_tmp - term_dict) > IT_MAX_WORD)
919                 break;
920         }
921         for (i = 0; term_component[i];)
922             string_rel_add_char(&term_tmp, term_component, &i);
923         *term_tmp++ = '.';
924         *term_tmp++ = '+';
925         *term_tmp++ = ')';
926         *term_tmp = '\0';
927         break;
928     case 4:
929         if (!term_100(zh->reg->zebra_maps, reg_type, term_sub,
930                       term_component, space_split, term_dst))
931             return 0;
932         yaz_log(log_level_rpn, "Relation >=");
933
934         *term_tmp++ = '(';
935         for (i = 0; term_component[i];)
936         {
937             int j = 0;
938
939             if (i)
940                 *term_tmp++ = '|';
941             while (j < i)
942                 string_rel_add_char(&term_tmp, term_component, &j);
943             *term_tmp++ = '[';
944
945             if (term_component[i+1])
946             {
947                 *term_tmp++ = '^';
948                 *term_tmp++ = '-';
949                 string_rel_add_char(&term_tmp, term_component, &i);
950             }
951             else
952             {
953                 string_rel_add_char(&term_tmp, term_component, &i);
954                 *term_tmp++ = '-';
955             }
956             *term_tmp++ = ']';
957             *term_tmp++ = '.';
958             *term_tmp++ = '*';
959
960             if ((term_tmp - term_dict) > IT_MAX_WORD)
961                 break;
962         }
963         *term_tmp++ = ')';
964         *term_tmp = '\0';
965         break;
966     case 3:
967     case 102:
968     case -1:
969         yaz_log(log_level_rpn, "Relation =");
970         if (!term_100(zh->reg->zebra_maps, reg_type, term_sub,
971                       term_component, space_split, term_dst))
972             return 0;
973         strcat(term_tmp, "(");
974         strcat(term_tmp, term_component);
975         strcat(term_tmp, ")");
976         break;
977     default:
978         *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
979         return 0;
980     }
981     return 1;
982 }
983
984 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
985                              const char **term_sub, 
986                              oid_value attributeSet, NMEM stream,
987                              struct grep_info *grep_info,
988                              int reg_type, int complete_flag,
989                              int num_bases, char **basenames,
990                              char *term_dst, int xpath_use);
991
992 static ZEBRA_RES term_trunc(ZebraHandle zh,
993                             Z_AttributesPlusTerm *zapt,
994                             const char **term_sub, 
995                             oid_value attributeSet, NMEM stream,
996                             struct grep_info *grep_info,
997                             int reg_type, int complete_flag,
998                             int num_bases, char **basenames,
999                             char *term_dst,
1000                             const char *rank_type, int xpath_use,
1001                             NMEM rset_nmem,
1002                             RSET *rset,
1003                             struct rset_key_control *kc)
1004 {
1005     ZEBRA_RES res;
1006     *rset = 0;
1007     grep_info->isam_p_indx = 0;
1008     res = string_term(zh, zapt, term_sub, attributeSet, stream, grep_info,
1009                       reg_type, complete_flag, num_bases, basenames,
1010                       term_dst, xpath_use);
1011     if (res != ZEBRA_OK)
1012         return res;
1013     if (!*term_sub)  /* no more terms ? */
1014         return res;
1015     yaz_log(log_level_rpn, "term: %s", term_dst);
1016     *rset = rset_trunc(zh, grep_info->isam_p_buf,
1017                        grep_info->isam_p_indx, term_dst,
1018                        strlen(term_dst), rank_type, 1 /* preserve pos */,
1019                        zapt->term->which, rset_nmem,
1020                        kc, kc->scope);
1021     if (!*rset)
1022         return ZEBRA_FAIL;
1023     return ZEBRA_OK;
1024 }
1025
1026 static char *nmem_strdup_i(NMEM nmem, int v)
1027 {
1028     char val_str[64];
1029     sprintf(val_str, "%d", v);
1030     return nmem_strdup(nmem, val_str);
1031 }
1032
1033 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1034                              const char **term_sub, 
1035                              oid_value attributeSet, NMEM stream,
1036                              struct grep_info *grep_info,
1037                              int reg_type, int complete_flag,
1038                              int num_bases, char **basenames,
1039                              char *term_dst, int xpath_use)
1040 {
1041     char term_dict[2*IT_MAX_WORD+4000];
1042     int j, r, base_no;
1043     AttrType truncation;
1044     int truncation_value;
1045     AttrType use;
1046     int use_value;
1047     const char *use_string = 0;
1048     oid_value curAttributeSet = attributeSet;
1049     const char *termp;
1050     struct rpn_char_map_info rcmi;
1051     int space_split = complete_flag ? 0 : 1;
1052
1053     int bases_ok = 0;     /* no of databases with OK attribute */
1054     int errCode = 0;      /* err code (if any is not OK) */
1055     char *errString = 0;  /* addinfo */
1056
1057     rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
1058     attr_init(&use, zapt, 1);
1059     use_value = attr_find_ex(&use, &curAttributeSet, &use_string);
1060     yaz_log(log_level_rpn, "string_term, use value %d", use_value);
1061     attr_init(&truncation, zapt, 5);
1062     truncation_value = attr_find(&truncation, NULL);
1063     yaz_log(log_level_rpn, "truncation value %d", truncation_value);
1064
1065     if (use_value == -1)    /* no attribute - assumy "any" */
1066         use_value = 1016;
1067     for (base_no = 0; base_no < num_bases; base_no++)
1068     {
1069         int ord = -1;
1070         int attr_ok = 0;
1071         int regex_range = 0;
1072         int init_pos = 0;
1073         attent attp;
1074         data1_local_attribute id_xpath_attr;
1075         data1_local_attribute *local_attr;
1076         int max_pos, prefix_len = 0;
1077         int relation_error;
1078
1079         termp = *term_sub;
1080
1081         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1082         {
1083             zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
1084                            basenames[base_no]);
1085             return ZEBRA_FAIL;
1086         }
1087         if (xpath_use > 0 && use_value == -2) 
1088         {
1089             /* xpath mode and we have a string attribute */
1090             attp.local_attributes = &id_xpath_attr;
1091             attp.attset_ordinal = VAL_IDXPATH;
1092             id_xpath_attr.next = 0;
1093
1094             use_value = xpath_use;  /* xpath_use as use-attribute now */
1095             id_xpath_attr.local = use_value;
1096         }
1097         else if (curAttributeSet == VAL_IDXPATH && use_value >= 0)
1098         {
1099             /* X-Path attribute, use numeric value directly */
1100             attp.local_attributes = &id_xpath_attr;
1101             attp.attset_ordinal = VAL_IDXPATH;
1102             id_xpath_attr.next = 0;
1103             id_xpath_attr.local = use_value;
1104         }
1105         else if (use_string &&
1106                  (ord = zebraExplain_lookup_attr_str(zh->reg->zei,
1107                                                      use_string)) >= 0)
1108         {
1109             /* we have a match for a raw string attribute */
1110             char ord_buf[32];
1111             int i, ord_len;
1112
1113             if (prefix_len)
1114                 term_dict[prefix_len++] = '|';
1115             else
1116                 term_dict[prefix_len++] = '(';
1117             
1118             ord_len = key_SU_encode (ord, ord_buf);
1119             for (i = 0; i<ord_len; i++)
1120             {
1121                 term_dict[prefix_len++] = 1;
1122                 term_dict[prefix_len++] = ord_buf[i];
1123             }
1124             attp.local_attributes = 0;  /* no more attributes */
1125         }
1126         else 
1127         {
1128             /* lookup in the .att files . Allow string as well */
1129             if ((r = att_getentbyatt (zh, &attp, curAttributeSet, use_value,
1130                                       use_string)))
1131             {
1132                 yaz_log(YLOG_DEBUG, "att_getentbyatt fail. set=%d use=%d r=%d",
1133                         curAttributeSet, use_value, r);
1134                 if (r == -1)
1135                 {
1136                     /* set was found, but value wasn't defined */
1137                     errCode = YAZ_BIB1_UNSUPP_USE_ATTRIBUTE;
1138                     if (use_string)
1139                         errString = nmem_strdup(stream, use_string);
1140                     else
1141                         errString = nmem_strdup_i (stream, use_value);
1142                 }
1143                 else
1144                 {
1145                     int oid[OID_SIZE];
1146                     struct oident oident;
1147                     
1148                     oident.proto = PROTO_Z3950;
1149                     oident.oclass = CLASS_ATTSET;
1150                     oident.value = curAttributeSet;
1151                     oid_ent_to_oid (&oident, oid);
1152                     
1153                     errCode = YAZ_BIB1_UNSUPP_ATTRIBUTE_SET;
1154                     errString = nmem_strdup(stream, oident.desc);
1155                 }
1156                 continue;
1157             }
1158         }
1159         for (local_attr = attp.local_attributes; local_attr;
1160              local_attr = local_attr->next)
1161         {
1162             char ord_buf[32];
1163             int i, ord_len;
1164             
1165             ord = zebraExplain_lookup_attr_su(zh->reg->zei,
1166                                               attp.attset_ordinal,
1167                                               local_attr->local);
1168             if (ord < 0)
1169                 continue;
1170             if (prefix_len)
1171                 term_dict[prefix_len++] = '|';
1172             else
1173                 term_dict[prefix_len++] = '(';
1174             
1175             ord_len = key_SU_encode (ord, ord_buf);
1176             for (i = 0; i<ord_len; i++)
1177             {
1178                 term_dict[prefix_len++] = 1;
1179                 term_dict[prefix_len++] = ord_buf[i];
1180             }
1181         }
1182         bases_ok++;
1183         if (prefix_len)
1184             attr_ok = 1;
1185
1186         term_dict[prefix_len++] = ')';
1187         term_dict[prefix_len++] = 1;
1188         term_dict[prefix_len++] = reg_type;
1189         yaz_log(log_level_rpn, "reg_type = %d", term_dict[prefix_len-1]);
1190         term_dict[prefix_len] = '\0';
1191         j = prefix_len;
1192         switch (truncation_value)
1193         {
1194         case -1:         /* not specified */
1195         case 100:        /* do not truncate */
1196             if (!string_relation (zh, zapt, &termp, term_dict,
1197                                   attributeSet,
1198                                   reg_type, space_split, term_dst,
1199                                   &relation_error))
1200             {
1201                 if (relation_error)
1202                 {
1203                     zebra_setError(zh, relation_error, 0);
1204                     return ZEBRA_FAIL;
1205                 }
1206                 *term_sub = 0;
1207                 return ZEBRA_OK;
1208             }
1209             break;
1210         case 1:          /* right truncation */
1211             term_dict[j++] = '(';
1212             if (!term_100(zh->reg->zebra_maps, reg_type,
1213                           &termp, term_dict + j, space_split, term_dst))
1214             {
1215                 *term_sub = 0;
1216                 return ZEBRA_OK;
1217             }
1218             strcat(term_dict, ".*)");
1219             break;
1220         case 2:          /* keft truncation */
1221             term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*';
1222             if (!term_100(zh->reg->zebra_maps, reg_type,
1223                           &termp, term_dict + j, space_split, term_dst))
1224             {
1225                 *term_sub = 0;
1226                 return ZEBRA_OK;
1227             }
1228             strcat(term_dict, ")");
1229             break;
1230         case 3:          /* left&right truncation */
1231             term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*';
1232             if (!term_100(zh->reg->zebra_maps, reg_type,
1233                           &termp, term_dict + j, space_split, term_dst))
1234             {
1235                 *term_sub = 0;
1236                 return ZEBRA_OK;
1237             }
1238             strcat(term_dict, ".*)");
1239             break;
1240         case 101:        /* process # in term */
1241             term_dict[j++] = '(';
1242             if (!term_101(zh->reg->zebra_maps, reg_type,
1243                           &termp, term_dict + j, space_split, term_dst))
1244             {
1245                 *term_sub = 0;
1246                 return ZEBRA_OK;
1247             }
1248             strcat(term_dict, ")");
1249             break;
1250         case 102:        /* Regexp-1 */
1251             term_dict[j++] = '(';
1252             if (!term_102(zh->reg->zebra_maps, reg_type,
1253                           &termp, term_dict + j, space_split, term_dst))
1254             {
1255                 *term_sub = 0;
1256                 return ZEBRA_OK;
1257             }
1258             strcat(term_dict, ")");
1259             break;
1260         case 103:       /* Regexp-2 */
1261             regex_range = 1;
1262             term_dict[j++] = '(';
1263             init_pos = 2;
1264             if (!term_103(zh->reg->zebra_maps, reg_type,
1265                           &termp, term_dict + j, &regex_range,
1266                           space_split, term_dst))
1267             {
1268                 *term_sub = 0;
1269                 return ZEBRA_OK;
1270             }
1271             strcat(term_dict, ")");
1272             break;
1273         case 104:        /* process # and ! in term */
1274             term_dict[j++] = '(';
1275             if (!term_104(zh->reg->zebra_maps, reg_type,
1276                           &termp, term_dict + j, space_split, term_dst))
1277             {
1278                 *term_sub = 0;
1279                 return ZEBRA_OK;
1280             }
1281             strcat(term_dict, ")");
1282             break;
1283         case 105:        /* process * and ! in term */
1284             term_dict[j++] = '(';
1285             if (!term_105(zh->reg->zebra_maps, reg_type,
1286                           &termp, term_dict + j, space_split, term_dst, 1))
1287             {
1288                 *term_sub = 0;
1289                 return ZEBRA_OK;
1290             }
1291             strcat(term_dict, ")");
1292             break;
1293         case 106:        /* process * and ! in term */
1294             term_dict[j++] = '(';
1295             if (!term_105(zh->reg->zebra_maps, reg_type,
1296                           &termp, term_dict + j, space_split, term_dst, 0))
1297             {
1298                 *term_sub = 0;
1299                 return ZEBRA_OK;
1300             }
1301             strcat(term_dict, ")");
1302             break;
1303         default:
1304             zebra_setError_zint(zh,
1305                                 YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
1306                                 truncation_value);
1307             return ZEBRA_FAIL;
1308         }
1309         if (attr_ok)
1310         {
1311             char buf[80];
1312             const char *input = term_dict + prefix_len;
1313             esc_str(buf, sizeof(buf), input, strlen(input));
1314         }
1315         if (attr_ok)
1316         {
1317             yaz_log(log_level_rpn, "dict_lookup_grep: %s", term_dict+prefix_len);
1318             r = dict_lookup_grep(zh->reg->dict, term_dict, regex_range,
1319                                  grep_info, &max_pos, init_pos,
1320                                  grep_handle);
1321             if (r)
1322                 yaz_log(YLOG_WARN, "dict_lookup_grep fail %d", r);
1323         }
1324     }
1325     if (!bases_ok)
1326     {
1327         zebra_setError(zh, errCode, errString);
1328         return ZEBRA_FAIL;
1329     }
1330     *term_sub = termp;
1331     yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1332     return ZEBRA_OK;
1333 }
1334
1335
1336 /* convert APT search term to UTF8 */
1337 static ZEBRA_RES zapt_term_to_utf8(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1338                                    char *termz)
1339 {
1340     size_t sizez;
1341     Z_Term *term = zapt->term;
1342
1343     switch (term->which)
1344     {
1345     case Z_Term_general:
1346         if (zh->iconv_to_utf8 != 0)
1347         {
1348             char *inbuf = term->u.general->buf;
1349             size_t inleft = term->u.general->len;
1350             char *outbuf = termz;
1351             size_t outleft = IT_MAX_WORD-1;
1352             size_t ret;
1353
1354             ret = yaz_iconv(zh->iconv_to_utf8, &inbuf, &inleft,
1355                         &outbuf, &outleft);
1356             if (ret == (size_t)(-1))
1357             {
1358                 ret = yaz_iconv(zh->iconv_to_utf8, 0, 0, 0, 0);
1359                 zebra_setError(
1360                     zh, 
1361                     YAZ_BIB1_QUERY_TERM_INCLUDES_CHARS_THAT_DO_NOT_TRANSLATE_INTO_,
1362                     0);
1363                 return ZEBRA_FAIL;
1364             }
1365             *outbuf = 0;
1366         }
1367         else
1368         {
1369             sizez = term->u.general->len;
1370             if (sizez > IT_MAX_WORD-1)
1371                 sizez = IT_MAX_WORD-1;
1372             memcpy (termz, term->u.general->buf, sizez);
1373             termz[sizez] = '\0';
1374         }
1375         break;
1376     case Z_Term_characterString:
1377         sizez = strlen(term->u.characterString);
1378         if (sizez > IT_MAX_WORD-1)
1379             sizez = IT_MAX_WORD-1;
1380         memcpy (termz, term->u.characterString, sizez);
1381         termz[sizez] = '\0';
1382         break;
1383     default:
1384         zebra_setError(zh, YAZ_BIB1_UNSUPP_CODED_VALUE_FOR_TERM, 0);
1385         return ZEBRA_FAIL;
1386     }
1387     return ZEBRA_OK;
1388 }
1389
1390 /* convert APT SCAN term to internal cmap */
1391 static ZEBRA_RES trans_scan_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1392                                  char *termz, int reg_type)
1393 {
1394     char termz0[IT_MAX_WORD];
1395
1396     if (zapt_term_to_utf8(zh, zapt, termz0) == ZEBRA_FAIL)
1397         return ZEBRA_FAIL;    /* error */
1398     else
1399     {
1400         const char **map;
1401         const char *cp = (const char *) termz0;
1402         const char *cp_end = cp + strlen(cp);
1403         const char *src;
1404         int i = 0;
1405         const char *space_map = NULL;
1406         int len;
1407             
1408         while ((len = (cp_end - cp)) > 0)
1409         {
1410             map = zebra_maps_input(zh->reg->zebra_maps, reg_type, &cp, len, 0);
1411             if (**map == *CHR_SPACE)
1412                 space_map = *map;
1413             else
1414             {
1415                 if (i && space_map)
1416                     for (src = space_map; *src; src++)
1417                         termz[i++] = *src;
1418                 space_map = NULL;
1419                 for (src = *map; *src; src++)
1420                     termz[i++] = *src;
1421             }
1422         }
1423         termz[i] = '\0';
1424     }
1425     return ZEBRA_OK;
1426 }
1427
1428 char *normalize_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1429                      const char *termz, NMEM stream, unsigned reg_id)
1430 {
1431     WRBUF wrbuf = 0;
1432     AttrType truncation;
1433     int truncation_value;
1434     char *ex_list = 0;
1435
1436     attr_init(&truncation, zapt, 5);
1437     truncation_value = attr_find(&truncation, NULL);
1438
1439     switch (truncation_value)
1440     {
1441     default:
1442         ex_list = "";
1443         break;
1444     case 101:
1445         ex_list = "#";
1446         break;
1447     case 102:
1448     case 103:
1449         ex_list = 0;
1450         break;
1451     case 104:
1452         ex_list = "!#";
1453         break;
1454     case 105:
1455         ex_list = "!*";
1456         break;
1457     }
1458     if (ex_list)
1459         wrbuf = zebra_replace(zh->reg->zebra_maps, reg_id, ex_list,
1460                               termz, strlen(termz));
1461     if (!wrbuf)
1462         return nmem_strdup(stream, termz);
1463     else
1464     {
1465         char *buf = (char*) nmem_malloc(stream, wrbuf_len(wrbuf)+1);
1466         memcpy (buf, wrbuf_buf(wrbuf), wrbuf_len(wrbuf));
1467         buf[wrbuf_len(wrbuf)] = '\0';
1468         return buf;
1469     }
1470 }
1471
1472 static void grep_info_delete(struct grep_info *grep_info)
1473 {
1474 #ifdef TERM_COUNT
1475     xfree(grep_info->term_no);
1476 #endif
1477     xfree(grep_info->isam_p_buf);
1478 }
1479
1480 static ZEBRA_RES grep_info_prepare(ZebraHandle zh,
1481                                    Z_AttributesPlusTerm *zapt,
1482                                    struct grep_info *grep_info,
1483                                    int reg_type)
1484 {
1485     AttrType termset;
1486     int termset_value_numeric;
1487     const char *termset_value_string;
1488
1489 #ifdef TERM_COUNT
1490     grep_info->term_no = 0;
1491 #endif
1492     grep_info->isam_p_size = 0;
1493     grep_info->isam_p_buf = NULL;
1494     grep_info->zh = zh;
1495     grep_info->reg_type = reg_type;
1496     grep_info->termset = 0;
1497
1498     if (!zapt)
1499         return ZEBRA_OK;
1500     attr_init(&termset, zapt, 8);
1501     termset_value_numeric =
1502         attr_find_ex(&termset, NULL, &termset_value_string);
1503     if (termset_value_numeric != -1)
1504     {
1505         char resname[32];
1506         const char *termset_name = 0;
1507         if (termset_value_numeric != -2)
1508         {
1509     
1510             sprintf(resname, "%d", termset_value_numeric);
1511             termset_name = resname;
1512         }
1513         else
1514             termset_name = termset_value_string;
1515         yaz_log(log_level_rpn, "creating termset set %s", termset_name);
1516         grep_info->termset = resultSetAdd(zh, termset_name, 1);
1517         if (!grep_info->termset)
1518         {
1519             zebra_setError(zh, YAZ_BIB1_ILLEGAL_RESULT_SET_NAME, termset_name);
1520             return ZEBRA_FAIL;
1521         }
1522     }
1523     return ZEBRA_OK;
1524 }
1525                                
1526
1527 static ZEBRA_RES term_list_trunc(ZebraHandle zh,
1528                                  Z_AttributesPlusTerm *zapt,
1529                                  const char *termz_org,
1530                                  oid_value attributeSet,
1531                                  NMEM stream,
1532                                  int reg_type, int complete_flag,
1533                                  const char *rank_type, int xpath_use,
1534                                  int num_bases, char **basenames, 
1535                                  NMEM rset_nmem,
1536                                  RSET **result_sets, int *num_result_sets,
1537                                  struct rset_key_control *kc)
1538 {
1539     char term_dst[IT_MAX_WORD+1];
1540     struct grep_info grep_info;
1541     char *termz = normalize_term(zh, zapt, termz_org, stream, reg_type);
1542     const char *termp = termz;
1543     int alloc_sets = 0;
1544
1545     *num_result_sets = 0;
1546     *term_dst = 0;
1547     if (grep_info_prepare(zh, zapt, &grep_info, reg_type) == ZEBRA_FAIL)
1548         return ZEBRA_FAIL;
1549     while(1)
1550     { 
1551         ZEBRA_RES res;
1552
1553         if (alloc_sets == *num_result_sets)
1554         {
1555             int add = 10;
1556             RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) * 
1557                                               sizeof(*rnew));
1558             if (alloc_sets)
1559                 memcpy(rnew, *result_sets, alloc_sets * sizeof(*rnew));
1560             alloc_sets = alloc_sets + add;
1561             *result_sets = rnew;
1562         }
1563         res = term_trunc(zh, zapt, &termp, attributeSet,
1564                          stream, &grep_info,
1565                          reg_type, complete_flag,
1566                          num_bases, basenames,
1567                          term_dst, rank_type,
1568                          xpath_use, rset_nmem,
1569                          &(*result_sets)[*num_result_sets],
1570                          kc);
1571         if (res != ZEBRA_OK)
1572         {
1573             int i;
1574             for (i = 0; i < *num_result_sets; i++)
1575                 rset_delete((*result_sets)[i]);
1576             grep_info_delete (&grep_info);
1577             return res;
1578         }
1579         if ((*result_sets)[*num_result_sets] == 0)
1580             break;
1581         (*num_result_sets)++;
1582     }
1583     grep_info_delete(&grep_info);
1584     return ZEBRA_OK;
1585 }
1586
1587 static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh,
1588                                        Z_AttributesPlusTerm *zapt,
1589                                        const char *termz_org,
1590                                        oid_value attributeSet,
1591                                        NMEM stream,
1592                                        int reg_type, int complete_flag,
1593                                        const char *rank_type, int xpath_use,
1594                                        int num_bases, char **basenames, 
1595                                        NMEM rset_nmem,
1596                                        RSET *rset,
1597                                        struct rset_key_control *kc)
1598 {
1599     RSET *result_sets = 0;
1600     int num_result_sets = 0;
1601     ZEBRA_RES res =
1602         term_list_trunc(zh, zapt, termz_org, attributeSet,
1603                         stream, reg_type, complete_flag,
1604                         rank_type, xpath_use,
1605                         num_bases, basenames,
1606                         rset_nmem,
1607                         &result_sets, &num_result_sets, kc);
1608     if (res != ZEBRA_OK)
1609         return res;
1610     if (num_result_sets == 0)
1611         *rset = rsnull_create (rset_nmem, kc, 0); 
1612     else if (num_result_sets == 1)
1613         *rset = result_sets[0];
1614     else
1615         *rset = rsprox_create(rset_nmem, kc, kc->scope,
1616                               num_result_sets, result_sets,
1617                               1 /* ordered */, 0 /* exclusion */,
1618                               3 /* relation */, 1 /* distance */);
1619     if (!*rset)
1620         return ZEBRA_FAIL;
1621     return ZEBRA_OK;
1622 }
1623
1624 static ZEBRA_RES rpn_search_APT_or_list(ZebraHandle zh,
1625                                         Z_AttributesPlusTerm *zapt,
1626                                         const char *termz_org,
1627                                         oid_value attributeSet,
1628                                         NMEM stream,
1629                                         int reg_type, int complete_flag,
1630                                         const char *rank_type,
1631                                         int xpath_use,
1632                                         int num_bases, char **basenames,
1633                                         NMEM rset_nmem,
1634                                         RSET *rset,
1635                                         struct rset_key_control *kc)
1636 {
1637     RSET *result_sets = 0;
1638     int num_result_sets = 0;
1639     ZEBRA_RES res =
1640         term_list_trunc(zh, zapt, termz_org, attributeSet,
1641                         stream, reg_type, complete_flag,
1642                         rank_type, xpath_use,
1643                         num_bases, basenames,
1644                         rset_nmem,
1645                         &result_sets, &num_result_sets, kc);
1646     if (res != ZEBRA_OK)
1647         return res;
1648     if (num_result_sets == 0)
1649         *rset = rsnull_create (rset_nmem, kc, 0); 
1650     else if (num_result_sets == 1)
1651         *rset = result_sets[0];
1652     else
1653         *rset = rsmulti_or_create(rset_nmem, kc, kc->scope, 0 /* termid */,
1654                                   num_result_sets, result_sets);
1655     if (!*rset)
1656         return ZEBRA_FAIL;
1657     return ZEBRA_OK;
1658 }
1659
1660 static ZEBRA_RES rpn_search_APT_and_list(ZebraHandle zh,
1661                                          Z_AttributesPlusTerm *zapt,
1662                                          const char *termz_org,
1663                                          oid_value attributeSet,
1664                                          NMEM stream,
1665                                          int reg_type, int complete_flag,
1666                                          const char *rank_type, 
1667                                          int xpath_use,
1668                                          int num_bases, char **basenames,
1669                                          NMEM rset_nmem,
1670                                          RSET *rset,
1671                                          struct rset_key_control *kc)
1672 {
1673     RSET *result_sets = 0;
1674     int num_result_sets = 0;
1675     ZEBRA_RES res =
1676         term_list_trunc(zh, zapt, termz_org, attributeSet,
1677                         stream, reg_type, complete_flag,
1678                         rank_type, xpath_use,
1679                         num_bases, basenames,
1680                         rset_nmem,
1681                         &result_sets, &num_result_sets,
1682                         kc);
1683     if (res != ZEBRA_OK)
1684         return res;
1685     if (num_result_sets == 0)
1686         *rset = rsnull_create (rset_nmem, kc, 0); 
1687     else if (num_result_sets == 1)
1688         *rset = result_sets[0];
1689     else
1690         *rset = rsmulti_and_create(rset_nmem, kc, kc->scope,
1691                                    num_result_sets, result_sets);
1692     if (!*rset)
1693         return ZEBRA_FAIL;
1694     return ZEBRA_OK;
1695 }
1696
1697 static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1698                             const char **term_sub,
1699                             char *term_dict,
1700                             oid_value attributeSet,
1701                             struct grep_info *grep_info,
1702                             int *max_pos,
1703                             int reg_type,
1704                             char *term_dst,
1705                             int *error_code)
1706 {
1707     AttrType relation;
1708     int relation_value;
1709     int term_value;
1710     int r;
1711     char *term_tmp = term_dict + strlen(term_dict);
1712
1713     *error_code = 0;
1714     attr_init(&relation, zapt, 2);
1715     relation_value = attr_find(&relation, NULL);
1716
1717     yaz_log(log_level_rpn, "numeric relation value=%d", relation_value);
1718
1719     if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1720                   term_dst))
1721         return 0;
1722     term_value = atoi (term_tmp);
1723     switch (relation_value)
1724     {
1725     case 1:
1726         yaz_log(log_level_rpn, "Relation <");
1727         gen_regular_rel(term_tmp, term_value-1, 1);
1728         break;
1729     case 2:
1730         yaz_log(log_level_rpn, "Relation <=");
1731         gen_regular_rel(term_tmp, term_value, 1);
1732         break;
1733     case 4:
1734         yaz_log(log_level_rpn, "Relation >=");
1735         gen_regular_rel(term_tmp, term_value, 0);
1736         break;
1737     case 5:
1738         yaz_log(log_level_rpn, "Relation >");
1739         gen_regular_rel(term_tmp, term_value+1, 0);
1740         break;
1741     case -1:
1742     case 3:
1743         yaz_log(log_level_rpn, "Relation =");
1744         sprintf(term_tmp, "(0*%d)", term_value);
1745         break;
1746     default:
1747         *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
1748         return 0;
1749     }
1750     yaz_log(log_level_rpn, "dict_lookup_grep: %s", term_tmp);
1751     r = dict_lookup_grep(zh->reg->dict, term_dict, 0, grep_info, max_pos,
1752                           0, grep_handle);
1753     if (r)
1754         yaz_log(YLOG_WARN, "dict_lookup_grep fail, rel = gt: %d", r);
1755     yaz_log(log_level_rpn, "%d positions", grep_info->isam_p_indx);
1756     return 1;
1757 }
1758
1759 static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1760                               const char **term_sub, 
1761                               oid_value attributeSet,
1762                               struct grep_info *grep_info,
1763                               int reg_type, int complete_flag,
1764                               int num_bases, char **basenames,
1765                               char *term_dst, int xpath_use, NMEM stream)
1766 {
1767     char term_dict[2*IT_MAX_WORD+2];
1768     int r, base_no;
1769     AttrType use;
1770     int use_value;
1771     const char *use_string = 0;
1772     oid_value curAttributeSet = attributeSet;
1773     const char *termp;
1774     struct rpn_char_map_info rcmi;
1775
1776     int bases_ok = 0;     /* no of databases with OK attribute */
1777     int errCode = 0;      /* err code (if any is not OK) */
1778     char *errString = 0;  /* addinfo */
1779
1780     rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
1781     attr_init(&use, zapt, 1);
1782     use_value = attr_find_ex(&use, &curAttributeSet, &use_string);
1783
1784     if (use_value == -1)
1785         use_value = 1016;
1786
1787     for (base_no = 0; base_no < num_bases; base_no++)
1788     {
1789         attent attp;
1790         data1_local_attribute id_xpath_attr;
1791         data1_local_attribute *local_attr;
1792         int max_pos, prefix_len = 0;
1793         int relation_error = 0;
1794
1795         termp = *term_sub;
1796         if (use_value == -2)  /* string attribute (assume IDXPATH/any) */
1797         {
1798             use_value = xpath_use;
1799             attp.local_attributes = &id_xpath_attr;
1800             attp.attset_ordinal = VAL_IDXPATH;
1801             id_xpath_attr.next = 0;
1802             id_xpath_attr.local = use_value;
1803         }
1804         else if (curAttributeSet == VAL_IDXPATH)
1805         {
1806             attp.local_attributes = &id_xpath_attr;
1807             attp.attset_ordinal = VAL_IDXPATH;
1808             id_xpath_attr.next = 0;
1809             id_xpath_attr.local = use_value;
1810         }
1811         else
1812         {
1813             if ((r = att_getentbyatt (zh, &attp, curAttributeSet, use_value,
1814                                             use_string)))
1815             {
1816                 yaz_log(YLOG_DEBUG, "att_getentbyatt fail. set=%d use=%d r=%d",
1817                       curAttributeSet, use_value, r);
1818                 if (r == -1)
1819                 {
1820                     errCode = YAZ_BIB1_UNSUPP_USE_ATTRIBUTE;
1821                     if (use_string)
1822                         errString = nmem_strdup(stream, use_string);
1823                     else
1824                         errString = nmem_strdup_i (stream, use_value);
1825                 }
1826                 else
1827                     errCode = YAZ_BIB1_UNSUPP_ATTRIBUTE_SET;
1828                 continue;
1829             }
1830         }
1831         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1832         {
1833             zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
1834                            basenames[base_no]);
1835             return ZEBRA_FAIL;
1836         }
1837         for (local_attr = attp.local_attributes; local_attr;
1838              local_attr = local_attr->next)
1839         {
1840             int ord;
1841             char ord_buf[32];
1842             int i, ord_len;
1843
1844             ord = zebraExplain_lookup_attr_su(zh->reg->zei,
1845                                               attp.attset_ordinal,
1846                                               local_attr->local);
1847             if (ord < 0)
1848                 continue;
1849             if (prefix_len)
1850                 term_dict[prefix_len++] = '|';
1851             else
1852                 term_dict[prefix_len++] = '(';
1853
1854             ord_len = key_SU_encode (ord, ord_buf);
1855             for (i = 0; i<ord_len; i++)
1856             {
1857                 term_dict[prefix_len++] = 1;
1858                 term_dict[prefix_len++] = ord_buf[i];
1859             }
1860         }
1861         if (!prefix_len)
1862         {
1863             zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_USE_ATTRIBUTE, use_value);
1864             continue;
1865         }
1866         bases_ok++;
1867         term_dict[prefix_len++] = ')';        
1868         term_dict[prefix_len++] = 1;
1869         term_dict[prefix_len++] = reg_type;
1870         yaz_log(YLOG_DEBUG, "reg_type = %d", term_dict[prefix_len-1]);
1871         term_dict[prefix_len] = '\0';
1872         if (!numeric_relation(zh, zapt, &termp, term_dict,
1873                               attributeSet, grep_info, &max_pos, reg_type,
1874                               term_dst, &relation_error))
1875         {
1876             if (relation_error)
1877             {
1878                 zebra_setError(zh, relation_error, 0);
1879                 return ZEBRA_FAIL;
1880             }
1881             *term_sub = 0;
1882             return ZEBRA_OK;
1883         }
1884     }
1885     if (!bases_ok)
1886     {
1887         zebra_setError(zh, errCode, errString);
1888         return ZEBRA_FAIL;
1889     }
1890     *term_sub = termp;
1891     yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1892     return ZEBRA_OK;
1893 }
1894
1895 static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh,
1896                                         Z_AttributesPlusTerm *zapt,
1897                                         const char *termz,
1898                                         oid_value attributeSet,
1899                                         NMEM stream,
1900                                         int reg_type, int complete_flag,
1901                                         const char *rank_type, int xpath_use,
1902                                         int num_bases, char **basenames,
1903                                         NMEM rset_nmem,
1904                                         RSET *rset,
1905                                         struct rset_key_control *kc)
1906 {
1907     char term_dst[IT_MAX_WORD+1];
1908     const char *termp = termz;
1909     RSET *result_sets = 0;
1910     int num_result_sets = 0;
1911     ZEBRA_RES res;
1912     struct grep_info grep_info;
1913     int alloc_sets = 0;
1914
1915     yaz_log(log_level_rpn, "APT_numeric t='%s'", termz);
1916     if (grep_info_prepare(zh, zapt, &grep_info, reg_type) == ZEBRA_FAIL)
1917         return ZEBRA_FAIL;
1918     while (1)
1919     { 
1920         if (alloc_sets == num_result_sets)
1921         {
1922             int add = 10;
1923             RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) * 
1924                                               sizeof(*rnew));
1925             if (alloc_sets)
1926                 memcpy(rnew, result_sets, alloc_sets * sizeof(*rnew));
1927             alloc_sets = alloc_sets + add;
1928             result_sets = rnew;
1929         }
1930         yaz_log(YLOG_DEBUG, "APT_numeric termp=%s", termp);
1931         grep_info.isam_p_indx = 0;
1932         res = numeric_term(zh, zapt, &termp, attributeSet, &grep_info,
1933                            reg_type, complete_flag, num_bases, basenames,
1934                            term_dst, xpath_use,
1935                            stream);
1936         if (res == ZEBRA_FAIL || termp == 0)
1937             break;
1938         yaz_log(YLOG_DEBUG, "term: %s", term_dst);
1939         result_sets[num_result_sets] =
1940             rset_trunc(zh, grep_info.isam_p_buf,
1941                        grep_info.isam_p_indx, term_dst,
1942                        strlen(term_dst), rank_type,
1943                        0 /* preserve position */,
1944                        zapt->term->which, rset_nmem, 
1945                        kc, kc->scope);
1946         if (!result_sets[num_result_sets])
1947             break;
1948         num_result_sets++;
1949     }
1950     grep_info_delete(&grep_info);
1951     if (termp)
1952     {
1953         int i;
1954         for (i = 0; i<num_result_sets; i++)
1955             rset_delete(result_sets[i]);
1956         return ZEBRA_FAIL;
1957     }
1958     if (num_result_sets == 0)
1959         *rset = rsnull_create(rset_nmem, kc, 0);
1960     if (num_result_sets == 1)
1961         *rset = result_sets[0];
1962     else
1963         *rset = rsmulti_and_create(rset_nmem, kc, kc->scope,
1964                                    num_result_sets, result_sets);
1965     if (!*rset)
1966         return ZEBRA_FAIL;
1967     return ZEBRA_OK;
1968 }
1969
1970 static ZEBRA_RES rpn_search_APT_local(ZebraHandle zh,
1971                                       Z_AttributesPlusTerm *zapt,
1972                                       const char *termz,
1973                                       oid_value attributeSet,
1974                                       NMEM stream,
1975                                       const char *rank_type, NMEM rset_nmem,
1976                                       RSET *rset,
1977                                       struct rset_key_control *kc)
1978 {
1979     RSFD rsfd;
1980     struct it_key key;
1981     int sys;
1982     *rset = rstemp_create(rset_nmem, kc, kc->scope,
1983                           res_get (zh->res, "setTmpDir"),0 );
1984     rsfd = rset_open(*rset, RSETF_WRITE);
1985     
1986     sys = atoi(termz);
1987     if (sys <= 0)
1988         sys = 1;
1989     key.mem[0] = sys;
1990     key.mem[1] = 1;
1991     key.len = 2;
1992     rset_write (rsfd, &key);
1993     rset_close (rsfd);
1994     return ZEBRA_OK;
1995 }
1996
1997 static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1998                                oid_value attributeSet, NMEM stream,
1999                                Z_SortKeySpecList *sort_sequence,
2000                                const char *rank_type,
2001                                NMEM rset_nmem,
2002                                RSET *rset,
2003                                struct rset_key_control *kc)
2004 {
2005     int i;
2006     int sort_relation_value;
2007     AttrType sort_relation_type;
2008     Z_SortKeySpec *sks;
2009     Z_SortKey *sk;
2010     int oid[OID_SIZE];
2011     oident oe;
2012     char termz[20];
2013     
2014     attr_init(&sort_relation_type, zapt, 7);
2015     sort_relation_value = attr_find(&sort_relation_type, &attributeSet);
2016
2017     if (!sort_sequence->specs)
2018     {
2019         sort_sequence->num_specs = 10;
2020         sort_sequence->specs = (Z_SortKeySpec **)
2021             nmem_malloc(stream, sort_sequence->num_specs *
2022                          sizeof(*sort_sequence->specs));
2023         for (i = 0; i<sort_sequence->num_specs; i++)
2024             sort_sequence->specs[i] = 0;
2025     }
2026     if (zapt->term->which != Z_Term_general)
2027         i = 0;
2028     else
2029         i = atoi_n ((char *) zapt->term->u.general->buf,
2030                     zapt->term->u.general->len);
2031     if (i >= sort_sequence->num_specs)
2032         i = 0;
2033     sprintf(termz, "%d", i);
2034
2035     oe.proto = PROTO_Z3950;
2036     oe.oclass = CLASS_ATTSET;
2037     oe.value = attributeSet;
2038     if (!oid_ent_to_oid (&oe, oid))
2039         return ZEBRA_FAIL;
2040
2041     sks = (Z_SortKeySpec *) nmem_malloc(stream, sizeof(*sks));
2042     sks->sortElement = (Z_SortElement *)
2043         nmem_malloc(stream, sizeof(*sks->sortElement));
2044     sks->sortElement->which = Z_SortElement_generic;
2045     sk = sks->sortElement->u.generic = (Z_SortKey *)
2046         nmem_malloc(stream, sizeof(*sk));
2047     sk->which = Z_SortKey_sortAttributes;
2048     sk->u.sortAttributes = (Z_SortAttributes *)
2049         nmem_malloc(stream, sizeof(*sk->u.sortAttributes));
2050
2051     sk->u.sortAttributes->id = oid;
2052     sk->u.sortAttributes->list = zapt->attributes;
2053
2054     sks->sortRelation = (int *)
2055         nmem_malloc(stream, sizeof(*sks->sortRelation));
2056     if (sort_relation_value == 1)
2057         *sks->sortRelation = Z_SortKeySpec_ascending;
2058     else if (sort_relation_value == 2)
2059         *sks->sortRelation = Z_SortKeySpec_descending;
2060     else 
2061         *sks->sortRelation = Z_SortKeySpec_ascending;
2062
2063     sks->caseSensitivity = (int *)
2064         nmem_malloc(stream, sizeof(*sks->caseSensitivity));
2065     *sks->caseSensitivity = 0;
2066
2067     sks->which = Z_SortKeySpec_null;
2068     sks->u.null = odr_nullval ();
2069     sort_sequence->specs[i] = sks;
2070     *rset = rsnull_create (rset_nmem, kc, 0);
2071     return ZEBRA_OK;
2072 }
2073
2074
2075 static int parse_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2076                        oid_value attributeSet,
2077                        struct xpath_location_step *xpath, int max, NMEM mem)
2078 {
2079     oid_value curAttributeSet = attributeSet;
2080     AttrType use;
2081     const char *use_string = 0;
2082     
2083     attr_init(&use, zapt, 1);
2084     attr_find_ex(&use, &curAttributeSet, &use_string);
2085
2086     if (!use_string || *use_string != '/')
2087         return -1;
2088
2089     return zebra_parse_xpath_str(use_string, xpath, max, mem);
2090 }
2091  
2092                
2093
2094 static RSET xpath_trunc(ZebraHandle zh, NMEM stream,
2095                         int reg_type, const char *term, int use,
2096                         oid_value curAttributeSet, NMEM rset_nmem,
2097                         struct rset_key_control *kc)
2098 {
2099     RSET rset;
2100     struct grep_info grep_info;
2101     char term_dict[2048];
2102     char ord_buf[32];
2103     int prefix_len = 0;
2104     int ord = zebraExplain_lookup_attr_su(zh->reg->zei, curAttributeSet, use);
2105     int ord_len, i, r, max_pos;
2106     int term_type = Z_Term_characterString;
2107     const char *flags = "void";
2108
2109     if (grep_info_prepare(zh, 0 /* zapt */, &grep_info, '0') == ZEBRA_FAIL)
2110         return rsnull_create(rset_nmem, kc, 0);
2111     
2112     if (ord < 0)
2113         return rsnull_create(rset_nmem, kc, 0);
2114     if (prefix_len)
2115         term_dict[prefix_len++] = '|';
2116     else
2117         term_dict[prefix_len++] = '(';
2118     
2119     ord_len = key_SU_encode (ord, ord_buf);
2120     for (i = 0; i<ord_len; i++)
2121     {
2122         term_dict[prefix_len++] = 1;
2123         term_dict[prefix_len++] = ord_buf[i];
2124     }
2125     term_dict[prefix_len++] = ')';
2126     term_dict[prefix_len++] = 1;
2127     term_dict[prefix_len++] = reg_type;
2128     
2129     strcpy(term_dict+prefix_len, term);
2130     
2131     grep_info.isam_p_indx = 0;
2132     r = dict_lookup_grep(zh->reg->dict, term_dict, 0,
2133                           &grep_info, &max_pos, 0, grep_handle);
2134     yaz_log(YLOG_DEBUG, "%s %d positions", term,
2135              grep_info.isam_p_indx);
2136     rset = rset_trunc(zh, grep_info.isam_p_buf,
2137                       grep_info.isam_p_indx, term, strlen(term),
2138                       flags, 1, term_type,rset_nmem,
2139                       kc, kc->scope);
2140     grep_info_delete(&grep_info);
2141     return rset;
2142 }
2143
2144 static
2145 ZEBRA_RES rpn_search_xpath(ZebraHandle zh,
2146                            oid_value attributeSet,
2147                            int num_bases, char **basenames,
2148                            NMEM stream, const char *rank_type, RSET rset,
2149                            int xpath_len, struct xpath_location_step *xpath,
2150                            NMEM rset_nmem,
2151                            RSET *rset_out,
2152                            struct rset_key_control *kc)
2153 {
2154     oid_value curAttributeSet = attributeSet;
2155     int base_no;
2156     int i;
2157
2158     if (xpath_len < 0)
2159     {
2160         *rset_out = rset;
2161         return ZEBRA_OK;
2162     }
2163
2164     yaz_log(YLOG_DEBUG, "xpath len=%d", xpath_len);
2165     for (i = 0; i<xpath_len; i++)
2166     {
2167         yaz_log(log_level_rpn, "XPATH %d %s", i, xpath[i].part);
2168
2169     }
2170
2171     curAttributeSet = VAL_IDXPATH;
2172
2173     /*
2174       //a    ->    a/.*
2175       //a/b  ->    b/a/.*
2176       /a     ->    a/
2177       /a/b   ->    b/a/
2178
2179       /      ->    none
2180
2181    a[@attr = value]/b[@other = othervalue]
2182
2183  /e/@a val      range(e/,range(@a,freetext(w,1015,val),@a),e/)
2184  /a/b val       range(b/a/,freetext(w,1016,val),b/a/)
2185  /a/b/@c val    range(b/a/,range(@c,freetext(w,1016,val),@c),b/a/)
2186  /a/b[@c = y] val range(b/a/,freetext(w,1016,val),b/a/,@c = y)
2187  /a[@c = y]/b val range(a/,range(b/a/,freetext(w,1016,val),b/a/),a/,@c = y)
2188  /a[@c = x]/b[@c = y] range(a/,range(b/a/,freetext(w,1016,val),b/a/,@c = y),a/,@c = x)
2189       
2190     */
2191
2192     dict_grep_cmap (zh->reg->dict, 0, 0);
2193
2194     for (base_no = 0; base_no < num_bases; base_no++)
2195     {
2196         int level = xpath_len;
2197         int first_path = 1;
2198         
2199         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
2200         {
2201             zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
2202                            basenames[base_no]);
2203             *rset_out = rset;
2204             return ZEBRA_FAIL;
2205         }
2206         while (--level >= 0)
2207         {
2208             char xpath_rev[128];
2209             int i, len;
2210             RSET rset_start_tag = 0, rset_end_tag = 0, rset_attr = 0;
2211
2212             *xpath_rev = 0;
2213             len = 0;
2214             for (i = level; i >= 1; --i)
2215             {
2216                 const char *cp = xpath[i].part;
2217                 if (*cp)
2218                 {
2219                     for (;*cp; cp++)
2220                         if (*cp == '*')
2221                         {
2222                             memcpy (xpath_rev + len, "[^/]*", 5);
2223                             len += 5;
2224                         }
2225                         else if (*cp == ' ')
2226                         {
2227
2228                             xpath_rev[len++] = 1;
2229                             xpath_rev[len++] = ' ';
2230                         }
2231
2232                         else
2233                             xpath_rev[len++] = *cp;
2234                     xpath_rev[len++] = '/';
2235                 }
2236                 else if (i == 1)  /* // case */
2237                 {
2238                     xpath_rev[len++] = '.';
2239                     xpath_rev[len++] = '*';
2240                 }
2241             }
2242             xpath_rev[len] = 0;
2243
2244             if (xpath[level].predicate &&
2245                 xpath[level].predicate->which == XPATH_PREDICATE_RELATION &&
2246                 xpath[level].predicate->u.relation.name[0])
2247             {
2248                 WRBUF wbuf = wrbuf_alloc();
2249                 wrbuf_puts(wbuf, xpath[level].predicate->u.relation.name+1);
2250                 if (xpath[level].predicate->u.relation.value)
2251                 {
2252                     const char *cp = xpath[level].predicate->u.relation.value;
2253                     wrbuf_putc(wbuf, '=');
2254                     
2255                     while (*cp)
2256                     {
2257                         if (strchr(REGEX_CHARS, *cp))
2258                             wrbuf_putc(wbuf, '\\');
2259                         wrbuf_putc(wbuf, *cp);
2260                         cp++;
2261                     }
2262                 }
2263                 wrbuf_puts(wbuf, "");
2264                 rset_attr = xpath_trunc(
2265                     zh, stream, '0', wrbuf_buf(wbuf), 3, 
2266                     curAttributeSet, rset_nmem, kc);
2267                 wrbuf_free(wbuf, 1);
2268             } 
2269             else 
2270             {
2271                 if (!first_path)
2272                     continue;
2273             }
2274             yaz_log(log_level_rpn, "xpath_rev (%d) = %s", level, xpath_rev);
2275             if (strlen(xpath_rev))
2276             {
2277                 rset_start_tag = xpath_trunc(zh, stream, '0', 
2278                         xpath_rev, 1, curAttributeSet, rset_nmem, kc);
2279             
2280                 rset_end_tag = xpath_trunc(zh, stream, '0', 
2281                         xpath_rev, 2, curAttributeSet, rset_nmem, kc);
2282
2283                 rset = rsbetween_create(rset_nmem, kc, kc->scope,
2284                                         rset_start_tag, rset,
2285                                         rset_end_tag, rset_attr);
2286             }
2287             first_path = 0;
2288         }
2289     }
2290     *rset_out = rset;
2291     return ZEBRA_OK;
2292 }
2293
2294 static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2295                                 oid_value attributeSet, NMEM stream,
2296                                 Z_SortKeySpecList *sort_sequence,
2297                                 int num_bases, char **basenames, 
2298                                 NMEM rset_nmem,
2299                                 RSET *rset,
2300                                 struct rset_key_control *kc)
2301 {
2302     ZEBRA_RES res = ZEBRA_OK;
2303     unsigned reg_id;
2304     char *search_type = NULL;
2305     char rank_type[128];
2306     int complete_flag;
2307     int sort_flag;
2308     char termz[IT_MAX_WORD+1];
2309     int xpath_len;
2310     int xpath_use = 0;
2311     struct xpath_location_step xpath[10];
2312
2313     if (!log_level_set)
2314     {
2315         log_level_rpn = yaz_log_module_level("rpn");
2316         log_level_set = 1;
2317     }
2318     zebra_maps_attr(zh->reg->zebra_maps, zapt, &reg_id, &search_type,
2319                     rank_type, &complete_flag, &sort_flag);
2320     
2321     yaz_log(YLOG_DEBUG, "reg_id=%c", reg_id);
2322     yaz_log(YLOG_DEBUG, "complete_flag=%d", complete_flag);
2323     yaz_log(YLOG_DEBUG, "search_type=%s", search_type);
2324     yaz_log(YLOG_DEBUG, "rank_type=%s", rank_type);
2325
2326     if (zapt_term_to_utf8(zh, zapt, termz) == ZEBRA_FAIL)
2327         return ZEBRA_FAIL;
2328
2329     if (sort_flag)
2330         return rpn_sort_spec(zh, zapt, attributeSet, stream, sort_sequence,
2331                              rank_type, rset_nmem, rset, kc);
2332     xpath_len = parse_xpath(zh, zapt, attributeSet, xpath, 10, stream);
2333     if (xpath_len >= 0)
2334     {
2335         xpath_use = 1016;
2336         if (xpath[xpath_len-1].part[0] == '@')
2337             xpath_use = 1015;
2338     }
2339
2340     if (!strcmp(search_type, "phrase"))
2341     {
2342         res = rpn_search_APT_phrase(zh, zapt, termz, attributeSet, stream,
2343                                     reg_id, complete_flag, rank_type,
2344                                     xpath_use,
2345                                     num_bases, basenames, rset_nmem,
2346                                     rset, kc);
2347     }
2348     else if (!strcmp(search_type, "and-list"))
2349     {
2350         res = rpn_search_APT_and_list(zh, zapt, termz, attributeSet, stream,
2351                                       reg_id, complete_flag, rank_type,
2352                                       xpath_use,
2353                                       num_bases, basenames, rset_nmem,
2354                                       rset, kc);
2355     }
2356     else if (!strcmp(search_type, "or-list"))
2357     {
2358         res = rpn_search_APT_or_list(zh, zapt, termz, attributeSet, stream,
2359                                      reg_id, complete_flag, rank_type,
2360                                      xpath_use,
2361                                      num_bases, basenames, rset_nmem,
2362                                      rset, kc);
2363     }
2364     else if (!strcmp(search_type, "local"))
2365     {
2366         res = rpn_search_APT_local(zh, zapt, termz, attributeSet, stream,
2367                                    rank_type, rset_nmem, rset, kc);
2368     }
2369     else if (!strcmp(search_type, "numeric"))
2370     {
2371         res = rpn_search_APT_numeric(zh, zapt, termz, attributeSet, stream,
2372                                      reg_id, complete_flag, rank_type,
2373                                      xpath_use,
2374                                      num_bases, basenames, rset_nmem,
2375                                      rset, kc);
2376     }
2377     else
2378     {
2379         zebra_setError(zh, YAZ_BIB1_UNSUPP_STRUCTURE_ATTRIBUTE, 0);
2380         res = ZEBRA_FAIL;
2381     }
2382     if (res != ZEBRA_OK)
2383         return res;
2384     if (!*rset)
2385         return ZEBRA_FAIL;
2386     return rpn_search_xpath(zh, attributeSet, num_bases, basenames,
2387                             stream, rank_type, *rset, 
2388                             xpath_len, xpath, rset_nmem, rset, kc);
2389 }
2390
2391 static ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2392                                       oid_value attributeSet, 
2393                                       NMEM stream, NMEM rset_nmem,
2394                                       Z_SortKeySpecList *sort_sequence,
2395                                       int num_bases, char **basenames,
2396                                       RSET **result_sets, int *num_result_sets,
2397                                       Z_Operator *parent_op,
2398                                       struct rset_key_control *kc);
2399
2400 ZEBRA_RES rpn_search_top(ZebraHandle zh, Z_RPNStructure *zs,
2401                          oid_value attributeSet, 
2402                          NMEM stream, NMEM rset_nmem,
2403                          Z_SortKeySpecList *sort_sequence,
2404                          int num_bases, char **basenames,
2405                          RSET *result_set)
2406 {
2407     RSET *result_sets = 0;
2408     int num_result_sets = 0;
2409     ZEBRA_RES res;
2410     struct rset_key_control *kc = zebra_key_control_create(zh);
2411
2412     res = rpn_search_structure(zh, zs, attributeSet,
2413                                stream, rset_nmem,
2414                                sort_sequence, 
2415                                num_bases, basenames,
2416                                &result_sets, &num_result_sets,
2417                                0 /* no parent op */,
2418                                kc);
2419     if (res != ZEBRA_OK)
2420     {
2421         int i;
2422         for (i = 0; i<num_result_sets; i++)
2423             rset_delete(result_sets[i]);
2424         *result_set = 0;
2425     }
2426     else
2427     {
2428         assert(num_result_sets == 1);
2429         assert(result_sets);
2430         assert(*result_sets);
2431         *result_set = *result_sets;
2432     }
2433     (*kc->dec)(kc);
2434     return res;
2435 }
2436
2437 ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2438                                oid_value attributeSet, 
2439                                NMEM stream, NMEM rset_nmem,
2440                                Z_SortKeySpecList *sort_sequence,
2441                                int num_bases, char **basenames,
2442                                RSET **result_sets, int *num_result_sets,
2443                                Z_Operator *parent_op,
2444                                struct rset_key_control *kc)
2445 {
2446     *num_result_sets = 0;
2447     if (zs->which == Z_RPNStructure_complex)
2448     {
2449         ZEBRA_RES res;
2450         Z_Operator *zop = zs->u.complex->roperator;
2451         RSET *result_sets_l = 0;
2452         int num_result_sets_l = 0;
2453         RSET *result_sets_r = 0;
2454         int num_result_sets_r = 0;
2455
2456         res = rpn_search_structure(zh, zs->u.complex->s1,
2457                                    attributeSet, stream, rset_nmem,
2458                                    sort_sequence,
2459                                    num_bases, basenames,
2460                                    &result_sets_l, &num_result_sets_l,
2461                                    zop, kc);
2462         if (res != ZEBRA_OK)
2463         {
2464             int i;
2465             for (i = 0; i<num_result_sets_l; i++)
2466                 rset_delete(result_sets_l[i]);
2467             return res;
2468         }
2469         res = rpn_search_structure(zh, zs->u.complex->s2,
2470                                    attributeSet, stream, rset_nmem,
2471                                    sort_sequence,
2472                                    num_bases, basenames,
2473                                    &result_sets_r, &num_result_sets_r,
2474                                    zop, kc);
2475         if (res != ZEBRA_OK)
2476         {
2477             int i;
2478             for (i = 0; i<num_result_sets_l; i++)
2479                 rset_delete(result_sets_l[i]);
2480             for (i = 0; i<num_result_sets_r; i++)
2481                 rset_delete(result_sets_r[i]);
2482             return res;
2483         }
2484
2485         /* make a new list of result for all children */
2486         *num_result_sets = num_result_sets_l + num_result_sets_r;
2487         *result_sets = nmem_malloc(stream, *num_result_sets * 
2488                                    sizeof(**result_sets));
2489         memcpy(*result_sets, result_sets_l, 
2490                num_result_sets_l * sizeof(**result_sets));
2491         memcpy(*result_sets + num_result_sets_l, result_sets_r, 
2492                num_result_sets_r * sizeof(**result_sets));
2493
2494         if (!parent_op || parent_op->which != zop->which
2495             || (zop->which != Z_Operator_and &&
2496                 zop->which != Z_Operator_or))
2497         {
2498             /* parent node different from this one (or non-present) */
2499             /* we must combine result sets now */
2500             RSET rset;
2501             switch (zop->which)
2502             {
2503             case Z_Operator_and:
2504                 rset = rsmulti_and_create(rset_nmem, kc,
2505                                           kc->scope,
2506                                           *num_result_sets, *result_sets);
2507                 break;
2508             case Z_Operator_or:
2509                 rset = rsmulti_or_create(rset_nmem, kc,
2510                                          kc->scope, 0, /* termid */
2511                                          *num_result_sets, *result_sets);
2512                 break;
2513             case Z_Operator_and_not:
2514                 rset = rsbool_create_not(rset_nmem, kc,
2515                                          kc->scope,
2516                                          (*result_sets)[0],
2517                                          (*result_sets)[1]);
2518                 break;
2519             case Z_Operator_prox:
2520                 if (zop->u.prox->which != Z_ProximityOperator_known)
2521                 {
2522                     zebra_setError(zh, 
2523                                    YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2524                                    0);
2525                     return ZEBRA_FAIL;
2526                 }
2527                 if (*zop->u.prox->u.known != Z_ProxUnit_word)
2528                 {
2529                     zebra_setError_zint(zh,
2530                                         YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2531                                         *zop->u.prox->u.known);
2532                     return ZEBRA_FAIL;
2533                 }
2534                 else
2535                 {
2536                     rset = rsprox_create(rset_nmem, kc,
2537                                          kc->scope,
2538                                          *num_result_sets, *result_sets, 
2539                                          *zop->u.prox->ordered,
2540                                          (!zop->u.prox->exclusion ? 
2541                                           0 : *zop->u.prox->exclusion),
2542                                          *zop->u.prox->relationType,
2543                                          *zop->u.prox->distance );
2544                 }
2545                 break;
2546             default:
2547                 zebra_setError(zh, YAZ_BIB1_OPERATOR_UNSUPP, 0);
2548                 return ZEBRA_FAIL;
2549             }
2550             *num_result_sets = 1;
2551             *result_sets = nmem_malloc(stream, *num_result_sets * 
2552                                        sizeof(**result_sets));
2553             (*result_sets)[0] = rset;
2554         }
2555     }
2556     else if (zs->which == Z_RPNStructure_simple)
2557     {
2558         RSET rset;
2559         ZEBRA_RES res;
2560
2561         if (zs->u.simple->which == Z_Operand_APT)
2562         {
2563             yaz_log(YLOG_DEBUG, "rpn_search_APT");
2564             res = rpn_search_APT(zh, zs->u.simple->u.attributesPlusTerm,
2565                                  attributeSet, stream, sort_sequence,
2566                                  num_bases, basenames, rset_nmem, &rset,
2567                                  kc);
2568             if (res != ZEBRA_OK)
2569                 return res;
2570         }
2571         else if (zs->u.simple->which == Z_Operand_resultSetId)
2572         {
2573             yaz_log(YLOG_DEBUG, "rpn_search_ref");
2574             rset = resultSetRef(zh, zs->u.simple->u.resultSetId);
2575             if (!rset)
2576             {
2577                 zebra_setError(zh, 
2578                                YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
2579                                zs->u.simple->u.resultSetId);
2580                 return ZEBRA_FAIL;
2581             }
2582             rset_dup(rset);
2583         }
2584         else
2585         {
2586             zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2587             return ZEBRA_FAIL;
2588         }
2589         *num_result_sets = 1;
2590         *result_sets = nmem_malloc(stream, *num_result_sets * 
2591                                    sizeof(**result_sets));
2592         (*result_sets)[0] = rset;
2593     }
2594     else
2595     {
2596         zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2597         return ZEBRA_FAIL;
2598     }
2599     return ZEBRA_OK;
2600 }
2601
2602 struct scan_info_entry {
2603     char *term;
2604     ISAM_P isam_p;
2605 };
2606
2607 struct scan_info {
2608     struct scan_info_entry *list;
2609     ODR odr;
2610     int before, after;
2611     char prefix[20];
2612 };
2613
2614 static int scan_handle (char *name, const char *info, int pos, void *client)
2615 {
2616     int len_prefix, idx;
2617     struct scan_info *scan_info = (struct scan_info *) client;
2618
2619     len_prefix = strlen(scan_info->prefix);
2620     if (memcmp (name, scan_info->prefix, len_prefix))
2621         return 1;
2622     if (pos > 0)
2623         idx = scan_info->after - pos + scan_info->before;
2624     else
2625         idx = - pos - 1;
2626
2627     if (idx < 0)
2628         return 0;
2629     scan_info->list[idx].term = (char *)
2630         odr_malloc(scan_info->odr, strlen(name + len_prefix)+1);
2631     strcpy(scan_info->list[idx].term, name + len_prefix);
2632     assert (*info == sizeof(ISAM_P));
2633     memcpy (&scan_info->list[idx].isam_p, info+1, sizeof(ISAM_P));
2634     return 0;
2635 }
2636
2637 static void scan_term_untrans (ZebraHandle zh, NMEM stream, int reg_type,
2638                                char **dst, const char *src)
2639 {
2640     char term_src[IT_MAX_WORD];
2641     char term_dst[IT_MAX_WORD];
2642     
2643     term_untrans (zh, reg_type, term_src, src);
2644
2645     if (zh->iconv_from_utf8 != 0)
2646     {
2647         int len;
2648         char *inbuf = term_src;
2649         size_t inleft = strlen(term_src);
2650         char *outbuf = term_dst;
2651         size_t outleft = sizeof(term_dst)-1;
2652         size_t ret;
2653         
2654         ret = yaz_iconv (zh->iconv_from_utf8, &inbuf, &inleft,
2655                          &outbuf, &outleft);
2656         if (ret == (size_t)(-1))
2657             len = 0;
2658         else
2659             len = outbuf - term_dst;
2660         *dst = nmem_malloc(stream, len + 1);
2661         if (len > 0)
2662             memcpy (*dst, term_dst, len);
2663         (*dst)[len] = '\0';
2664     }
2665     else
2666         *dst = nmem_strdup(stream, term_src);
2667 }
2668
2669 static void count_set (RSET r, int *count)
2670 {
2671     zint psysno = 0;
2672     int kno = 0;
2673     struct it_key key;
2674     RSFD rfd;
2675
2676     yaz_log(YLOG_DEBUG, "count_set");
2677
2678     *count = 0;
2679     rfd = rset_open (r, RSETF_READ);
2680     while (rset_read (rfd, &key,0 /* never mind terms */))
2681     {
2682         if (key.mem[0] != psysno)
2683         {
2684             psysno = key.mem[0];
2685             (*count)++;
2686         }
2687         kno++;
2688     }
2689     rset_close (rfd);
2690     yaz_log(YLOG_DEBUG, "%d keys, %d records", kno, *count);
2691 }
2692
2693 ZEBRA_RES rpn_scan(ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt,
2694                    oid_value attributeset,
2695                    int num_bases, char **basenames,
2696                    int *position, int *num_entries, ZebraScanEntry **list,
2697                    int *is_partial, RSET limit_set, int return_zero)
2698 {
2699     int i;
2700     int pos = *position;
2701     int num = *num_entries;
2702     int before;
2703     int after;
2704     int base_no;
2705     char termz[IT_MAX_WORD+20];
2706     AttrType use;
2707     int use_value;
2708     const char *use_string = 0;
2709     struct scan_info *scan_info_array;
2710     ZebraScanEntry *glist;
2711     int ords[32], ord_no = 0;
2712     int ptr[32];
2713
2714     int bases_ok = 0;     /* no of databases with OK attribute */
2715     int errCode = 0;      /* err code (if any is not OK) */
2716     char *errString = 0;  /* addinfo */
2717
2718     unsigned reg_id;
2719     char *search_type = NULL;
2720     char rank_type[128];
2721     int complete_flag;
2722     int sort_flag;
2723     NMEM rset_nmem = NULL; 
2724     struct rset_key_control *kc = 0;
2725
2726     *list = 0;
2727     *is_partial = 0;
2728
2729     if (attributeset == VAL_NONE)
2730         attributeset = VAL_BIB1;
2731
2732     if (!limit_set)
2733     {
2734         AttrType termset;
2735         int termset_value_numeric;
2736         const char *termset_value_string;
2737         attr_init(&termset, zapt, 8);
2738         termset_value_numeric =
2739             attr_find_ex(&termset, NULL, &termset_value_string);
2740         if (termset_value_numeric != -1)
2741         {
2742             char resname[32];
2743             const char *termset_name = 0;
2744             
2745             if (termset_value_numeric != -2)
2746             {
2747                 
2748                 sprintf(resname, "%d", termset_value_numeric);
2749                 termset_name = resname;
2750             }
2751             else
2752                 termset_name = termset_value_string;
2753             
2754             limit_set = resultSetRef (zh, termset_name);
2755         }
2756     }
2757         
2758     yaz_log(YLOG_DEBUG, "position = %d, num = %d set=%d",
2759             pos, num, attributeset);
2760         
2761     attr_init(&use, zapt, 1);
2762     use_value = attr_find_ex(&use, &attributeset, &use_string);
2763
2764     if (zebra_maps_attr(zh->reg->zebra_maps, zapt, &reg_id, &search_type,
2765                         rank_type, &complete_flag, &sort_flag))
2766     {
2767         *num_entries = 0;
2768         zebra_setError(zh, YAZ_BIB1_UNSUPP_ATTRIBUTE_TYPE, 0);
2769         return ZEBRA_FAIL;
2770     }
2771     yaz_log(YLOG_DEBUG, "use_value = %d", use_value);
2772
2773     if (use_value == -1)
2774         use_value = 1016;
2775     for (base_no = 0; base_no < num_bases && ord_no < 32; base_no++)
2776     {
2777         data1_local_attribute *local_attr;
2778         attent attp;
2779         int ord;
2780
2781         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
2782         {
2783             zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
2784                            basenames[base_no]);
2785             *num_entries = 0;
2786             return ZEBRA_FAIL;
2787         }
2788
2789         if (use_string &&
2790             (ord = zebraExplain_lookup_attr_str(zh->reg->zei,
2791                                                 use_string)) >= 0)
2792         {
2793             /* we have a match for a raw string attribute */
2794             if (ord > 0)
2795                 ords[ord_no++] = ord;
2796             attp.local_attributes = 0;  /* no more attributes */
2797         }
2798         else
2799         {
2800             int r;
2801             
2802             if ((r = att_getentbyatt (zh, &attp, attributeset, use_value,
2803                                       use_string)))
2804             {
2805                 yaz_log(YLOG_DEBUG, "att_getentbyatt fail. set=%d use=%d",
2806                         attributeset, use_value);
2807                 if (r == -1)
2808                 {
2809                     errCode = YAZ_BIB1_UNSUPP_USE_ATTRIBUTE;
2810                     if (use_string)
2811                         zebra_setError(zh, YAZ_BIB1_UNSUPP_USE_ATTRIBUTE,
2812                                        use_string);
2813                     else
2814                         zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_USE_ATTRIBUTE,
2815                                             use_value);
2816                 }   
2817                 else
2818                 {
2819                     zebra_setError(zh, YAZ_BIB1_UNSUPP_ATTRIBUTE_SET, 0);
2820                 }
2821                 continue;
2822             }
2823         }
2824         bases_ok++;
2825         for (local_attr = attp.local_attributes; local_attr && ord_no < 32;
2826              local_attr = local_attr->next)
2827         {
2828             ord = zebraExplain_lookup_attr_su(zh->reg->zei,
2829                                               attp.attset_ordinal,
2830                                               local_attr->local);
2831             if (ord > 0)
2832                 ords[ord_no++] = ord;
2833         }
2834     }
2835     if (!bases_ok && errCode)
2836     {
2837         zebra_setError(zh, errCode, errString);
2838         *num_entries = 0;
2839         return ZEBRA_FAIL;
2840     }
2841     if (ord_no == 0)
2842     {
2843         *num_entries = 0;
2844         return ZEBRA_OK;
2845     }
2846     /* prepare dictionary scanning */
2847     if (num < 1)
2848     {
2849         *num_entries = 0;
2850         return ZEBRA_OK;
2851     }
2852     before = pos-1;
2853     if (before < 0)
2854         before = 0;
2855     after = 1+num-pos;
2856     if (after < 0)
2857         after = 0;
2858     yaz_log(YLOG_DEBUG, "rpn_scan pos=%d num=%d before=%d "
2859             "after=%d before+after=%d",
2860             pos, num, before, after, before+after);
2861     scan_info_array = (struct scan_info *)
2862         odr_malloc(stream, ord_no * sizeof(*scan_info_array));
2863     for (i = 0; i < ord_no; i++)
2864     {
2865         int j, prefix_len = 0;
2866         int before_tmp = before, after_tmp = after;
2867         struct scan_info *scan_info = scan_info_array + i;
2868         struct rpn_char_map_info rcmi;
2869
2870         rpn_char_map_prepare (zh->reg, reg_id, &rcmi);
2871
2872         scan_info->before = before;
2873         scan_info->after = after;
2874         scan_info->odr = stream;
2875
2876         scan_info->list = (struct scan_info_entry *)
2877             odr_malloc(stream, (before+after) * sizeof(*scan_info->list));
2878         for (j = 0; j<before+after; j++)
2879             scan_info->list[j].term = NULL;
2880
2881         prefix_len += key_SU_encode (ords[i], termz + prefix_len);
2882         termz[prefix_len++] = reg_id;
2883         termz[prefix_len] = 0;
2884         strcpy(scan_info->prefix, termz);
2885
2886         if (trans_scan_term(zh, zapt, termz+prefix_len, reg_id) == ZEBRA_FAIL)
2887             return ZEBRA_FAIL;
2888         
2889         dict_scan(zh->reg->dict, termz, &before_tmp, &after_tmp,
2890                   scan_info, scan_handle);
2891     }
2892     glist = (ZebraScanEntry *)
2893         odr_malloc(stream, (before+after)*sizeof(*glist));
2894
2895     rset_nmem = nmem_create();
2896     kc = zebra_key_control_create(zh);
2897
2898     /* consider terms after main term */
2899     for (i = 0; i < ord_no; i++)
2900         ptr[i] = before;
2901     
2902     *is_partial = 0;
2903     for (i = 0; i<after; i++)
2904     {
2905         int j, j0 = -1;
2906         const char *mterm = NULL;
2907         const char *tst;
2908         RSET rset = 0;
2909         int lo = i + pos-1; /* offset in result list */
2910
2911         /* find: j0 is the first of the minimal values */
2912         for (j = 0; j < ord_no; j++)
2913         {
2914             if (ptr[j] < before+after && ptr[j] >= 0 &&
2915                 (tst = scan_info_array[j].list[ptr[j]].term) &&
2916                 (!mterm || strcmp (tst, mterm) < 0))
2917             {
2918                 j0 = j;
2919                 mterm = tst;
2920             }
2921         }
2922         if (j0 == -1)
2923             break;  /* no value found, stop */
2924
2925         /* get result set for first one , but only if it's within bounds */
2926         if (lo >= 0)
2927         {
2928             /* get result set for first term */
2929             scan_term_untrans(zh, stream->mem, reg_id,
2930                               &glist[lo].term, mterm);
2931             rset = rset_trunc(zh, &scan_info_array[j0].list[ptr[j0]].isam_p, 1,
2932                               glist[lo].term, strlen(glist[lo].term),
2933                               NULL, 0, zapt->term->which, rset_nmem, 
2934                               kc, kc->scope);
2935         }
2936         ptr[j0]++; /* move index for this set .. */
2937         /* get result set for remaining scan terms */
2938         for (j = j0+1; j<ord_no; j++)
2939         {
2940             if (ptr[j] < before+after && ptr[j] >= 0 &&
2941                 (tst = scan_info_array[j].list[ptr[j]].term) &&
2942                 !strcmp (tst, mterm))
2943             {
2944                 if (lo >= 0)
2945                 {
2946                     RSET rsets[2];
2947                     
2948                     rsets[0] = rset;
2949                     rsets[1] =
2950                         rset_trunc(
2951                             zh, &scan_info_array[j].list[ptr[j]].isam_p, 1,
2952                             glist[lo].term,
2953                             strlen(glist[lo].term), NULL, 0,
2954                             zapt->term->which,rset_nmem,
2955                             kc, kc->scope);
2956                     rset = rsmulti_or_create(rset_nmem, kc,
2957                                              kc->scope, 0 /* termid */,
2958                                              2, rsets);
2959                 }
2960                 ptr[j]++;
2961             }
2962         }
2963         if (lo >= 0)
2964         {
2965             /* merge with limit_set if given */
2966             if (limit_set)
2967             {
2968                 RSET rsets[2];
2969                 rsets[0] = rset;
2970                 rsets[1] = rset_dup(limit_set);
2971                 
2972                 rset = rsmulti_and_create(rset_nmem, kc,
2973                                           kc->scope,
2974                                           2, rsets);
2975             }
2976             /* count it */
2977             count_set(rset, &glist[lo].occurrences);
2978             rset_delete(rset);
2979         }
2980     }
2981     if (i < after)
2982     {
2983         *num_entries -= (after-i);
2984         *is_partial = 1;
2985         if (*num_entries < 0)
2986         {
2987             (*kc->dec)(kc);
2988             nmem_destroy(rset_nmem);
2989             *num_entries = 0;
2990             return ZEBRA_OK;
2991         }
2992     }
2993     /* consider terms before main term */
2994     for (i = 0; i<ord_no; i++)
2995         ptr[i] = 0;
2996     
2997     for (i = 0; i<before; i++)
2998     {
2999         int j, j0 = -1;
3000         const char *mterm = NULL;
3001         const char *tst;
3002         RSET rset;
3003         int lo = before-1-i; /* offset in result list */
3004         
3005         for (j = 0; j <ord_no; j++)
3006         {
3007             if (ptr[j] < before && ptr[j] >= 0 &&
3008                 (tst = scan_info_array[j].list[before-1-ptr[j]].term) &&
3009                 (!mterm || strcmp (tst, mterm) > 0))
3010             {
3011                 j0 = j;
3012                     mterm = tst;
3013             }
3014         }
3015         if (j0 == -1)
3016             break;
3017         
3018         scan_term_untrans (zh, stream->mem, reg_id,
3019                            &glist[lo].term, mterm);
3020         
3021         rset = rset_trunc
3022             (zh, &scan_info_array[j0].list[before-1-ptr[j0]].isam_p, 1,
3023              glist[lo].term, strlen(glist[lo].term),
3024              NULL, 0, zapt->term->which,rset_nmem,
3025              kc, kc->scope);
3026         
3027         ptr[j0]++;
3028         
3029         for (j = j0+1; j<ord_no; j++)
3030         {
3031             if (ptr[j] < before && ptr[j] >= 0 &&
3032                 (tst = scan_info_array[j].list[before-1-ptr[j]].term) &&
3033                 !strcmp (tst, mterm))
3034             {
3035                 RSET rsets[2];
3036                 
3037                 rsets[0] = rset;
3038                 rsets[1] = rset_trunc(
3039                     zh,
3040                     &scan_info_array[j].list[before-1-ptr[j]].isam_p, 1,
3041                     glist[lo].term,
3042                     strlen(glist[lo].term), NULL, 0,
3043                     zapt->term->which, rset_nmem,
3044                     kc, kc->scope);
3045                 rset = rsmulti_or_create(rset_nmem, kc,
3046                                          kc->scope, 0 /* termid */, 2, rsets);
3047                 
3048                 ptr[j]++;
3049             }
3050         }
3051         if (limit_set)
3052         {
3053             RSET rsets[2];
3054             rsets[0] = rset;
3055             rsets[1] = rset_dup(limit_set);
3056             
3057             rset = rsmulti_and_create(rset_nmem, kc,
3058                                       kc->scope, 2, rsets);
3059         }
3060         count_set (rset, &glist[lo].occurrences);
3061         rset_delete (rset);
3062     }
3063     (*kc->dec)(kc);
3064     nmem_destroy(rset_nmem);
3065     i = before-i;
3066     if (i)
3067     {
3068         *is_partial = 1;
3069         *position -= i;
3070         *num_entries -= i;
3071         if (*num_entries <= 0)
3072         {
3073             *num_entries = 0;
3074             return ZEBRA_OK;
3075         }
3076     }
3077     
3078     *list = glist + i;               /* list is set to first 'real' entry */
3079     
3080     yaz_log(YLOG_DEBUG, "position = %d, num_entries = %d",
3081             *position, *num_entries);
3082     return ZEBRA_OK;
3083 }
3084