Fixed and added a few Doxygen comments. Got rid of redundant function
[idzebra-moved-to-github.git] / index / zrpn.c
1 /* $Id: zrpn.c,v 1.194 2005-06-02 11:59:54 adam Exp $
2    Copyright (C) 1995-2005
3    Index Data ApS
4
5 This file is part of the Zebra server.
6
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
11
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with Zebra; see the file LICENSE.zebra.  If not, write to the
19 Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
20 02111-1307, USA.
21 */
22
23 #include <stdio.h>
24 #include <assert.h>
25 #ifdef WIN32
26 #include <io.h>
27 #else
28 #include <unistd.h>
29 #endif
30 #include <ctype.h>
31
32 #include <yaz/diagbib1.h>
33 #include "index.h"
34 #include <zebra_xpath.h>
35
36 #include <charmap.h>
37 #include <rset.h>
38
39 struct rpn_char_map_info
40 {
41     ZebraMaps zm;
42     int reg_type;
43 };
44
45 typedef struct
46 {
47     int type;
48     int major;
49     int minor;
50     Z_AttributesPlusTerm *zapt;
51 } AttrType;
52
53
54 static int log_level_set = 0;
55 static int log_level_rpn = 0;
56
57 static const char **rpn_char_map_handler(void *vp, const char **from, int len)
58 {
59     struct rpn_char_map_info *p = (struct rpn_char_map_info *) vp;
60     const char **out = zebra_maps_input(p->zm, p->reg_type, from, len, 0);
61 #if 0
62     if (out && *out)
63     {
64         const char *outp = *out;
65         yaz_log(YLOG_LOG, "---");
66         while (*outp)
67         {
68             yaz_log(YLOG_LOG, "%02X", *outp);
69             outp++;
70         }
71     }
72 #endif
73     return out;
74 }
75
76 static void rpn_char_map_prepare(struct zebra_register *reg, int reg_type,
77                                   struct rpn_char_map_info *map_info)
78 {
79     map_info->zm = reg->zebra_maps;
80     map_info->reg_type = reg_type;
81     dict_grep_cmap(reg->dict, map_info, rpn_char_map_handler);
82 }
83
84 static int attr_find_ex(AttrType *src, oid_value *attributeSetP,
85                          const char **string_value)
86 {
87     int num_attributes;
88
89     num_attributes = src->zapt->attributes->num_attributes;
90     while (src->major < num_attributes)
91     {
92         Z_AttributeElement *element;
93
94         element = src->zapt->attributes->attributes[src->major];
95         if (src->type == *element->attributeType)
96         {
97             switch (element->which) 
98             {
99             case Z_AttributeValue_numeric:
100                 ++(src->major);
101                 if (element->attributeSet && attributeSetP)
102                 {
103                     oident *attrset;
104
105                     attrset = oid_getentbyoid(element->attributeSet);
106                     *attributeSetP = attrset->value;
107                 }
108                 return *element->value.numeric;
109                 break;
110             case Z_AttributeValue_complex:
111                 if (src->minor >= element->value.complex->num_list)
112                     break;
113                 if (element->attributeSet && attributeSetP)
114                 {
115                     oident *attrset;
116                     
117                     attrset = oid_getentbyoid(element->attributeSet);
118                     *attributeSetP = attrset->value;
119                 }
120                 if (element->value.complex->list[src->minor]->which ==  
121                     Z_StringOrNumeric_numeric)
122                 {
123                     ++(src->minor);
124                     return
125                         *element->value.complex->list[src->minor-1]->u.numeric;
126                 }
127                 else if (element->value.complex->list[src->minor]->which ==  
128                          Z_StringOrNumeric_string)
129                 {
130                     if (!string_value)
131                         break;
132                     ++(src->minor);
133                     *string_value = 
134                         element->value.complex->list[src->minor-1]->u.string;
135                     return -2;
136                 }
137                 else
138                     break;
139             default:
140                 assert(0);
141             }
142         }
143         ++(src->major);
144     }
145     return -1;
146 }
147
148 static int attr_find(AttrType *src, oid_value *attributeSetP)
149 {
150     return attr_find_ex(src, attributeSetP, 0);
151 }
152
153 static void attr_init(AttrType *src, Z_AttributesPlusTerm *zapt,
154                        int type)
155 {
156     src->zapt = zapt;
157     src->type = type;
158     src->major = 0;
159     src->minor = 0;
160 }
161
162 #define TERM_COUNT        
163        
164 struct grep_info {        
165 #ifdef TERM_COUNT        
166     int *term_no;        
167 #endif        
168     ISAM_P *isam_p_buf;
169     int isam_p_size;        
170     int isam_p_indx;
171     ZebraHandle zh;
172     int reg_type;
173     ZebraSet termset;
174 };        
175
176 void zebra_term_untrans(ZebraHandle zh, int reg_type,
177                         char *dst, const char *src)
178 {
179     int len = 0;
180     while (*src)
181     {
182         const char *cp = zebra_maps_output(zh->reg->zebra_maps,
183                                            reg_type, &src);
184         if (!cp && len < IT_MAX_WORD-1)
185             dst[len++] = *src++;
186         else
187             while (*cp && len < IT_MAX_WORD-1)
188                 dst[len++] = *cp++;
189     }
190     dst[len] = '\0';
191 }
192
193 static void add_isam_p(const char *name, const char *info,
194                        struct grep_info *p)
195 {
196     if (!log_level_set)
197     {
198         log_level_rpn = yaz_log_module_level("rpn");
199         log_level_set = 1;
200     }
201     if (p->isam_p_indx == p->isam_p_size)
202     {
203         ISAM_P *new_isam_p_buf;
204 #ifdef TERM_COUNT        
205         int *new_term_no;        
206 #endif
207         p->isam_p_size = 2*p->isam_p_size + 100;
208         new_isam_p_buf = (ISAM_P *) xmalloc(sizeof(*new_isam_p_buf) *
209                                             p->isam_p_size);
210         if (p->isam_p_buf)
211         {
212             memcpy(new_isam_p_buf, p->isam_p_buf,
213                     p->isam_p_indx * sizeof(*p->isam_p_buf));
214             xfree(p->isam_p_buf);
215         }
216         p->isam_p_buf = new_isam_p_buf;
217
218 #ifdef TERM_COUNT
219         new_term_no = (int *) xmalloc(sizeof(*new_term_no) * p->isam_p_size);
220         if (p->term_no)
221         {
222             memcpy(new_term_no, p->isam_p_buf,
223                     p->isam_p_indx * sizeof(*p->term_no));
224             xfree(p->term_no);
225         }
226         p->term_no = new_term_no;
227 #endif
228     }
229     assert(*info == sizeof(*p->isam_p_buf));
230     memcpy(p->isam_p_buf + p->isam_p_indx, info+1, sizeof(*p->isam_p_buf));
231
232 #if 1
233     if (p->termset)
234     {
235         const char *db;
236         int set, use;
237         char term_tmp[IT_MAX_WORD];
238         int su_code = 0;
239         int len = key_SU_decode (&su_code, name);
240         
241         zebra_term_untrans  (p->zh, p->reg_type, term_tmp, name+len+1);
242         yaz_log(log_level_rpn, "grep: %d %c %s", su_code, name[len], term_tmp);
243         zebraExplain_lookup_ord (p->zh->reg->zei,
244                                  su_code, &db, &set, &use);
245         yaz_log(log_level_rpn, "grep:  set=%d use=%d db=%s", set, use, db);
246         
247         resultSetAddTerm(p->zh, p->termset, name[len], db,
248                          set, use, term_tmp);
249     }
250 #endif
251     (p->isam_p_indx)++;
252 }
253
254 static int grep_handle(char *name, const char *info, void *p)
255 {
256     add_isam_p(name, info, (struct grep_info *) p);
257     return 0;
258 }
259
260 static int term_pre(ZebraMaps zebra_maps, int reg_type, const char **src,
261                     const char *ct1, const char *ct2, int first)
262 {
263     const char *s1, *s0 = *src;
264     const char **map;
265
266     /* skip white space */
267     while (*s0)
268     {
269         if (ct1 && strchr(ct1, *s0))
270             break;
271         if (ct2 && strchr(ct2, *s0))
272             break;
273         s1 = s0;
274         map = zebra_maps_input(zebra_maps, reg_type, &s1, strlen(s1), first);
275         if (**map != *CHR_SPACE)
276             break;
277         s0 = s1;
278     }
279     *src = s0;
280     return *s0;
281 }
282
283
284 static void esc_str(char *out_buf, int out_size,
285                     const char *in_buf, int in_size)
286 {
287     int k;
288
289     assert(out_buf);
290     assert(in_buf);
291     assert(out_size > 20);
292     *out_buf = '\0';
293     for (k = 0; k<in_size; k++)
294     {
295         int c = in_buf[k] & 0xff;
296         int pc;
297         if (c < 32 || c > 126)
298             pc = '?';
299         else
300             pc = c;
301         sprintf(out_buf +strlen(out_buf), "%02X:%c  ", c, pc);
302         if (strlen(out_buf) > out_size-20)
303         {
304             strcat(out_buf, "..");
305             break;
306         }
307     }
308 }
309
310 #define REGEX_CHARS " []()|.*+?!"
311
312 /* term_100: handle term, where trunc = none(no operators at all) */
313 static int term_100(ZebraMaps zebra_maps, int reg_type,
314                     const char **src, char *dst, int space_split,
315                     char *dst_term)
316 {
317     const char *s0;
318     const char **map;
319     int i = 0;
320     int j = 0;
321
322     const char *space_start = 0;
323     const char *space_end = 0;
324
325     if (!term_pre(zebra_maps, reg_type, src, NULL, NULL, !space_split))
326         return 0;
327     s0 = *src;
328     while (*s0)
329     {
330         const char *s1 = s0;
331         int q_map_match = 0;
332         map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), 
333                                 &q_map_match);
334         if (space_split)
335         {
336             if (**map == *CHR_SPACE)
337                 break;
338         }
339         else  /* complete subfield only. */
340         {
341             if (**map == *CHR_SPACE)
342             {   /* save space mapping for later  .. */
343                 space_start = s1;
344                 space_end = s0;
345                 continue;
346             }
347             else if (space_start)
348             {   /* reload last space */
349                 while (space_start < space_end)
350                 {
351                     if (strchr(REGEX_CHARS, *space_start))
352                         dst[i++] = '\\';
353                     dst_term[j++] = *space_start;
354                     dst[i++] = *space_start++;
355                 }
356                 /* and reset */
357                 space_start = space_end = 0;
358             }
359         }
360         /* add non-space char */
361         memcpy(dst_term+j, s1, s0 - s1);
362         j += (s0 - s1);
363         if (!q_map_match)
364         {
365             while (s1 < s0)
366             {
367                 if (strchr(REGEX_CHARS, *s1))
368                     dst[i++] = '\\';
369                 dst[i++] = *s1++;
370             }
371         }
372         else
373         {
374             char tmpbuf[80];
375             esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
376             
377             strcpy(dst + i, map[0]);
378             i += strlen(map[0]);
379         }
380     }
381     dst[i] = '\0';
382     dst_term[j] = '\0';
383     *src = s0;
384     return i;
385 }
386
387 /* term_101: handle term, where trunc = Process # */
388 static int term_101(ZebraMaps zebra_maps, int reg_type,
389                     const char **src, char *dst, int space_split,
390                     char *dst_term)
391 {
392     const char *s0;
393     const char **map;
394     int i = 0;
395     int j = 0;
396
397     if (!term_pre(zebra_maps, reg_type, src, "#", "#", !space_split))
398         return 0;
399     s0 = *src;
400     while (*s0)
401     {
402         if (*s0 == '#')
403         {
404             dst[i++] = '.';
405             dst[i++] = '*';
406             dst_term[j++] = *s0++;
407         }
408         else
409         {
410             const char *s1 = s0;
411             int q_map_match = 0;
412             map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), 
413                                     &q_map_match);
414             if (space_split && **map == *CHR_SPACE)
415                 break;
416
417             /* add non-space char */
418             memcpy(dst_term+j, s1, s0 - s1);
419             j += (s0 - s1);
420             if (!q_map_match)
421             {
422                 while (s1 < s0)
423                 {
424                     if (strchr(REGEX_CHARS, *s1))
425                         dst[i++] = '\\';
426                     dst[i++] = *s1++;
427                 }
428             }
429             else
430             {
431                 char tmpbuf[80];
432                 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
433                 
434                 strcpy(dst + i, map[0]);
435                 i += strlen(map[0]);
436             }
437         }
438     }
439     dst[i] = '\0';
440     dst_term[j++] = '\0';
441     *src = s0;
442     return i;
443 }
444
445 /* term_103: handle term, where trunc = re-2 (regular expressions) */
446 static int term_103(ZebraMaps zebra_maps, int reg_type, const char **src,
447                     char *dst, int *errors, int space_split,
448                     char *dst_term)
449 {
450     int i = 0;
451     int j = 0;
452     const char *s0;
453     const char **map;
454
455     if (!term_pre(zebra_maps, reg_type, src, "^\\()[].*+?|", "(", !space_split))
456         return 0;
457     s0 = *src;
458     if (errors && *s0 == '+' && s0[1] && s0[2] == '+' && s0[3] &&
459         isdigit(((const unsigned char *)s0)[1]))
460     {
461         *errors = s0[1] - '0';
462         s0 += 3;
463         if (*errors > 3)
464             *errors = 3;
465     }
466     while (*s0)
467     {
468         if (strchr("^\\()[].*+?|-", *s0))
469         {
470             dst_term[j++] = *s0;
471             dst[i++] = *s0++;
472         }
473         else
474         {
475             const char *s1 = s0;
476             int q_map_match = 0;
477             map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), 
478                                     &q_map_match);
479             if (space_split && **map == *CHR_SPACE)
480                 break;
481
482             /* add non-space char */
483             memcpy(dst_term+j, s1, s0 - s1);
484             j += (s0 - s1);
485             if (!q_map_match)
486             {
487                 while (s1 < s0)
488                 {
489                     if (strchr(REGEX_CHARS, *s1))
490                         dst[i++] = '\\';
491                     dst[i++] = *s1++;
492                 }
493             }
494             else
495             {
496                 char tmpbuf[80];
497                 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
498                 
499                 strcpy(dst + i, map[0]);
500                 i += strlen(map[0]);
501             }
502         }
503     }
504     dst[i] = '\0';
505     dst_term[j] = '\0';
506     *src = s0;
507     
508     return i;
509 }
510
511 /* term_103: handle term, where trunc = re-1 (regular expressions) */
512 static int term_102(ZebraMaps zebra_maps, int reg_type, const char **src,
513                     char *dst, int space_split, char *dst_term)
514 {
515     return term_103(zebra_maps, reg_type, src, dst, NULL, space_split,
516                     dst_term);
517 }
518
519
520 /* term_104: handle term, where trunc = Process # and ! */
521 static int term_104(ZebraMaps zebra_maps, int reg_type,
522                     const char **src, char *dst, int space_split,
523                     char *dst_term)
524 {
525     const char *s0;
526     const char **map;
527     int i = 0;
528     int j = 0;
529
530     if (!term_pre(zebra_maps, reg_type, src, "?*#", "?*#", !space_split))
531         return 0;
532     s0 = *src;
533     while (*s0)
534     {
535         if (*s0 == '?')
536         {
537             dst_term[j++] = *s0++;
538             if (*s0 >= '0' && *s0 <= '9')
539             {
540                 int limit = 0;
541                 while (*s0 >= '0' && *s0 <= '9')
542                 {
543                     limit = limit * 10 + (*s0 - '0');
544                     dst_term[j++] = *s0++;
545                 }
546                 if (limit > 20)
547                     limit = 20;
548                 while (--limit >= 0)
549                 {
550                     dst[i++] = '.';
551                     dst[i++] = '?';
552                 }
553             }
554             else
555             {
556                 dst[i++] = '.';
557                 dst[i++] = '*';
558             }
559         }
560         else if (*s0 == '*')
561         {
562             dst[i++] = '.';
563             dst[i++] = '*';
564             dst_term[j++] = *s0++;
565         }
566         else if (*s0 == '#')
567         {
568             dst[i++] = '.';
569             dst_term[j++] = *s0++;
570         }
571         else
572         {
573             const char *s1 = s0;
574             int q_map_match = 0;
575             map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), 
576                                     &q_map_match);
577             if (space_split && **map == *CHR_SPACE)
578                 break;
579
580             /* add non-space char */
581             memcpy(dst_term+j, s1, s0 - s1);
582             j += (s0 - s1);
583             if (!q_map_match)
584             {
585                 while (s1 < s0)
586                 {
587                     if (strchr(REGEX_CHARS, *s1))
588                         dst[i++] = '\\';
589                     dst[i++] = *s1++;
590                 }
591             }
592             else
593             {
594                 char tmpbuf[80];
595                 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
596                 
597                 strcpy(dst + i, map[0]);
598                 i += strlen(map[0]);
599             }
600         }
601     }
602     dst[i] = '\0';
603     dst_term[j++] = '\0';
604     *src = s0;
605     return i;
606 }
607
608 /* term_105/106: handle term, where trunc = Process * and ! and right trunc */
609 static int term_105(ZebraMaps zebra_maps, int reg_type,
610                     const char **src, char *dst, int space_split,
611                     char *dst_term, int right_truncate)
612 {
613     const char *s0;
614     const char **map;
615     int i = 0;
616     int j = 0;
617
618     if (!term_pre(zebra_maps, reg_type, src, "*!", "*!", !space_split))
619         return 0;
620     s0 = *src;
621     while (*s0)
622     {
623         if (*s0 == '*')
624         {
625             dst[i++] = '.';
626             dst[i++] = '*';
627             dst_term[j++] = *s0++;
628         }
629         else if (*s0 == '!')
630         {
631             dst[i++] = '.';
632             dst_term[j++] = *s0++;
633         }
634         else
635         {
636             const char *s1 = s0;
637             int q_map_match = 0;
638             map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), 
639                                     &q_map_match);
640             if (space_split && **map == *CHR_SPACE)
641                 break;
642
643             /* add non-space char */
644             memcpy(dst_term+j, s1, s0 - s1);
645             j += (s0 - s1);
646             if (!q_map_match)
647             {
648                 while (s1 < s0)
649                 {
650                     if (strchr(REGEX_CHARS, *s1))
651                         dst[i++] = '\\';
652                     dst[i++] = *s1++;
653                 }
654             }
655             else
656             {
657                 char tmpbuf[80];
658                 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
659                 
660                 strcpy(dst + i, map[0]);
661                 i += strlen(map[0]);
662             }
663         }
664     }
665     if (right_truncate)
666     {
667         dst[i++] = '.';
668         dst[i++] = '*';
669     }
670     dst[i] = '\0';
671     
672     dst_term[j++] = '\0';
673     *src = s0;
674     return i;
675 }
676
677
678 /* gen_regular_rel - generate regular expression from relation
679  *  val:     border value (inclusive)
680  *  islt:    1 if <=; 0 if >=.
681  */
682 static void gen_regular_rel(char *dst, int val, int islt)
683 {
684     int dst_p;
685     int w, d, i;
686     int pos = 0;
687     char numstr[20];
688
689     yaz_log(YLOG_DEBUG, "gen_regular_rel. val=%d, islt=%d", val, islt);
690     if (val >= 0)
691     {
692         if (islt)
693             strcpy(dst, "(-[0-9]+|(");
694         else
695             strcpy(dst, "((");
696     } 
697     else
698     {
699         if (!islt)
700         {
701             strcpy(dst, "([0-9]+|-(");
702             dst_p = strlen(dst);
703             islt = 1;
704         }
705         else
706         {
707             strcpy(dst, "(-(");
708             islt = 0;
709         }
710         val = -val;
711     }
712     dst_p = strlen(dst);
713     sprintf(numstr, "%d", val);
714     for (w = strlen(numstr); --w >= 0; pos++)
715     {
716         d = numstr[w];
717         if (pos > 0)
718         {
719             if (islt)
720             {
721                 if (d == '0')
722                     continue;
723                 d--;
724             } 
725             else
726             {
727                 if (d == '9')
728                     continue;
729                 d++;
730             }
731         }
732         
733         strcpy(dst + dst_p, numstr);
734         dst_p = strlen(dst) - pos - 1;
735
736         if (islt)
737         {
738             if (d != '0')
739             {
740                 dst[dst_p++] = '[';
741                 dst[dst_p++] = '0';
742                 dst[dst_p++] = '-';
743                 dst[dst_p++] = d;
744                 dst[dst_p++] = ']';
745             }
746             else
747                 dst[dst_p++] = d;
748         }
749         else
750         {
751             if (d != '9')
752             { 
753                 dst[dst_p++] = '[';
754                 dst[dst_p++] = d;
755                 dst[dst_p++] = '-';
756                 dst[dst_p++] = '9';
757                 dst[dst_p++] = ']';
758             }
759             else
760                 dst[dst_p++] = d;
761         }
762         for (i = 0; i<pos; i++)
763         {
764             dst[dst_p++] = '[';
765             dst[dst_p++] = '0';
766             dst[dst_p++] = '-';
767             dst[dst_p++] = '9';
768             dst[dst_p++] = ']';
769         }
770         dst[dst_p++] = '|';
771     }
772     dst[dst_p] = '\0';
773     if (islt)
774     {
775         /* match everything less than 10^(pos-1) */
776         strcat(dst, "0*");
777         for (i = 1; i<pos; i++)
778             strcat(dst, "[0-9]?");
779     }
780     else
781     {
782         /* match everything greater than 10^pos */
783         for (i = 0; i <= pos; i++)
784             strcat(dst, "[0-9]");
785         strcat(dst, "[0-9]*");
786     }
787     strcat(dst, "))");
788 }
789
790 void string_rel_add_char(char **term_p, const char *src, int *indx)
791 {
792     if (src[*indx] == '\\')
793         *(*term_p)++ = src[(*indx)++];
794     *(*term_p)++ = src[(*indx)++];
795 }
796
797 /*
798  *   >  abc     ([b-].*|a[c-].*|ab[d-].*|abc.+)
799  *              ([^-a].*|a[^-b].*ab[^-c].*|abc.+)
800  *   >= abc     ([b-].*|a[c-].*|ab[c-].*)
801  *              ([^-a].*|a[^-b].*|ab[c-].*)
802  *   <  abc     ([-0].*|a[-a].*|ab[-b].*)
803  *              ([^a-].*|a[^b-].*|ab[^c-].*)
804  *   <= abc     ([-0].*|a[-a].*|ab[-b].*|abc)
805  *              ([^a-].*|a[^b-].*|ab[^c-].*|abc)
806  */
807 static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
808                            const char **term_sub, char *term_dict,
809                            oid_value attributeSet,
810                            int reg_type, int space_split, char *term_dst,
811                            int *error_code)
812 {
813     AttrType relation;
814     int relation_value;
815     int i;
816     char *term_tmp = term_dict + strlen(term_dict);
817     char term_component[2*IT_MAX_WORD+20];
818
819     attr_init(&relation, zapt, 2);
820     relation_value = attr_find(&relation, NULL);
821
822     *error_code = 0;
823     yaz_log(YLOG_DEBUG, "string relation value=%d", relation_value);
824     switch (relation_value)
825     {
826     case 1:
827         if (!term_100(zh->reg->zebra_maps, reg_type,
828                       term_sub, term_component,
829                       space_split, term_dst))
830             return 0;
831         yaz_log(log_level_rpn, "Relation <");
832         
833         *term_tmp++ = '(';
834         for (i = 0; term_component[i]; )
835         {
836             int j = 0;
837
838             if (i)
839                 *term_tmp++ = '|';
840             while (j < i)
841                 string_rel_add_char(&term_tmp, term_component, &j);
842
843             *term_tmp++ = '[';
844
845             *term_tmp++ = '^';
846             string_rel_add_char(&term_tmp, term_component, &i);
847             *term_tmp++ = '-';
848
849             *term_tmp++ = ']';
850             *term_tmp++ = '.';
851             *term_tmp++ = '*';
852
853             if ((term_tmp - term_dict) > IT_MAX_WORD)
854                 break;
855         }
856         *term_tmp++ = ')';
857         *term_tmp = '\0';
858         break;
859     case 2:
860         if (!term_100(zh->reg->zebra_maps, reg_type,
861                       term_sub, term_component,
862                       space_split, term_dst))
863             return 0;
864         yaz_log(log_level_rpn, "Relation <=");
865
866         *term_tmp++ = '(';
867         for (i = 0; term_component[i]; )
868         {
869             int j = 0;
870
871             while (j < i)
872                 string_rel_add_char(&term_tmp, term_component, &j);
873             *term_tmp++ = '[';
874
875             *term_tmp++ = '^';
876             string_rel_add_char(&term_tmp, term_component, &i);
877             *term_tmp++ = '-';
878
879             *term_tmp++ = ']';
880             *term_tmp++ = '.';
881             *term_tmp++ = '*';
882
883             *term_tmp++ = '|';
884
885             if ((term_tmp - term_dict) > IT_MAX_WORD)
886                 break;
887         }
888         for (i = 0; term_component[i]; )
889             string_rel_add_char(&term_tmp, term_component, &i);
890         *term_tmp++ = ')';
891         *term_tmp = '\0';
892         break;
893     case 5:
894         if (!term_100 (zh->reg->zebra_maps, reg_type,
895                        term_sub, term_component, space_split, term_dst))
896             return 0;
897         yaz_log(log_level_rpn, "Relation >");
898
899         *term_tmp++ = '(';
900         for (i = 0; term_component[i];)
901         {
902             int j = 0;
903
904             while (j < i)
905                 string_rel_add_char(&term_tmp, term_component, &j);
906             *term_tmp++ = '[';
907             
908             *term_tmp++ = '^';
909             *term_tmp++ = '-';
910             string_rel_add_char(&term_tmp, term_component, &i);
911
912             *term_tmp++ = ']';
913             *term_tmp++ = '.';
914             *term_tmp++ = '*';
915
916             *term_tmp++ = '|';
917
918             if ((term_tmp - term_dict) > IT_MAX_WORD)
919                 break;
920         }
921         for (i = 0; term_component[i];)
922             string_rel_add_char(&term_tmp, term_component, &i);
923         *term_tmp++ = '.';
924         *term_tmp++ = '+';
925         *term_tmp++ = ')';
926         *term_tmp = '\0';
927         break;
928     case 4:
929         if (!term_100(zh->reg->zebra_maps, reg_type, term_sub,
930                       term_component, space_split, term_dst))
931             return 0;
932         yaz_log(log_level_rpn, "Relation >=");
933
934         *term_tmp++ = '(';
935         for (i = 0; term_component[i];)
936         {
937             int j = 0;
938
939             if (i)
940                 *term_tmp++ = '|';
941             while (j < i)
942                 string_rel_add_char(&term_tmp, term_component, &j);
943             *term_tmp++ = '[';
944
945             if (term_component[i+1])
946             {
947                 *term_tmp++ = '^';
948                 *term_tmp++ = '-';
949                 string_rel_add_char(&term_tmp, term_component, &i);
950             }
951             else
952             {
953                 string_rel_add_char(&term_tmp, term_component, &i);
954                 *term_tmp++ = '-';
955             }
956             *term_tmp++ = ']';
957             *term_tmp++ = '.';
958             *term_tmp++ = '*';
959
960             if ((term_tmp - term_dict) > IT_MAX_WORD)
961                 break;
962         }
963         *term_tmp++ = ')';
964         *term_tmp = '\0';
965         break;
966     case 3:
967     case 102:
968     case -1:
969         yaz_log(log_level_rpn, "Relation =");
970         if (!term_100(zh->reg->zebra_maps, reg_type, term_sub,
971                       term_component, space_split, term_dst))
972             return 0;
973         strcat(term_tmp, "(");
974         strcat(term_tmp, term_component);
975         strcat(term_tmp, ")");
976         break;
977     default:
978         *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
979         return 0;
980     }
981     return 1;
982 }
983
984 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
985                              const char **term_sub, 
986                              oid_value attributeSet, NMEM stream,
987                              struct grep_info *grep_info,
988                              int reg_type, int complete_flag,
989                              int num_bases, char **basenames,
990                              char *term_dst, int xpath_use);
991
992 static ZEBRA_RES term_trunc(ZebraHandle zh,
993                             Z_AttributesPlusTerm *zapt,
994                             const char **term_sub, 
995                             oid_value attributeSet, NMEM stream,
996                             struct grep_info *grep_info,
997                             int reg_type, int complete_flag,
998                             int num_bases, char **basenames,
999                             char *term_dst,
1000                             const char *rank_type, int xpath_use,
1001                             NMEM rset_nmem,
1002                             RSET *rset,
1003                             struct rset_key_control *kc)
1004 {
1005     ZEBRA_RES res;
1006     *rset = 0;
1007     grep_info->isam_p_indx = 0;
1008     res = string_term(zh, zapt, term_sub, attributeSet, stream, grep_info,
1009                       reg_type, complete_flag, num_bases, basenames,
1010                       term_dst, xpath_use);
1011     if (res != ZEBRA_OK)
1012         return res;
1013     if (!*term_sub)  /* no more terms ? */
1014         return res;
1015     yaz_log(log_level_rpn, "term: %s", term_dst);
1016     *rset = rset_trunc(zh, grep_info->isam_p_buf,
1017                        grep_info->isam_p_indx, term_dst,
1018                        strlen(term_dst), rank_type, 1 /* preserve pos */,
1019                        zapt->term->which, rset_nmem,
1020                        kc, kc->scope);
1021     if (!*rset)
1022         return ZEBRA_FAIL;
1023     return ZEBRA_OK;
1024 }
1025
1026 static char *nmem_strdup_i(NMEM nmem, int v)
1027 {
1028     char val_str[64];
1029     sprintf(val_str, "%d", v);
1030     return nmem_strdup(nmem, val_str);
1031 }
1032
1033 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1034                              const char **term_sub, 
1035                              oid_value attributeSet, NMEM stream,
1036                              struct grep_info *grep_info,
1037                              int reg_type, int complete_flag,
1038                              int num_bases, char **basenames,
1039                              char *term_dst, int xpath_use)
1040 {
1041     char term_dict[2*IT_MAX_WORD+4000];
1042     int j, r, base_no;
1043     AttrType truncation;
1044     int truncation_value;
1045     AttrType use;
1046     int use_value;
1047     const char *use_string = 0;
1048     oid_value curAttributeSet = attributeSet;
1049     const char *termp;
1050     struct rpn_char_map_info rcmi;
1051     int space_split = complete_flag ? 0 : 1;
1052
1053     int bases_ok = 0;     /* no of databases with OK attribute */
1054     int errCode = 0;      /* err code (if any is not OK) */
1055     char *errString = 0;  /* addinfo */
1056
1057     rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
1058     attr_init(&use, zapt, 1);
1059     use_value = attr_find_ex(&use, &curAttributeSet, &use_string);
1060     yaz_log(log_level_rpn, "string_term, use value %d", use_value);
1061     attr_init(&truncation, zapt, 5);
1062     truncation_value = attr_find(&truncation, NULL);
1063     yaz_log(log_level_rpn, "truncation value %d", truncation_value);
1064
1065     if (use_value == -1)    /* no attribute - assumy "any" */
1066         use_value = 1016;
1067     for (base_no = 0; base_no < num_bases; base_no++)
1068     {
1069         int ord = -1;
1070         int attr_ok = 0;
1071         int regex_range = 0;
1072         int init_pos = 0;
1073         attent attp;
1074         data1_local_attribute id_xpath_attr;
1075         data1_local_attribute *local_attr;
1076         int max_pos, prefix_len = 0;
1077         int relation_error;
1078
1079         termp = *term_sub;
1080
1081         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1082         {
1083             zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
1084                            basenames[base_no]);
1085             return ZEBRA_FAIL;
1086         }
1087         if (xpath_use > 0 && use_value == -2) 
1088         {
1089             /* xpath mode and we have a string attribute */
1090             attp.local_attributes = &id_xpath_attr;
1091             attp.attset_ordinal = VAL_IDXPATH;
1092             id_xpath_attr.next = 0;
1093
1094             use_value = xpath_use;  /* xpath_use as use-attribute now */
1095             id_xpath_attr.local = use_value;
1096         }
1097         else if (curAttributeSet == VAL_IDXPATH && use_value >= 0)
1098         {
1099             /* X-Path attribute, use numeric value directly */
1100             attp.local_attributes = &id_xpath_attr;
1101             attp.attset_ordinal = VAL_IDXPATH;
1102             id_xpath_attr.next = 0;
1103             id_xpath_attr.local = use_value;
1104         }
1105         else if (use_string &&
1106                  (ord = zebraExplain_lookup_attr_str(zh->reg->zei,
1107                                                      use_string)) >= 0)
1108         {
1109             /* we have a match for a raw string attribute */
1110             char ord_buf[32];
1111             int i, ord_len;
1112
1113             if (prefix_len)
1114                 term_dict[prefix_len++] = '|';
1115             else
1116                 term_dict[prefix_len++] = '(';
1117             
1118             ord_len = key_SU_encode (ord, ord_buf);
1119             for (i = 0; i<ord_len; i++)
1120             {
1121                 term_dict[prefix_len++] = 1;
1122                 term_dict[prefix_len++] = ord_buf[i];
1123             }
1124             attp.local_attributes = 0;  /* no more attributes */
1125         }
1126         else 
1127         {
1128             /* lookup in the .att files . Allow string as well */
1129             if ((r = att_getentbyatt (zh, &attp, curAttributeSet, use_value,
1130                                       use_string)))
1131             {
1132                 yaz_log(YLOG_DEBUG, "att_getentbyatt fail. set=%d use=%d r=%d",
1133                         curAttributeSet, use_value, r);
1134                 if (r == -1)
1135                 {
1136                     /* set was found, but value wasn't defined */
1137                     errCode = YAZ_BIB1_UNSUPP_USE_ATTRIBUTE;
1138                     if (use_string)
1139                         errString = nmem_strdup(stream, use_string);
1140                     else
1141                         errString = nmem_strdup_i (stream, use_value);
1142                 }
1143                 else
1144                 {
1145                     int oid[OID_SIZE];
1146                     struct oident oident;
1147                     
1148                     oident.proto = PROTO_Z3950;
1149                     oident.oclass = CLASS_ATTSET;
1150                     oident.value = curAttributeSet;
1151                     oid_ent_to_oid (&oident, oid);
1152                     
1153                     errCode = YAZ_BIB1_UNSUPP_ATTRIBUTE_SET;
1154                     errString = nmem_strdup(stream, oident.desc);
1155                 }
1156                 continue;
1157             }
1158         }
1159         for (local_attr = attp.local_attributes; local_attr;
1160              local_attr = local_attr->next)
1161         {
1162             char ord_buf[32];
1163             int i, ord_len;
1164             
1165             ord = zebraExplain_lookup_attr_su(zh->reg->zei,
1166                                               attp.attset_ordinal,
1167                                               local_attr->local);
1168             if (ord < 0)
1169                 continue;
1170             if (prefix_len)
1171                 term_dict[prefix_len++] = '|';
1172             else
1173                 term_dict[prefix_len++] = '(';
1174             
1175             ord_len = key_SU_encode (ord, ord_buf);
1176             for (i = 0; i<ord_len; i++)
1177             {
1178                 term_dict[prefix_len++] = 1;
1179                 term_dict[prefix_len++] = ord_buf[i];
1180             }
1181         }
1182         bases_ok++;
1183         if (prefix_len)
1184             attr_ok = 1;
1185
1186         term_dict[prefix_len++] = ')';
1187         term_dict[prefix_len++] = 1;
1188         term_dict[prefix_len++] = reg_type;
1189         yaz_log(log_level_rpn, "reg_type = %d", term_dict[prefix_len-1]);
1190         term_dict[prefix_len] = '\0';
1191         j = prefix_len;
1192         switch (truncation_value)
1193         {
1194         case -1:         /* not specified */
1195         case 100:        /* do not truncate */
1196             if (!string_relation (zh, zapt, &termp, term_dict,
1197                                   attributeSet,
1198                                   reg_type, space_split, term_dst,
1199                                   &relation_error))
1200             {
1201                 if (relation_error)
1202                 {
1203                     zebra_setError(zh, relation_error, 0);
1204                     return ZEBRA_FAIL;
1205                 }
1206                 *term_sub = 0;
1207                 return ZEBRA_OK;
1208             }
1209             break;
1210         case 1:          /* right truncation */
1211             term_dict[j++] = '(';
1212             if (!term_100(zh->reg->zebra_maps, reg_type,
1213                           &termp, term_dict + j, space_split, term_dst))
1214             {
1215                 *term_sub = 0;
1216                 return ZEBRA_OK;
1217             }
1218             strcat(term_dict, ".*)");
1219             break;
1220         case 2:          /* keft truncation */
1221             term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*';
1222             if (!term_100(zh->reg->zebra_maps, reg_type,
1223                           &termp, term_dict + j, space_split, term_dst))
1224             {
1225                 *term_sub = 0;
1226                 return ZEBRA_OK;
1227             }
1228             strcat(term_dict, ")");
1229             break;
1230         case 3:          /* left&right truncation */
1231             term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*';
1232             if (!term_100(zh->reg->zebra_maps, reg_type,
1233                           &termp, term_dict + j, space_split, term_dst))
1234             {
1235                 *term_sub = 0;
1236                 return ZEBRA_OK;
1237             }
1238             strcat(term_dict, ".*)");
1239             break;
1240         case 101:        /* process # in term */
1241             term_dict[j++] = '(';
1242             if (!term_101(zh->reg->zebra_maps, reg_type,
1243                           &termp, term_dict + j, space_split, term_dst))
1244             {
1245                 *term_sub = 0;
1246                 return ZEBRA_OK;
1247             }
1248             strcat(term_dict, ")");
1249             break;
1250         case 102:        /* Regexp-1 */
1251             term_dict[j++] = '(';
1252             if (!term_102(zh->reg->zebra_maps, reg_type,
1253                           &termp, term_dict + j, space_split, term_dst))
1254             {
1255                 *term_sub = 0;
1256                 return ZEBRA_OK;
1257             }
1258             strcat(term_dict, ")");
1259             break;
1260         case 103:       /* Regexp-2 */
1261             regex_range = 1;
1262             term_dict[j++] = '(';
1263             init_pos = 2;
1264             if (!term_103(zh->reg->zebra_maps, reg_type,
1265                           &termp, term_dict + j, &regex_range,
1266                           space_split, term_dst))
1267             {
1268                 *term_sub = 0;
1269                 return ZEBRA_OK;
1270             }
1271             strcat(term_dict, ")");
1272             break;
1273         case 104:        /* process # and ! in term */
1274             term_dict[j++] = '(';
1275             if (!term_104(zh->reg->zebra_maps, reg_type,
1276                           &termp, term_dict + j, space_split, term_dst))
1277             {
1278                 *term_sub = 0;
1279                 return ZEBRA_OK;
1280             }
1281             strcat(term_dict, ")");
1282             break;
1283         case 105:        /* process * and ! in term */
1284             term_dict[j++] = '(';
1285             if (!term_105(zh->reg->zebra_maps, reg_type,
1286                           &termp, term_dict + j, space_split, term_dst, 1))
1287             {
1288                 *term_sub = 0;
1289                 return ZEBRA_OK;
1290             }
1291             strcat(term_dict, ")");
1292             break;
1293         case 106:        /* process * and ! in term */
1294             term_dict[j++] = '(';
1295             if (!term_105(zh->reg->zebra_maps, reg_type,
1296                           &termp, term_dict + j, space_split, term_dst, 0))
1297             {
1298                 *term_sub = 0;
1299                 return ZEBRA_OK;
1300             }
1301             strcat(term_dict, ")");
1302             break;
1303         default:
1304             zebra_setError_zint(zh,
1305                                 YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
1306                                 truncation_value);
1307             return ZEBRA_FAIL;
1308         }
1309         if (attr_ok)
1310         {
1311             char buf[80];
1312             const char *input = term_dict + prefix_len;
1313             esc_str(buf, sizeof(buf), input, strlen(input));
1314         }
1315         if (attr_ok)
1316         {
1317             yaz_log(log_level_rpn, "dict_lookup_grep: %s", term_dict+prefix_len);
1318             r = dict_lookup_grep(zh->reg->dict, term_dict, regex_range,
1319                                  grep_info, &max_pos, init_pos,
1320                                  grep_handle);
1321             if (r)
1322                 yaz_log(YLOG_WARN, "dict_lookup_grep fail %d", r);
1323         }
1324     }
1325     if (!bases_ok)
1326     {
1327         zebra_setError(zh, errCode, errString);
1328         return ZEBRA_FAIL;
1329     }
1330     *term_sub = termp;
1331     yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1332     return ZEBRA_OK;
1333 }
1334
1335
1336 /* convert APT search term to UTF8 */
1337 static ZEBRA_RES zapt_term_to_utf8(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1338                                    char *termz)
1339 {
1340     size_t sizez;
1341     Z_Term *term = zapt->term;
1342
1343     switch (term->which)
1344     {
1345     case Z_Term_general:
1346         if (zh->iconv_to_utf8 != 0)
1347         {
1348             char *inbuf = term->u.general->buf;
1349             size_t inleft = term->u.general->len;
1350             char *outbuf = termz;
1351             size_t outleft = IT_MAX_WORD-1;
1352             size_t ret;
1353
1354             ret = yaz_iconv(zh->iconv_to_utf8, &inbuf, &inleft,
1355                         &outbuf, &outleft);
1356             if (ret == (size_t)(-1))
1357             {
1358                 ret = yaz_iconv(zh->iconv_to_utf8, 0, 0, 0, 0);
1359                 zebra_setError(
1360                     zh, 
1361                     YAZ_BIB1_QUERY_TERM_INCLUDES_CHARS_THAT_DO_NOT_TRANSLATE_INTO_,
1362                     0);
1363                 return ZEBRA_FAIL;
1364             }
1365             *outbuf = 0;
1366         }
1367         else
1368         {
1369             sizez = term->u.general->len;
1370             if (sizez > IT_MAX_WORD-1)
1371                 sizez = IT_MAX_WORD-1;
1372             memcpy (termz, term->u.general->buf, sizez);
1373             termz[sizez] = '\0';
1374         }
1375         break;
1376     case Z_Term_characterString:
1377         sizez = strlen(term->u.characterString);
1378         if (sizez > IT_MAX_WORD-1)
1379             sizez = IT_MAX_WORD-1;
1380         memcpy (termz, term->u.characterString, sizez);
1381         termz[sizez] = '\0';
1382         break;
1383     default:
1384         zebra_setError(zh, YAZ_BIB1_UNSUPP_CODED_VALUE_FOR_TERM, 0);
1385         return ZEBRA_FAIL;
1386     }
1387     return ZEBRA_OK;
1388 }
1389
1390 /* convert APT SCAN term to internal cmap */
1391 static ZEBRA_RES trans_scan_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1392                                  char *termz, int reg_type)
1393 {
1394     char termz0[IT_MAX_WORD];
1395
1396     if (zapt_term_to_utf8(zh, zapt, termz0) == ZEBRA_FAIL)
1397         return ZEBRA_FAIL;    /* error */
1398     else
1399     {
1400         const char **map;
1401         const char *cp = (const char *) termz0;
1402         const char *cp_end = cp + strlen(cp);
1403         const char *src;
1404         int i = 0;
1405         const char *space_map = NULL;
1406         int len;
1407             
1408         while ((len = (cp_end - cp)) > 0)
1409         {
1410             map = zebra_maps_input(zh->reg->zebra_maps, reg_type, &cp, len, 0);
1411             if (**map == *CHR_SPACE)
1412                 space_map = *map;
1413             else
1414             {
1415                 if (i && space_map)
1416                     for (src = space_map; *src; src++)
1417                         termz[i++] = *src;
1418                 space_map = NULL;
1419                 for (src = *map; *src; src++)
1420                     termz[i++] = *src;
1421             }
1422         }
1423         termz[i] = '\0';
1424     }
1425     return ZEBRA_OK;
1426 }
1427
1428 char *normalize_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1429                      const char *termz, NMEM stream, unsigned reg_id)
1430 {
1431     WRBUF wrbuf = 0;
1432     AttrType truncation;
1433     int truncation_value;
1434     char *ex_list = 0;
1435
1436     attr_init(&truncation, zapt, 5);
1437     truncation_value = attr_find(&truncation, NULL);
1438
1439     switch (truncation_value)
1440     {
1441     default:
1442         ex_list = "";
1443         break;
1444     case 101:
1445         ex_list = "#";
1446         break;
1447     case 102:
1448     case 103:
1449         ex_list = 0;
1450         break;
1451     case 104:
1452         ex_list = "!#";
1453         break;
1454     case 105:
1455         ex_list = "!*";
1456         break;
1457     }
1458     if (ex_list)
1459         wrbuf = zebra_replace(zh->reg->zebra_maps, reg_id, ex_list,
1460                               termz, strlen(termz));
1461     if (!wrbuf)
1462         return nmem_strdup(stream, termz);
1463     else
1464     {
1465         char *buf = (char*) nmem_malloc(stream, wrbuf_len(wrbuf)+1);
1466         memcpy (buf, wrbuf_buf(wrbuf), wrbuf_len(wrbuf));
1467         buf[wrbuf_len(wrbuf)] = '\0';
1468         return buf;
1469     }
1470 }
1471
1472 static void grep_info_delete(struct grep_info *grep_info)
1473 {
1474 #ifdef TERM_COUNT
1475     xfree(grep_info->term_no);
1476 #endif
1477     xfree(grep_info->isam_p_buf);
1478 }
1479
1480 static ZEBRA_RES grep_info_prepare(ZebraHandle zh,
1481                                    Z_AttributesPlusTerm *zapt,
1482                                    struct grep_info *grep_info,
1483                                    int reg_type)
1484 {
1485     AttrType termset;
1486     int termset_value_numeric;
1487     const char *termset_value_string;
1488
1489 #ifdef TERM_COUNT
1490     grep_info->term_no = 0;
1491 #endif
1492     grep_info->isam_p_size = 0;
1493     grep_info->isam_p_buf = NULL;
1494     grep_info->zh = zh;
1495     grep_info->reg_type = reg_type;
1496     grep_info->termset = 0;
1497
1498     if (!zapt)
1499         return ZEBRA_OK;
1500     attr_init(&termset, zapt, 8);
1501     termset_value_numeric =
1502         attr_find_ex(&termset, NULL, &termset_value_string);
1503     if (termset_value_numeric != -1)
1504     {
1505         char resname[32];
1506         const char *termset_name = 0;
1507         if (termset_value_numeric != -2)
1508         {
1509     
1510             sprintf(resname, "%d", termset_value_numeric);
1511             termset_name = resname;
1512         }
1513         else
1514             termset_name = termset_value_string;
1515         yaz_log(log_level_rpn, "creating termset set %s", termset_name);
1516         grep_info->termset = resultSetAdd(zh, termset_name, 1);
1517         if (!grep_info->termset)
1518         {
1519             zebra_setError(zh, YAZ_BIB1_ILLEGAL_RESULT_SET_NAME, termset_name);
1520             return ZEBRA_FAIL;
1521         }
1522     }
1523     return ZEBRA_OK;
1524 }
1525                                
1526 /**
1527   \brief Create result set(s) for list of terms
1528   \param zh Zebra Handle
1529   \param termz_org term as used in query but converted to UTF-8
1530   \param attributeSet default attribute set
1531   \param stream memory for result
1532   \param reg_type register type ('w', 'p',..)
1533   \param complete_flag whether it's phrases or not
1534   \param rank_type term flags for ranking
1535   \param xpath_use use attribute for X-Path (-1 for no X-path)
1536   \param num_bases number of databases
1537   \param basenames array of databases
1538   \param rset_mem memory for result sets
1539   \param result_sets output result set for each term in list (output)
1540   \param number number of output result sets
1541   \param kc rset key control to be used for created result sets
1542 */
1543 static ZEBRA_RES term_list_trunc(ZebraHandle zh,
1544                                  Z_AttributesPlusTerm *zapt,
1545                                  const char *termz_org,
1546                                  oid_value attributeSet,
1547                                  NMEM stream,
1548                                  int reg_type, int complete_flag,
1549                                  const char *rank_type, int xpath_use,
1550                                  int num_bases, char **basenames, 
1551                                  NMEM rset_nmem,
1552                                  RSET **result_sets, int *num_result_sets,
1553                                  struct rset_key_control *kc)
1554 {
1555     char term_dst[IT_MAX_WORD+1];
1556     struct grep_info grep_info;
1557     char *termz = normalize_term(zh, zapt, termz_org, stream, reg_type);
1558     const char *termp = termz;
1559     int alloc_sets = 0;
1560
1561     *num_result_sets = 0;
1562     *term_dst = 0;
1563     if (grep_info_prepare(zh, zapt, &grep_info, reg_type) == ZEBRA_FAIL)
1564         return ZEBRA_FAIL;
1565     while(1)
1566     { 
1567         ZEBRA_RES res;
1568
1569         if (alloc_sets == *num_result_sets)
1570         {
1571             int add = 10;
1572             RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) * 
1573                                               sizeof(*rnew));
1574             if (alloc_sets)
1575                 memcpy(rnew, *result_sets, alloc_sets * sizeof(*rnew));
1576             alloc_sets = alloc_sets + add;
1577             *result_sets = rnew;
1578         }
1579         res = term_trunc(zh, zapt, &termp, attributeSet,
1580                          stream, &grep_info,
1581                          reg_type, complete_flag,
1582                          num_bases, basenames,
1583                          term_dst, rank_type,
1584                          xpath_use, rset_nmem,
1585                          &(*result_sets)[*num_result_sets],
1586                          kc);
1587         if (res != ZEBRA_OK)
1588         {
1589             int i;
1590             for (i = 0; i < *num_result_sets; i++)
1591                 rset_delete((*result_sets)[i]);
1592             grep_info_delete (&grep_info);
1593             return res;
1594         }
1595         if ((*result_sets)[*num_result_sets] == 0)
1596             break;
1597         (*num_result_sets)++;
1598     }
1599     grep_info_delete(&grep_info);
1600     return ZEBRA_OK;
1601 }
1602
1603 static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh,
1604                                        Z_AttributesPlusTerm *zapt,
1605                                        const char *termz_org,
1606                                        oid_value attributeSet,
1607                                        NMEM stream,
1608                                        int reg_type, int complete_flag,
1609                                        const char *rank_type, int xpath_use,
1610                                        int num_bases, char **basenames, 
1611                                        NMEM rset_nmem,
1612                                        RSET *rset,
1613                                        struct rset_key_control *kc)
1614 {
1615     RSET *result_sets = 0;
1616     int num_result_sets = 0;
1617     ZEBRA_RES res =
1618         term_list_trunc(zh, zapt, termz_org, attributeSet,
1619                         stream, reg_type, complete_flag,
1620                         rank_type, xpath_use,
1621                         num_bases, basenames,
1622                         rset_nmem,
1623                         &result_sets, &num_result_sets, kc);
1624     if (res != ZEBRA_OK)
1625         return res;
1626     if (num_result_sets == 0)
1627         *rset = rsnull_create (rset_nmem, kc, 0); 
1628     else if (num_result_sets == 1)
1629         *rset = result_sets[0];
1630     else
1631         *rset = rsprox_create(rset_nmem, kc, kc->scope,
1632                               num_result_sets, result_sets,
1633                               1 /* ordered */, 0 /* exclusion */,
1634                               3 /* relation */, 1 /* distance */);
1635     if (!*rset)
1636         return ZEBRA_FAIL;
1637     return ZEBRA_OK;
1638 }
1639
1640 static ZEBRA_RES rpn_search_APT_or_list(ZebraHandle zh,
1641                                         Z_AttributesPlusTerm *zapt,
1642                                         const char *termz_org,
1643                                         oid_value attributeSet,
1644                                         NMEM stream,
1645                                         int reg_type, int complete_flag,
1646                                         const char *rank_type,
1647                                         int xpath_use,
1648                                         int num_bases, char **basenames,
1649                                         NMEM rset_nmem,
1650                                         RSET *rset,
1651                                         struct rset_key_control *kc)
1652 {
1653     RSET *result_sets = 0;
1654     int num_result_sets = 0;
1655     ZEBRA_RES res =
1656         term_list_trunc(zh, zapt, termz_org, attributeSet,
1657                         stream, reg_type, complete_flag,
1658                         rank_type, xpath_use,
1659                         num_bases, basenames,
1660                         rset_nmem,
1661                         &result_sets, &num_result_sets, kc);
1662     if (res != ZEBRA_OK)
1663         return res;
1664     if (num_result_sets == 0)
1665         *rset = rsnull_create (rset_nmem, kc, 0); 
1666     else if (num_result_sets == 1)
1667         *rset = result_sets[0];
1668     else
1669         *rset = rsmulti_or_create(rset_nmem, kc, kc->scope, 0 /* termid */,
1670                                   num_result_sets, result_sets);
1671     if (!*rset)
1672         return ZEBRA_FAIL;
1673     return ZEBRA_OK;
1674 }
1675
1676 static ZEBRA_RES rpn_search_APT_and_list(ZebraHandle zh,
1677                                          Z_AttributesPlusTerm *zapt,
1678                                          const char *termz_org,
1679                                          oid_value attributeSet,
1680                                          NMEM stream,
1681                                          int reg_type, int complete_flag,
1682                                          const char *rank_type, 
1683                                          int xpath_use,
1684                                          int num_bases, char **basenames,
1685                                          NMEM rset_nmem,
1686                                          RSET *rset,
1687                                          struct rset_key_control *kc)
1688 {
1689     RSET *result_sets = 0;
1690     int num_result_sets = 0;
1691     ZEBRA_RES res =
1692         term_list_trunc(zh, zapt, termz_org, attributeSet,
1693                         stream, reg_type, complete_flag,
1694                         rank_type, xpath_use,
1695                         num_bases, basenames,
1696                         rset_nmem,
1697                         &result_sets, &num_result_sets,
1698                         kc);
1699     if (res != ZEBRA_OK)
1700         return res;
1701     if (num_result_sets == 0)
1702         *rset = rsnull_create (rset_nmem, kc, 0); 
1703     else if (num_result_sets == 1)
1704         *rset = result_sets[0];
1705     else
1706         *rset = rsmulti_and_create(rset_nmem, kc, kc->scope,
1707                                    num_result_sets, result_sets);
1708     if (!*rset)
1709         return ZEBRA_FAIL;
1710     return ZEBRA_OK;
1711 }
1712
1713 static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1714                             const char **term_sub,
1715                             char *term_dict,
1716                             oid_value attributeSet,
1717                             struct grep_info *grep_info,
1718                             int *max_pos,
1719                             int reg_type,
1720                             char *term_dst,
1721                             int *error_code)
1722 {
1723     AttrType relation;
1724     int relation_value;
1725     int term_value;
1726     int r;
1727     char *term_tmp = term_dict + strlen(term_dict);
1728
1729     *error_code = 0;
1730     attr_init(&relation, zapt, 2);
1731     relation_value = attr_find(&relation, NULL);
1732
1733     yaz_log(log_level_rpn, "numeric relation value=%d", relation_value);
1734
1735     if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1736                   term_dst))
1737         return 0;
1738     term_value = atoi (term_tmp);
1739     switch (relation_value)
1740     {
1741     case 1:
1742         yaz_log(log_level_rpn, "Relation <");
1743         gen_regular_rel(term_tmp, term_value-1, 1);
1744         break;
1745     case 2:
1746         yaz_log(log_level_rpn, "Relation <=");
1747         gen_regular_rel(term_tmp, term_value, 1);
1748         break;
1749     case 4:
1750         yaz_log(log_level_rpn, "Relation >=");
1751         gen_regular_rel(term_tmp, term_value, 0);
1752         break;
1753     case 5:
1754         yaz_log(log_level_rpn, "Relation >");
1755         gen_regular_rel(term_tmp, term_value+1, 0);
1756         break;
1757     case -1:
1758     case 3:
1759         yaz_log(log_level_rpn, "Relation =");
1760         sprintf(term_tmp, "(0*%d)", term_value);
1761         break;
1762     default:
1763         *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
1764         return 0;
1765     }
1766     yaz_log(log_level_rpn, "dict_lookup_grep: %s", term_tmp);
1767     r = dict_lookup_grep(zh->reg->dict, term_dict, 0, grep_info, max_pos,
1768                           0, grep_handle);
1769     if (r)
1770         yaz_log(YLOG_WARN, "dict_lookup_grep fail, rel = gt: %d", r);
1771     yaz_log(log_level_rpn, "%d positions", grep_info->isam_p_indx);
1772     return 1;
1773 }
1774
1775 static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1776                               const char **term_sub, 
1777                               oid_value attributeSet,
1778                               struct grep_info *grep_info,
1779                               int reg_type, int complete_flag,
1780                               int num_bases, char **basenames,
1781                               char *term_dst, int xpath_use, NMEM stream)
1782 {
1783     char term_dict[2*IT_MAX_WORD+2];
1784     int r, base_no;
1785     AttrType use;
1786     int use_value;
1787     const char *use_string = 0;
1788     oid_value curAttributeSet = attributeSet;
1789     const char *termp;
1790     struct rpn_char_map_info rcmi;
1791
1792     int bases_ok = 0;     /* no of databases with OK attribute */
1793     int errCode = 0;      /* err code (if any is not OK) */
1794     char *errString = 0;  /* addinfo */
1795
1796     rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
1797     attr_init(&use, zapt, 1);
1798     use_value = attr_find_ex(&use, &curAttributeSet, &use_string);
1799
1800     if (use_value == -1)
1801         use_value = 1016;
1802
1803     for (base_no = 0; base_no < num_bases; base_no++)
1804     {
1805         attent attp;
1806         data1_local_attribute id_xpath_attr;
1807         data1_local_attribute *local_attr;
1808         int max_pos, prefix_len = 0;
1809         int relation_error = 0;
1810
1811         termp = *term_sub;
1812         if (use_value == -2)  /* string attribute (assume IDXPATH/any) */
1813         {
1814             use_value = xpath_use;
1815             attp.local_attributes = &id_xpath_attr;
1816             attp.attset_ordinal = VAL_IDXPATH;
1817             id_xpath_attr.next = 0;
1818             id_xpath_attr.local = use_value;
1819         }
1820         else if (curAttributeSet == VAL_IDXPATH)
1821         {
1822             attp.local_attributes = &id_xpath_attr;
1823             attp.attset_ordinal = VAL_IDXPATH;
1824             id_xpath_attr.next = 0;
1825             id_xpath_attr.local = use_value;
1826         }
1827         else
1828         {
1829             if ((r = att_getentbyatt (zh, &attp, curAttributeSet, use_value,
1830                                             use_string)))
1831             {
1832                 yaz_log(YLOG_DEBUG, "att_getentbyatt fail. set=%d use=%d r=%d",
1833                       curAttributeSet, use_value, r);
1834                 if (r == -1)
1835                 {
1836                     errCode = YAZ_BIB1_UNSUPP_USE_ATTRIBUTE;
1837                     if (use_string)
1838                         errString = nmem_strdup(stream, use_string);
1839                     else
1840                         errString = nmem_strdup_i (stream, use_value);
1841                 }
1842                 else
1843                     errCode = YAZ_BIB1_UNSUPP_ATTRIBUTE_SET;
1844                 continue;
1845             }
1846         }
1847         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1848         {
1849             zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
1850                            basenames[base_no]);
1851             return ZEBRA_FAIL;
1852         }
1853         for (local_attr = attp.local_attributes; local_attr;
1854              local_attr = local_attr->next)
1855         {
1856             int ord;
1857             char ord_buf[32];
1858             int i, ord_len;
1859
1860             ord = zebraExplain_lookup_attr_su(zh->reg->zei,
1861                                               attp.attset_ordinal,
1862                                               local_attr->local);
1863             if (ord < 0)
1864                 continue;
1865             if (prefix_len)
1866                 term_dict[prefix_len++] = '|';
1867             else
1868                 term_dict[prefix_len++] = '(';
1869
1870             ord_len = key_SU_encode (ord, ord_buf);
1871             for (i = 0; i<ord_len; i++)
1872             {
1873                 term_dict[prefix_len++] = 1;
1874                 term_dict[prefix_len++] = ord_buf[i];
1875             }
1876         }
1877         if (!prefix_len)
1878         {
1879             zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_USE_ATTRIBUTE, use_value);
1880             continue;
1881         }
1882         bases_ok++;
1883         term_dict[prefix_len++] = ')';        
1884         term_dict[prefix_len++] = 1;
1885         term_dict[prefix_len++] = reg_type;
1886         yaz_log(YLOG_DEBUG, "reg_type = %d", term_dict[prefix_len-1]);
1887         term_dict[prefix_len] = '\0';
1888         if (!numeric_relation(zh, zapt, &termp, term_dict,
1889                               attributeSet, grep_info, &max_pos, reg_type,
1890                               term_dst, &relation_error))
1891         {
1892             if (relation_error)
1893             {
1894                 zebra_setError(zh, relation_error, 0);
1895                 return ZEBRA_FAIL;
1896             }
1897             *term_sub = 0;
1898             return ZEBRA_OK;
1899         }
1900     }
1901     if (!bases_ok)
1902     {
1903         zebra_setError(zh, errCode, errString);
1904         return ZEBRA_FAIL;
1905     }
1906     *term_sub = termp;
1907     yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1908     return ZEBRA_OK;
1909 }
1910
1911 static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh,
1912                                         Z_AttributesPlusTerm *zapt,
1913                                         const char *termz,
1914                                         oid_value attributeSet,
1915                                         NMEM stream,
1916                                         int reg_type, int complete_flag,
1917                                         const char *rank_type, int xpath_use,
1918                                         int num_bases, char **basenames,
1919                                         NMEM rset_nmem,
1920                                         RSET *rset,
1921                                         struct rset_key_control *kc)
1922 {
1923     char term_dst[IT_MAX_WORD+1];
1924     const char *termp = termz;
1925     RSET *result_sets = 0;
1926     int num_result_sets = 0;
1927     ZEBRA_RES res;
1928     struct grep_info grep_info;
1929     int alloc_sets = 0;
1930
1931     yaz_log(log_level_rpn, "APT_numeric t='%s'", termz);
1932     if (grep_info_prepare(zh, zapt, &grep_info, reg_type) == ZEBRA_FAIL)
1933         return ZEBRA_FAIL;
1934     while (1)
1935     { 
1936         if (alloc_sets == num_result_sets)
1937         {
1938             int add = 10;
1939             RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) * 
1940                                               sizeof(*rnew));
1941             if (alloc_sets)
1942                 memcpy(rnew, result_sets, alloc_sets * sizeof(*rnew));
1943             alloc_sets = alloc_sets + add;
1944             result_sets = rnew;
1945         }
1946         yaz_log(YLOG_DEBUG, "APT_numeric termp=%s", termp);
1947         grep_info.isam_p_indx = 0;
1948         res = numeric_term(zh, zapt, &termp, attributeSet, &grep_info,
1949                            reg_type, complete_flag, num_bases, basenames,
1950                            term_dst, xpath_use,
1951                            stream);
1952         if (res == ZEBRA_FAIL || termp == 0)
1953             break;
1954         yaz_log(YLOG_DEBUG, "term: %s", term_dst);
1955         result_sets[num_result_sets] =
1956             rset_trunc(zh, grep_info.isam_p_buf,
1957                        grep_info.isam_p_indx, term_dst,
1958                        strlen(term_dst), rank_type,
1959                        0 /* preserve position */,
1960                        zapt->term->which, rset_nmem, 
1961                        kc, kc->scope);
1962         if (!result_sets[num_result_sets])
1963             break;
1964         num_result_sets++;
1965     }
1966     grep_info_delete(&grep_info);
1967     if (termp)
1968     {
1969         int i;
1970         for (i = 0; i<num_result_sets; i++)
1971             rset_delete(result_sets[i]);
1972         return ZEBRA_FAIL;
1973     }
1974     if (num_result_sets == 0)
1975         *rset = rsnull_create(rset_nmem, kc, 0);
1976     if (num_result_sets == 1)
1977         *rset = result_sets[0];
1978     else
1979         *rset = rsmulti_and_create(rset_nmem, kc, kc->scope,
1980                                    num_result_sets, result_sets);
1981     if (!*rset)
1982         return ZEBRA_FAIL;
1983     return ZEBRA_OK;
1984 }
1985
1986 static ZEBRA_RES rpn_search_APT_local(ZebraHandle zh,
1987                                       Z_AttributesPlusTerm *zapt,
1988                                       const char *termz,
1989                                       oid_value attributeSet,
1990                                       NMEM stream,
1991                                       const char *rank_type, NMEM rset_nmem,
1992                                       RSET *rset,
1993                                       struct rset_key_control *kc)
1994 {
1995     RSFD rsfd;
1996     struct it_key key;
1997     int sys;
1998     *rset = rstemp_create(rset_nmem, kc, kc->scope,
1999                           res_get (zh->res, "setTmpDir"),0 );
2000     rsfd = rset_open(*rset, RSETF_WRITE);
2001     
2002     sys = atoi(termz);
2003     if (sys <= 0)
2004         sys = 1;
2005     key.mem[0] = sys;
2006     key.mem[1] = 1;
2007     key.len = 2;
2008     rset_write (rsfd, &key);
2009     rset_close (rsfd);
2010     return ZEBRA_OK;
2011 }
2012
2013 static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2014                                oid_value attributeSet, NMEM stream,
2015                                Z_SortKeySpecList *sort_sequence,
2016                                const char *rank_type,
2017                                NMEM rset_nmem,
2018                                RSET *rset,
2019                                struct rset_key_control *kc)
2020 {
2021     int i;
2022     int sort_relation_value;
2023     AttrType sort_relation_type;
2024     Z_SortKeySpec *sks;
2025     Z_SortKey *sk;
2026     int oid[OID_SIZE];
2027     oident oe;
2028     char termz[20];
2029     
2030     attr_init(&sort_relation_type, zapt, 7);
2031     sort_relation_value = attr_find(&sort_relation_type, &attributeSet);
2032
2033     if (!sort_sequence->specs)
2034     {
2035         sort_sequence->num_specs = 10;
2036         sort_sequence->specs = (Z_SortKeySpec **)
2037             nmem_malloc(stream, sort_sequence->num_specs *
2038                          sizeof(*sort_sequence->specs));
2039         for (i = 0; i<sort_sequence->num_specs; i++)
2040             sort_sequence->specs[i] = 0;
2041     }
2042     if (zapt->term->which != Z_Term_general)
2043         i = 0;
2044     else
2045         i = atoi_n ((char *) zapt->term->u.general->buf,
2046                     zapt->term->u.general->len);
2047     if (i >= sort_sequence->num_specs)
2048         i = 0;
2049     sprintf(termz, "%d", i);
2050
2051     oe.proto = PROTO_Z3950;
2052     oe.oclass = CLASS_ATTSET;
2053     oe.value = attributeSet;
2054     if (!oid_ent_to_oid (&oe, oid))
2055         return ZEBRA_FAIL;
2056
2057     sks = (Z_SortKeySpec *) nmem_malloc(stream, sizeof(*sks));
2058     sks->sortElement = (Z_SortElement *)
2059         nmem_malloc(stream, sizeof(*sks->sortElement));
2060     sks->sortElement->which = Z_SortElement_generic;
2061     sk = sks->sortElement->u.generic = (Z_SortKey *)
2062         nmem_malloc(stream, sizeof(*sk));
2063     sk->which = Z_SortKey_sortAttributes;
2064     sk->u.sortAttributes = (Z_SortAttributes *)
2065         nmem_malloc(stream, sizeof(*sk->u.sortAttributes));
2066
2067     sk->u.sortAttributes->id = oid;
2068     sk->u.sortAttributes->list = zapt->attributes;
2069
2070     sks->sortRelation = (int *)
2071         nmem_malloc(stream, sizeof(*sks->sortRelation));
2072     if (sort_relation_value == 1)
2073         *sks->sortRelation = Z_SortKeySpec_ascending;
2074     else if (sort_relation_value == 2)
2075         *sks->sortRelation = Z_SortKeySpec_descending;
2076     else 
2077         *sks->sortRelation = Z_SortKeySpec_ascending;
2078
2079     sks->caseSensitivity = (int *)
2080         nmem_malloc(stream, sizeof(*sks->caseSensitivity));
2081     *sks->caseSensitivity = 0;
2082
2083     sks->which = Z_SortKeySpec_null;
2084     sks->u.null = odr_nullval ();
2085     sort_sequence->specs[i] = sks;
2086     *rset = rsnull_create (rset_nmem, kc, 0);
2087     return ZEBRA_OK;
2088 }
2089
2090
2091 static int parse_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2092                        oid_value attributeSet,
2093                        struct xpath_location_step *xpath, int max, NMEM mem)
2094 {
2095     oid_value curAttributeSet = attributeSet;
2096     AttrType use;
2097     const char *use_string = 0;
2098     
2099     attr_init(&use, zapt, 1);
2100     attr_find_ex(&use, &curAttributeSet, &use_string);
2101
2102     if (!use_string || *use_string != '/')
2103         return -1;
2104
2105     return zebra_parse_xpath_str(use_string, xpath, max, mem);
2106 }
2107  
2108                
2109
2110 static RSET xpath_trunc(ZebraHandle zh, NMEM stream,
2111                         int reg_type, const char *term, int use,
2112                         oid_value curAttributeSet, NMEM rset_nmem,
2113                         struct rset_key_control *kc)
2114 {
2115     RSET rset;
2116     struct grep_info grep_info;
2117     char term_dict[2048];
2118     char ord_buf[32];
2119     int prefix_len = 0;
2120     int ord = zebraExplain_lookup_attr_su(zh->reg->zei, curAttributeSet, use);
2121     int ord_len, i, r, max_pos;
2122     int term_type = Z_Term_characterString;
2123     const char *flags = "void";
2124
2125     if (grep_info_prepare(zh, 0 /* zapt */, &grep_info, '0') == ZEBRA_FAIL)
2126         return rsnull_create(rset_nmem, kc, 0);
2127     
2128     if (ord < 0)
2129         return rsnull_create(rset_nmem, kc, 0);
2130     if (prefix_len)
2131         term_dict[prefix_len++] = '|';
2132     else
2133         term_dict[prefix_len++] = '(';
2134     
2135     ord_len = key_SU_encode (ord, ord_buf);
2136     for (i = 0; i<ord_len; i++)
2137     {
2138         term_dict[prefix_len++] = 1;
2139         term_dict[prefix_len++] = ord_buf[i];
2140     }
2141     term_dict[prefix_len++] = ')';
2142     term_dict[prefix_len++] = 1;
2143     term_dict[prefix_len++] = reg_type;
2144     
2145     strcpy(term_dict+prefix_len, term);
2146     
2147     grep_info.isam_p_indx = 0;
2148     r = dict_lookup_grep(zh->reg->dict, term_dict, 0,
2149                           &grep_info, &max_pos, 0, grep_handle);
2150     yaz_log(YLOG_DEBUG, "%s %d positions", term,
2151              grep_info.isam_p_indx);
2152     rset = rset_trunc(zh, grep_info.isam_p_buf,
2153                       grep_info.isam_p_indx, term, strlen(term),
2154                       flags, 1, term_type,rset_nmem,
2155                       kc, kc->scope);
2156     grep_info_delete(&grep_info);
2157     return rset;
2158 }
2159
2160 static
2161 ZEBRA_RES rpn_search_xpath(ZebraHandle zh,
2162                            oid_value attributeSet,
2163                            int num_bases, char **basenames,
2164                            NMEM stream, const char *rank_type, RSET rset,
2165                            int xpath_len, struct xpath_location_step *xpath,
2166                            NMEM rset_nmem,
2167                            RSET *rset_out,
2168                            struct rset_key_control *kc)
2169 {
2170     oid_value curAttributeSet = attributeSet;
2171     int base_no;
2172     int i;
2173
2174     if (xpath_len < 0)
2175     {
2176         *rset_out = rset;
2177         return ZEBRA_OK;
2178     }
2179
2180     yaz_log(YLOG_DEBUG, "xpath len=%d", xpath_len);
2181     for (i = 0; i<xpath_len; i++)
2182     {
2183         yaz_log(log_level_rpn, "XPATH %d %s", i, xpath[i].part);
2184
2185     }
2186
2187     curAttributeSet = VAL_IDXPATH;
2188
2189     /*
2190       //a    ->    a/.*
2191       //a/b  ->    b/a/.*
2192       /a     ->    a/
2193       /a/b   ->    b/a/
2194
2195       /      ->    none
2196
2197    a[@attr = value]/b[@other = othervalue]
2198
2199  /e/@a val      range(e/,range(@a,freetext(w,1015,val),@a),e/)
2200  /a/b val       range(b/a/,freetext(w,1016,val),b/a/)
2201  /a/b/@c val    range(b/a/,range(@c,freetext(w,1016,val),@c),b/a/)
2202  /a/b[@c = y] val range(b/a/,freetext(w,1016,val),b/a/,@c = y)
2203  /a[@c = y]/b val range(a/,range(b/a/,freetext(w,1016,val),b/a/),a/,@c = y)
2204  /a[@c = x]/b[@c = y] range(a/,range(b/a/,freetext(w,1016,val),b/a/,@c = y),a/,@c = x)
2205       
2206     */
2207
2208     dict_grep_cmap (zh->reg->dict, 0, 0);
2209
2210     for (base_no = 0; base_no < num_bases; base_no++)
2211     {
2212         int level = xpath_len;
2213         int first_path = 1;
2214         
2215         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
2216         {
2217             zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
2218                            basenames[base_no]);
2219             *rset_out = rset;
2220             return ZEBRA_FAIL;
2221         }
2222         while (--level >= 0)
2223         {
2224             char xpath_rev[128];
2225             int i, len;
2226             RSET rset_start_tag = 0, rset_end_tag = 0, rset_attr = 0;
2227
2228             *xpath_rev = 0;
2229             len = 0;
2230             for (i = level; i >= 1; --i)
2231             {
2232                 const char *cp = xpath[i].part;
2233                 if (*cp)
2234                 {
2235                     for (;*cp; cp++)
2236                         if (*cp == '*')
2237                         {
2238                             memcpy (xpath_rev + len, "[^/]*", 5);
2239                             len += 5;
2240                         }
2241                         else if (*cp == ' ')
2242                         {
2243
2244                             xpath_rev[len++] = 1;
2245                             xpath_rev[len++] = ' ';
2246                         }
2247
2248                         else
2249                             xpath_rev[len++] = *cp;
2250                     xpath_rev[len++] = '/';
2251                 }
2252                 else if (i == 1)  /* // case */
2253                 {
2254                     xpath_rev[len++] = '.';
2255                     xpath_rev[len++] = '*';
2256                 }
2257             }
2258             xpath_rev[len] = 0;
2259
2260             if (xpath[level].predicate &&
2261                 xpath[level].predicate->which == XPATH_PREDICATE_RELATION &&
2262                 xpath[level].predicate->u.relation.name[0])
2263             {
2264                 WRBUF wbuf = wrbuf_alloc();
2265                 wrbuf_puts(wbuf, xpath[level].predicate->u.relation.name+1);
2266                 if (xpath[level].predicate->u.relation.value)
2267                 {
2268                     const char *cp = xpath[level].predicate->u.relation.value;
2269                     wrbuf_putc(wbuf, '=');
2270                     
2271                     while (*cp)
2272                     {
2273                         if (strchr(REGEX_CHARS, *cp))
2274                             wrbuf_putc(wbuf, '\\');
2275                         wrbuf_putc(wbuf, *cp);
2276                         cp++;
2277                     }
2278                 }
2279                 wrbuf_puts(wbuf, "");
2280                 rset_attr = xpath_trunc(
2281                     zh, stream, '0', wrbuf_buf(wbuf), 3, 
2282                     curAttributeSet, rset_nmem, kc);
2283                 wrbuf_free(wbuf, 1);
2284             } 
2285             else 
2286             {
2287                 if (!first_path)
2288                     continue;
2289             }
2290             yaz_log(log_level_rpn, "xpath_rev (%d) = %s", level, xpath_rev);
2291             if (strlen(xpath_rev))
2292             {
2293                 rset_start_tag = xpath_trunc(zh, stream, '0', 
2294                         xpath_rev, 1, curAttributeSet, rset_nmem, kc);
2295             
2296                 rset_end_tag = xpath_trunc(zh, stream, '0', 
2297                         xpath_rev, 2, curAttributeSet, rset_nmem, kc);
2298
2299                 rset = rsbetween_create(rset_nmem, kc, kc->scope,
2300                                         rset_start_tag, rset,
2301                                         rset_end_tag, rset_attr);
2302             }
2303             first_path = 0;
2304         }
2305     }
2306     *rset_out = rset;
2307     return ZEBRA_OK;
2308 }
2309
2310 static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2311                                 oid_value attributeSet, NMEM stream,
2312                                 Z_SortKeySpecList *sort_sequence,
2313                                 int num_bases, char **basenames, 
2314                                 NMEM rset_nmem,
2315                                 RSET *rset,
2316                                 struct rset_key_control *kc)
2317 {
2318     ZEBRA_RES res = ZEBRA_OK;
2319     unsigned reg_id;
2320     char *search_type = NULL;
2321     char rank_type[128];
2322     int complete_flag;
2323     int sort_flag;
2324     char termz[IT_MAX_WORD+1];
2325     int xpath_len;
2326     int xpath_use = 0;
2327     struct xpath_location_step xpath[10];
2328
2329     if (!log_level_set)
2330     {
2331         log_level_rpn = yaz_log_module_level("rpn");
2332         log_level_set = 1;
2333     }
2334     zebra_maps_attr(zh->reg->zebra_maps, zapt, &reg_id, &search_type,
2335                     rank_type, &complete_flag, &sort_flag);
2336     
2337     yaz_log(YLOG_DEBUG, "reg_id=%c", reg_id);
2338     yaz_log(YLOG_DEBUG, "complete_flag=%d", complete_flag);
2339     yaz_log(YLOG_DEBUG, "search_type=%s", search_type);
2340     yaz_log(YLOG_DEBUG, "rank_type=%s", rank_type);
2341
2342     if (zapt_term_to_utf8(zh, zapt, termz) == ZEBRA_FAIL)
2343         return ZEBRA_FAIL;
2344
2345     if (sort_flag)
2346         return rpn_sort_spec(zh, zapt, attributeSet, stream, sort_sequence,
2347                              rank_type, rset_nmem, rset, kc);
2348     /* consider if an X-Path query is used */
2349     xpath_len = parse_xpath(zh, zapt, attributeSet, xpath, 10, stream);
2350     if (xpath_len >= 0)
2351     {
2352         xpath_use = 1016;  /* searching for element by default */
2353         if (xpath[xpath_len-1].part[0] == '@') 
2354             xpath_use = 1015;  /* last step an attribute .. */
2355     }
2356
2357     /* search using one of the various search type strategies
2358        termz is our UTF-8 search term
2359        attributeSet is top-level default attribute set 
2360        stream is ODR for search
2361        reg_id is the register type
2362        complete_flag is 1 for complete subfield, 0 for incomplete
2363        xpath_use is use-attribute to be used for X-Path search, 0 for none
2364     */
2365     if (!strcmp(search_type, "phrase"))
2366     {
2367         res = rpn_search_APT_phrase(zh, zapt, termz, attributeSet, stream,
2368                                     reg_id, complete_flag, rank_type,
2369                                     xpath_use,
2370                                     num_bases, basenames, rset_nmem,
2371                                     rset, kc);
2372     }
2373     else if (!strcmp(search_type, "and-list"))
2374     {
2375         res = rpn_search_APT_and_list(zh, zapt, termz, attributeSet, stream,
2376                                       reg_id, complete_flag, rank_type,
2377                                       xpath_use,
2378                                       num_bases, basenames, rset_nmem,
2379                                       rset, kc);
2380     }
2381     else if (!strcmp(search_type, "or-list"))
2382     {
2383         res = rpn_search_APT_or_list(zh, zapt, termz, attributeSet, stream,
2384                                      reg_id, complete_flag, rank_type,
2385                                      xpath_use,
2386                                      num_bases, basenames, rset_nmem,
2387                                      rset, kc);
2388     }
2389     else if (!strcmp(search_type, "local"))
2390     {
2391         res = rpn_search_APT_local(zh, zapt, termz, attributeSet, stream,
2392                                    rank_type, rset_nmem, rset, kc);
2393     }
2394     else if (!strcmp(search_type, "numeric"))
2395     {
2396         res = rpn_search_APT_numeric(zh, zapt, termz, attributeSet, stream,
2397                                      reg_id, complete_flag, rank_type,
2398                                      xpath_use,
2399                                      num_bases, basenames, rset_nmem,
2400                                      rset, kc);
2401     }
2402     else
2403     {
2404         zebra_setError(zh, YAZ_BIB1_UNSUPP_STRUCTURE_ATTRIBUTE, 0);
2405         res = ZEBRA_FAIL;
2406     }
2407     if (res != ZEBRA_OK)
2408         return res;
2409     if (!*rset)
2410         return ZEBRA_FAIL;
2411     return rpn_search_xpath(zh, attributeSet, num_bases, basenames,
2412                             stream, rank_type, *rset, 
2413                             xpath_len, xpath, rset_nmem, rset, kc);
2414 }
2415
2416 static ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2417                                       oid_value attributeSet, 
2418                                       NMEM stream, NMEM rset_nmem,
2419                                       Z_SortKeySpecList *sort_sequence,
2420                                       int num_bases, char **basenames,
2421                                       RSET **result_sets, int *num_result_sets,
2422                                       Z_Operator *parent_op,
2423                                       struct rset_key_control *kc);
2424
2425 ZEBRA_RES rpn_search_top(ZebraHandle zh, Z_RPNStructure *zs,
2426                          oid_value attributeSet, 
2427                          NMEM stream, NMEM rset_nmem,
2428                          Z_SortKeySpecList *sort_sequence,
2429                          int num_bases, char **basenames,
2430                          RSET *result_set)
2431 {
2432     RSET *result_sets = 0;
2433     int num_result_sets = 0;
2434     ZEBRA_RES res;
2435     struct rset_key_control *kc = zebra_key_control_create(zh);
2436
2437     res = rpn_search_structure(zh, zs, attributeSet,
2438                                stream, rset_nmem,
2439                                sort_sequence, 
2440                                num_bases, basenames,
2441                                &result_sets, &num_result_sets,
2442                                0 /* no parent op */,
2443                                kc);
2444     if (res != ZEBRA_OK)
2445     {
2446         int i;
2447         for (i = 0; i<num_result_sets; i++)
2448             rset_delete(result_sets[i]);
2449         *result_set = 0;
2450     }
2451     else
2452     {
2453         assert(num_result_sets == 1);
2454         assert(result_sets);
2455         assert(*result_sets);
2456         *result_set = *result_sets;
2457     }
2458     (*kc->dec)(kc);
2459     return res;
2460 }
2461
2462 ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2463                                oid_value attributeSet, 
2464                                NMEM stream, NMEM rset_nmem,
2465                                Z_SortKeySpecList *sort_sequence,
2466                                int num_bases, char **basenames,
2467                                RSET **result_sets, int *num_result_sets,
2468                                Z_Operator *parent_op,
2469                                struct rset_key_control *kc)
2470 {
2471     *num_result_sets = 0;
2472     if (zs->which == Z_RPNStructure_complex)
2473     {
2474         ZEBRA_RES res;
2475         Z_Operator *zop = zs->u.complex->roperator;
2476         RSET *result_sets_l = 0;
2477         int num_result_sets_l = 0;
2478         RSET *result_sets_r = 0;
2479         int num_result_sets_r = 0;
2480
2481         res = rpn_search_structure(zh, zs->u.complex->s1,
2482                                    attributeSet, stream, rset_nmem,
2483                                    sort_sequence,
2484                                    num_bases, basenames,
2485                                    &result_sets_l, &num_result_sets_l,
2486                                    zop, kc);
2487         if (res != ZEBRA_OK)
2488         {
2489             int i;
2490             for (i = 0; i<num_result_sets_l; i++)
2491                 rset_delete(result_sets_l[i]);
2492             return res;
2493         }
2494         res = rpn_search_structure(zh, zs->u.complex->s2,
2495                                    attributeSet, stream, rset_nmem,
2496                                    sort_sequence,
2497                                    num_bases, basenames,
2498                                    &result_sets_r, &num_result_sets_r,
2499                                    zop, kc);
2500         if (res != ZEBRA_OK)
2501         {
2502             int i;
2503             for (i = 0; i<num_result_sets_l; i++)
2504                 rset_delete(result_sets_l[i]);
2505             for (i = 0; i<num_result_sets_r; i++)
2506                 rset_delete(result_sets_r[i]);
2507             return res;
2508         }
2509
2510         /* make a new list of result for all children */
2511         *num_result_sets = num_result_sets_l + num_result_sets_r;
2512         *result_sets = nmem_malloc(stream, *num_result_sets * 
2513                                    sizeof(**result_sets));
2514         memcpy(*result_sets, result_sets_l, 
2515                num_result_sets_l * sizeof(**result_sets));
2516         memcpy(*result_sets + num_result_sets_l, result_sets_r, 
2517                num_result_sets_r * sizeof(**result_sets));
2518
2519         if (!parent_op || parent_op->which != zop->which
2520             || (zop->which != Z_Operator_and &&
2521                 zop->which != Z_Operator_or))
2522         {
2523             /* parent node different from this one (or non-present) */
2524             /* we must combine result sets now */
2525             RSET rset;
2526             switch (zop->which)
2527             {
2528             case Z_Operator_and:
2529                 rset = rsmulti_and_create(rset_nmem, kc,
2530                                           kc->scope,
2531                                           *num_result_sets, *result_sets);
2532                 break;
2533             case Z_Operator_or:
2534                 rset = rsmulti_or_create(rset_nmem, kc,
2535                                          kc->scope, 0, /* termid */
2536                                          *num_result_sets, *result_sets);
2537                 break;
2538             case Z_Operator_and_not:
2539                 rset = rsbool_create_not(rset_nmem, kc,
2540                                          kc->scope,
2541                                          (*result_sets)[0],
2542                                          (*result_sets)[1]);
2543                 break;
2544             case Z_Operator_prox:
2545                 if (zop->u.prox->which != Z_ProximityOperator_known)
2546                 {
2547                     zebra_setError(zh, 
2548                                    YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2549                                    0);
2550                     return ZEBRA_FAIL;
2551                 }
2552                 if (*zop->u.prox->u.known != Z_ProxUnit_word)
2553                 {
2554                     zebra_setError_zint(zh,
2555                                         YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2556                                         *zop->u.prox->u.known);
2557                     return ZEBRA_FAIL;
2558                 }
2559                 else
2560                 {
2561                     rset = rsprox_create(rset_nmem, kc,
2562                                          kc->scope,
2563                                          *num_result_sets, *result_sets, 
2564                                          *zop->u.prox->ordered,
2565                                          (!zop->u.prox->exclusion ? 
2566                                           0 : *zop->u.prox->exclusion),
2567                                          *zop->u.prox->relationType,
2568                                          *zop->u.prox->distance );
2569                 }
2570                 break;
2571             default:
2572                 zebra_setError(zh, YAZ_BIB1_OPERATOR_UNSUPP, 0);
2573                 return ZEBRA_FAIL;
2574             }
2575             *num_result_sets = 1;
2576             *result_sets = nmem_malloc(stream, *num_result_sets * 
2577                                        sizeof(**result_sets));
2578             (*result_sets)[0] = rset;
2579         }
2580     }
2581     else if (zs->which == Z_RPNStructure_simple)
2582     {
2583         RSET rset;
2584         ZEBRA_RES res;
2585
2586         if (zs->u.simple->which == Z_Operand_APT)
2587         {
2588             yaz_log(YLOG_DEBUG, "rpn_search_APT");
2589             res = rpn_search_APT(zh, zs->u.simple->u.attributesPlusTerm,
2590                                  attributeSet, stream, sort_sequence,
2591                                  num_bases, basenames, rset_nmem, &rset,
2592                                  kc);
2593             if (res != ZEBRA_OK)
2594                 return res;
2595         }
2596         else if (zs->u.simple->which == Z_Operand_resultSetId)
2597         {
2598             yaz_log(YLOG_DEBUG, "rpn_search_ref");
2599             rset = resultSetRef(zh, zs->u.simple->u.resultSetId);
2600             if (!rset)
2601             {
2602                 zebra_setError(zh, 
2603                                YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
2604                                zs->u.simple->u.resultSetId);
2605                 return ZEBRA_FAIL;
2606             }
2607             rset_dup(rset);
2608         }
2609         else
2610         {
2611             zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2612             return ZEBRA_FAIL;
2613         }
2614         *num_result_sets = 1;
2615         *result_sets = nmem_malloc(stream, *num_result_sets * 
2616                                    sizeof(**result_sets));
2617         (*result_sets)[0] = rset;
2618     }
2619     else
2620     {
2621         zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2622         return ZEBRA_FAIL;
2623     }
2624     return ZEBRA_OK;
2625 }
2626
2627 struct scan_info_entry {
2628     char *term;
2629     ISAM_P isam_p;
2630 };
2631
2632 struct scan_info {
2633     struct scan_info_entry *list;
2634     ODR odr;
2635     int before, after;
2636     char prefix[20];
2637 };
2638
2639 static int scan_handle (char *name, const char *info, int pos, void *client)
2640 {
2641     int len_prefix, idx;
2642     struct scan_info *scan_info = (struct scan_info *) client;
2643
2644     len_prefix = strlen(scan_info->prefix);
2645     if (memcmp (name, scan_info->prefix, len_prefix))
2646         return 1;
2647     if (pos > 0)
2648         idx = scan_info->after - pos + scan_info->before;
2649     else
2650         idx = - pos - 1;
2651
2652     if (idx < 0)
2653         return 0;
2654     scan_info->list[idx].term = (char *)
2655         odr_malloc(scan_info->odr, strlen(name + len_prefix)+1);
2656     strcpy(scan_info->list[idx].term, name + len_prefix);
2657     assert (*info == sizeof(ISAM_P));
2658     memcpy (&scan_info->list[idx].isam_p, info+1, sizeof(ISAM_P));
2659     return 0;
2660 }
2661
2662 void zebra_term_untrans_iconv(ZebraHandle zh, NMEM stream, int reg_type,
2663                               char **dst, const char *src)
2664 {
2665     char term_src[IT_MAX_WORD];
2666     char term_dst[IT_MAX_WORD];
2667     
2668     zebra_term_untrans (zh, reg_type, term_src, src);
2669
2670     if (zh->iconv_from_utf8 != 0)
2671     {
2672         int len;
2673         char *inbuf = term_src;
2674         size_t inleft = strlen(term_src);
2675         char *outbuf = term_dst;
2676         size_t outleft = sizeof(term_dst)-1;
2677         size_t ret;
2678         
2679         ret = yaz_iconv (zh->iconv_from_utf8, &inbuf, &inleft,
2680                          &outbuf, &outleft);
2681         if (ret == (size_t)(-1))
2682             len = 0;
2683         else
2684             len = outbuf - term_dst;
2685         *dst = nmem_malloc(stream, len + 1);
2686         if (len > 0)
2687             memcpy (*dst, term_dst, len);
2688         (*dst)[len] = '\0';
2689     }
2690     else
2691         *dst = nmem_strdup(stream, term_src);
2692 }
2693
2694 static void count_set (RSET r, int *count)
2695 {
2696     zint psysno = 0;
2697     int kno = 0;
2698     struct it_key key;
2699     RSFD rfd;
2700
2701     yaz_log(YLOG_DEBUG, "count_set");
2702
2703     *count = 0;
2704     rfd = rset_open (r, RSETF_READ);
2705     while (rset_read (rfd, &key,0 /* never mind terms */))
2706     {
2707         if (key.mem[0] != psysno)
2708         {
2709             psysno = key.mem[0];
2710             (*count)++;
2711         }
2712         kno++;
2713     }
2714     rset_close (rfd);
2715     yaz_log(YLOG_DEBUG, "%d keys, %d records", kno, *count);
2716 }
2717
2718 ZEBRA_RES rpn_scan(ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt,
2719                    oid_value attributeset,
2720                    int num_bases, char **basenames,
2721                    int *position, int *num_entries, ZebraScanEntry **list,
2722                    int *is_partial, RSET limit_set, int return_zero)
2723 {
2724     int i;
2725     int pos = *position;
2726     int num = *num_entries;
2727     int before;
2728     int after;
2729     int base_no;
2730     char termz[IT_MAX_WORD+20];
2731     AttrType use;
2732     int use_value;
2733     const char *use_string = 0;
2734     struct scan_info *scan_info_array;
2735     ZebraScanEntry *glist;
2736     int ords[32], ord_no = 0;
2737     int ptr[32];
2738
2739     int bases_ok = 0;     /* no of databases with OK attribute */
2740     int errCode = 0;      /* err code (if any is not OK) */
2741     char *errString = 0;  /* addinfo */
2742
2743     unsigned reg_id;
2744     char *search_type = NULL;
2745     char rank_type[128];
2746     int complete_flag;
2747     int sort_flag;
2748     NMEM rset_nmem = NULL; 
2749     struct rset_key_control *kc = 0;
2750
2751     *list = 0;
2752     *is_partial = 0;
2753
2754     if (attributeset == VAL_NONE)
2755         attributeset = VAL_BIB1;
2756
2757     if (!limit_set)
2758     {
2759         AttrType termset;
2760         int termset_value_numeric;
2761         const char *termset_value_string;
2762         attr_init(&termset, zapt, 8);
2763         termset_value_numeric =
2764             attr_find_ex(&termset, NULL, &termset_value_string);
2765         if (termset_value_numeric != -1)
2766         {
2767             char resname[32];
2768             const char *termset_name = 0;
2769             
2770             if (termset_value_numeric != -2)
2771             {
2772                 
2773                 sprintf(resname, "%d", termset_value_numeric);
2774                 termset_name = resname;
2775             }
2776             else
2777                 termset_name = termset_value_string;
2778             
2779             limit_set = resultSetRef (zh, termset_name);
2780         }
2781     }
2782         
2783     yaz_log(YLOG_DEBUG, "position = %d, num = %d set=%d",
2784             pos, num, attributeset);
2785         
2786     attr_init(&use, zapt, 1);
2787     use_value = attr_find_ex(&use, &attributeset, &use_string);
2788
2789     if (zebra_maps_attr(zh->reg->zebra_maps, zapt, &reg_id, &search_type,
2790                         rank_type, &complete_flag, &sort_flag))
2791     {
2792         *num_entries = 0;
2793         zebra_setError(zh, YAZ_BIB1_UNSUPP_ATTRIBUTE_TYPE, 0);
2794         return ZEBRA_FAIL;
2795     }
2796     yaz_log(YLOG_DEBUG, "use_value = %d", use_value);
2797
2798     if (use_value == -1)
2799         use_value = 1016;
2800     for (base_no = 0; base_no < num_bases && ord_no < 32; base_no++)
2801     {
2802         data1_local_attribute *local_attr;
2803         attent attp;
2804         int ord;
2805
2806         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
2807         {
2808             zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
2809                            basenames[base_no]);
2810             *num_entries = 0;
2811             return ZEBRA_FAIL;
2812         }
2813
2814         if (use_string &&
2815             (ord = zebraExplain_lookup_attr_str(zh->reg->zei,
2816                                                 use_string)) >= 0)
2817         {
2818             /* we have a match for a raw string attribute */
2819             if (ord > 0)
2820                 ords[ord_no++] = ord;
2821             attp.local_attributes = 0;  /* no more attributes */
2822         }
2823         else
2824         {
2825             int r;
2826             
2827             if ((r = att_getentbyatt (zh, &attp, attributeset, use_value,
2828                                       use_string)))
2829             {
2830                 yaz_log(YLOG_DEBUG, "att_getentbyatt fail. set=%d use=%d",
2831                         attributeset, use_value);
2832                 if (r == -1)
2833                 {
2834                     errCode = YAZ_BIB1_UNSUPP_USE_ATTRIBUTE;
2835                     if (use_string)
2836                         zebra_setError(zh, YAZ_BIB1_UNSUPP_USE_ATTRIBUTE,
2837                                        use_string);
2838                     else
2839                         zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_USE_ATTRIBUTE,
2840                                             use_value);
2841                 }   
2842                 else
2843                 {
2844                     zebra_setError(zh, YAZ_BIB1_UNSUPP_ATTRIBUTE_SET, 0);
2845                 }
2846                 continue;
2847             }
2848         }
2849         bases_ok++;
2850         for (local_attr = attp.local_attributes; local_attr && ord_no < 32;
2851              local_attr = local_attr->next)
2852         {
2853             ord = zebraExplain_lookup_attr_su(zh->reg->zei,
2854                                               attp.attset_ordinal,
2855                                               local_attr->local);
2856             if (ord > 0)
2857                 ords[ord_no++] = ord;
2858         }
2859     }
2860     if (!bases_ok && errCode)
2861     {
2862         zebra_setError(zh, errCode, errString);
2863         *num_entries = 0;
2864         return ZEBRA_FAIL;
2865     }
2866     if (ord_no == 0)
2867     {
2868         *num_entries = 0;
2869         return ZEBRA_OK;
2870     }
2871     /* prepare dictionary scanning */
2872     if (num < 1)
2873     {
2874         *num_entries = 0;
2875         return ZEBRA_OK;
2876     }
2877     before = pos-1;
2878     if (before < 0)
2879         before = 0;
2880     after = 1+num-pos;
2881     if (after < 0)
2882         after = 0;
2883     yaz_log(YLOG_DEBUG, "rpn_scan pos=%d num=%d before=%d "
2884             "after=%d before+after=%d",
2885             pos, num, before, after, before+after);
2886     scan_info_array = (struct scan_info *)
2887         odr_malloc(stream, ord_no * sizeof(*scan_info_array));
2888     for (i = 0; i < ord_no; i++)
2889     {
2890         int j, prefix_len = 0;
2891         int before_tmp = before, after_tmp = after;
2892         struct scan_info *scan_info = scan_info_array + i;
2893         struct rpn_char_map_info rcmi;
2894
2895         rpn_char_map_prepare (zh->reg, reg_id, &rcmi);
2896
2897         scan_info->before = before;
2898         scan_info->after = after;
2899         scan_info->odr = stream;
2900
2901         scan_info->list = (struct scan_info_entry *)
2902             odr_malloc(stream, (before+after) * sizeof(*scan_info->list));
2903         for (j = 0; j<before+after; j++)
2904             scan_info->list[j].term = NULL;
2905
2906         prefix_len += key_SU_encode (ords[i], termz + prefix_len);
2907         termz[prefix_len++] = reg_id;
2908         termz[prefix_len] = 0;
2909         strcpy(scan_info->prefix, termz);
2910
2911         if (trans_scan_term(zh, zapt, termz+prefix_len, reg_id) == ZEBRA_FAIL)
2912             return ZEBRA_FAIL;
2913         
2914         dict_scan(zh->reg->dict, termz, &before_tmp, &after_tmp,
2915                   scan_info, scan_handle);
2916     }
2917     glist = (ZebraScanEntry *)
2918         odr_malloc(stream, (before+after)*sizeof(*glist));
2919
2920     rset_nmem = nmem_create();
2921     kc = zebra_key_control_create(zh);
2922
2923     /* consider terms after main term */
2924     for (i = 0; i < ord_no; i++)
2925         ptr[i] = before;
2926     
2927     *is_partial = 0;
2928     for (i = 0; i<after; i++)
2929     {
2930         int j, j0 = -1;
2931         const char *mterm = NULL;
2932         const char *tst;
2933         RSET rset = 0;
2934         int lo = i + pos-1; /* offset in result list */
2935
2936         /* find: j0 is the first of the minimal values */
2937         for (j = 0; j < ord_no; j++)
2938         {
2939             if (ptr[j] < before+after && ptr[j] >= 0 &&
2940                 (tst = scan_info_array[j].list[ptr[j]].term) &&
2941                 (!mterm || strcmp (tst, mterm) < 0))
2942             {
2943                 j0 = j;
2944                 mterm = tst;
2945             }
2946         }
2947         if (j0 == -1)
2948             break;  /* no value found, stop */
2949
2950         /* get result set for first one , but only if it's within bounds */
2951         if (lo >= 0)
2952         {
2953             /* get result set for first term */
2954             zebra_term_untrans_iconv(zh, stream->mem, reg_id,
2955                                      &glist[lo].term, mterm);
2956             rset = rset_trunc(zh, &scan_info_array[j0].list[ptr[j0]].isam_p, 1,
2957                               glist[lo].term, strlen(glist[lo].term),
2958                               NULL, 0, zapt->term->which, rset_nmem, 
2959                               kc, kc->scope);
2960         }
2961         ptr[j0]++; /* move index for this set .. */
2962         /* get result set for remaining scan terms */
2963         for (j = j0+1; j<ord_no; j++)
2964         {
2965             if (ptr[j] < before+after && ptr[j] >= 0 &&
2966                 (tst = scan_info_array[j].list[ptr[j]].term) &&
2967                 !strcmp (tst, mterm))
2968             {
2969                 if (lo >= 0)
2970                 {
2971                     RSET rsets[2];
2972                     
2973                     rsets[0] = rset;
2974                     rsets[1] =
2975                         rset_trunc(
2976                             zh, &scan_info_array[j].list[ptr[j]].isam_p, 1,
2977                             glist[lo].term,
2978                             strlen(glist[lo].term), NULL, 0,
2979                             zapt->term->which,rset_nmem,
2980                             kc, kc->scope);
2981                     rset = rsmulti_or_create(rset_nmem, kc,
2982                                              kc->scope, 0 /* termid */,
2983                                              2, rsets);
2984                 }
2985                 ptr[j]++;
2986             }
2987         }
2988         if (lo >= 0)
2989         {
2990             /* merge with limit_set if given */
2991             if (limit_set)
2992             {
2993                 RSET rsets[2];
2994                 rsets[0] = rset;
2995                 rsets[1] = rset_dup(limit_set);
2996                 
2997                 rset = rsmulti_and_create(rset_nmem, kc,
2998                                           kc->scope,
2999                                           2, rsets);
3000             }
3001             /* count it */
3002             count_set(rset, &glist[lo].occurrences);
3003             rset_delete(rset);
3004         }
3005     }
3006     if (i < after)
3007     {
3008         *num_entries -= (after-i);
3009         *is_partial = 1;
3010         if (*num_entries < 0)
3011         {
3012             (*kc->dec)(kc);
3013             nmem_destroy(rset_nmem);
3014             *num_entries = 0;
3015             return ZEBRA_OK;
3016         }
3017     }
3018     /* consider terms before main term */
3019     for (i = 0; i<ord_no; i++)
3020         ptr[i] = 0;
3021     
3022     for (i = 0; i<before; i++)
3023     {
3024         int j, j0 = -1;
3025         const char *mterm = NULL;
3026         const char *tst;
3027         RSET rset;
3028         int lo = before-1-i; /* offset in result list */
3029         
3030         for (j = 0; j <ord_no; j++)
3031         {
3032             if (ptr[j] < before && ptr[j] >= 0 &&
3033                 (tst = scan_info_array[j].list[before-1-ptr[j]].term) &&
3034                 (!mterm || strcmp (tst, mterm) > 0))
3035             {
3036                 j0 = j;
3037                     mterm = tst;
3038             }
3039         }
3040         if (j0 == -1)
3041             break;
3042         
3043         zebra_term_untrans_iconv(zh, stream->mem, reg_id,
3044                                  &glist[lo].term, mterm);
3045         
3046         rset = rset_trunc
3047             (zh, &scan_info_array[j0].list[before-1-ptr[j0]].isam_p, 1,
3048              glist[lo].term, strlen(glist[lo].term),
3049              NULL, 0, zapt->term->which, rset_nmem,
3050              kc, kc->scope);
3051         
3052         ptr[j0]++;
3053         
3054         for (j = j0+1; j<ord_no; j++)
3055         {
3056             if (ptr[j] < before && ptr[j] >= 0 &&
3057                 (tst = scan_info_array[j].list[before-1-ptr[j]].term) &&
3058                 !strcmp (tst, mterm))
3059             {
3060                 RSET rsets[2];
3061                 
3062                 rsets[0] = rset;
3063                 rsets[1] = rset_trunc(
3064                     zh,
3065                     &scan_info_array[j].list[before-1-ptr[j]].isam_p, 1,
3066                     glist[lo].term,
3067                     strlen(glist[lo].term), NULL, 0,
3068                     zapt->term->which, rset_nmem,
3069                     kc, kc->scope);
3070                 rset = rsmulti_or_create(rset_nmem, kc,
3071                                          kc->scope, 0 /* termid */, 2, rsets);
3072                 
3073                 ptr[j]++;
3074             }
3075         }
3076         if (limit_set)
3077         {
3078             RSET rsets[2];
3079             rsets[0] = rset;
3080             rsets[1] = rset_dup(limit_set);
3081             
3082             rset = rsmulti_and_create(rset_nmem, kc,
3083                                       kc->scope, 2, rsets);
3084         }
3085         count_set (rset, &glist[lo].occurrences);
3086         rset_delete (rset);
3087     }
3088     (*kc->dec)(kc);
3089     nmem_destroy(rset_nmem);
3090     i = before-i;
3091     if (i)
3092     {
3093         *is_partial = 1;
3094         *position -= i;
3095         *num_entries -= i;
3096         if (*num_entries <= 0)
3097         {
3098             *num_entries = 0;
3099             return ZEBRA_OK;
3100         }
3101     }
3102     
3103     *list = glist + i;               /* list is set to first 'real' entry */
3104     
3105     yaz_log(YLOG_DEBUG, "position = %d, num_entries = %d",
3106             *position, *num_entries);
3107     return ZEBRA_OK;
3108 }
3109