Added support for term references (queryIDs) for searchResult.
[idzebra-moved-to-github.git] / index / zrpn.c
1 /* $Id: zrpn.c,v 1.201 2005-06-22 19:42:38 adam Exp $
2    Copyright (C) 1995-2005
3    Index Data ApS
4
5 This file is part of the Zebra server.
6
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
11
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with Zebra; see the file LICENSE.zebra.  If not, write to the
19 Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
20 02111-1307, USA.
21 */
22
23 #include <stdio.h>
24 #include <assert.h>
25 #ifdef WIN32
26 #include <io.h>
27 #endif
28 #if HAVE_UNISTD_H
29 #include <unistd.h>
30 #endif
31 #include <ctype.h>
32
33 #include <yaz/diagbib1.h>
34 #include "index.h"
35 #include <zebra_xpath.h>
36
37 #include <charmap.h>
38 #include <rset.h>
39
40 struct rpn_char_map_info
41 {
42     ZebraMaps zm;
43     int reg_type;
44 };
45
46 typedef struct
47 {
48     int type;
49     int major;
50     int minor;
51     Z_AttributesPlusTerm *zapt;
52 } AttrType;
53
54 static int log_level_set = 0;
55 static int log_level_rpn = 0;
56
57 static const char **rpn_char_map_handler(void *vp, const char **from, int len)
58 {
59     struct rpn_char_map_info *p = (struct rpn_char_map_info *) vp;
60     const char **out = zebra_maps_input(p->zm, p->reg_type, from, len, 0);
61 #if 0
62     if (out && *out)
63     {
64         const char *outp = *out;
65         yaz_log(YLOG_LOG, "---");
66         while (*outp)
67         {
68             yaz_log(YLOG_LOG, "%02X", *outp);
69             outp++;
70         }
71     }
72 #endif
73     return out;
74 }
75
76 static void rpn_char_map_prepare(struct zebra_register *reg, int reg_type,
77                                   struct rpn_char_map_info *map_info)
78 {
79     map_info->zm = reg->zebra_maps;
80     map_info->reg_type = reg_type;
81     dict_grep_cmap(reg->dict, map_info, rpn_char_map_handler);
82 }
83
84 static int attr_find_ex(AttrType *src, oid_value *attributeSetP,
85                          const char **string_value)
86 {
87     int num_attributes;
88
89     num_attributes = src->zapt->attributes->num_attributes;
90     while (src->major < num_attributes)
91     {
92         Z_AttributeElement *element;
93
94         element = src->zapt->attributes->attributes[src->major];
95         if (src->type == *element->attributeType)
96         {
97             switch (element->which) 
98             {
99             case Z_AttributeValue_numeric:
100                 ++(src->major);
101                 if (element->attributeSet && attributeSetP)
102                 {
103                     oident *attrset;
104
105                     attrset = oid_getentbyoid(element->attributeSet);
106                     *attributeSetP = attrset->value;
107                 }
108                 return *element->value.numeric;
109                 break;
110             case Z_AttributeValue_complex:
111                 if (src->minor >= element->value.complex->num_list)
112                     break;
113                 if (element->attributeSet && attributeSetP)
114                 {
115                     oident *attrset;
116                     
117                     attrset = oid_getentbyoid(element->attributeSet);
118                     *attributeSetP = attrset->value;
119                 }
120                 if (element->value.complex->list[src->minor]->which ==  
121                     Z_StringOrNumeric_numeric)
122                 {
123                     ++(src->minor);
124                     return
125                         *element->value.complex->list[src->minor-1]->u.numeric;
126                 }
127                 else if (element->value.complex->list[src->minor]->which ==  
128                          Z_StringOrNumeric_string)
129                 {
130                     if (!string_value)
131                         break;
132                     ++(src->minor);
133                     *string_value = 
134                         element->value.complex->list[src->minor-1]->u.string;
135                     return -2;
136                 }
137                 else
138                     break;
139             default:
140                 assert(0);
141             }
142         }
143         ++(src->major);
144     }
145     return -1;
146 }
147
148 static int attr_find(AttrType *src, oid_value *attributeSetP)
149 {
150     return attr_find_ex(src, attributeSetP, 0);
151 }
152
153 static void attr_init(AttrType *src, Z_AttributesPlusTerm *zapt,
154                        int type)
155 {
156     src->zapt = zapt;
157     src->type = type;
158     src->major = 0;
159     src->minor = 0;
160 }
161
162 #define TERM_COUNT        
163        
164 struct grep_info {        
165 #ifdef TERM_COUNT        
166     int *term_no;        
167 #endif        
168     ISAM_P *isam_p_buf;
169     int isam_p_size;        
170     int isam_p_indx;
171     ZebraHandle zh;
172     int reg_type;
173     ZebraSet termset;
174 };        
175
176 void zebra_term_untrans(ZebraHandle zh, int reg_type,
177                         char *dst, const char *src)
178 {
179     int len = 0;
180     while (*src)
181     {
182         const char *cp = zebra_maps_output(zh->reg->zebra_maps,
183                                            reg_type, &src);
184         if (!cp && len < IT_MAX_WORD-1)
185             dst[len++] = *src++;
186         else
187             while (*cp && len < IT_MAX_WORD-1)
188                 dst[len++] = *cp++;
189     }
190     dst[len] = '\0';
191 }
192
193 static void add_isam_p(const char *name, const char *info,
194                        struct grep_info *p)
195 {
196     if (!log_level_set)
197     {
198         log_level_rpn = yaz_log_module_level("rpn");
199         log_level_set = 1;
200     }
201     if (p->isam_p_indx == p->isam_p_size)
202     {
203         ISAM_P *new_isam_p_buf;
204 #ifdef TERM_COUNT        
205         int *new_term_no;        
206 #endif
207         p->isam_p_size = 2*p->isam_p_size + 100;
208         new_isam_p_buf = (ISAM_P *) xmalloc(sizeof(*new_isam_p_buf) *
209                                             p->isam_p_size);
210         if (p->isam_p_buf)
211         {
212             memcpy(new_isam_p_buf, p->isam_p_buf,
213                     p->isam_p_indx * sizeof(*p->isam_p_buf));
214             xfree(p->isam_p_buf);
215         }
216         p->isam_p_buf = new_isam_p_buf;
217
218 #ifdef TERM_COUNT
219         new_term_no = (int *) xmalloc(sizeof(*new_term_no) * p->isam_p_size);
220         if (p->term_no)
221         {
222             memcpy(new_term_no, p->isam_p_buf,
223                     p->isam_p_indx * sizeof(*p->term_no));
224             xfree(p->term_no);
225         }
226         p->term_no = new_term_no;
227 #endif
228     }
229     assert(*info == sizeof(*p->isam_p_buf));
230     memcpy(p->isam_p_buf + p->isam_p_indx, info+1, sizeof(*p->isam_p_buf));
231
232 #if 1
233     if (p->termset)
234     {
235         const char *db;
236         int set, use;
237         char term_tmp[IT_MAX_WORD];
238         int su_code = 0;
239         int len = key_SU_decode (&su_code, name);
240         
241         zebra_term_untrans  (p->zh, p->reg_type, term_tmp, name+len+1);
242         yaz_log(log_level_rpn, "grep: %d %c %s", su_code, name[len], term_tmp);
243         zebraExplain_lookup_ord (p->zh->reg->zei,
244                                  su_code, &db, &set, &use);
245         yaz_log(log_level_rpn, "grep:  set=%d use=%d db=%s", set, use, db);
246         
247         resultSetAddTerm(p->zh, p->termset, name[len], db,
248                          set, use, term_tmp);
249     }
250 #endif
251     (p->isam_p_indx)++;
252 }
253
254 static int grep_handle(char *name, const char *info, void *p)
255 {
256     add_isam_p(name, info, (struct grep_info *) p);
257     return 0;
258 }
259
260 static int term_pre(ZebraMaps zebra_maps, int reg_type, const char **src,
261                     const char *ct1, const char *ct2, int first)
262 {
263     const char *s1, *s0 = *src;
264     const char **map;
265
266     /* skip white space */
267     while (*s0)
268     {
269         if (ct1 && strchr(ct1, *s0))
270             break;
271         if (ct2 && strchr(ct2, *s0))
272             break;
273         s1 = s0;
274         map = zebra_maps_input(zebra_maps, reg_type, &s1, strlen(s1), first);
275         if (**map != *CHR_SPACE)
276             break;
277         s0 = s1;
278     }
279     *src = s0;
280     return *s0;
281 }
282
283
284 static void esc_str(char *out_buf, int out_size,
285                     const char *in_buf, int in_size)
286 {
287     int k;
288
289     assert(out_buf);
290     assert(in_buf);
291     assert(out_size > 20);
292     *out_buf = '\0';
293     for (k = 0; k<in_size; k++)
294     {
295         int c = in_buf[k] & 0xff;
296         int pc;
297         if (c < 32 || c > 126)
298             pc = '?';
299         else
300             pc = c;
301         sprintf(out_buf +strlen(out_buf), "%02X:%c  ", c, pc);
302         if (strlen(out_buf) > out_size-20)
303         {
304             strcat(out_buf, "..");
305             break;
306         }
307     }
308 }
309
310 #define REGEX_CHARS " []()|.*+?!"
311
312 /* term_100: handle term, where trunc = none(no operators at all) */
313 static int term_100(ZebraMaps zebra_maps, int reg_type,
314                     const char **src, char *dst, int space_split,
315                     char *dst_term)
316 {
317     const char *s0;
318     const char **map;
319     int i = 0;
320     int j = 0;
321
322     const char *space_start = 0;
323     const char *space_end = 0;
324
325     if (!term_pre(zebra_maps, reg_type, src, NULL, NULL, !space_split))
326         return 0;
327     s0 = *src;
328     while (*s0)
329     {
330         const char *s1 = s0;
331         int q_map_match = 0;
332         map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), 
333                                 &q_map_match);
334         if (space_split)
335         {
336             if (**map == *CHR_SPACE)
337                 break;
338         }
339         else  /* complete subfield only. */
340         {
341             if (**map == *CHR_SPACE)
342             {   /* save space mapping for later  .. */
343                 space_start = s1;
344                 space_end = s0;
345                 continue;
346             }
347             else if (space_start)
348             {   /* reload last space */
349                 while (space_start < space_end)
350                 {
351                     if (strchr(REGEX_CHARS, *space_start))
352                         dst[i++] = '\\';
353                     dst_term[j++] = *space_start;
354                     dst[i++] = *space_start++;
355                 }
356                 /* and reset */
357                 space_start = space_end = 0;
358             }
359         }
360         /* add non-space char */
361         memcpy(dst_term+j, s1, s0 - s1);
362         j += (s0 - s1);
363         if (!q_map_match)
364         {
365             while (s1 < s0)
366             {
367                 if (strchr(REGEX_CHARS, *s1))
368                     dst[i++] = '\\';
369                 dst[i++] = *s1++;
370             }
371         }
372         else
373         {
374             char tmpbuf[80];
375             esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
376             
377             strcpy(dst + i, map[0]);
378             i += strlen(map[0]);
379         }
380     }
381     dst[i] = '\0';
382     dst_term[j] = '\0';
383     *src = s0;
384     return i;
385 }
386
387 /* term_101: handle term, where trunc = Process # */
388 static int term_101(ZebraMaps zebra_maps, int reg_type,
389                     const char **src, char *dst, int space_split,
390                     char *dst_term)
391 {
392     const char *s0;
393     const char **map;
394     int i = 0;
395     int j = 0;
396
397     if (!term_pre(zebra_maps, reg_type, src, "#", "#", !space_split))
398         return 0;
399     s0 = *src;
400     while (*s0)
401     {
402         if (*s0 == '#')
403         {
404             dst[i++] = '.';
405             dst[i++] = '*';
406             dst_term[j++] = *s0++;
407         }
408         else
409         {
410             const char *s1 = s0;
411             int q_map_match = 0;
412             map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), 
413                                     &q_map_match);
414             if (space_split && **map == *CHR_SPACE)
415                 break;
416
417             /* add non-space char */
418             memcpy(dst_term+j, s1, s0 - s1);
419             j += (s0 - s1);
420             if (!q_map_match)
421             {
422                 while (s1 < s0)
423                 {
424                     if (strchr(REGEX_CHARS, *s1))
425                         dst[i++] = '\\';
426                     dst[i++] = *s1++;
427                 }
428             }
429             else
430             {
431                 char tmpbuf[80];
432                 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
433                 
434                 strcpy(dst + i, map[0]);
435                 i += strlen(map[0]);
436             }
437         }
438     }
439     dst[i] = '\0';
440     dst_term[j++] = '\0';
441     *src = s0;
442     return i;
443 }
444
445 /* term_103: handle term, where trunc = re-2 (regular expressions) */
446 static int term_103(ZebraMaps zebra_maps, int reg_type, const char **src,
447                     char *dst, int *errors, int space_split,
448                     char *dst_term)
449 {
450     int i = 0;
451     int j = 0;
452     const char *s0;
453     const char **map;
454
455     if (!term_pre(zebra_maps, reg_type, src, "^\\()[].*+?|", "(", !space_split))
456         return 0;
457     s0 = *src;
458     if (errors && *s0 == '+' && s0[1] && s0[2] == '+' && s0[3] &&
459         isdigit(((const unsigned char *)s0)[1]))
460     {
461         *errors = s0[1] - '0';
462         s0 += 3;
463         if (*errors > 3)
464             *errors = 3;
465     }
466     while (*s0)
467     {
468         if (strchr("^\\()[].*+?|-", *s0))
469         {
470             dst_term[j++] = *s0;
471             dst[i++] = *s0++;
472         }
473         else
474         {
475             const char *s1 = s0;
476             int q_map_match = 0;
477             map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), 
478                                     &q_map_match);
479             if (space_split && **map == *CHR_SPACE)
480                 break;
481
482             /* add non-space char */
483             memcpy(dst_term+j, s1, s0 - s1);
484             j += (s0 - s1);
485             if (!q_map_match)
486             {
487                 while (s1 < s0)
488                 {
489                     if (strchr(REGEX_CHARS, *s1))
490                         dst[i++] = '\\';
491                     dst[i++] = *s1++;
492                 }
493             }
494             else
495             {
496                 char tmpbuf[80];
497                 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
498                 
499                 strcpy(dst + i, map[0]);
500                 i += strlen(map[0]);
501             }
502         }
503     }
504     dst[i] = '\0';
505     dst_term[j] = '\0';
506     *src = s0;
507     
508     return i;
509 }
510
511 /* term_103: handle term, where trunc = re-1 (regular expressions) */
512 static int term_102(ZebraMaps zebra_maps, int reg_type, const char **src,
513                     char *dst, int space_split, char *dst_term)
514 {
515     return term_103(zebra_maps, reg_type, src, dst, NULL, space_split,
516                     dst_term);
517 }
518
519
520 /* term_104: handle term, where trunc = Process # and ! */
521 static int term_104(ZebraMaps zebra_maps, int reg_type,
522                     const char **src, char *dst, int space_split,
523                     char *dst_term)
524 {
525     const char *s0;
526     const char **map;
527     int i = 0;
528     int j = 0;
529
530     if (!term_pre(zebra_maps, reg_type, src, "?*#", "?*#", !space_split))
531         return 0;
532     s0 = *src;
533     while (*s0)
534     {
535         if (*s0 == '?')
536         {
537             dst_term[j++] = *s0++;
538             if (*s0 >= '0' && *s0 <= '9')
539             {
540                 int limit = 0;
541                 while (*s0 >= '0' && *s0 <= '9')
542                 {
543                     limit = limit * 10 + (*s0 - '0');
544                     dst_term[j++] = *s0++;
545                 }
546                 if (limit > 20)
547                     limit = 20;
548                 while (--limit >= 0)
549                 {
550                     dst[i++] = '.';
551                     dst[i++] = '?';
552                 }
553             }
554             else
555             {
556                 dst[i++] = '.';
557                 dst[i++] = '*';
558             }
559         }
560         else if (*s0 == '*')
561         {
562             dst[i++] = '.';
563             dst[i++] = '*';
564             dst_term[j++] = *s0++;
565         }
566         else if (*s0 == '#')
567         {
568             dst[i++] = '.';
569             dst_term[j++] = *s0++;
570         }
571         else
572         {
573             const char *s1 = s0;
574             int q_map_match = 0;
575             map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), 
576                                     &q_map_match);
577             if (space_split && **map == *CHR_SPACE)
578                 break;
579
580             /* add non-space char */
581             memcpy(dst_term+j, s1, s0 - s1);
582             j += (s0 - s1);
583             if (!q_map_match)
584             {
585                 while (s1 < s0)
586                 {
587                     if (strchr(REGEX_CHARS, *s1))
588                         dst[i++] = '\\';
589                     dst[i++] = *s1++;
590                 }
591             }
592             else
593             {
594                 char tmpbuf[80];
595                 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
596                 
597                 strcpy(dst + i, map[0]);
598                 i += strlen(map[0]);
599             }
600         }
601     }
602     dst[i] = '\0';
603     dst_term[j++] = '\0';
604     *src = s0;
605     return i;
606 }
607
608 /* term_105/106: handle term, where trunc = Process * and ! and right trunc */
609 static int term_105(ZebraMaps zebra_maps, int reg_type,
610                     const char **src, char *dst, int space_split,
611                     char *dst_term, int right_truncate)
612 {
613     const char *s0;
614     const char **map;
615     int i = 0;
616     int j = 0;
617
618     if (!term_pre(zebra_maps, reg_type, src, "*!", "*!", !space_split))
619         return 0;
620     s0 = *src;
621     while (*s0)
622     {
623         if (*s0 == '*')
624         {
625             dst[i++] = '.';
626             dst[i++] = '*';
627             dst_term[j++] = *s0++;
628         }
629         else if (*s0 == '!')
630         {
631             dst[i++] = '.';
632             dst_term[j++] = *s0++;
633         }
634         else
635         {
636             const char *s1 = s0;
637             int q_map_match = 0;
638             map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), 
639                                     &q_map_match);
640             if (space_split && **map == *CHR_SPACE)
641                 break;
642
643             /* add non-space char */
644             memcpy(dst_term+j, s1, s0 - s1);
645             j += (s0 - s1);
646             if (!q_map_match)
647             {
648                 while (s1 < s0)
649                 {
650                     if (strchr(REGEX_CHARS, *s1))
651                         dst[i++] = '\\';
652                     dst[i++] = *s1++;
653                 }
654             }
655             else
656             {
657                 char tmpbuf[80];
658                 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
659                 
660                 strcpy(dst + i, map[0]);
661                 i += strlen(map[0]);
662             }
663         }
664     }
665     if (right_truncate)
666     {
667         dst[i++] = '.';
668         dst[i++] = '*';
669     }
670     dst[i] = '\0';
671     
672     dst_term[j++] = '\0';
673     *src = s0;
674     return i;
675 }
676
677
678 /* gen_regular_rel - generate regular expression from relation
679  *  val:     border value (inclusive)
680  *  islt:    1 if <=; 0 if >=.
681  */
682 static void gen_regular_rel(char *dst, int val, int islt)
683 {
684     int dst_p;
685     int w, d, i;
686     int pos = 0;
687     char numstr[20];
688
689     yaz_log(YLOG_DEBUG, "gen_regular_rel. val=%d, islt=%d", val, islt);
690     if (val >= 0)
691     {
692         if (islt)
693             strcpy(dst, "(-[0-9]+|(");
694         else
695             strcpy(dst, "((");
696     } 
697     else
698     {
699         if (!islt)
700         {
701             strcpy(dst, "([0-9]+|-(");
702             dst_p = strlen(dst);
703             islt = 1;
704         }
705         else
706         {
707             strcpy(dst, "(-(");
708             islt = 0;
709         }
710         val = -val;
711     }
712     dst_p = strlen(dst);
713     sprintf(numstr, "%d", val);
714     for (w = strlen(numstr); --w >= 0; pos++)
715     {
716         d = numstr[w];
717         if (pos > 0)
718         {
719             if (islt)
720             {
721                 if (d == '0')
722                     continue;
723                 d--;
724             } 
725             else
726             {
727                 if (d == '9')
728                     continue;
729                 d++;
730             }
731         }
732         
733         strcpy(dst + dst_p, numstr);
734         dst_p = strlen(dst) - pos - 1;
735
736         if (islt)
737         {
738             if (d != '0')
739             {
740                 dst[dst_p++] = '[';
741                 dst[dst_p++] = '0';
742                 dst[dst_p++] = '-';
743                 dst[dst_p++] = d;
744                 dst[dst_p++] = ']';
745             }
746             else
747                 dst[dst_p++] = d;
748         }
749         else
750         {
751             if (d != '9')
752             { 
753                 dst[dst_p++] = '[';
754                 dst[dst_p++] = d;
755                 dst[dst_p++] = '-';
756                 dst[dst_p++] = '9';
757                 dst[dst_p++] = ']';
758             }
759             else
760                 dst[dst_p++] = d;
761         }
762         for (i = 0; i<pos; i++)
763         {
764             dst[dst_p++] = '[';
765             dst[dst_p++] = '0';
766             dst[dst_p++] = '-';
767             dst[dst_p++] = '9';
768             dst[dst_p++] = ']';
769         }
770         dst[dst_p++] = '|';
771     }
772     dst[dst_p] = '\0';
773     if (islt)
774     {
775         /* match everything less than 10^(pos-1) */
776         strcat(dst, "0*");
777         for (i = 1; i<pos; i++)
778             strcat(dst, "[0-9]?");
779     }
780     else
781     {
782         /* match everything greater than 10^pos */
783         for (i = 0; i <= pos; i++)
784             strcat(dst, "[0-9]");
785         strcat(dst, "[0-9]*");
786     }
787     strcat(dst, "))");
788 }
789
790 void string_rel_add_char(char **term_p, const char *src, int *indx)
791 {
792     if (src[*indx] == '\\')
793         *(*term_p)++ = src[(*indx)++];
794     *(*term_p)++ = src[(*indx)++];
795 }
796
797 /*
798  *   >  abc     ([b-].*|a[c-].*|ab[d-].*|abc.+)
799  *              ([^-a].*|a[^-b].*ab[^-c].*|abc.+)
800  *   >= abc     ([b-].*|a[c-].*|ab[c-].*)
801  *              ([^-a].*|a[^-b].*|ab[c-].*)
802  *   <  abc     ([-0].*|a[-a].*|ab[-b].*)
803  *              ([^a-].*|a[^b-].*|ab[^c-].*)
804  *   <= abc     ([-0].*|a[-a].*|ab[-b].*|abc)
805  *              ([^a-].*|a[^b-].*|ab[^c-].*|abc)
806  */
807 static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
808                            const char **term_sub, char *term_dict,
809                            oid_value attributeSet,
810                            int reg_type, int space_split, char *term_dst,
811                            int *error_code)
812 {
813     AttrType relation;
814     int relation_value;
815     int i;
816     char *term_tmp = term_dict + strlen(term_dict);
817     char term_component[2*IT_MAX_WORD+20];
818
819     attr_init(&relation, zapt, 2);
820     relation_value = attr_find(&relation, NULL);
821
822     *error_code = 0;
823     yaz_log(YLOG_DEBUG, "string relation value=%d", relation_value);
824     switch (relation_value)
825     {
826     case 1:
827         if (!term_100(zh->reg->zebra_maps, reg_type,
828                       term_sub, term_component,
829                       space_split, term_dst))
830             return 0;
831         yaz_log(log_level_rpn, "Relation <");
832         
833         *term_tmp++ = '(';
834         for (i = 0; term_component[i]; )
835         {
836             int j = 0;
837
838             if (i)
839                 *term_tmp++ = '|';
840             while (j < i)
841                 string_rel_add_char(&term_tmp, term_component, &j);
842
843             *term_tmp++ = '[';
844
845             *term_tmp++ = '^';
846             string_rel_add_char(&term_tmp, term_component, &i);
847             *term_tmp++ = '-';
848
849             *term_tmp++ = ']';
850             *term_tmp++ = '.';
851             *term_tmp++ = '*';
852
853             if ((term_tmp - term_dict) > IT_MAX_WORD)
854                 break;
855         }
856         *term_tmp++ = ')';
857         *term_tmp = '\0';
858         break;
859     case 2:
860         if (!term_100(zh->reg->zebra_maps, reg_type,
861                       term_sub, term_component,
862                       space_split, term_dst))
863             return 0;
864         yaz_log(log_level_rpn, "Relation <=");
865
866         *term_tmp++ = '(';
867         for (i = 0; term_component[i]; )
868         {
869             int j = 0;
870
871             while (j < i)
872                 string_rel_add_char(&term_tmp, term_component, &j);
873             *term_tmp++ = '[';
874
875             *term_tmp++ = '^';
876             string_rel_add_char(&term_tmp, term_component, &i);
877             *term_tmp++ = '-';
878
879             *term_tmp++ = ']';
880             *term_tmp++ = '.';
881             *term_tmp++ = '*';
882
883             *term_tmp++ = '|';
884
885             if ((term_tmp - term_dict) > IT_MAX_WORD)
886                 break;
887         }
888         for (i = 0; term_component[i]; )
889             string_rel_add_char(&term_tmp, term_component, &i);
890         *term_tmp++ = ')';
891         *term_tmp = '\0';
892         break;
893     case 5:
894         if (!term_100 (zh->reg->zebra_maps, reg_type,
895                        term_sub, term_component, space_split, term_dst))
896             return 0;
897         yaz_log(log_level_rpn, "Relation >");
898
899         *term_tmp++ = '(';
900         for (i = 0; term_component[i];)
901         {
902             int j = 0;
903
904             while (j < i)
905                 string_rel_add_char(&term_tmp, term_component, &j);
906             *term_tmp++ = '[';
907             
908             *term_tmp++ = '^';
909             *term_tmp++ = '-';
910             string_rel_add_char(&term_tmp, term_component, &i);
911
912             *term_tmp++ = ']';
913             *term_tmp++ = '.';
914             *term_tmp++ = '*';
915
916             *term_tmp++ = '|';
917
918             if ((term_tmp - term_dict) > IT_MAX_WORD)
919                 break;
920         }
921         for (i = 0; term_component[i];)
922             string_rel_add_char(&term_tmp, term_component, &i);
923         *term_tmp++ = '.';
924         *term_tmp++ = '+';
925         *term_tmp++ = ')';
926         *term_tmp = '\0';
927         break;
928     case 4:
929         if (!term_100(zh->reg->zebra_maps, reg_type, term_sub,
930                       term_component, space_split, term_dst))
931             return 0;
932         yaz_log(log_level_rpn, "Relation >=");
933
934         *term_tmp++ = '(';
935         for (i = 0; term_component[i];)
936         {
937             int j = 0;
938
939             if (i)
940                 *term_tmp++ = '|';
941             while (j < i)
942                 string_rel_add_char(&term_tmp, term_component, &j);
943             *term_tmp++ = '[';
944
945             if (term_component[i+1])
946             {
947                 *term_tmp++ = '^';
948                 *term_tmp++ = '-';
949                 string_rel_add_char(&term_tmp, term_component, &i);
950             }
951             else
952             {
953                 string_rel_add_char(&term_tmp, term_component, &i);
954                 *term_tmp++ = '-';
955             }
956             *term_tmp++ = ']';
957             *term_tmp++ = '.';
958             *term_tmp++ = '*';
959
960             if ((term_tmp - term_dict) > IT_MAX_WORD)
961                 break;
962         }
963         *term_tmp++ = ')';
964         *term_tmp = '\0';
965         break;
966     case 3:
967     case 102:
968     case -1:
969         yaz_log(log_level_rpn, "Relation =");
970         if (!term_100(zh->reg->zebra_maps, reg_type, term_sub,
971                       term_component, space_split, term_dst))
972             return 0;
973         strcat(term_tmp, "(");
974         strcat(term_tmp, term_component);
975         strcat(term_tmp, ")");
976         break;
977     default:
978         *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
979         return 0;
980     }
981     return 1;
982 }
983
984 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
985                              const char **term_sub, 
986                              oid_value attributeSet, NMEM stream,
987                              struct grep_info *grep_info,
988                              int reg_type, int complete_flag,
989                              int num_bases, char **basenames,
990                              char *term_dst, int xpath_use,
991                              struct ord_list **ol);
992
993 static ZEBRA_RES term_limits_APT(ZebraHandle zh,
994                                  Z_AttributesPlusTerm *zapt,
995                                  zint *hits_limit_value,
996                                  const char **term_ref_id_str)
997 {
998     AttrType term_ref_id_attr;
999     AttrType hits_limit_attr;
1000  
1001     attr_init(&hits_limit_attr, zapt, 9);
1002     *hits_limit_value  = attr_find(&hits_limit_attr, NULL);
1003
1004     attr_init(&term_ref_id_attr, zapt, 10);
1005     attr_find_ex(&term_ref_id_attr, NULL, term_ref_id_str);
1006
1007     /* no limit given ? */
1008     if (*hits_limit_value == -1)
1009         if (*term_ref_id_str)
1010         {
1011             /* use global if term_ref is present */
1012             *hits_limit_value = zh->approx_limit;
1013         }
1014         else
1015         {
1016             /* no counting if term_ref is not present */
1017             *hits_limit_value = 0;
1018         }
1019     else if (*hits_limit_value == 0)
1020     {
1021         /* 0 is the same as global limit */
1022         *hits_limit_value = zh->approx_limit;
1023     }
1024     yaz_log(YLOG_LOG, "term_limits_APT ref_id=%s limit=" ZINT_FORMAT,
1025             *term_ref_id_str ? *term_ref_id_str : "none",
1026             *hits_limit_value);
1027     return ZEBRA_OK;
1028 }
1029
1030 static ZEBRA_RES term_trunc(ZebraHandle zh,
1031                             Z_AttributesPlusTerm *zapt,
1032                             const char **term_sub, 
1033                             oid_value attributeSet, NMEM stream,
1034                             struct grep_info *grep_info,
1035                             int reg_type, int complete_flag,
1036                             int num_bases, char **basenames,
1037                             char *term_dst,
1038                             const char *rank_type, int xpath_use,
1039                             NMEM rset_nmem,
1040                             RSET *rset,
1041                             struct rset_key_control *kc)
1042 {
1043     ZEBRA_RES res;
1044     struct ord_list *ol;
1045     zint hits_limit_value;
1046     const char *term_ref_id_str = 0;
1047     *rset = 0;
1048
1049     term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str);
1050     grep_info->isam_p_indx = 0;
1051     res = string_term(zh, zapt, term_sub, attributeSet, stream, grep_info,
1052                       reg_type, complete_flag, num_bases, basenames,
1053                       term_dst, xpath_use, &ol);
1054     if (res != ZEBRA_OK)
1055         return res;
1056     if (!*term_sub)  /* no more terms ? */
1057         return res;
1058     yaz_log(log_level_rpn, "term: %s", term_dst);
1059     *rset = rset_trunc(zh, grep_info->isam_p_buf,
1060                        grep_info->isam_p_indx, term_dst,
1061                        strlen(term_dst), rank_type, 1 /* preserve pos */,
1062                        zapt->term->which, rset_nmem,
1063                        kc, kc->scope, ol, reg_type, hits_limit_value,
1064                        term_ref_id_str);
1065     if (!*rset)
1066         return ZEBRA_FAIL;
1067     return ZEBRA_OK;
1068 }
1069
1070 static char *nmem_strdup_i(NMEM nmem, int v)
1071 {
1072     char val_str[64];
1073     sprintf(val_str, "%d", v);
1074     return nmem_strdup(nmem, val_str);
1075 }
1076
1077 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1078                              const char **term_sub, 
1079                              oid_value attributeSet, NMEM stream,
1080                              struct grep_info *grep_info,
1081                              int reg_type, int complete_flag,
1082                              int num_bases, char **basenames,
1083                              char *term_dst, int xpath_use,
1084                              struct ord_list **ol)
1085 {
1086     char term_dict[2*IT_MAX_WORD+4000];
1087     int j, r, base_no;
1088     AttrType truncation;
1089     int truncation_value;
1090     AttrType use;
1091     int use_value;
1092     const char *use_string = 0;
1093     oid_value curAttributeSet = attributeSet;
1094     const char *termp;
1095     struct rpn_char_map_info rcmi;
1096     int space_split = complete_flag ? 0 : 1;
1097
1098     int bases_ok = 0;     /* no of databases with OK attribute */
1099     int errCode = 0;      /* err code (if any is not OK) */
1100     char *errString = 0;  /* addinfo */
1101
1102
1103     *ol = ord_list_create(stream);
1104
1105     rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
1106     attr_init(&use, zapt, 1);
1107     use_value = attr_find_ex(&use, &curAttributeSet, &use_string);
1108     yaz_log(log_level_rpn, "string_term, use value %d", use_value);
1109     attr_init(&truncation, zapt, 5);
1110     truncation_value = attr_find(&truncation, NULL);
1111     yaz_log(log_level_rpn, "truncation value %d", truncation_value);
1112
1113     if (use_value == -1)    /* no attribute - assumy "any" */
1114         use_value = 1016;
1115     for (base_no = 0; base_no < num_bases; base_no++)
1116     {
1117         int ord = -1;
1118         int attr_ok = 0;
1119         int regex_range = 0;
1120         int init_pos = 0;
1121         attent attp;
1122         data1_local_attribute id_xpath_attr;
1123         data1_local_attribute *local_attr;
1124         int max_pos, prefix_len = 0;
1125         int relation_error;
1126
1127         termp = *term_sub;
1128
1129         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1130         {
1131             zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
1132                            basenames[base_no]);
1133             return ZEBRA_FAIL;
1134         }
1135         if (xpath_use > 0 && use_value == -2) 
1136         {
1137             /* xpath mode and we have a string attribute */
1138             attp.local_attributes = &id_xpath_attr;
1139             attp.attset_ordinal = VAL_IDXPATH;
1140             id_xpath_attr.next = 0;
1141
1142             use_value = xpath_use;  /* xpath_use as use-attribute now */
1143             id_xpath_attr.local = use_value;
1144         }
1145         else if (curAttributeSet == VAL_IDXPATH && use_value >= 0)
1146         {
1147             /* X-Path attribute, use numeric value directly */
1148             attp.local_attributes = &id_xpath_attr;
1149             attp.attset_ordinal = VAL_IDXPATH;
1150             id_xpath_attr.next = 0;
1151             id_xpath_attr.local = use_value;
1152         }
1153         else if (use_string &&
1154                  (ord = zebraExplain_lookup_attr_str(zh->reg->zei,
1155                                                      use_string)) >= 0)
1156         {
1157             /* we have a match for a raw string attribute */
1158             char ord_buf[32];
1159             int i, ord_len;
1160
1161             if (prefix_len)
1162                 term_dict[prefix_len++] = '|';
1163             else
1164                 term_dict[prefix_len++] = '(';
1165             
1166             ord_len = key_SU_encode (ord, ord_buf);
1167             for (i = 0; i<ord_len; i++)
1168             {
1169                 term_dict[prefix_len++] = 1;
1170                 term_dict[prefix_len++] = ord_buf[i];
1171             }
1172             attp.local_attributes = 0;  /* no more attributes */
1173             *ol = ord_list_append(stream, *ol, ord);
1174         }
1175         else 
1176         {
1177             /* lookup in the .att files . Allow string as well */
1178             if ((r = att_getentbyatt (zh, &attp, curAttributeSet, use_value,
1179                                       use_string)))
1180             {
1181                 yaz_log(YLOG_DEBUG, "att_getentbyatt fail. set=%d use=%d r=%d",
1182                         curAttributeSet, use_value, r);
1183                 if (r == -1)
1184                 {
1185                     /* set was found, but value wasn't defined */
1186                     errCode = YAZ_BIB1_UNSUPP_USE_ATTRIBUTE;
1187                     if (use_string)
1188                         errString = nmem_strdup(stream, use_string);
1189                     else
1190                         errString = nmem_strdup_i (stream, use_value);
1191                 }
1192                 else
1193                 {
1194                     int oid[OID_SIZE];
1195                     struct oident oident;
1196                     
1197                     oident.proto = PROTO_Z3950;
1198                     oident.oclass = CLASS_ATTSET;
1199                     oident.value = curAttributeSet;
1200                     oid_ent_to_oid (&oident, oid);
1201                     
1202                     errCode = YAZ_BIB1_UNSUPP_ATTRIBUTE_SET;
1203                     errString = nmem_strdup(stream, oident.desc);
1204                 }
1205                 continue;
1206             }
1207         }
1208         for (local_attr = attp.local_attributes; local_attr;
1209              local_attr = local_attr->next)
1210         {
1211             char ord_buf[32];
1212             int i, ord_len;
1213             
1214             ord = zebraExplain_lookup_attr_su(zh->reg->zei,
1215                                               attp.attset_ordinal,
1216                                               local_attr->local);
1217             if (ord < 0)
1218                 continue;
1219             *ol = ord_list_append(stream, *ol, ord);
1220             if (prefix_len)
1221                 term_dict[prefix_len++] = '|';
1222             else
1223                 term_dict[prefix_len++] = '(';
1224             
1225             ord_len = key_SU_encode (ord, ord_buf);
1226             for (i = 0; i<ord_len; i++)
1227             {
1228                 term_dict[prefix_len++] = 1;
1229                 term_dict[prefix_len++] = ord_buf[i];
1230             }
1231         }
1232         bases_ok++;
1233         if (prefix_len)
1234             attr_ok = 1;
1235
1236         term_dict[prefix_len++] = ')';
1237         term_dict[prefix_len++] = 1;
1238         term_dict[prefix_len++] = reg_type;
1239         yaz_log(log_level_rpn, "reg_type = %d", term_dict[prefix_len-1]);
1240         term_dict[prefix_len] = '\0';
1241         j = prefix_len;
1242         switch (truncation_value)
1243         {
1244         case -1:         /* not specified */
1245         case 100:        /* do not truncate */
1246             if (!string_relation (zh, zapt, &termp, term_dict,
1247                                   attributeSet,
1248                                   reg_type, space_split, term_dst,
1249                                   &relation_error))
1250             {
1251                 if (relation_error)
1252                 {
1253                     zebra_setError(zh, relation_error, 0);
1254                     return ZEBRA_FAIL;
1255                 }
1256                 *term_sub = 0;
1257                 return ZEBRA_OK;
1258             }
1259             break;
1260         case 1:          /* right truncation */
1261             term_dict[j++] = '(';
1262             if (!term_100(zh->reg->zebra_maps, reg_type,
1263                           &termp, term_dict + j, space_split, term_dst))
1264             {
1265                 *term_sub = 0;
1266                 return ZEBRA_OK;
1267             }
1268             strcat(term_dict, ".*)");
1269             break;
1270         case 2:          /* keft truncation */
1271             term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*';
1272             if (!term_100(zh->reg->zebra_maps, reg_type,
1273                           &termp, term_dict + j, space_split, term_dst))
1274             {
1275                 *term_sub = 0;
1276                 return ZEBRA_OK;
1277             }
1278             strcat(term_dict, ")");
1279             break;
1280         case 3:          /* left&right truncation */
1281             term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*';
1282             if (!term_100(zh->reg->zebra_maps, reg_type,
1283                           &termp, term_dict + j, space_split, term_dst))
1284             {
1285                 *term_sub = 0;
1286                 return ZEBRA_OK;
1287             }
1288             strcat(term_dict, ".*)");
1289             break;
1290         case 101:        /* process # in term */
1291             term_dict[j++] = '(';
1292             if (!term_101(zh->reg->zebra_maps, reg_type,
1293                           &termp, term_dict + j, space_split, term_dst))
1294             {
1295                 *term_sub = 0;
1296                 return ZEBRA_OK;
1297             }
1298             strcat(term_dict, ")");
1299             break;
1300         case 102:        /* Regexp-1 */
1301             term_dict[j++] = '(';
1302             if (!term_102(zh->reg->zebra_maps, reg_type,
1303                           &termp, term_dict + j, space_split, term_dst))
1304             {
1305                 *term_sub = 0;
1306                 return ZEBRA_OK;
1307             }
1308             strcat(term_dict, ")");
1309             break;
1310         case 103:       /* Regexp-2 */
1311             regex_range = 1;
1312             term_dict[j++] = '(';
1313             init_pos = 2;
1314             if (!term_103(zh->reg->zebra_maps, reg_type,
1315                           &termp, term_dict + j, &regex_range,
1316                           space_split, term_dst))
1317             {
1318                 *term_sub = 0;
1319                 return ZEBRA_OK;
1320             }
1321             strcat(term_dict, ")");
1322             break;
1323         case 104:        /* process # and ! in term */
1324             term_dict[j++] = '(';
1325             if (!term_104(zh->reg->zebra_maps, reg_type,
1326                           &termp, term_dict + j, space_split, term_dst))
1327             {
1328                 *term_sub = 0;
1329                 return ZEBRA_OK;
1330             }
1331             strcat(term_dict, ")");
1332             break;
1333         case 105:        /* process * and ! in term */
1334             term_dict[j++] = '(';
1335             if (!term_105(zh->reg->zebra_maps, reg_type,
1336                           &termp, term_dict + j, space_split, term_dst, 1))
1337             {
1338                 *term_sub = 0;
1339                 return ZEBRA_OK;
1340             }
1341             strcat(term_dict, ")");
1342             break;
1343         case 106:        /* process * and ! in term */
1344             term_dict[j++] = '(';
1345             if (!term_105(zh->reg->zebra_maps, reg_type,
1346                           &termp, term_dict + j, space_split, term_dst, 0))
1347             {
1348                 *term_sub = 0;
1349                 return ZEBRA_OK;
1350             }
1351             strcat(term_dict, ")");
1352             break;
1353         default:
1354             zebra_setError_zint(zh,
1355                                 YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
1356                                 truncation_value);
1357             return ZEBRA_FAIL;
1358         }
1359         if (attr_ok)
1360         {
1361             char buf[80];
1362             const char *input = term_dict + prefix_len;
1363             esc_str(buf, sizeof(buf), input, strlen(input));
1364         }
1365         if (attr_ok)
1366         {
1367             yaz_log(log_level_rpn, "dict_lookup_grep: %s", term_dict+prefix_len);
1368             r = dict_lookup_grep(zh->reg->dict, term_dict, regex_range,
1369                                  grep_info, &max_pos, init_pos,
1370                                  grep_handle);
1371             if (r)
1372                 yaz_log(YLOG_WARN, "dict_lookup_grep fail %d", r);
1373         }
1374     }
1375     if (!bases_ok)
1376     {
1377         zebra_setError(zh, errCode, errString);
1378         return ZEBRA_FAIL;
1379     }
1380     *term_sub = termp;
1381     yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1382     return ZEBRA_OK;
1383 }
1384
1385
1386 /* convert APT search term to UTF8 */
1387 static ZEBRA_RES zapt_term_to_utf8(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1388                                    char *termz)
1389 {
1390     size_t sizez;
1391     Z_Term *term = zapt->term;
1392
1393     switch (term->which)
1394     {
1395     case Z_Term_general:
1396         if (zh->iconv_to_utf8 != 0)
1397         {
1398             char *inbuf = term->u.general->buf;
1399             size_t inleft = term->u.general->len;
1400             char *outbuf = termz;
1401             size_t outleft = IT_MAX_WORD-1;
1402             size_t ret;
1403
1404             ret = yaz_iconv(zh->iconv_to_utf8, &inbuf, &inleft,
1405                         &outbuf, &outleft);
1406             if (ret == (size_t)(-1))
1407             {
1408                 ret = yaz_iconv(zh->iconv_to_utf8, 0, 0, 0, 0);
1409                 zebra_setError(
1410                     zh, 
1411                     YAZ_BIB1_QUERY_TERM_INCLUDES_CHARS_THAT_DO_NOT_TRANSLATE_INTO_,
1412                     0);
1413                 return ZEBRA_FAIL;
1414             }
1415             *outbuf = 0;
1416         }
1417         else
1418         {
1419             sizez = term->u.general->len;
1420             if (sizez > IT_MAX_WORD-1)
1421                 sizez = IT_MAX_WORD-1;
1422             memcpy (termz, term->u.general->buf, sizez);
1423             termz[sizez] = '\0';
1424         }
1425         break;
1426     case Z_Term_characterString:
1427         sizez = strlen(term->u.characterString);
1428         if (sizez > IT_MAX_WORD-1)
1429             sizez = IT_MAX_WORD-1;
1430         memcpy (termz, term->u.characterString, sizez);
1431         termz[sizez] = '\0';
1432         break;
1433     default:
1434         zebra_setError(zh, YAZ_BIB1_UNSUPP_CODED_VALUE_FOR_TERM, 0);
1435         return ZEBRA_FAIL;
1436     }
1437     return ZEBRA_OK;
1438 }
1439
1440 /* convert APT SCAN term to internal cmap */
1441 static ZEBRA_RES trans_scan_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1442                                  char *termz, int reg_type)
1443 {
1444     char termz0[IT_MAX_WORD];
1445
1446     if (zapt_term_to_utf8(zh, zapt, termz0) == ZEBRA_FAIL)
1447         return ZEBRA_FAIL;    /* error */
1448     else
1449     {
1450         const char **map;
1451         const char *cp = (const char *) termz0;
1452         const char *cp_end = cp + strlen(cp);
1453         const char *src;
1454         int i = 0;
1455         const char *space_map = NULL;
1456         int len;
1457             
1458         while ((len = (cp_end - cp)) > 0)
1459         {
1460             map = zebra_maps_input(zh->reg->zebra_maps, reg_type, &cp, len, 0);
1461             if (**map == *CHR_SPACE)
1462                 space_map = *map;
1463             else
1464             {
1465                 if (i && space_map)
1466                     for (src = space_map; *src; src++)
1467                         termz[i++] = *src;
1468                 space_map = NULL;
1469                 for (src = *map; *src; src++)
1470                     termz[i++] = *src;
1471             }
1472         }
1473         termz[i] = '\0';
1474     }
1475     return ZEBRA_OK;
1476 }
1477
1478 static void grep_info_delete(struct grep_info *grep_info)
1479 {
1480 #ifdef TERM_COUNT
1481     xfree(grep_info->term_no);
1482 #endif
1483     xfree(grep_info->isam_p_buf);
1484 }
1485
1486 static ZEBRA_RES grep_info_prepare(ZebraHandle zh,
1487                                    Z_AttributesPlusTerm *zapt,
1488                                    struct grep_info *grep_info,
1489                                    int reg_type)
1490 {
1491     AttrType termset;
1492     int termset_value_numeric;
1493     const char *termset_value_string;
1494
1495 #ifdef TERM_COUNT
1496     grep_info->term_no = 0;
1497 #endif
1498     grep_info->isam_p_size = 0;
1499     grep_info->isam_p_buf = NULL;
1500     grep_info->zh = zh;
1501     grep_info->reg_type = reg_type;
1502     grep_info->termset = 0;
1503
1504     if (!zapt)
1505         return ZEBRA_OK;
1506     attr_init(&termset, zapt, 8);
1507     termset_value_numeric =
1508         attr_find_ex(&termset, NULL, &termset_value_string);
1509     if (termset_value_numeric != -1)
1510     {
1511         char resname[32];
1512         const char *termset_name = 0;
1513         if (termset_value_numeric != -2)
1514         {
1515     
1516             sprintf(resname, "%d", termset_value_numeric);
1517             termset_name = resname;
1518         }
1519         else
1520             termset_name = termset_value_string;
1521         yaz_log(log_level_rpn, "creating termset set %s", termset_name);
1522         grep_info->termset = resultSetAdd(zh, termset_name, 1);
1523         if (!grep_info->termset)
1524         {
1525             zebra_setError(zh, YAZ_BIB1_ILLEGAL_RESULT_SET_NAME, termset_name);
1526             return ZEBRA_FAIL;
1527         }
1528     }
1529     return ZEBRA_OK;
1530 }
1531                                
1532 /**
1533   \brief Create result set(s) for list of terms
1534   \param zh Zebra Handle
1535   \param termz term as used in query but converted to UTF-8
1536   \param attributeSet default attribute set
1537   \param stream memory for result
1538   \param reg_type register type ('w', 'p',..)
1539   \param complete_flag whether it's phrases or not
1540   \param rank_type term flags for ranking
1541   \param xpath_use use attribute for X-Path (-1 for no X-path)
1542   \param num_bases number of databases
1543   \param basenames array of databases
1544   \param rset_mem memory for result sets
1545   \param result_sets output result set for each term in list (output)
1546   \param number number of output result sets
1547   \param kc rset key control to be used for created result sets
1548 */
1549 static ZEBRA_RES term_list_trunc(ZebraHandle zh,
1550                                  Z_AttributesPlusTerm *zapt,
1551                                  const char *termz,
1552                                  oid_value attributeSet,
1553                                  NMEM stream,
1554                                  int reg_type, int complete_flag,
1555                                  const char *rank_type, int xpath_use,
1556                                  int num_bases, char **basenames, 
1557                                  NMEM rset_nmem,
1558                                  RSET **result_sets, int *num_result_sets,
1559                                  struct rset_key_control *kc)
1560 {
1561     char term_dst[IT_MAX_WORD+1];
1562     struct grep_info grep_info;
1563     const char *termp = termz;
1564     int alloc_sets = 0;
1565
1566     *num_result_sets = 0;
1567     *term_dst = 0;
1568     if (grep_info_prepare(zh, zapt, &grep_info, reg_type) == ZEBRA_FAIL)
1569         return ZEBRA_FAIL;
1570     while(1)
1571     { 
1572         ZEBRA_RES res;
1573
1574         if (alloc_sets == *num_result_sets)
1575         {
1576             int add = 10;
1577             RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) * 
1578                                               sizeof(*rnew));
1579             if (alloc_sets)
1580                 memcpy(rnew, *result_sets, alloc_sets * sizeof(*rnew));
1581             alloc_sets = alloc_sets + add;
1582             *result_sets = rnew;
1583         }
1584         res = term_trunc(zh, zapt, &termp, attributeSet,
1585                          stream, &grep_info,
1586                          reg_type, complete_flag,
1587                          num_bases, basenames,
1588                          term_dst, rank_type,
1589                          xpath_use, rset_nmem,
1590                          &(*result_sets)[*num_result_sets],
1591                          kc);
1592         if (res != ZEBRA_OK)
1593         {
1594             int i;
1595             for (i = 0; i < *num_result_sets; i++)
1596                 rset_delete((*result_sets)[i]);
1597             grep_info_delete (&grep_info);
1598             return res;
1599         }
1600         if ((*result_sets)[*num_result_sets] == 0)
1601             break;
1602         (*num_result_sets)++;
1603     }
1604     grep_info_delete(&grep_info);
1605     return ZEBRA_OK;
1606 }
1607
1608 static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh,
1609                                        Z_AttributesPlusTerm *zapt,
1610                                        const char *termz_org,
1611                                        oid_value attributeSet,
1612                                        NMEM stream,
1613                                        int reg_type, int complete_flag,
1614                                        const char *rank_type, int xpath_use,
1615                                        int num_bases, char **basenames, 
1616                                        NMEM rset_nmem,
1617                                        RSET *rset,
1618                                        struct rset_key_control *kc)
1619 {
1620     RSET *result_sets = 0;
1621     int num_result_sets = 0;
1622     ZEBRA_RES res =
1623         term_list_trunc(zh, zapt, termz_org, attributeSet,
1624                         stream, reg_type, complete_flag,
1625                         rank_type, xpath_use,
1626                         num_bases, basenames,
1627                         rset_nmem,
1628                         &result_sets, &num_result_sets, kc);
1629     if (res != ZEBRA_OK)
1630         return res;
1631     if (num_result_sets == 0)
1632         *rset = rsnull_create (rset_nmem, kc, 0); 
1633     else if (num_result_sets == 1)
1634         *rset = result_sets[0];
1635     else
1636         *rset = rsprox_create(rset_nmem, kc, kc->scope,
1637                               num_result_sets, result_sets,
1638                               1 /* ordered */, 0 /* exclusion */,
1639                               3 /* relation */, 1 /* distance */);
1640     if (!*rset)
1641         return ZEBRA_FAIL;
1642     return ZEBRA_OK;
1643 }
1644
1645 static ZEBRA_RES rpn_search_APT_or_list(ZebraHandle zh,
1646                                         Z_AttributesPlusTerm *zapt,
1647                                         const char *termz_org,
1648                                         oid_value attributeSet,
1649                                         NMEM stream,
1650                                         int reg_type, int complete_flag,
1651                                         const char *rank_type,
1652                                         int xpath_use,
1653                                         int num_bases, char **basenames,
1654                                         NMEM rset_nmem,
1655                                         RSET *rset,
1656                                         struct rset_key_control *kc)
1657 {
1658     RSET *result_sets = 0;
1659     int num_result_sets = 0;
1660     ZEBRA_RES res =
1661         term_list_trunc(zh, zapt, termz_org, attributeSet,
1662                         stream, reg_type, complete_flag,
1663                         rank_type, xpath_use,
1664                         num_bases, basenames,
1665                         rset_nmem,
1666                         &result_sets, &num_result_sets, kc);
1667     if (res != ZEBRA_OK)
1668         return res;
1669     if (num_result_sets == 0)
1670         *rset = rsnull_create (rset_nmem, kc, 0); 
1671     else if (num_result_sets == 1)
1672         *rset = result_sets[0];
1673     else
1674         *rset = rsmulti_or_create(rset_nmem, kc, kc->scope, 0 /* termid */,
1675                                   num_result_sets, result_sets);
1676     if (!*rset)
1677         return ZEBRA_FAIL;
1678     return ZEBRA_OK;
1679 }
1680
1681 static ZEBRA_RES rpn_search_APT_and_list(ZebraHandle zh,
1682                                          Z_AttributesPlusTerm *zapt,
1683                                          const char *termz_org,
1684                                          oid_value attributeSet,
1685                                          NMEM stream,
1686                                          int reg_type, int complete_flag,
1687                                          const char *rank_type, 
1688                                          int xpath_use,
1689                                          int num_bases, char **basenames,
1690                                          NMEM rset_nmem,
1691                                          RSET *rset,
1692                                          struct rset_key_control *kc)
1693 {
1694     RSET *result_sets = 0;
1695     int num_result_sets = 0;
1696     ZEBRA_RES res =
1697         term_list_trunc(zh, zapt, termz_org, attributeSet,
1698                         stream, reg_type, complete_flag,
1699                         rank_type, xpath_use,
1700                         num_bases, basenames,
1701                         rset_nmem,
1702                         &result_sets, &num_result_sets,
1703                         kc);
1704     if (res != ZEBRA_OK)
1705         return res;
1706     if (num_result_sets == 0)
1707         *rset = rsnull_create (rset_nmem, kc, 0); 
1708     else if (num_result_sets == 1)
1709         *rset = result_sets[0];
1710     else
1711         *rset = rsmulti_and_create(rset_nmem, kc, kc->scope,
1712                                    num_result_sets, result_sets);
1713     if (!*rset)
1714         return ZEBRA_FAIL;
1715     return ZEBRA_OK;
1716 }
1717
1718 static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1719                             const char **term_sub,
1720                             char *term_dict,
1721                             oid_value attributeSet,
1722                             struct grep_info *grep_info,
1723                             int *max_pos,
1724                             int reg_type,
1725                             char *term_dst,
1726                             int *error_code)
1727 {
1728     AttrType relation;
1729     int relation_value;
1730     int term_value;
1731     int r;
1732     char *term_tmp = term_dict + strlen(term_dict);
1733
1734     *error_code = 0;
1735     attr_init(&relation, zapt, 2);
1736     relation_value = attr_find(&relation, NULL);
1737
1738     yaz_log(log_level_rpn, "numeric relation value=%d", relation_value);
1739
1740     if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1741                   term_dst))
1742         return 0;
1743     term_value = atoi (term_tmp);
1744     switch (relation_value)
1745     {
1746     case 1:
1747         yaz_log(log_level_rpn, "Relation <");
1748         gen_regular_rel(term_tmp, term_value-1, 1);
1749         break;
1750     case 2:
1751         yaz_log(log_level_rpn, "Relation <=");
1752         gen_regular_rel(term_tmp, term_value, 1);
1753         break;
1754     case 4:
1755         yaz_log(log_level_rpn, "Relation >=");
1756         gen_regular_rel(term_tmp, term_value, 0);
1757         break;
1758     case 5:
1759         yaz_log(log_level_rpn, "Relation >");
1760         gen_regular_rel(term_tmp, term_value+1, 0);
1761         break;
1762     case -1:
1763     case 3:
1764         yaz_log(log_level_rpn, "Relation =");
1765         sprintf(term_tmp, "(0*%d)", term_value);
1766         break;
1767     default:
1768         *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
1769         return 0;
1770     }
1771     yaz_log(log_level_rpn, "dict_lookup_grep: %s", term_tmp);
1772     r = dict_lookup_grep(zh->reg->dict, term_dict, 0, grep_info, max_pos,
1773                           0, grep_handle);
1774     if (r)
1775         yaz_log(YLOG_WARN, "dict_lookup_grep fail, rel = gt: %d", r);
1776     yaz_log(log_level_rpn, "%d positions", grep_info->isam_p_indx);
1777     return 1;
1778 }
1779
1780 static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1781                               const char **term_sub, 
1782                               oid_value attributeSet,
1783                               struct grep_info *grep_info,
1784                               int reg_type, int complete_flag,
1785                               int num_bases, char **basenames,
1786                               char *term_dst, int xpath_use, NMEM stream)
1787 {
1788     char term_dict[2*IT_MAX_WORD+2];
1789     int r, base_no;
1790     AttrType use;
1791     int use_value;
1792     const char *use_string = 0;
1793     oid_value curAttributeSet = attributeSet;
1794     const char *termp;
1795     struct rpn_char_map_info rcmi;
1796
1797     int bases_ok = 0;     /* no of databases with OK attribute */
1798     int errCode = 0;      /* err code (if any is not OK) */
1799     char *errString = 0;  /* addinfo */
1800
1801     rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
1802     attr_init(&use, zapt, 1);
1803     use_value = attr_find_ex(&use, &curAttributeSet, &use_string);
1804
1805     if (use_value == -1)
1806         use_value = 1016;
1807
1808     for (base_no = 0; base_no < num_bases; base_no++)
1809     {
1810         attent attp;
1811         data1_local_attribute id_xpath_attr;
1812         data1_local_attribute *local_attr;
1813         int max_pos, prefix_len = 0;
1814         int relation_error = 0;
1815
1816         termp = *term_sub;
1817         if (use_value == -2)  /* string attribute (assume IDXPATH/any) */
1818         {
1819             use_value = xpath_use;
1820             attp.local_attributes = &id_xpath_attr;
1821             attp.attset_ordinal = VAL_IDXPATH;
1822             id_xpath_attr.next = 0;
1823             id_xpath_attr.local = use_value;
1824         }
1825         else if (curAttributeSet == VAL_IDXPATH)
1826         {
1827             attp.local_attributes = &id_xpath_attr;
1828             attp.attset_ordinal = VAL_IDXPATH;
1829             id_xpath_attr.next = 0;
1830             id_xpath_attr.local = use_value;
1831         }
1832         else
1833         {
1834             if ((r = att_getentbyatt (zh, &attp, curAttributeSet, use_value,
1835                                             use_string)))
1836             {
1837                 yaz_log(YLOG_DEBUG, "att_getentbyatt fail. set=%d use=%d r=%d",
1838                       curAttributeSet, use_value, r);
1839                 if (r == -1)
1840                 {
1841                     errCode = YAZ_BIB1_UNSUPP_USE_ATTRIBUTE;
1842                     if (use_string)
1843                         errString = nmem_strdup(stream, use_string);
1844                     else
1845                         errString = nmem_strdup_i (stream, use_value);
1846                 }
1847                 else
1848                     errCode = YAZ_BIB1_UNSUPP_ATTRIBUTE_SET;
1849                 continue;
1850             }
1851         }
1852         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1853         {
1854             zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
1855                            basenames[base_no]);
1856             return ZEBRA_FAIL;
1857         }
1858         for (local_attr = attp.local_attributes; local_attr;
1859              local_attr = local_attr->next)
1860         {
1861             int ord;
1862             char ord_buf[32];
1863             int i, ord_len;
1864
1865             ord = zebraExplain_lookup_attr_su(zh->reg->zei,
1866                                               attp.attset_ordinal,
1867                                               local_attr->local);
1868             if (ord < 0)
1869                 continue;
1870             if (prefix_len)
1871                 term_dict[prefix_len++] = '|';
1872             else
1873                 term_dict[prefix_len++] = '(';
1874
1875             ord_len = key_SU_encode (ord, ord_buf);
1876             for (i = 0; i<ord_len; i++)
1877             {
1878                 term_dict[prefix_len++] = 1;
1879                 term_dict[prefix_len++] = ord_buf[i];
1880             }
1881         }
1882         if (!prefix_len)
1883         {
1884             zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_USE_ATTRIBUTE, use_value);
1885             continue;
1886         }
1887         bases_ok++;
1888         term_dict[prefix_len++] = ')';        
1889         term_dict[prefix_len++] = 1;
1890         term_dict[prefix_len++] = reg_type;
1891         yaz_log(YLOG_DEBUG, "reg_type = %d", term_dict[prefix_len-1]);
1892         term_dict[prefix_len] = '\0';
1893         if (!numeric_relation(zh, zapt, &termp, term_dict,
1894                               attributeSet, grep_info, &max_pos, reg_type,
1895                               term_dst, &relation_error))
1896         {
1897             if (relation_error)
1898             {
1899                 zebra_setError(zh, relation_error, 0);
1900                 return ZEBRA_FAIL;
1901             }
1902             *term_sub = 0;
1903             return ZEBRA_OK;
1904         }
1905     }
1906     if (!bases_ok)
1907     {
1908         zebra_setError(zh, errCode, errString);
1909         return ZEBRA_FAIL;
1910     }
1911     *term_sub = termp;
1912     yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1913     return ZEBRA_OK;
1914 }
1915
1916                                  
1917 static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh,
1918                                         Z_AttributesPlusTerm *zapt,
1919                                         const char *termz,
1920                                         oid_value attributeSet,
1921                                         NMEM stream,
1922                                         int reg_type, int complete_flag,
1923                                         const char *rank_type, int xpath_use,
1924                                         int num_bases, char **basenames,
1925                                         NMEM rset_nmem,
1926                                         RSET *rset,
1927                                         struct rset_key_control *kc)
1928 {
1929     char term_dst[IT_MAX_WORD+1];
1930     const char *termp = termz;
1931     RSET *result_sets = 0;
1932     int num_result_sets = 0;
1933     ZEBRA_RES res;
1934     struct grep_info grep_info;
1935     int alloc_sets = 0;
1936     zint hits_limit_value;
1937     const char *term_ref_id_str = 0;
1938
1939     term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str);
1940
1941     yaz_log(log_level_rpn, "APT_numeric t='%s'", termz);
1942     if (grep_info_prepare(zh, zapt, &grep_info, reg_type) == ZEBRA_FAIL)
1943         return ZEBRA_FAIL;
1944     while (1)
1945     { 
1946         if (alloc_sets == num_result_sets)
1947         {
1948             int add = 10;
1949             RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) * 
1950                                               sizeof(*rnew));
1951             if (alloc_sets)
1952                 memcpy(rnew, result_sets, alloc_sets * sizeof(*rnew));
1953             alloc_sets = alloc_sets + add;
1954             result_sets = rnew;
1955         }
1956         yaz_log(YLOG_DEBUG, "APT_numeric termp=%s", termp);
1957         grep_info.isam_p_indx = 0;
1958         res = numeric_term(zh, zapt, &termp, attributeSet, &grep_info,
1959                            reg_type, complete_flag, num_bases, basenames,
1960                            term_dst, xpath_use,
1961                            stream);
1962         if (res == ZEBRA_FAIL || termp == 0)
1963             break;
1964         yaz_log(YLOG_DEBUG, "term: %s", term_dst);
1965         result_sets[num_result_sets] =
1966             rset_trunc(zh, grep_info.isam_p_buf,
1967                        grep_info.isam_p_indx, term_dst,
1968                        strlen(term_dst), rank_type,
1969                        0 /* preserve position */,
1970                        zapt->term->which, rset_nmem, 
1971                        kc, kc->scope, 0, reg_type,
1972                        hits_limit_value,
1973                        term_ref_id_str);
1974         if (!result_sets[num_result_sets])
1975             break;
1976         num_result_sets++;
1977     }
1978     grep_info_delete(&grep_info);
1979     if (termp)
1980     {
1981         int i;
1982         for (i = 0; i<num_result_sets; i++)
1983             rset_delete(result_sets[i]);
1984         return ZEBRA_FAIL;
1985     }
1986     if (num_result_sets == 0)
1987         *rset = rsnull_create(rset_nmem, kc, 0);
1988     if (num_result_sets == 1)
1989         *rset = result_sets[0];
1990     else
1991         *rset = rsmulti_and_create(rset_nmem, kc, kc->scope,
1992                                    num_result_sets, result_sets);
1993     if (!*rset)
1994         return ZEBRA_FAIL;
1995     return ZEBRA_OK;
1996 }
1997
1998 static ZEBRA_RES rpn_search_APT_local(ZebraHandle zh,
1999                                       Z_AttributesPlusTerm *zapt,
2000                                       const char *termz,
2001                                       oid_value attributeSet,
2002                                       NMEM stream,
2003                                       const char *rank_type, NMEM rset_nmem,
2004                                       RSET *rset,
2005                                       struct rset_key_control *kc)
2006 {
2007     RSFD rsfd;
2008     struct it_key key;
2009     int sys;
2010     *rset = rstemp_create(rset_nmem, kc, kc->scope,
2011                           res_get (zh->res, "setTmpDir"),0 );
2012     rsfd = rset_open(*rset, RSETF_WRITE);
2013     
2014     sys = atoi(termz);
2015     if (sys <= 0)
2016         sys = 1;
2017     key.mem[0] = sys;
2018     key.mem[1] = 1;
2019     key.len = 2;
2020     rset_write (rsfd, &key);
2021     rset_close (rsfd);
2022     return ZEBRA_OK;
2023 }
2024
2025 static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2026                                oid_value attributeSet, NMEM stream,
2027                                Z_SortKeySpecList *sort_sequence,
2028                                const char *rank_type,
2029                                NMEM rset_nmem,
2030                                RSET *rset,
2031                                struct rset_key_control *kc)
2032 {
2033     int i;
2034     int sort_relation_value;
2035     AttrType sort_relation_type;
2036     Z_SortKeySpec *sks;
2037     Z_SortKey *sk;
2038     int oid[OID_SIZE];
2039     oident oe;
2040     char termz[20];
2041     
2042     attr_init(&sort_relation_type, zapt, 7);
2043     sort_relation_value = attr_find(&sort_relation_type, &attributeSet);
2044
2045     if (!sort_sequence->specs)
2046     {
2047         sort_sequence->num_specs = 10;
2048         sort_sequence->specs = (Z_SortKeySpec **)
2049             nmem_malloc(stream, sort_sequence->num_specs *
2050                          sizeof(*sort_sequence->specs));
2051         for (i = 0; i<sort_sequence->num_specs; i++)
2052             sort_sequence->specs[i] = 0;
2053     }
2054     if (zapt->term->which != Z_Term_general)
2055         i = 0;
2056     else
2057         i = atoi_n ((char *) zapt->term->u.general->buf,
2058                     zapt->term->u.general->len);
2059     if (i >= sort_sequence->num_specs)
2060         i = 0;
2061     sprintf(termz, "%d", i);
2062
2063     oe.proto = PROTO_Z3950;
2064     oe.oclass = CLASS_ATTSET;
2065     oe.value = attributeSet;
2066     if (!oid_ent_to_oid (&oe, oid))
2067         return ZEBRA_FAIL;
2068
2069     sks = (Z_SortKeySpec *) nmem_malloc(stream, sizeof(*sks));
2070     sks->sortElement = (Z_SortElement *)
2071         nmem_malloc(stream, sizeof(*sks->sortElement));
2072     sks->sortElement->which = Z_SortElement_generic;
2073     sk = sks->sortElement->u.generic = (Z_SortKey *)
2074         nmem_malloc(stream, sizeof(*sk));
2075     sk->which = Z_SortKey_sortAttributes;
2076     sk->u.sortAttributes = (Z_SortAttributes *)
2077         nmem_malloc(stream, sizeof(*sk->u.sortAttributes));
2078
2079     sk->u.sortAttributes->id = oid;
2080     sk->u.sortAttributes->list = zapt->attributes;
2081
2082     sks->sortRelation = (int *)
2083         nmem_malloc(stream, sizeof(*sks->sortRelation));
2084     if (sort_relation_value == 1)
2085         *sks->sortRelation = Z_SortKeySpec_ascending;
2086     else if (sort_relation_value == 2)
2087         *sks->sortRelation = Z_SortKeySpec_descending;
2088     else 
2089         *sks->sortRelation = Z_SortKeySpec_ascending;
2090
2091     sks->caseSensitivity = (int *)
2092         nmem_malloc(stream, sizeof(*sks->caseSensitivity));
2093     *sks->caseSensitivity = 0;
2094
2095     sks->which = Z_SortKeySpec_null;
2096     sks->u.null = odr_nullval ();
2097     sort_sequence->specs[i] = sks;
2098     *rset = rsnull_create (rset_nmem, kc, 0);
2099     return ZEBRA_OK;
2100 }
2101
2102
2103 static int parse_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2104                        oid_value attributeSet,
2105                        struct xpath_location_step *xpath, int max, NMEM mem)
2106 {
2107     oid_value curAttributeSet = attributeSet;
2108     AttrType use;
2109     const char *use_string = 0;
2110     
2111     attr_init(&use, zapt, 1);
2112     attr_find_ex(&use, &curAttributeSet, &use_string);
2113
2114     if (!use_string || *use_string != '/')
2115         return -1;
2116
2117     return zebra_parse_xpath_str(use_string, xpath, max, mem);
2118 }
2119  
2120                
2121
2122 static RSET xpath_trunc(ZebraHandle zh, NMEM stream,
2123                         int reg_type, const char *term, int use,
2124                         oid_value curAttributeSet, NMEM rset_nmem,
2125                         struct rset_key_control *kc)
2126 {
2127     RSET rset;
2128     struct grep_info grep_info;
2129     char term_dict[2048];
2130     char ord_buf[32];
2131     int prefix_len = 0;
2132     int ord = zebraExplain_lookup_attr_su(zh->reg->zei, curAttributeSet, use);
2133     int ord_len, i, r, max_pos;
2134     int term_type = Z_Term_characterString;
2135     const char *flags = "void";
2136
2137     if (grep_info_prepare(zh, 0 /* zapt */, &grep_info, '0') == ZEBRA_FAIL)
2138         return rsnull_create(rset_nmem, kc, 0);
2139     
2140     if (ord < 0)
2141         return rsnull_create(rset_nmem, kc, 0);
2142     if (prefix_len)
2143         term_dict[prefix_len++] = '|';
2144     else
2145         term_dict[prefix_len++] = '(';
2146     
2147     ord_len = key_SU_encode (ord, ord_buf);
2148     for (i = 0; i<ord_len; i++)
2149     {
2150         term_dict[prefix_len++] = 1;
2151         term_dict[prefix_len++] = ord_buf[i];
2152     }
2153     term_dict[prefix_len++] = ')';
2154     term_dict[prefix_len++] = 1;
2155     term_dict[prefix_len++] = reg_type;
2156     
2157     strcpy(term_dict+prefix_len, term);
2158     
2159     grep_info.isam_p_indx = 0;
2160     r = dict_lookup_grep(zh->reg->dict, term_dict, 0,
2161                           &grep_info, &max_pos, 0, grep_handle);
2162     yaz_log(YLOG_DEBUG, "%s %d positions", term,
2163              grep_info.isam_p_indx);
2164     rset = rset_trunc(zh, grep_info.isam_p_buf,
2165                       grep_info.isam_p_indx, term, strlen(term),
2166                       flags, 1, term_type,rset_nmem,
2167                       kc, kc->scope, 0, reg_type, 0 /* hits_limit */,
2168                       0 /* term_ref_id_str */);
2169     grep_info_delete(&grep_info);
2170     return rset;
2171 }
2172
2173 static
2174 ZEBRA_RES rpn_search_xpath(ZebraHandle zh,
2175                            oid_value attributeSet,
2176                            int num_bases, char **basenames,
2177                            NMEM stream, const char *rank_type, RSET rset,
2178                            int xpath_len, struct xpath_location_step *xpath,
2179                            NMEM rset_nmem,
2180                            RSET *rset_out,
2181                            struct rset_key_control *kc)
2182 {
2183     oid_value curAttributeSet = attributeSet;
2184     int base_no;
2185     int i;
2186
2187     if (xpath_len < 0)
2188     {
2189         *rset_out = rset;
2190         return ZEBRA_OK;
2191     }
2192
2193     yaz_log(YLOG_DEBUG, "xpath len=%d", xpath_len);
2194     for (i = 0; i<xpath_len; i++)
2195     {
2196         yaz_log(log_level_rpn, "XPATH %d %s", i, xpath[i].part);
2197
2198     }
2199
2200     curAttributeSet = VAL_IDXPATH;
2201
2202     /*
2203       //a    ->    a/.*
2204       //a/b  ->    b/a/.*
2205       /a     ->    a/
2206       /a/b   ->    b/a/
2207
2208       /      ->    none
2209
2210    a[@attr = value]/b[@other = othervalue]
2211
2212  /e/@a val      range(e/,range(@a,freetext(w,1015,val),@a),e/)
2213  /a/b val       range(b/a/,freetext(w,1016,val),b/a/)
2214  /a/b/@c val    range(b/a/,range(@c,freetext(w,1016,val),@c),b/a/)
2215  /a/b[@c = y] val range(b/a/,freetext(w,1016,val),b/a/,@c = y)
2216  /a[@c = y]/b val range(a/,range(b/a/,freetext(w,1016,val),b/a/),a/,@c = y)
2217  /a[@c = x]/b[@c = y] range(a/,range(b/a/,freetext(w,1016,val),b/a/,@c = y),a/,@c = x)
2218       
2219     */
2220
2221     dict_grep_cmap (zh->reg->dict, 0, 0);
2222
2223     for (base_no = 0; base_no < num_bases; base_no++)
2224     {
2225         int level = xpath_len;
2226         int first_path = 1;
2227         
2228         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
2229         {
2230             zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
2231                            basenames[base_no]);
2232             *rset_out = rset;
2233             return ZEBRA_FAIL;
2234         }
2235         while (--level >= 0)
2236         {
2237             char xpath_rev[128];
2238             int i, len;
2239             RSET rset_start_tag = 0, rset_end_tag = 0, rset_attr = 0;
2240
2241             *xpath_rev = 0;
2242             len = 0;
2243             for (i = level; i >= 1; --i)
2244             {
2245                 const char *cp = xpath[i].part;
2246                 if (*cp)
2247                 {
2248                     for (;*cp; cp++)
2249                         if (*cp == '*')
2250                         {
2251                             memcpy (xpath_rev + len, "[^/]*", 5);
2252                             len += 5;
2253                         }
2254                         else if (*cp == ' ')
2255                         {
2256
2257                             xpath_rev[len++] = 1;
2258                             xpath_rev[len++] = ' ';
2259                         }
2260
2261                         else
2262                             xpath_rev[len++] = *cp;
2263                     xpath_rev[len++] = '/';
2264                 }
2265                 else if (i == 1)  /* // case */
2266                 {
2267                     xpath_rev[len++] = '.';
2268                     xpath_rev[len++] = '*';
2269                 }
2270             }
2271             xpath_rev[len] = 0;
2272
2273             if (xpath[level].predicate &&
2274                 xpath[level].predicate->which == XPATH_PREDICATE_RELATION &&
2275                 xpath[level].predicate->u.relation.name[0])
2276             {
2277                 WRBUF wbuf = wrbuf_alloc();
2278                 wrbuf_puts(wbuf, xpath[level].predicate->u.relation.name+1);
2279                 if (xpath[level].predicate->u.relation.value)
2280                 {
2281                     const char *cp = xpath[level].predicate->u.relation.value;
2282                     wrbuf_putc(wbuf, '=');
2283                     
2284                     while (*cp)
2285                     {
2286                         if (strchr(REGEX_CHARS, *cp))
2287                             wrbuf_putc(wbuf, '\\');
2288                         wrbuf_putc(wbuf, *cp);
2289                         cp++;
2290                     }
2291                 }
2292                 wrbuf_puts(wbuf, "");
2293                 rset_attr = xpath_trunc(
2294                     zh, stream, '0', wrbuf_buf(wbuf), 3, 
2295                     curAttributeSet, rset_nmem, kc);
2296                 wrbuf_free(wbuf, 1);
2297             } 
2298             else 
2299             {
2300                 if (!first_path)
2301                     continue;
2302             }
2303             yaz_log(log_level_rpn, "xpath_rev (%d) = %s", level, xpath_rev);
2304             if (strlen(xpath_rev))
2305             {
2306                 rset_start_tag = xpath_trunc(zh, stream, '0', 
2307                         xpath_rev, 1, curAttributeSet, rset_nmem, kc);
2308             
2309                 rset_end_tag = xpath_trunc(zh, stream, '0', 
2310                         xpath_rev, 2, curAttributeSet, rset_nmem, kc);
2311
2312                 rset = rsbetween_create(rset_nmem, kc, kc->scope,
2313                                         rset_start_tag, rset,
2314                                         rset_end_tag, rset_attr);
2315             }
2316             first_path = 0;
2317         }
2318     }
2319     *rset_out = rset;
2320     return ZEBRA_OK;
2321 }
2322
2323 static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2324                                 oid_value attributeSet, NMEM stream,
2325                                 Z_SortKeySpecList *sort_sequence,
2326                                 int num_bases, char **basenames, 
2327                                 NMEM rset_nmem,
2328                                 RSET *rset,
2329                                 struct rset_key_control *kc)
2330 {
2331     ZEBRA_RES res = ZEBRA_OK;
2332     unsigned reg_id;
2333     char *search_type = NULL;
2334     char rank_type[128];
2335     int complete_flag;
2336     int sort_flag;
2337     char termz[IT_MAX_WORD+1];
2338     int xpath_len;
2339     int xpath_use = 0;
2340     struct xpath_location_step xpath[10];
2341
2342     if (!log_level_set)
2343     {
2344         log_level_rpn = yaz_log_module_level("rpn");
2345         log_level_set = 1;
2346     }
2347     zebra_maps_attr(zh->reg->zebra_maps, zapt, &reg_id, &search_type,
2348                     rank_type, &complete_flag, &sort_flag);
2349     
2350     yaz_log(YLOG_DEBUG, "reg_id=%c", reg_id);
2351     yaz_log(YLOG_DEBUG, "complete_flag=%d", complete_flag);
2352     yaz_log(YLOG_DEBUG, "search_type=%s", search_type);
2353     yaz_log(YLOG_DEBUG, "rank_type=%s", rank_type);
2354
2355     if (zapt_term_to_utf8(zh, zapt, termz) == ZEBRA_FAIL)
2356         return ZEBRA_FAIL;
2357
2358     if (sort_flag)
2359         return rpn_sort_spec(zh, zapt, attributeSet, stream, sort_sequence,
2360                              rank_type, rset_nmem, rset, kc);
2361     /* consider if an X-Path query is used */
2362     xpath_len = parse_xpath(zh, zapt, attributeSet, xpath, 10, stream);
2363     if (xpath_len >= 0)
2364     {
2365         xpath_use = 1016;  /* searching for element by default */
2366         if (xpath[xpath_len-1].part[0] == '@') 
2367             xpath_use = 1015;  /* last step an attribute .. */
2368     }
2369
2370     /* search using one of the various search type strategies
2371        termz is our UTF-8 search term
2372        attributeSet is top-level default attribute set 
2373        stream is ODR for search
2374        reg_id is the register type
2375        complete_flag is 1 for complete subfield, 0 for incomplete
2376        xpath_use is use-attribute to be used for X-Path search, 0 for none
2377     */
2378     if (!strcmp(search_type, "phrase"))
2379     {
2380         res = rpn_search_APT_phrase(zh, zapt, termz, attributeSet, stream,
2381                                     reg_id, complete_flag, rank_type,
2382                                     xpath_use,
2383                                     num_bases, basenames, rset_nmem,
2384                                     rset, kc);
2385     }
2386     else if (!strcmp(search_type, "and-list"))
2387     {
2388         res = rpn_search_APT_and_list(zh, zapt, termz, attributeSet, stream,
2389                                       reg_id, complete_flag, rank_type,
2390                                       xpath_use,
2391                                       num_bases, basenames, rset_nmem,
2392                                       rset, kc);
2393     }
2394     else if (!strcmp(search_type, "or-list"))
2395     {
2396         res = rpn_search_APT_or_list(zh, zapt, termz, attributeSet, stream,
2397                                      reg_id, complete_flag, rank_type,
2398                                      xpath_use,
2399                                      num_bases, basenames, rset_nmem,
2400                                      rset, kc);
2401     }
2402     else if (!strcmp(search_type, "local"))
2403     {
2404         res = rpn_search_APT_local(zh, zapt, termz, attributeSet, stream,
2405                                    rank_type, rset_nmem, rset, kc);
2406     }
2407     else if (!strcmp(search_type, "numeric"))
2408     {
2409         res = rpn_search_APT_numeric(zh, zapt, termz, attributeSet, stream,
2410                                      reg_id, complete_flag, rank_type,
2411                                      xpath_use,
2412                                      num_bases, basenames, rset_nmem,
2413                                      rset, kc);
2414     }
2415     else
2416     {
2417         zebra_setError(zh, YAZ_BIB1_UNSUPP_STRUCTURE_ATTRIBUTE, 0);
2418         res = ZEBRA_FAIL;
2419     }
2420     if (res != ZEBRA_OK)
2421         return res;
2422     if (!*rset)
2423         return ZEBRA_FAIL;
2424     return rpn_search_xpath(zh, attributeSet, num_bases, basenames,
2425                             stream, rank_type, *rset, 
2426                             xpath_len, xpath, rset_nmem, rset, kc);
2427 }
2428
2429 static ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2430                                       oid_value attributeSet, 
2431                                       NMEM stream, NMEM rset_nmem,
2432                                       Z_SortKeySpecList *sort_sequence,
2433                                       int num_bases, char **basenames,
2434                                       RSET **result_sets, int *num_result_sets,
2435                                       Z_Operator *parent_op,
2436                                       struct rset_key_control *kc);
2437
2438 ZEBRA_RES rpn_search_top(ZebraHandle zh, Z_RPNStructure *zs,
2439                          oid_value attributeSet, 
2440                          NMEM stream, NMEM rset_nmem,
2441                          Z_SortKeySpecList *sort_sequence,
2442                          int num_bases, char **basenames,
2443                          RSET *result_set)
2444 {
2445     RSET *result_sets = 0;
2446     int num_result_sets = 0;
2447     ZEBRA_RES res;
2448     struct rset_key_control *kc = zebra_key_control_create(zh);
2449
2450     res = rpn_search_structure(zh, zs, attributeSet,
2451                                stream, rset_nmem,
2452                                sort_sequence, 
2453                                num_bases, basenames,
2454                                &result_sets, &num_result_sets,
2455                                0 /* no parent op */,
2456                                kc);
2457     if (res != ZEBRA_OK)
2458     {
2459         int i;
2460         for (i = 0; i<num_result_sets; i++)
2461             rset_delete(result_sets[i]);
2462         *result_set = 0;
2463     }
2464     else
2465     {
2466         assert(num_result_sets == 1);
2467         assert(result_sets);
2468         assert(*result_sets);
2469         *result_set = *result_sets;
2470     }
2471     (*kc->dec)(kc);
2472     return res;
2473 }
2474
2475 ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2476                                oid_value attributeSet, 
2477                                NMEM stream, NMEM rset_nmem,
2478                                Z_SortKeySpecList *sort_sequence,
2479                                int num_bases, char **basenames,
2480                                RSET **result_sets, int *num_result_sets,
2481                                Z_Operator *parent_op,
2482                                struct rset_key_control *kc)
2483 {
2484     *num_result_sets = 0;
2485     if (zs->which == Z_RPNStructure_complex)
2486     {
2487         ZEBRA_RES res;
2488         Z_Operator *zop = zs->u.complex->roperator;
2489         RSET *result_sets_l = 0;
2490         int num_result_sets_l = 0;
2491         RSET *result_sets_r = 0;
2492         int num_result_sets_r = 0;
2493
2494         res = rpn_search_structure(zh, zs->u.complex->s1,
2495                                    attributeSet, stream, rset_nmem,
2496                                    sort_sequence,
2497                                    num_bases, basenames,
2498                                    &result_sets_l, &num_result_sets_l,
2499                                    zop, kc);
2500         if (res != ZEBRA_OK)
2501         {
2502             int i;
2503             for (i = 0; i<num_result_sets_l; i++)
2504                 rset_delete(result_sets_l[i]);
2505             return res;
2506         }
2507         res = rpn_search_structure(zh, zs->u.complex->s2,
2508                                    attributeSet, stream, rset_nmem,
2509                                    sort_sequence,
2510                                    num_bases, basenames,
2511                                    &result_sets_r, &num_result_sets_r,
2512                                    zop, kc);
2513         if (res != ZEBRA_OK)
2514         {
2515             int i;
2516             for (i = 0; i<num_result_sets_l; i++)
2517                 rset_delete(result_sets_l[i]);
2518             for (i = 0; i<num_result_sets_r; i++)
2519                 rset_delete(result_sets_r[i]);
2520             return res;
2521         }
2522
2523         /* make a new list of result for all children */
2524         *num_result_sets = num_result_sets_l + num_result_sets_r;
2525         *result_sets = nmem_malloc(stream, *num_result_sets * 
2526                                    sizeof(**result_sets));
2527         memcpy(*result_sets, result_sets_l, 
2528                num_result_sets_l * sizeof(**result_sets));
2529         memcpy(*result_sets + num_result_sets_l, result_sets_r, 
2530                num_result_sets_r * sizeof(**result_sets));
2531
2532         if (!parent_op || parent_op->which != zop->which
2533             || (zop->which != Z_Operator_and &&
2534                 zop->which != Z_Operator_or))
2535         {
2536             /* parent node different from this one (or non-present) */
2537             /* we must combine result sets now */
2538             RSET rset;
2539             switch (zop->which)
2540             {
2541             case Z_Operator_and:
2542                 rset = rsmulti_and_create(rset_nmem, kc,
2543                                           kc->scope,
2544                                           *num_result_sets, *result_sets);
2545                 break;
2546             case Z_Operator_or:
2547                 rset = rsmulti_or_create(rset_nmem, kc,
2548                                          kc->scope, 0, /* termid */
2549                                          *num_result_sets, *result_sets);
2550                 break;
2551             case Z_Operator_and_not:
2552                 rset = rsbool_create_not(rset_nmem, kc,
2553                                          kc->scope,
2554                                          (*result_sets)[0],
2555                                          (*result_sets)[1]);
2556                 break;
2557             case Z_Operator_prox:
2558                 if (zop->u.prox->which != Z_ProximityOperator_known)
2559                 {
2560                     zebra_setError(zh, 
2561                                    YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2562                                    0);
2563                     return ZEBRA_FAIL;
2564                 }
2565                 if (*zop->u.prox->u.known != Z_ProxUnit_word)
2566                 {
2567                     zebra_setError_zint(zh,
2568                                         YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2569                                         *zop->u.prox->u.known);
2570                     return ZEBRA_FAIL;
2571                 }
2572                 else
2573                 {
2574                     rset = rsprox_create(rset_nmem, kc,
2575                                          kc->scope,
2576                                          *num_result_sets, *result_sets, 
2577                                          *zop->u.prox->ordered,
2578                                          (!zop->u.prox->exclusion ? 
2579                                           0 : *zop->u.prox->exclusion),
2580                                          *zop->u.prox->relationType,
2581                                          *zop->u.prox->distance );
2582                 }
2583                 break;
2584             default:
2585                 zebra_setError(zh, YAZ_BIB1_OPERATOR_UNSUPP, 0);
2586                 return ZEBRA_FAIL;
2587             }
2588             *num_result_sets = 1;
2589             *result_sets = nmem_malloc(stream, *num_result_sets * 
2590                                        sizeof(**result_sets));
2591             (*result_sets)[0] = rset;
2592         }
2593     }
2594     else if (zs->which == Z_RPNStructure_simple)
2595     {
2596         RSET rset;
2597         ZEBRA_RES res;
2598
2599         if (zs->u.simple->which == Z_Operand_APT)
2600         {
2601             yaz_log(YLOG_DEBUG, "rpn_search_APT");
2602             res = rpn_search_APT(zh, zs->u.simple->u.attributesPlusTerm,
2603                                  attributeSet, stream, sort_sequence,
2604                                  num_bases, basenames, rset_nmem, &rset,
2605                                  kc);
2606             if (res != ZEBRA_OK)
2607                 return res;
2608         }
2609         else if (zs->u.simple->which == Z_Operand_resultSetId)
2610         {
2611             yaz_log(YLOG_DEBUG, "rpn_search_ref");
2612             rset = resultSetRef(zh, zs->u.simple->u.resultSetId);
2613             if (!rset)
2614             {
2615                 zebra_setError(zh, 
2616                                YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
2617                                zs->u.simple->u.resultSetId);
2618                 return ZEBRA_FAIL;
2619             }
2620             rset_dup(rset);
2621         }
2622         else
2623         {
2624             zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2625             return ZEBRA_FAIL;
2626         }
2627         *num_result_sets = 1;
2628         *result_sets = nmem_malloc(stream, *num_result_sets * 
2629                                    sizeof(**result_sets));
2630         (*result_sets)[0] = rset;
2631     }
2632     else
2633     {
2634         zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2635         return ZEBRA_FAIL;
2636     }
2637     return ZEBRA_OK;
2638 }
2639
2640 struct scan_info_entry {
2641     char *term;
2642     ISAM_P isam_p;
2643 };
2644
2645 struct scan_info {
2646     struct scan_info_entry *list;
2647     ODR odr;
2648     int before, after;
2649     char prefix[20];
2650 };
2651
2652 static int scan_handle (char *name, const char *info, int pos, void *client)
2653 {
2654     int len_prefix, idx;
2655     struct scan_info *scan_info = (struct scan_info *) client;
2656
2657     len_prefix = strlen(scan_info->prefix);
2658     if (memcmp (name, scan_info->prefix, len_prefix))
2659         return 1;
2660     if (pos > 0)
2661         idx = scan_info->after - pos + scan_info->before;
2662     else
2663         idx = - pos - 1;
2664
2665     if (idx < 0)
2666         return 0;
2667     scan_info->list[idx].term = (char *)
2668         odr_malloc(scan_info->odr, strlen(name + len_prefix)+1);
2669     strcpy(scan_info->list[idx].term, name + len_prefix);
2670     assert (*info == sizeof(ISAM_P));
2671     memcpy (&scan_info->list[idx].isam_p, info+1, sizeof(ISAM_P));
2672     return 0;
2673 }
2674
2675 void zebra_term_untrans_iconv(ZebraHandle zh, NMEM stream, int reg_type,
2676                               char **dst, const char *src)
2677 {
2678     char term_src[IT_MAX_WORD];
2679     char term_dst[IT_MAX_WORD];
2680     
2681     zebra_term_untrans (zh, reg_type, term_src, src);
2682
2683     if (zh->iconv_from_utf8 != 0)
2684     {
2685         int len;
2686         char *inbuf = term_src;
2687         size_t inleft = strlen(term_src);
2688         char *outbuf = term_dst;
2689         size_t outleft = sizeof(term_dst)-1;
2690         size_t ret;
2691         
2692         ret = yaz_iconv (zh->iconv_from_utf8, &inbuf, &inleft,
2693                          &outbuf, &outleft);
2694         if (ret == (size_t)(-1))
2695             len = 0;
2696         else
2697             len = outbuf - term_dst;
2698         *dst = nmem_malloc(stream, len + 1);
2699         if (len > 0)
2700             memcpy (*dst, term_dst, len);
2701         (*dst)[len] = '\0';
2702     }
2703     else
2704         *dst = nmem_strdup(stream, term_src);
2705 }
2706
2707 static void count_set(ZebraHandle zh, RSET rset, zint *count)
2708 {
2709     zint psysno = 0;
2710     struct it_key key;
2711     RSFD rfd;
2712
2713     yaz_log(YLOG_DEBUG, "count_set");
2714
2715     rset->hits_limit = zh->approx_limit;
2716
2717     *count = 0;
2718     rfd = rset_open(rset, RSETF_READ);
2719     while (rset_read(rfd, &key,0 /* never mind terms */))
2720     {
2721         if (key.mem[0] != psysno)
2722         {
2723             psysno = key.mem[0];
2724             if (rfd->counted_items >= rset->hits_limit)
2725                 break;
2726         }
2727     }
2728     rset_close (rfd);
2729     *count = rset->hits_count;
2730 }
2731
2732 ZEBRA_RES rpn_scan(ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt,
2733                    oid_value attributeset,
2734                    int num_bases, char **basenames,
2735                    int *position, int *num_entries, ZebraScanEntry **list,
2736                    int *is_partial, RSET limit_set, int return_zero)
2737 {
2738     int i;
2739     int pos = *position;
2740     int num = *num_entries;
2741     int before;
2742     int after;
2743     int base_no;
2744     char termz[IT_MAX_WORD+20];
2745     AttrType use;
2746     int use_value;
2747     const char *use_string = 0;
2748     struct scan_info *scan_info_array;
2749     ZebraScanEntry *glist;
2750     int ords[32], ord_no = 0;
2751     int ptr[32];
2752
2753     int bases_ok = 0;     /* no of databases with OK attribute */
2754     int errCode = 0;      /* err code (if any is not OK) */
2755     char *errString = 0;  /* addinfo */
2756
2757     unsigned reg_id;
2758     char *search_type = NULL;
2759     char rank_type[128];
2760     int complete_flag;
2761     int sort_flag;
2762     NMEM rset_nmem = NULL; 
2763     struct rset_key_control *kc = 0;
2764
2765     *list = 0;
2766     *is_partial = 0;
2767
2768     if (attributeset == VAL_NONE)
2769         attributeset = VAL_BIB1;
2770
2771     if (!limit_set)
2772     {
2773         AttrType termset;
2774         int termset_value_numeric;
2775         const char *termset_value_string;
2776         attr_init(&termset, zapt, 8);
2777         termset_value_numeric =
2778             attr_find_ex(&termset, NULL, &termset_value_string);
2779         if (termset_value_numeric != -1)
2780         {
2781             char resname[32];
2782             const char *termset_name = 0;
2783             
2784             if (termset_value_numeric != -2)
2785             {
2786                 
2787                 sprintf(resname, "%d", termset_value_numeric);
2788                 termset_name = resname;
2789             }
2790             else
2791                 termset_name = termset_value_string;
2792             
2793             limit_set = resultSetRef (zh, termset_name);
2794         }
2795     }
2796         
2797     yaz_log(YLOG_DEBUG, "position = %d, num = %d set=%d",
2798             pos, num, attributeset);
2799         
2800     attr_init(&use, zapt, 1);
2801     use_value = attr_find_ex(&use, &attributeset, &use_string);
2802
2803     if (zebra_maps_attr(zh->reg->zebra_maps, zapt, &reg_id, &search_type,
2804                         rank_type, &complete_flag, &sort_flag))
2805     {
2806         *num_entries = 0;
2807         zebra_setError(zh, YAZ_BIB1_UNSUPP_ATTRIBUTE_TYPE, 0);
2808         return ZEBRA_FAIL;
2809     }
2810     yaz_log(YLOG_DEBUG, "use_value = %d", use_value);
2811
2812     if (use_value == -1)
2813         use_value = 1016;
2814     for (base_no = 0; base_no < num_bases && ord_no < 32; base_no++)
2815     {
2816         data1_local_attribute *local_attr;
2817         attent attp;
2818         int ord;
2819
2820         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
2821         {
2822             zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
2823                            basenames[base_no]);
2824             *num_entries = 0;
2825             return ZEBRA_FAIL;
2826         }
2827
2828         if (use_string &&
2829             (ord = zebraExplain_lookup_attr_str(zh->reg->zei,
2830                                                 use_string)) >= 0)
2831         {
2832             /* we have a match for a raw string attribute */
2833             if (ord > 0)
2834                 ords[ord_no++] = ord;
2835             attp.local_attributes = 0;  /* no more attributes */
2836         }
2837         else
2838         {
2839             int r;
2840             
2841             if ((r = att_getentbyatt (zh, &attp, attributeset, use_value,
2842                                       use_string)))
2843             {
2844                 yaz_log(YLOG_DEBUG, "att_getentbyatt fail. set=%d use=%d",
2845                         attributeset, use_value);
2846                 if (r == -1)
2847                 {
2848                     errCode = YAZ_BIB1_UNSUPP_USE_ATTRIBUTE;
2849                     if (use_string)
2850                         zebra_setError(zh, YAZ_BIB1_UNSUPP_USE_ATTRIBUTE,
2851                                        use_string);
2852                     else
2853                         zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_USE_ATTRIBUTE,
2854                                             use_value);
2855                 }   
2856                 else
2857                 {
2858                     zebra_setError(zh, YAZ_BIB1_UNSUPP_ATTRIBUTE_SET, 0);
2859                 }
2860                 continue;
2861             }
2862         }
2863         bases_ok++;
2864         for (local_attr = attp.local_attributes; local_attr && ord_no < 32;
2865              local_attr = local_attr->next)
2866         {
2867             ord = zebraExplain_lookup_attr_su(zh->reg->zei,
2868                                               attp.attset_ordinal,
2869                                               local_attr->local);
2870             if (ord > 0)
2871                 ords[ord_no++] = ord;
2872         }
2873     }
2874     if (!bases_ok && errCode)
2875     {
2876         zebra_setError(zh, errCode, errString);
2877         *num_entries = 0;
2878         return ZEBRA_FAIL;
2879     }
2880     if (ord_no == 0)
2881     {
2882         *num_entries = 0;
2883         return ZEBRA_OK;
2884     }
2885     /* prepare dictionary scanning */
2886     if (num < 1)
2887     {
2888         *num_entries = 0;
2889         return ZEBRA_OK;
2890     }
2891     before = pos-1;
2892     if (before < 0)
2893         before = 0;
2894     after = 1+num-pos;
2895     if (after < 0)
2896         after = 0;
2897     yaz_log(YLOG_DEBUG, "rpn_scan pos=%d num=%d before=%d "
2898             "after=%d before+after=%d",
2899             pos, num, before, after, before+after);
2900     scan_info_array = (struct scan_info *)
2901         odr_malloc(stream, ord_no * sizeof(*scan_info_array));
2902     for (i = 0; i < ord_no; i++)
2903     {
2904         int j, prefix_len = 0;
2905         int before_tmp = before, after_tmp = after;
2906         struct scan_info *scan_info = scan_info_array + i;
2907         struct rpn_char_map_info rcmi;
2908
2909         rpn_char_map_prepare (zh->reg, reg_id, &rcmi);
2910
2911         scan_info->before = before;
2912         scan_info->after = after;
2913         scan_info->odr = stream;
2914
2915         scan_info->list = (struct scan_info_entry *)
2916             odr_malloc(stream, (before+after) * sizeof(*scan_info->list));
2917         for (j = 0; j<before+after; j++)
2918             scan_info->list[j].term = NULL;
2919
2920         prefix_len += key_SU_encode (ords[i], termz + prefix_len);
2921         termz[prefix_len++] = reg_id;
2922         termz[prefix_len] = 0;
2923         strcpy(scan_info->prefix, termz);
2924
2925         if (trans_scan_term(zh, zapt, termz+prefix_len, reg_id) == ZEBRA_FAIL)
2926             return ZEBRA_FAIL;
2927         
2928         dict_scan(zh->reg->dict, termz, &before_tmp, &after_tmp,
2929                   scan_info, scan_handle);
2930     }
2931     glist = (ZebraScanEntry *)
2932         odr_malloc(stream, (before+after)*sizeof(*glist));
2933
2934     rset_nmem = nmem_create();
2935     kc = zebra_key_control_create(zh);
2936
2937     /* consider terms after main term */
2938     for (i = 0; i < ord_no; i++)
2939         ptr[i] = before;
2940     
2941     *is_partial = 0;
2942     for (i = 0; i<after; i++)
2943     {
2944         int j, j0 = -1;
2945         const char *mterm = NULL;
2946         const char *tst;
2947         RSET rset = 0;
2948         int lo = i + pos-1; /* offset in result list */
2949
2950         /* find: j0 is the first of the minimal values */
2951         for (j = 0; j < ord_no; j++)
2952         {
2953             if (ptr[j] < before+after && ptr[j] >= 0 &&
2954                 (tst = scan_info_array[j].list[ptr[j]].term) &&
2955                 (!mterm || strcmp (tst, mterm) < 0))
2956             {
2957                 j0 = j;
2958                 mterm = tst;
2959             }
2960         }
2961         if (j0 == -1)
2962             break;  /* no value found, stop */
2963
2964         /* get result set for first one , but only if it's within bounds */
2965         if (lo >= 0)
2966         {
2967             /* get result set for first term */
2968             zebra_term_untrans_iconv(zh, stream->mem, reg_id,
2969                                      &glist[lo].term, mterm);
2970             rset = rset_trunc(zh, &scan_info_array[j0].list[ptr[j0]].isam_p, 1,
2971                               glist[lo].term, strlen(glist[lo].term),
2972                               NULL, 0, zapt->term->which, rset_nmem, 
2973                               kc, kc->scope, 0, reg_id, 0 /* hits_limit */,
2974                               0 /* term_ref_id_str */);
2975         }
2976         ptr[j0]++; /* move index for this set .. */
2977         /* get result set for remaining scan terms */
2978         for (j = j0+1; j<ord_no; j++)
2979         {
2980             if (ptr[j] < before+after && ptr[j] >= 0 &&
2981                 (tst = scan_info_array[j].list[ptr[j]].term) &&
2982                 !strcmp (tst, mterm))
2983             {
2984                 if (lo >= 0)
2985                 {
2986                     RSET rsets[2];
2987                     
2988                     rsets[0] = rset;
2989                     rsets[1] =
2990                         rset_trunc(
2991                             zh, &scan_info_array[j].list[ptr[j]].isam_p, 1,
2992                             glist[lo].term,
2993                             strlen(glist[lo].term), NULL, 0,
2994                             zapt->term->which,rset_nmem,
2995                             kc, kc->scope, 0, reg_id, 0 /* hits_limit */,
2996                             0 /* term_ref_id_str */ );
2997                     rset = rsmulti_or_create(rset_nmem, kc,
2998                                              kc->scope, 0 /* termid */,
2999                                              2, rsets);
3000                 }
3001                 ptr[j]++;
3002             }
3003         }
3004         if (lo >= 0)
3005         {
3006             zint count;
3007             /* merge with limit_set if given */
3008             if (limit_set)
3009             {
3010                 RSET rsets[2];
3011                 rsets[0] = rset;
3012                 rsets[1] = rset_dup(limit_set);
3013                 
3014                 rset = rsmulti_and_create(rset_nmem, kc,
3015                                           kc->scope,
3016                                           2, rsets);
3017             }
3018             /* count it */
3019             count_set(zh, rset, &count);
3020             glist[lo].occurrences = count;
3021             rset_delete(rset);
3022         }
3023     }
3024     if (i < after)
3025     {
3026         *num_entries -= (after-i);
3027         *is_partial = 1;
3028         if (*num_entries < 0)
3029         {
3030             (*kc->dec)(kc);
3031             nmem_destroy(rset_nmem);
3032             *num_entries = 0;
3033             return ZEBRA_OK;
3034         }
3035     }
3036     /* consider terms before main term */
3037     for (i = 0; i<ord_no; i++)
3038         ptr[i] = 0;
3039     
3040     for (i = 0; i<before; i++)
3041     {
3042         int j, j0 = -1;
3043         const char *mterm = NULL;
3044         const char *tst;
3045         RSET rset;
3046         int lo = before-1-i; /* offset in result list */
3047         zint count;
3048         
3049         for (j = 0; j <ord_no; j++)
3050         {
3051             if (ptr[j] < before && ptr[j] >= 0 &&
3052                 (tst = scan_info_array[j].list[before-1-ptr[j]].term) &&
3053                 (!mterm || strcmp (tst, mterm) > 0))
3054             {
3055                 j0 = j;
3056                     mterm = tst;
3057             }
3058         }
3059         if (j0 == -1)
3060             break;
3061         
3062         zebra_term_untrans_iconv(zh, stream->mem, reg_id,
3063                                  &glist[lo].term, mterm);
3064         
3065         rset = rset_trunc
3066             (zh, &scan_info_array[j0].list[before-1-ptr[j0]].isam_p, 1,
3067              glist[lo].term, strlen(glist[lo].term),
3068              NULL, 0, zapt->term->which, rset_nmem,
3069              kc, kc->scope, 0, reg_id, 0 /* hits_limit */,
3070              0 /* term_ref_id_str */);
3071         
3072         ptr[j0]++;
3073         
3074         for (j = j0+1; j<ord_no; j++)
3075         {
3076             if (ptr[j] < before && ptr[j] >= 0 &&
3077                 (tst = scan_info_array[j].list[before-1-ptr[j]].term) &&
3078                 !strcmp (tst, mterm))
3079             {
3080                 RSET rsets[2];
3081                 
3082                 rsets[0] = rset;
3083                 rsets[1] = rset_trunc(
3084                     zh,
3085                     &scan_info_array[j].list[before-1-ptr[j]].isam_p, 1,
3086                     glist[lo].term,
3087                     strlen(glist[lo].term), NULL, 0,
3088                     zapt->term->which, rset_nmem,
3089                     kc, kc->scope, 0, reg_id, 0 /* hits_limit */,
3090                     0 /* term_ref_id_str */);
3091                 rset = rsmulti_or_create(rset_nmem, kc,
3092                                          kc->scope, 0 /* termid */, 2, rsets);
3093                 
3094                 ptr[j]++;
3095             }
3096         }
3097         if (limit_set)
3098         {
3099             RSET rsets[2];
3100             rsets[0] = rset;
3101             rsets[1] = rset_dup(limit_set);
3102             
3103             rset = rsmulti_and_create(rset_nmem, kc,
3104                                       kc->scope, 2, rsets);
3105         }
3106         count_set(zh, rset, &count);
3107         glist[lo].occurrences = count;
3108         rset_delete (rset);
3109     }
3110     (*kc->dec)(kc);
3111     nmem_destroy(rset_nmem);
3112     i = before-i;
3113     if (i)
3114     {
3115         *is_partial = 1;
3116         *position -= i;
3117         *num_entries -= i;
3118         if (*num_entries <= 0)
3119         {
3120             *num_entries = 0;
3121             return ZEBRA_OK;
3122         }
3123     }
3124     
3125     *list = glist + i;               /* list is set to first 'real' entry */
3126     
3127     yaz_log(YLOG_DEBUG, "position = %d, num_entries = %d",
3128             *position, *num_entries);
3129     return ZEBRA_OK;
3130 }
3131