Fixed bug #245: Time for getting records changes a lot based on record
[idzebra-moved-to-github.git] / index / zrpn.c
1 /* $Id: zrpn.c,v 1.141.2.5 2004-11-26 12:20:32 adam Exp $
2    Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003,2004
3    Index Data Aps
4
5 This file is part of the Zebra server.
6
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
11
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with Zebra; see the file LICENSE.zebra.  If not, write to the
19 Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
20 02111-1307, USA.
21 */
22
23
24 #include <stdio.h>
25 #include <assert.h>
26 #ifdef WIN32
27 #include <io.h>
28 #else
29 #include <unistd.h>
30 #endif
31 #include <ctype.h>
32
33 #include "index.h"
34 #include <zebra_xpath.h>
35
36 #include <charmap.h>
37 #include <rstemp.h>
38 #include <rsnull.h>
39 #include <rsbool.h>
40 #include <rsbetween.h>
41 #include <rsprox.h>
42
43 struct rpn_char_map_info {
44     ZebraMaps zm;
45     int reg_type;
46 };
47
48 typedef struct {
49     int type;
50     int major;
51     int minor;
52     Z_AttributesPlusTerm *zapt;
53 } AttrType;
54
55
56 static const char **rpn_char_map_handler(void *vp, const char **from, int len)
57 {
58     struct rpn_char_map_info *p = (struct rpn_char_map_info *) vp;
59     const char **out = zebra_maps_input(p->zm, p->reg_type, from, len, 0);
60 #if 0
61     if (out && *out)
62     {
63         const char *outp = *out;
64         yaz_log(LOG_LOG, "---");
65         while (*outp)
66         {
67             yaz_log(LOG_LOG, "%02X", *outp);
68             outp++;
69         }
70     }
71 #endif
72     return out;
73 }
74
75 static void rpn_char_map_prepare(struct zebra_register *reg, int reg_type,
76                                   struct rpn_char_map_info *map_info)
77 {
78     map_info->zm = reg->zebra_maps;
79     map_info->reg_type = reg_type;
80     dict_grep_cmap(reg->dict, map_info, rpn_char_map_handler);
81 }
82
83 static int attr_find_ex(AttrType *src, oid_value *attributeSetP,
84                          const char **string_value)
85 {
86     int num_attributes;
87
88     num_attributes = src->zapt->attributes->num_attributes;
89     while (src->major < num_attributes)
90     {
91         Z_AttributeElement *element;
92
93         element = src->zapt->attributes->attributes[src->major];
94         if (src->type == *element->attributeType)
95         {
96             switch (element->which) 
97             {
98             case Z_AttributeValue_numeric:
99                 ++(src->major);
100                 if (element->attributeSet && attributeSetP)
101                 {
102                     oident *attrset;
103
104                     attrset = oid_getentbyoid(element->attributeSet);
105                     *attributeSetP = attrset->value;
106                 }
107                 return *element->value.numeric;
108                 break;
109             case Z_AttributeValue_complex:
110                 if (src->minor >= element->value.complex->num_list)
111                     break;
112                 if (element->attributeSet && attributeSetP)
113                 {
114                     oident *attrset;
115                     
116                     attrset = oid_getentbyoid(element->attributeSet);
117                     *attributeSetP = attrset->value;
118                 }
119                 if (element->value.complex->list[src->minor]->which ==  
120                     Z_StringOrNumeric_numeric)
121                 {
122                     ++(src->minor);
123                     return
124                         *element->value.complex->list[src->minor-1]->u.numeric;
125                 }
126                 else if (element->value.complex->list[src->minor]->which ==  
127                          Z_StringOrNumeric_string)
128                 {
129                     if (!string_value)
130                         break;
131                     ++(src->minor);
132                     *string_value = 
133                         element->value.complex->list[src->minor-1]->u.string;
134                     return -2;
135                 }
136                 else
137                     break;
138             default:
139                 assert(0);
140             }
141         }
142         ++(src->major);
143     }
144     return -1;
145 }
146
147 static int attr_find(AttrType *src, oid_value *attributeSetP)
148 {
149     return attr_find_ex(src, attributeSetP, 0);
150 }
151
152 static void attr_init(AttrType *src, Z_AttributesPlusTerm *zapt,
153                        int type)
154 {
155     src->zapt = zapt;
156     src->type = type;
157     src->major = 0;
158     src->minor = 0;
159 }
160
161 #define TERM_COUNT        
162        
163 struct grep_info {        
164 #ifdef TERM_COUNT        
165     int *term_no;        
166 #endif        
167     ISAMS_P *isam_p_buf;
168     int isam_p_size;        
169     int isam_p_indx;
170     ZebraHandle zh;
171     int reg_type;
172     ZebraSet termset;
173 };        
174
175 static void term_untrans(ZebraHandle zh, int reg_type,
176                            char *dst, const char *src)
177 {
178     int len = 0;
179     while (*src)
180     {
181         const char *cp = zebra_maps_output(zh->reg->zebra_maps,
182                                             reg_type, &src);
183         if (!cp && len < IT_MAX_WORD-1)
184             dst[len++] = *src++;
185         else
186             while (*cp && len < IT_MAX_WORD-1)
187                 dst[len++] = *cp++;
188     }
189     dst[len] = '\0';
190 }
191
192 static void add_isam_p(const char *name, const char *info,
193                         struct grep_info *p)
194 {
195     if (p->isam_p_indx == p->isam_p_size)
196     {
197         ISAMS_P *new_isam_p_buf;
198 #ifdef TERM_COUNT        
199         int *new_term_no;        
200 #endif
201         p->isam_p_size = 2*p->isam_p_size + 100;
202         new_isam_p_buf = (ISAMS_P *) xmalloc(sizeof(*new_isam_p_buf) *
203                                              p->isam_p_size);
204         if (p->isam_p_buf)
205         {
206             memcpy(new_isam_p_buf, p->isam_p_buf,
207                     p->isam_p_indx * sizeof(*p->isam_p_buf));
208             xfree(p->isam_p_buf);
209         }
210         p->isam_p_buf = new_isam_p_buf;
211
212 #ifdef TERM_COUNT
213         new_term_no = (int *) xmalloc(sizeof(*new_term_no) *
214                                        p->isam_p_size);
215         if (p->term_no)
216         {
217             memcpy(new_term_no, p->isam_p_buf,
218                     p->isam_p_indx * sizeof(*p->term_no));
219             xfree(p->term_no);
220         }
221         p->term_no = new_term_no;
222 #endif
223     }
224     assert(*info == sizeof(*p->isam_p_buf));
225     memcpy(p->isam_p_buf + p->isam_p_indx, info+1, sizeof(*p->isam_p_buf));
226
227 #if 1
228     if (p->termset)
229     {
230         const char *db;
231         int set, use;
232         char term_tmp[IT_MAX_WORD];
233         int su_code = 0;
234         int len = key_SU_decode(&su_code, name);
235         
236         term_untrans(p->zh, p->reg_type, term_tmp, name+len+1);
237         yaz_log(LOG_LOG, "grep: %d %c %s", su_code, name[len], term_tmp);
238         zebraExplain_lookup_ord(p->zh->reg->zei,
239                                  su_code, &db, &set, &use);
240         yaz_log(LOG_LOG, "grep:  set=%d use=%d db=%s", set, use, db);
241         
242         resultSetAddTerm(p->zh, p->termset, name[len], db,
243                           set, use, term_tmp);
244     }
245 #endif
246     (p->isam_p_indx)++;
247 }
248
249 static int grep_handle(char *name, const char *info, void *p)
250 {
251     add_isam_p(name, info, (struct grep_info *) p);
252     return 0;
253 }
254
255 static int term_pre(ZebraMaps zebra_maps, int reg_type, const char **src,
256                      const char *ct1, const char *ct2, int first)
257 {
258     const char *s1, *s0 = *src;
259     const char **map;
260
261     /* skip white space */
262     while (*s0)
263     {
264         if (ct1 && strchr(ct1, *s0))
265             break;
266         if (ct2 && strchr(ct2, *s0))
267             break;
268         s1 = s0;
269         map = zebra_maps_input(zebra_maps, reg_type, &s1, strlen(s1), first);
270         if (**map != *CHR_SPACE)
271             break;
272         s0 = s1;
273     }
274     *src = s0;
275     return *s0;
276 }
277
278 #define REGEX_CHARS " []()|.*+?!"
279
280 /* term_100: handle term, where trunc=none(no operators at all) */
281 static int term_100(ZebraMaps zebra_maps, int reg_type,
282                      const char **src, char *dst, int space_split,
283                      char *dst_term)
284 {
285     const char *s0, *s1;
286     const char **map;
287     int i = 0;
288     int j = 0;
289
290     const char *space_start = 0;
291     const char *space_end = 0;
292
293     if (!term_pre(zebra_maps, reg_type, src, NULL, NULL, !space_split))
294         return 0;
295     s0 = *src;
296     while (*s0)
297     {
298         s1 = s0;
299         map = zebra_maps_input(zebra_maps, reg_type, &s0, strlen(s0), 0);
300         if (space_split)
301         {
302             if (**map == *CHR_SPACE)
303                 break;
304         }
305         else  /* complete subfield only. */
306         {
307             if (**map == *CHR_SPACE)
308             {   /* save space mapping for later  .. */
309                 space_start = s1;
310                 space_end = s0;
311                 continue;
312             }
313             else if (space_start)
314             {   /* reload last space */
315                 while (space_start < space_end)
316                 {
317                     if (strchr(REGEX_CHARS, *space_start))
318                         dst[i++] = '\\';
319                     dst_term[j++] = *space_start;
320                     dst[i++] = *space_start++;
321                 }
322                 /* and reset */
323                 space_start = space_end = 0;
324             }
325         }
326         /* add non-space char */
327         while (s1 < s0)
328         {
329             if (strchr(REGEX_CHARS, *s1))
330                 dst[i++] = '\\';
331             dst_term[j++] = *s1;
332             dst[i++] = *s1++;
333         }
334     }
335     dst[i] = '\0';
336     dst_term[j] = '\0';
337     *src = s0;
338     return i;
339 }
340
341 /* term_101: handle term, where trunc=Process # */
342 static int term_101(ZebraMaps zebra_maps, int reg_type,
343                      const char **src, char *dst, int space_split,
344                      char *dst_term)
345 {
346     const char *s0, *s1;
347     const char **map;
348     int i = 0;
349     int j = 0;
350
351     if (!term_pre(zebra_maps, reg_type, src, "#", "#", !space_split))
352         return 0;
353     s0 = *src;
354     while (*s0)
355     {
356         if (*s0 == '#')
357         {
358             dst[i++] = '.';
359             dst[i++] = '*';
360             dst_term[j++] = *s0++;
361         }
362         else
363         {
364             s1 = s0;
365             map = zebra_maps_input(zebra_maps, reg_type, &s0, strlen(s0), 0);
366             if (space_split && **map == *CHR_SPACE)
367                 break;
368             while (s1 < s0)
369             {
370                 if (strchr(REGEX_CHARS, *s1))
371                     dst[i++] = '\\';
372                 dst_term[j++] = *s1;
373                 dst[i++] = *s1++;
374             }
375         }
376     }
377     dst[i] = '\0';
378     dst_term[j++] = '\0';
379     *src = s0;
380     return i;
381 }
382
383 /* term_103: handle term, where trunc=re-2 (regular expressions) */
384 static int term_103(ZebraMaps zebra_maps, int reg_type, const char **src,
385                      char *dst, int *errors, int space_split,
386                      char *dst_term)
387 {
388     int i = 0;
389     int j = 0;
390     const char *s0, *s1;
391     const char **map;
392
393     if (!term_pre(zebra_maps, reg_type, src, "^\\()[].*+?|", "(", !space_split))
394         return 0;
395     s0 = *src;
396     if (errors && *s0 == '+' && s0[1] && s0[2] == '+' && s0[3] &&
397         isdigit(s0[1]))
398     {
399         *errors = s0[1] - '0';
400         s0 += 3;
401         if (*errors > 3)
402             *errors = 3;
403     }
404     while (*s0)
405     {
406         if (strchr("^\\()[].*+?|-", *s0))
407         {
408             dst_term[j++] = *s0;
409             dst[i++] = *s0++;
410         }
411         else
412         {
413             s1 = s0;
414             map = zebra_maps_input(zebra_maps, reg_type, &s0, strlen(s0), 0);
415             if (**map == *CHR_SPACE)
416                 break;
417             while (s1 < s0)
418             {
419                 if (strchr(REGEX_CHARS, *s1))
420                     dst[i++] = '\\';
421                 dst_term[j++] = *s1;
422                 dst[i++] = *s1++;
423             }
424         }
425     }
426     dst[i] = '\0';
427     dst_term[j] = '\0';
428     *src = s0;
429     return i;
430 }
431
432 /* term_103: handle term, where trunc=re-1 (regular expressions) */
433 static int term_102 (ZebraMaps zebra_maps, int reg_type, const char **src,
434                      char *dst, int space_split, char *dst_term)
435 {
436     return term_103(zebra_maps, reg_type, src, dst, NULL, space_split,
437                      dst_term);
438 }
439
440
441 /* term_104: handle term, where trunc=Process # and ! */
442 static int term_104(ZebraMaps zebra_maps, int reg_type,
443                      const char **src, char *dst, int space_split,
444                      char *dst_term)
445 {
446     const char *s0, *s1;
447     const char **map;
448     int i = 0;
449     int j = 0;
450
451     if (!term_pre(zebra_maps, reg_type, src, "?*#", "?*#", !space_split))
452         return 0;
453     s0 = *src;
454     while (*s0)
455     {
456         if (*s0 == '?')
457         {
458             dst_term[j++] = *s0++;
459             if (*s0 >= '0' && *s0 <= '9')
460             {
461                 int limit = 0;
462                 while (*s0 >= '0' && *s0 <= '9')
463                 {
464                     limit = limit * 10 + (*s0 - '0');
465                     dst_term[j++] = *s0++;
466                 }
467                 if (limit > 20)
468                     limit = 20;
469                 while (--limit >= 0)
470                 {
471                     dst[i++] = '.';
472                     dst[i++] = '?';
473                 }
474             }
475             else
476             {
477                 dst[i++] = '.';
478                 dst[i++] = '*';
479             }
480         }
481         else if (*s0 == '*')
482         {
483             dst[i++] = '.';
484             dst[i++] = '*';
485             dst_term[j++] = *s0++;
486         }
487         else if (*s0 == '#')
488         {
489             dst[i++] = '.';
490             dst_term[j++] = *s0++;
491         }
492         {
493             s1 = s0;
494             map = zebra_maps_input(zebra_maps, reg_type, &s0, strlen(s0), 0);
495             if (space_split && **map == *CHR_SPACE)
496                 break;
497             while (s1 < s0)
498             {
499                 if (strchr(REGEX_CHARS, *s1))
500                     dst[i++] = '\\';
501                 dst_term[j++] = *s1;
502                 dst[i++] = *s1++;
503             }
504         }
505     }
506     dst[i] = '\0';
507     dst_term[j++] = '\0';
508     *src = s0;
509     return i;
510 }
511
512 /* term_105/106: handle term, where trunc=Process * and ! and right trunc */
513 static int term_105 (ZebraMaps zebra_maps, int reg_type,
514                      const char **src, char *dst, int space_split,
515                      char *dst_term, int right_truncate)
516 {
517     const char *s0, *s1;
518     const char **map;
519     int i = 0;
520     int j = 0;
521
522     if (!term_pre(zebra_maps, reg_type, src, "*!", "*!", !space_split))
523         return 0;
524     s0 = *src;
525     while (*s0)
526     {
527         if (*s0 == '*')
528         {
529             dst[i++] = '.';
530             dst[i++] = '*';
531             dst_term[j++] = *s0++;
532         }
533         else if (*s0 == '!')
534         {
535             dst[i++] = '.';
536             dst_term[j++] = *s0++;
537         }
538         {
539             s1 = s0;
540             map = zebra_maps_input(zebra_maps, reg_type, &s0, strlen(s0), 0);
541             if (space_split && **map == *CHR_SPACE)
542                 break;
543             while (s1 < s0)
544             {
545                 if (strchr(REGEX_CHARS, *s1))
546                     dst[i++] = '\\';
547                 dst_term[j++] = *s1;
548                 dst[i++] = *s1++;
549             }
550         }
551     }
552     if (right_truncate)
553     {
554         dst[i++] = '.';
555         dst[i++] = '*';
556     }
557     dst[i] = '\0';
558     
559     dst_term[j++] = '\0';
560     *src = s0;
561     return i;
562 }
563
564
565 /* gen_regular_rel - generate regular expression from relation
566  *  val:     border value (inclusive)
567  *  islt:    1 if <=; 0 if >=.
568  */
569 static void gen_regular_rel(char *dst, int val, int islt)
570 {
571     int dst_p;
572     int w, d, i;
573     int pos = 0;
574     char numstr[20];
575
576     yaz_log(LOG_DEBUG, "gen_regular_rel. val=%d, islt=%d", val, islt);
577     if (val >= 0)
578     {
579         if (islt)
580             strcpy(dst, "(-[0-9]+|(");
581         else
582             strcpy(dst, "((");
583     } 
584     else
585     {
586         if (!islt)
587         {
588             strcpy(dst, "([0-9]+|-(");
589             dst_p = strlen(dst);
590             islt = 1;
591         }
592         else
593         {
594             strcpy(dst, "(-(");
595             islt = 0;
596         }
597         val = -val;
598     }
599     dst_p = strlen(dst);
600     sprintf(numstr, "%d", val);
601     for (w = strlen(numstr); --w >= 0; pos++)
602     {
603         d = numstr[w];
604         if (pos > 0)
605         {
606             if (islt)
607             {
608                 if (d == '0')
609                     continue;
610                 d--;
611             } 
612             else
613             {
614                 if (d == '9')
615                     continue;
616                 d++;
617             }
618         }
619         
620         strcpy(dst + dst_p, numstr);
621         dst_p = strlen(dst) - pos - 1;
622
623         if (islt)
624         {
625             if (d != '0')
626             {
627                 dst[dst_p++] = '[';
628                 dst[dst_p++] = '0';
629                 dst[dst_p++] = '-';
630                 dst[dst_p++] = d;
631                 dst[dst_p++] = ']';
632             }
633             else
634                 dst[dst_p++] = d;
635         }
636         else
637         {
638             if (d != '9')
639             { 
640                 dst[dst_p++] = '[';
641                 dst[dst_p++] = d;
642                 dst[dst_p++] = '-';
643                 dst[dst_p++] = '9';
644                 dst[dst_p++] = ']';
645             }
646             else
647                 dst[dst_p++] = d;
648         }
649         for (i = 0; i<pos; i++)
650         {
651             dst[dst_p++] = '[';
652             dst[dst_p++] = '0';
653             dst[dst_p++] = '-';
654             dst[dst_p++] = '9';
655             dst[dst_p++] = ']';
656         }
657         dst[dst_p++] = '|';
658     }
659     dst[dst_p] = '\0';
660     if (islt)
661     {
662         /* match everything less than 10^(pos-1) */
663         strcat(dst, "0*");
664         for (i = 1; i<pos; i++)
665             strcat(dst, "[0-9]?");
666     }
667     else
668     {
669         /* match everything greater than 10^pos */
670         for (i = 0; i <= pos; i++)
671             strcat(dst, "[0-9]");
672         strcat(dst, "[0-9]*");
673     }
674     strcat(dst, "))");
675 }
676
677 void string_rel_add_char(char **term_p, const char *src, int *indx)
678 {
679     if (src[*indx] == '\\')
680         *(*term_p)++ = src[(*indx)++];
681     *(*term_p)++ = src[(*indx)++];
682 }
683
684 /*
685  *   >  abc     ([b-].*|a[c-].*|ab[d-].*|abc.+)
686  *              ([^-a].*|a[^-b].*ab[^-c].*|abc.+)
687  *   >= abc     ([b-].*|a[c-].*|ab[c-].*)
688  *              ([^-a].*|a[^-b].*|ab[c-].*)
689  *   <  abc     ([-0].*|a[-a].*|ab[-b].*)
690  *              ([^a-].*|a[^b-].*|ab[^c-].*)
691  *   <= abc     ([-0].*|a[-a].*|ab[-b].*|abc)
692  *              ([^a-].*|a[^b-].*|ab[^c-].*|abc)
693  */
694 static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
695                             const char **term_sub, char *term_dict,
696                             oid_value attributeSet,
697                             int reg_type, int space_split, char *term_dst)
698 {
699     AttrType relation;
700     int relation_value;
701     int i;
702     char *term_tmp = term_dict + strlen(term_dict);
703     char term_component[2*IT_MAX_WORD+20];
704
705     attr_init(&relation, zapt, 2);
706     relation_value = attr_find(&relation, NULL);
707
708     yaz_log(LOG_DEBUG, "string relation value=%d", relation_value);
709     switch (relation_value)
710     {
711     case 1:
712         if (!term_100 (zh->reg->zebra_maps, reg_type,
713                        term_sub, term_component,
714                        space_split, term_dst))
715             return 0;
716         yaz_log(LOG_DEBUG, "Relation <");
717         
718         *term_tmp++ = '(';
719         for (i = 0; term_component[i]; )
720         {
721             int j = 0;
722
723             if (i)
724                 *term_tmp++ = '|';
725             while (j < i)
726                 string_rel_add_char (&term_tmp, term_component, &j);
727
728             *term_tmp++ = '[';
729
730             *term_tmp++ = '^';
731             string_rel_add_char (&term_tmp, term_component, &i);
732             *term_tmp++ = '-';
733
734             *term_tmp++ = ']';
735             *term_tmp++ = '.';
736             *term_tmp++ = '*';
737
738             if ((term_tmp - term_dict) > IT_MAX_WORD)
739                 break;
740         }
741         *term_tmp++ = ')';
742         *term_tmp = '\0';
743         break;
744     case 2:
745         if (!term_100 (zh->reg->zebra_maps, reg_type,
746                        term_sub, term_component,
747                        space_split, term_dst))
748             return 0;
749         yaz_log(LOG_DEBUG, "Relation <=");
750
751         *term_tmp++ = '(';
752         for (i = 0; term_component[i]; )
753         {
754             int j = 0;
755
756             while (j < i)
757                 string_rel_add_char (&term_tmp, term_component, &j);
758             *term_tmp++ = '[';
759
760             *term_tmp++ = '^';
761             string_rel_add_char (&term_tmp, term_component, &i);
762             *term_tmp++ = '-';
763
764             *term_tmp++ = ']';
765             *term_tmp++ = '.';
766             *term_tmp++ = '*';
767
768             *term_tmp++ = '|';
769
770             if ((term_tmp - term_dict) > IT_MAX_WORD)
771                 break;
772         }
773         for (i = 0; term_component[i]; )
774             string_rel_add_char (&term_tmp, term_component, &i);
775         *term_tmp++ = ')';
776         *term_tmp = '\0';
777         break;
778     case 5:
779         if (!term_100 (zh->reg->zebra_maps, reg_type,
780                        term_sub, term_component, space_split, term_dst))
781             return 0;
782         yaz_log(LOG_DEBUG, "Relation >");
783
784         *term_tmp++ = '(';
785         for (i = 0; term_component[i];)
786         {
787             int j = 0;
788
789             while (j < i)
790                 string_rel_add_char (&term_tmp, term_component, &j);
791             *term_tmp++ = '[';
792             
793             *term_tmp++ = '^';
794             *term_tmp++ = '-';
795             string_rel_add_char (&term_tmp, term_component, &i);
796
797             *term_tmp++ = ']';
798             *term_tmp++ = '.';
799             *term_tmp++ = '*';
800
801             *term_tmp++ = '|';
802
803             if ((term_tmp - term_dict) > IT_MAX_WORD)
804                 break;
805         }
806         for (i = 0; term_component[i];)
807             string_rel_add_char (&term_tmp, term_component, &i);
808         *term_tmp++ = '.';
809         *term_tmp++ = '+';
810         *term_tmp++ = ')';
811         *term_tmp = '\0';
812         break;
813     case 4:
814         if (!term_100 (zh->reg->zebra_maps, reg_type, term_sub,
815                        term_component, space_split, term_dst))
816             return 0;
817         yaz_log(LOG_DEBUG, "Relation >=");
818
819         *term_tmp++ = '(';
820         for (i = 0; term_component[i];)
821         {
822             int j = 0;
823
824             if (i)
825                 *term_tmp++ = '|';
826             while (j < i)
827                 string_rel_add_char (&term_tmp, term_component, &j);
828             *term_tmp++ = '[';
829
830             if (term_component[i+1])
831             {
832                 *term_tmp++ = '^';
833                 *term_tmp++ = '-';
834                 string_rel_add_char (&term_tmp, term_component, &i);
835             }
836             else
837             {
838                 string_rel_add_char (&term_tmp, term_component, &i);
839                 *term_tmp++ = '-';
840             }
841             *term_tmp++ = ']';
842             *term_tmp++ = '.';
843             *term_tmp++ = '*';
844
845             if ((term_tmp - term_dict) > IT_MAX_WORD)
846                 break;
847         }
848         *term_tmp++ = ')';
849         *term_tmp = '\0';
850         break;
851     case 3:
852     default:
853         yaz_log(LOG_DEBUG, "Relation =");
854         if (!term_100 (zh->reg->zebra_maps, reg_type, term_sub,
855                        term_component, space_split, term_dst))
856             return 0;
857         strcat (term_tmp, "(");
858         strcat (term_tmp, term_component);
859         strcat (term_tmp, ")");
860     }
861     return 1;
862 }
863
864 static int string_term (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
865                         const char **term_sub, 
866                         oid_value attributeSet, NMEM stream,
867                         struct grep_info *grep_info,
868                         int reg_type, int complete_flag,
869                         int num_bases, char **basenames,
870                         char *term_dst, int xpath_use);
871
872 static RSET term_trunc (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
873                         const char **term_sub, 
874                         oid_value attributeSet, NMEM stream,
875                         struct grep_info *grep_info,
876                         int reg_type, int complete_flag,
877                         int num_bases, char **basenames,
878                         char *term_dst,
879                         const char *rank_type, int xpath_use)
880 {
881     int r;
882     grep_info->isam_p_indx = 0;
883     r = string_term (zh, zapt, term_sub, attributeSet, stream, grep_info,
884                      reg_type, complete_flag, num_bases, basenames,
885                      term_dst, xpath_use);
886     if (r < 1)
887         return 0;
888     yaz_log(LOG_DEBUG, "term: %s", term_dst);
889     return rset_trunc (zh, grep_info->isam_p_buf,
890                        grep_info->isam_p_indx, term_dst,
891                        strlen(term_dst), rank_type, 1 /* preserve pos */,
892                        zapt->term->which);
893 }
894
895 static char *nmem_strdup_i(NMEM nmem, int v)
896 {
897     char val_str[64];
898     sprintf (val_str, "%d", v);
899     return nmem_strdup (nmem, val_str);
900 }
901     
902 static int string_term (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
903                         const char **term_sub, 
904                         oid_value attributeSet, NMEM stream,
905                         struct grep_info *grep_info,
906                         int reg_type, int complete_flag,
907                         int num_bases, char **basenames,
908                         char *term_dst, int xpath_use)
909 {
910     char term_dict[2*IT_MAX_WORD+4000];
911     int j, r, base_no;
912     AttrType truncation;
913     int truncation_value;
914     AttrType use;
915     int use_value;
916     const char *use_string = 0;
917     oid_value curAttributeSet = attributeSet;
918     const char *termp;
919     struct rpn_char_map_info rcmi;
920     int space_split = complete_flag ? 0 : 1;
921
922     int bases_ok = 0;     /* no of databases with OK attribute */
923     int errCode = 0;      /* err code (if any is not OK) */
924     char *errString = 0;  /* addinfo */
925
926     rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
927     attr_init (&use, zapt, 1);
928     use_value = attr_find_ex (&use, &curAttributeSet, &use_string);
929     yaz_log(LOG_DEBUG, "string_term, use value %d", use_value);
930     attr_init (&truncation, zapt, 5);
931     truncation_value = attr_find (&truncation, NULL);
932     yaz_log(LOG_DEBUG, "truncation value %d", truncation_value);
933
934     if (use_value == -1)    /* no attribute - assumy "any" */
935         use_value = 1016;
936     for (base_no = 0; base_no < num_bases; base_no++)
937     {
938         int attr_ok = 0;
939         int regex_range = 0;
940         int init_pos = 0;
941         attent attp;
942         data1_local_attribute id_xpath_attr;
943         data1_local_attribute *local_attr;
944         int max_pos, prefix_len = 0;
945
946         termp = *term_sub;
947
948         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
949         {
950             zh->errCode = 109; /* Database unavailable */
951             zh->errString = basenames[base_no];
952             return -1;
953         }
954         if (xpath_use > 0 && use_value == -2) 
955         {
956             use_value = xpath_use;
957             attp.local_attributes = &id_xpath_attr;
958             attp.attset_ordinal = VAL_IDXPATH;
959             id_xpath_attr.next = 0;
960             id_xpath_attr.local = use_value;
961         }
962         else if (curAttributeSet == VAL_IDXPATH)
963         {
964             attp.local_attributes = &id_xpath_attr;
965             attp.attset_ordinal = VAL_IDXPATH;
966             id_xpath_attr.next = 0;
967             id_xpath_attr.local = use_value;
968         }
969         else
970         {
971             if ((r=att_getentbyatt (zh, &attp, curAttributeSet, use_value,
972                                             use_string)))
973             {
974                 yaz_log(LOG_DEBUG, "att_getentbyatt fail. set=%d use=%d r=%d",
975                       curAttributeSet, use_value, r);
976                 if (r == -1)
977                 {
978                     /* set was found, but value wasn't defined */
979                     errCode = 114;
980                     if (use_string)
981                         errString = nmem_strdup(stream, use_string);
982                     else
983                         errString = nmem_strdup_i(stream, use_value);
984                 }
985                 else
986                 {
987                     int oid[OID_SIZE];
988                     struct oident oident;
989                     
990                     oident.proto = PROTO_Z3950;
991                     oident.oclass = CLASS_ATTSET;
992                     oident.value = curAttributeSet;
993                     oid_ent_to_oid (&oident, oid);
994                     
995                     errCode = 121;
996                     errString = nmem_strdup (stream, oident.desc);
997                 }
998                 continue;
999             }
1000         }
1001         for (local_attr = attp.local_attributes; local_attr;
1002              local_attr = local_attr->next)
1003         {
1004             int ord;
1005             char ord_buf[32];
1006             int i, ord_len;
1007             
1008             ord = zebraExplain_lookupSU (zh->reg->zei, attp.attset_ordinal,
1009                                          local_attr->local);
1010             if (ord < 0)
1011                 continue;
1012             if (prefix_len)
1013                 term_dict[prefix_len++] = '|';
1014             else
1015                 term_dict[prefix_len++] = '(';
1016             
1017             ord_len = key_SU_encode (ord, ord_buf);
1018             for (i = 0; i<ord_len; i++)
1019             {
1020                 term_dict[prefix_len++] = 1;
1021                 term_dict[prefix_len++] = ord_buf[i];
1022             }
1023         }
1024         if (!prefix_len)
1025         {
1026 #if 1
1027             bases_ok++;
1028 #else
1029             errCode = 114;
1030             errString = nmem_strdup_i(stream, use_value);
1031             continue;
1032 #endif
1033         }
1034         else
1035         {
1036             attr_ok = 1;
1037             bases_ok++; /* this has OK attributes */
1038         }
1039
1040         term_dict[prefix_len++] = ')';
1041         term_dict[prefix_len++] = 1;
1042         term_dict[prefix_len++] = reg_type;
1043         yaz_log(LOG_DEBUG, "reg_type = %d", term_dict[prefix_len-1]);
1044         term_dict[prefix_len] = '\0';
1045         j = prefix_len;
1046         switch (truncation_value)
1047         {
1048         case -1:         /* not specified */
1049         case 100:        /* do not truncate */
1050             if (!string_relation (zh, zapt, &termp, term_dict,
1051                                   attributeSet,
1052                                   reg_type, space_split, term_dst))
1053                 return 0;
1054             break;
1055         case 1:          /* right truncation */
1056             term_dict[j++] = '(';
1057             if (!term_100 (zh->reg->zebra_maps, reg_type,
1058                            &termp, term_dict + j, space_split, term_dst))
1059                 return 0;
1060             strcat (term_dict, ".*)");
1061             break;
1062         case 2:          /* keft truncation */
1063             term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*';
1064             if (!term_100 (zh->reg->zebra_maps, reg_type,
1065                            &termp, term_dict + j, space_split, term_dst))
1066                 return 0;
1067             strcat (term_dict, ")");
1068             break;
1069         case 3:          /* left&right truncation */
1070             term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*';
1071             if (!term_100 (zh->reg->zebra_maps, reg_type,
1072                            &termp, term_dict + j, space_split, term_dst))
1073                 return 0;
1074             strcat (term_dict, ".*)");
1075             break;
1076         case 101:        /* process # in term */
1077             term_dict[j++] = '(';
1078             if (!term_101 (zh->reg->zebra_maps, reg_type,
1079                            &termp, term_dict + j, space_split, term_dst))
1080                 return 0;
1081             strcat (term_dict, ")");
1082             break;
1083         case 102:        /* Regexp-1 */
1084             term_dict[j++] = '(';
1085             if (!term_102 (zh->reg->zebra_maps, reg_type,
1086                            &termp, term_dict + j, space_split, term_dst))
1087                 return 0;
1088             strcat (term_dict, ")");
1089             break;
1090         case 103:       /* Regexp-2 */
1091             r = 1;
1092             term_dict[j++] = '(';
1093             init_pos = 2;
1094             if (!term_103 (zh->reg->zebra_maps, reg_type,
1095                            &termp, term_dict + j, &regex_range,
1096                            space_split, term_dst))
1097                 return 0;
1098             strcat (term_dict, ")");
1099             break;
1100         case 104:        /* process # and ! in term */
1101             term_dict[j++] = '(';
1102             if (!term_104 (zh->reg->zebra_maps, reg_type,
1103                            &termp, term_dict + j, space_split, term_dst))
1104                 return 0;
1105             strcat (term_dict, ")");
1106             break;
1107         case 105:        /* process * and ! in term */
1108             term_dict[j++] = '(';
1109             if (!term_105 (zh->reg->zebra_maps, reg_type,
1110                            &termp, term_dict + j, space_split, term_dst, 1))
1111                 return 0;
1112             strcat (term_dict, ")");
1113             break;
1114         case 106:        /* process * and ! in term */
1115             term_dict[j++] = '(';
1116             if (!term_105 (zh->reg->zebra_maps, reg_type,
1117                            &termp, term_dict + j, space_split, term_dst, 0))
1118                 return 0;
1119             strcat (term_dict, ")");
1120             break;
1121         default:
1122             zh->errCode = 120;
1123             zh->errString = nmem_strdup_i(stream, truncation_value);
1124             return -1;
1125         }
1126         if (attr_ok)
1127         {
1128             yaz_log(LOG_DEBUG, "dict_lookup_grep: %s", term_dict+prefix_len);
1129             r = dict_lookup_grep (zh->reg->dict, term_dict, regex_range,
1130                                   grep_info, &max_pos, init_pos,
1131                                   grep_handle);
1132             if (r)
1133                 yaz_log(LOG_WARN, "dict_lookup_grep fail %d", r);
1134         }
1135     }
1136     if (!bases_ok)
1137     {
1138         zh->errCode = errCode;
1139         zh->errString = errString;
1140         return -1;
1141     }
1142     *term_sub = termp;
1143     yaz_log(LOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1144     return 1;
1145 }
1146
1147
1148 /* convert APT search term to UTF8 */
1149 static int zapt_term_to_utf8 (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1150                               char *termz)
1151 {
1152     size_t sizez;
1153     Z_Term *term = zapt->term;
1154
1155     switch (term->which)
1156     {
1157     case Z_Term_general:
1158         if (zh->iconv_to_utf8 != 0)
1159         {
1160             char *inbuf = term->u.general->buf;
1161             size_t inleft = term->u.general->len;
1162             char *outbuf = termz;
1163             size_t outleft = IT_MAX_WORD-1;
1164             size_t ret;
1165
1166             ret = yaz_iconv(zh->iconv_to_utf8, &inbuf, &inleft,
1167                         &outbuf, &outleft);
1168             if (ret == (size_t)(-1))
1169             {
1170                 ret = yaz_iconv(zh->iconv_to_utf8, 0, 0, 0, 0);
1171                 zh->errCode = 125;
1172                 return -1;
1173             }
1174             *outbuf = 0;
1175         }
1176         else
1177         {
1178             sizez = term->u.general->len;
1179             if (sizez > IT_MAX_WORD-1)
1180                 sizez = IT_MAX_WORD-1;
1181             memcpy (termz, term->u.general->buf, sizez);
1182             termz[sizez] = '\0';
1183         }
1184         break;
1185     case Z_Term_characterString:
1186         sizez = strlen(term->u.characterString);
1187         if (sizez > IT_MAX_WORD-1)
1188             sizez = IT_MAX_WORD-1;
1189         memcpy (termz, term->u.characterString, sizez);
1190         termz[sizez] = '\0';
1191         break;
1192     default:
1193         zh->errCode = 124;
1194         return -1;
1195     }
1196     return 0;
1197 }
1198
1199 /* convert APT SCAN term to internal cmap */
1200 static int trans_scan_term (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1201                             char *termz, int reg_type)
1202 {
1203     char termz0[IT_MAX_WORD];
1204
1205     if (zapt_term_to_utf8(zh, zapt, termz0))
1206         return -1;    /* error */
1207     else
1208     {
1209         const char **map;
1210         const char *cp = (const char *) termz0;
1211         const char *cp_end = cp + strlen(cp);
1212         const char *src;
1213         int i = 0;
1214         const char *space_map = NULL;
1215         int len;
1216             
1217         while ((len = (cp_end - cp)) > 0)
1218         {
1219             map = zebra_maps_input (zh->reg->zebra_maps, reg_type, &cp, len, 0);
1220             if (**map == *CHR_SPACE)
1221                 space_map = *map;
1222             else
1223             {
1224                 if (i && space_map)
1225                     for (src = space_map; *src; src++)
1226                         termz[i++] = *src;
1227                 space_map = NULL;
1228                 for (src = *map; *src; src++)
1229                     termz[i++] = *src;
1230             }
1231         }
1232         termz[i] = '\0';
1233     }
1234     return 0;
1235 }
1236
1237 char *normalize_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1238                      const char *termz, NMEM stream, unsigned reg_id)
1239 {
1240     WRBUF wrbuf = 0;
1241     AttrType truncation;
1242     int truncation_value;
1243     char *ex_list = 0;
1244
1245     attr_init (&truncation, zapt, 5);
1246     truncation_value = attr_find (&truncation, NULL);
1247
1248     switch (truncation_value)
1249     {
1250     default:
1251         ex_list = "";
1252         break;
1253     case 101:
1254         ex_list = "#";
1255         break;
1256     case 102:
1257     case 103:
1258         ex_list = 0;
1259         break;
1260     case 104:
1261         ex_list = "!#";
1262         break;
1263     case 105:
1264         ex_list = "!*";
1265         break;
1266     }
1267     if (ex_list)
1268         wrbuf = zebra_replace(zh->reg->zebra_maps, reg_id, ex_list,
1269                               termz, strlen(termz));
1270     if (!wrbuf)
1271         return nmem_strdup(stream, termz);
1272     else
1273     {
1274         char *buf = (char*) nmem_malloc (stream, wrbuf_len(wrbuf)+1);
1275         memcpy (buf, wrbuf_buf(wrbuf), wrbuf_len(wrbuf));
1276         buf[wrbuf_len(wrbuf)] = '\0';
1277         return buf;
1278     }
1279 }
1280
1281 static void grep_info_delete (struct grep_info *grep_info)
1282 {
1283 #ifdef TERM_COUNT
1284     xfree(grep_info->term_no);
1285 #endif
1286     xfree (grep_info->isam_p_buf);
1287 }
1288
1289 static int grep_info_prepare (ZebraHandle zh,
1290                               Z_AttributesPlusTerm *zapt,
1291                               struct grep_info *grep_info,
1292                               int reg_type,
1293                               NMEM stream)
1294 {
1295     AttrType termset;
1296     int termset_value_numeric;
1297     const char *termset_value_string;
1298
1299 #ifdef TERM_COUNT
1300     grep_info->term_no = 0;
1301 #endif
1302     grep_info->isam_p_size = 0;
1303     grep_info->isam_p_buf = NULL;
1304     grep_info->zh = zh;
1305     grep_info->reg_type = reg_type;
1306     grep_info->termset = 0;
1307
1308     if (!zapt)
1309         return 0;
1310     attr_init (&termset, zapt, 8);
1311     termset_value_numeric =
1312         attr_find_ex (&termset, NULL, &termset_value_string);
1313     if (termset_value_numeric != -1)
1314     {
1315         char resname[32];
1316         const char *termset_name = 0;
1317         if (termset_value_numeric != -2)
1318         {
1319             sprintf (resname, "%d", termset_value_numeric);
1320             termset_name = resname;
1321         }
1322         else
1323             termset_name = termset_value_string;
1324         yaz_log(LOG_LOG, "creating termset set %s", termset_name);
1325         grep_info->termset = resultSetAdd (zh, termset_name, 1);
1326         if (!grep_info->termset)
1327         {
1328             zh->errCode = 128;
1329             zh->errString = nmem_strdup (stream, termset_name);
1330             return -1;
1331         }
1332     }
1333     return 0;
1334 }
1335                                
1336
1337 static RSET rpn_search_APT_phrase (ZebraHandle zh,
1338                                    Z_AttributesPlusTerm *zapt,
1339                                    const char *termz_org,
1340                                    oid_value attributeSet,
1341                                    NMEM stream,
1342                                    int reg_type, int complete_flag,
1343                                    const char *rank_type, int xpath_use,
1344                                    int num_bases, char **basenames)
1345 {
1346     char term_dst[IT_MAX_WORD+1];
1347     RSET rset[60], result;
1348     int  rset_no = 0;
1349     struct grep_info grep_info;
1350     char *termz = normalize_term(zh, zapt, termz_org, stream, reg_type);
1351     const char *termp = termz;
1352
1353     *term_dst = 0;
1354     if (grep_info_prepare (zh, zapt, &grep_info, reg_type, stream))
1355         return 0;
1356     while (1)
1357     { 
1358         yaz_log(LOG_DEBUG, "APT_phrase termp=%s", termp);
1359         rset[rset_no] = term_trunc (zh, zapt, &termp, attributeSet,
1360                                     stream, &grep_info,
1361                                     reg_type, complete_flag,
1362                                     num_bases, basenames,
1363                                     term_dst, rank_type,
1364                                     xpath_use);
1365         if (!rset[rset_no])
1366             break;
1367         if (++rset_no >= (int) (sizeof(rset)/sizeof(*rset)))
1368             break;
1369     }
1370     grep_info_delete (&grep_info);
1371     if (rset_no == 0)
1372     {
1373         rset_null_parms parms;
1374         
1375         parms.rset_term = rset_term_create (termz, -1, rank_type,
1376                                             zapt->term->which);
1377         return rset_create (rset_kind_null, &parms);
1378     }
1379     else if (rset_no == 1)
1380         return (rset[0]);
1381     else
1382     {
1383         /* new / old prox */
1384         rset_prox_parms parms;
1385         
1386         parms.rset = rset;
1387         parms.rset_no = rset_no;
1388         parms.ordered = 1;
1389         parms.exclusion = 0;
1390         parms.relation = 3;
1391         parms.distance = 1;
1392         parms.key_size = sizeof(struct it_key);
1393         parms.cmp = key_compare_it;
1394         parms.getseq = key_get_seq;
1395         parms.log_item = key_logdump_txt;
1396         result = rset_create(rset_kind_prox, &parms);
1397     }
1398     return result;
1399 }
1400
1401 static RSET rpn_search_APT_or_list (ZebraHandle zh,
1402                                     Z_AttributesPlusTerm *zapt,
1403                                     const char *termz_org,
1404                                     oid_value attributeSet,
1405                                     NMEM stream,
1406                                     int reg_type, int complete_flag,
1407                                     const char *rank_type,
1408                                     int xpath_use,
1409                                     int num_bases, char **basenames)
1410 {
1411     char term_dst[IT_MAX_WORD+1];
1412     RSET rset[60], result;
1413     int i, rset_no = 0;
1414     struct grep_info grep_info;
1415     char *termz = normalize_term(zh, zapt, termz_org, stream, reg_type);
1416     const char *termp = termz;
1417
1418     if (grep_info_prepare (zh, zapt, &grep_info, reg_type, stream))
1419         return 0;
1420     while (1)
1421     { 
1422         yaz_log(LOG_DEBUG, "APT_or_list termp=%s", termp);
1423         rset[rset_no] = term_trunc (zh, zapt, &termp, attributeSet,
1424                                     stream, &grep_info,
1425                                     reg_type, complete_flag,
1426                                     num_bases, basenames,
1427                                     term_dst, rank_type,
1428                                     xpath_use);
1429         if (!rset[rset_no])
1430             break;
1431         if (++rset_no >= (int) (sizeof(rset)/sizeof(*rset)))
1432             break;
1433     }
1434     grep_info_delete (&grep_info);
1435     if (rset_no == 0)
1436     {
1437         rset_null_parms parms;
1438         
1439         parms.rset_term = rset_term_create (termz, -1, rank_type,
1440                                             zapt->term->which);
1441         return rset_create (rset_kind_null, &parms);
1442     }
1443     result = rset[0];
1444     for (i = 1; i<rset_no; i++)
1445     {
1446         rset_bool_parms bool_parms;
1447
1448         bool_parms.rset_l = result;
1449         bool_parms.rset_r = rset[i];
1450         bool_parms.key_size = sizeof(struct it_key);
1451         bool_parms.cmp = key_compare_it;
1452         bool_parms.log_item = key_logdump_txt;
1453         result = rset_create (rset_kind_or, &bool_parms);
1454     }
1455     return result;
1456 }
1457
1458 static RSET rpn_search_APT_and_list (ZebraHandle zh,
1459                                      Z_AttributesPlusTerm *zapt,
1460                                      const char *termz_org,
1461                                      oid_value attributeSet,
1462                                      NMEM stream,
1463                                      int reg_type, int complete_flag,
1464                                      const char *rank_type, 
1465                                      int xpath_use,
1466                                      int num_bases, char **basenames)
1467 {
1468     char term_dst[IT_MAX_WORD+1];
1469     RSET rset[60], result;
1470     int i, rset_no = 0;
1471     struct grep_info grep_info;
1472     char *termz = normalize_term(zh, zapt, termz_org, stream, reg_type);
1473     const char *termp = termz;
1474
1475     if (grep_info_prepare (zh, zapt, &grep_info, reg_type, stream))
1476         return 0;
1477     while (1)
1478     { 
1479         yaz_log(LOG_DEBUG, "APT_and_list termp=%s", termp);
1480         rset[rset_no] = term_trunc (zh, zapt, &termp, attributeSet,
1481                                     stream, &grep_info,
1482                                     reg_type, complete_flag,
1483                                     num_bases, basenames,
1484                                     term_dst, rank_type,
1485                                     xpath_use);
1486         if (!rset[rset_no])
1487             break;
1488         assert (rset[rset_no]);
1489         if (++rset_no >= (int) (sizeof(rset)/sizeof(*rset)))
1490             break;
1491     }
1492     grep_info_delete (&grep_info);
1493     if (rset_no == 0)
1494     {
1495         rset_null_parms parms;
1496         
1497         parms.rset_term = rset_term_create (termz, -1, rank_type,
1498                                             zapt->term->which);
1499         return rset_create (rset_kind_null, &parms);
1500     }
1501     result = rset[0];
1502     for (i = 1; i<rset_no; i++)
1503     {
1504         rset_bool_parms bool_parms;
1505
1506         bool_parms.rset_l = result;
1507         bool_parms.rset_r = rset[i];
1508         bool_parms.key_size = sizeof(struct it_key);
1509         bool_parms.cmp = key_compare_it;
1510         bool_parms.log_item = key_logdump_txt;
1511         result = rset_create (rset_kind_and, &bool_parms);
1512     }
1513     return result;
1514 }
1515
1516 static int numeric_relation (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1517                              const char **term_sub,
1518                              char *term_dict,
1519                              oid_value attributeSet,
1520                              struct grep_info *grep_info,
1521                              int *max_pos,
1522                              int reg_type,
1523                              char *term_dst)
1524 {
1525     AttrType relation;
1526     int relation_value;
1527     int term_value;
1528     int r;
1529     char *term_tmp = term_dict + strlen(term_dict);
1530
1531     attr_init (&relation, zapt, 2);
1532     relation_value = attr_find (&relation, NULL);
1533
1534     yaz_log(LOG_DEBUG, "numeric relation value=%d", relation_value);
1535
1536     if (!term_100 (zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1537                    term_dst))
1538         return 0;
1539     term_value = atoi (term_tmp);
1540     switch (relation_value)
1541     {
1542     case 1:
1543         yaz_log(LOG_DEBUG, "Relation <");
1544         gen_regular_rel (term_tmp, term_value-1, 1);
1545         break;
1546     case 2:
1547         yaz_log(LOG_DEBUG, "Relation <=");
1548         gen_regular_rel (term_tmp, term_value, 1);
1549         break;
1550     case 4:
1551         yaz_log(LOG_DEBUG, "Relation >=");
1552         gen_regular_rel (term_tmp, term_value, 0);
1553         break;
1554     case 5:
1555         yaz_log(LOG_DEBUG, "Relation >");
1556         gen_regular_rel (term_tmp, term_value+1, 0);
1557         break;
1558     case 3:
1559     default:
1560         yaz_log(LOG_DEBUG, "Relation =");
1561         sprintf (term_tmp, "(0*%d)", term_value);
1562     }
1563     yaz_log(LOG_DEBUG, "dict_lookup_grep: %s", term_tmp);
1564     r = dict_lookup_grep (zh->reg->dict, term_dict, 0, grep_info, max_pos,
1565                           0, grep_handle);
1566     if (r)
1567         yaz_log(LOG_WARN, "dict_lookup_grep fail, rel=gt: %d", r);
1568     yaz_log(LOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1569     return 1;
1570 }
1571
1572 static int numeric_term (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1573                          const char **term_sub, 
1574                          oid_value attributeSet, struct grep_info *grep_info,
1575                          int reg_type, int complete_flag,
1576                          int num_bases, char **basenames,
1577                          char *term_dst, int xpath_use, NMEM stream)
1578 {
1579     char term_dict[2*IT_MAX_WORD+2];
1580     int r, base_no;
1581     AttrType use;
1582     int use_value;
1583     const char *use_string = 0;
1584     oid_value curAttributeSet = attributeSet;
1585     const char *termp;
1586     struct rpn_char_map_info rcmi;
1587
1588     int bases_ok = 0;     /* no of databases with OK attribute */
1589     int errCode = 0;      /* err code (if any is not OK) */
1590     char *errString = 0;  /* addinfo */
1591
1592     rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
1593     attr_init (&use, zapt, 1);
1594     use_value = attr_find_ex (&use, &curAttributeSet, &use_string);
1595
1596     if (use_value == -1)
1597         use_value = 1016;
1598
1599     for (base_no = 0; base_no < num_bases; base_no++)
1600     {
1601         attent attp;
1602         data1_local_attribute id_xpath_attr;
1603         data1_local_attribute *local_attr;
1604         int max_pos, prefix_len = 0;
1605
1606         termp = *term_sub;
1607         if (use_value == -2)  /* string attribute (assume IDXPATH/any) */
1608         {
1609             use_value = xpath_use;
1610             attp.local_attributes = &id_xpath_attr;
1611             attp.attset_ordinal = VAL_IDXPATH;
1612             id_xpath_attr.next = 0;
1613             id_xpath_attr.local = use_value;
1614         }
1615         else if (curAttributeSet == VAL_IDXPATH)
1616         {
1617             attp.local_attributes = &id_xpath_attr;
1618             attp.attset_ordinal = VAL_IDXPATH;
1619             id_xpath_attr.next = 0;
1620             id_xpath_attr.local = use_value;
1621         }
1622         else
1623         {
1624             if ((r=att_getentbyatt (zh, &attp, curAttributeSet, use_value,
1625                                             use_string)))
1626             {
1627                 yaz_log(LOG_DEBUG, "att_getentbyatt fail. set=%d use=%d r=%d",
1628                       curAttributeSet, use_value, r);
1629                 if (r == -1)
1630                 {
1631                     errString = nmem_strdup_i (stream, use_value);
1632                     errCode = 114;
1633                 }
1634                 else
1635                     errCode = 121;
1636                 continue;
1637             }
1638         }
1639         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1640         {
1641             zh->errCode = 109; /* Database unavailable */
1642             zh->errString = basenames[base_no];
1643             return -1;
1644         }
1645         for (local_attr = attp.local_attributes; local_attr;
1646              local_attr = local_attr->next)
1647         {
1648             int ord;
1649             char ord_buf[32];
1650             int i, ord_len;
1651
1652             ord = zebraExplain_lookupSU (zh->reg->zei, attp.attset_ordinal,
1653                                           local_attr->local);
1654             if (ord < 0)
1655                 continue;
1656             if (prefix_len)
1657                 term_dict[prefix_len++] = '|';
1658             else
1659                 term_dict[prefix_len++] = '(';
1660
1661             ord_len = key_SU_encode (ord, ord_buf);
1662             for (i = 0; i<ord_len; i++)
1663             {
1664                 term_dict[prefix_len++] = 1;
1665                 term_dict[prefix_len++] = ord_buf[i];
1666             }
1667         }
1668         if (!prefix_len)
1669         {
1670             char val_str[32];
1671             sprintf (val_str, "%d", use_value);
1672             errCode = 114;
1673             errString = nmem_strdup (stream, val_str);
1674             continue;
1675         }
1676         bases_ok++;
1677         term_dict[prefix_len++] = ')';        
1678         term_dict[prefix_len++] = 1;
1679         term_dict[prefix_len++] = reg_type;
1680         yaz_log(LOG_DEBUG, "reg_type = %d", term_dict[prefix_len-1]);
1681         term_dict[prefix_len] = '\0';
1682         if (!numeric_relation (zh, zapt, &termp, term_dict,
1683                                attributeSet, grep_info, &max_pos, reg_type,
1684                                term_dst))
1685             return 0;
1686     }
1687     if (!bases_ok)
1688     {
1689         zh->errCode = errCode;
1690         zh->errString = errString;
1691         return -1;
1692     }
1693     *term_sub = termp;
1694     yaz_log(LOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1695     return 1;
1696 }
1697
1698 static RSET rpn_search_APT_numeric (ZebraHandle zh,
1699                                     Z_AttributesPlusTerm *zapt,
1700                                     const char *termz,
1701                                     oid_value attributeSet,
1702                                     NMEM stream,
1703                                     int reg_type, int complete_flag,
1704                                     const char *rank_type, int xpath_use,
1705                                     int num_bases, char **basenames)
1706 {
1707     char term_dst[IT_MAX_WORD+1];
1708     const char *termp = termz;
1709     RSET rset[60], result;
1710     int i, r, rset_no = 0;
1711     struct grep_info grep_info;
1712
1713     if (grep_info_prepare (zh, zapt, &grep_info, reg_type, stream))
1714         return 0;
1715     while (1)
1716     { 
1717         yaz_log(LOG_DEBUG, "APT_numeric termp=%s", termp);
1718         grep_info.isam_p_indx = 0;
1719         r = numeric_term (zh, zapt, &termp, attributeSet, &grep_info,
1720                           reg_type, complete_flag, num_bases, basenames,
1721                           term_dst, xpath_use,
1722                           stream);
1723         if (r < 1)
1724             break;
1725         yaz_log(LOG_DEBUG, "term: %s", term_dst);
1726         rset[rset_no] = rset_trunc (zh, grep_info.isam_p_buf,
1727                                     grep_info.isam_p_indx, term_dst,
1728                                     strlen(term_dst), rank_type,
1729                                     0 /* preserve position */,
1730                                     zapt->term->which);
1731         assert (rset[rset_no]);
1732         if (++rset_no >= (int) (sizeof(rset)/sizeof(*rset)))
1733             break;
1734     }
1735     grep_info_delete (&grep_info);
1736     if (rset_no == 0)
1737     {
1738         rset_null_parms parms;
1739         
1740         parms.rset_term = rset_term_create (term_dst, -1, rank_type,
1741                                             zapt->term->which);
1742         return rset_create (rset_kind_null, &parms);
1743     }
1744     result = rset[0];
1745     for (i = 1; i<rset_no; i++)
1746     {
1747         rset_bool_parms bool_parms;
1748
1749         bool_parms.rset_l = result;
1750         bool_parms.rset_r = rset[i];
1751         bool_parms.key_size = sizeof(struct it_key);
1752         bool_parms.cmp = key_compare_it;
1753         bool_parms.log_item = key_logdump_txt;
1754         result = rset_create (rset_kind_and, &bool_parms);
1755     }
1756     return result;
1757 }
1758
1759 static RSET rpn_search_APT_local (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1760                                   const char *termz,
1761                                   oid_value attributeSet,
1762                                   NMEM stream,
1763                                   const char *rank_type)
1764 {
1765     RSET result;
1766     RSFD rsfd;
1767     struct it_key key;
1768     rset_temp_parms parms;
1769
1770     parms.rset_term = rset_term_create (termz, -1, rank_type,
1771                                         zapt->term->which);
1772     parms.cmp = key_compare_it;
1773     parms.key_size = sizeof (struct it_key);
1774     parms.temp_path = res_get (zh->res, "setTmpDir");
1775     result = rset_create (rset_kind_temp, &parms);
1776     rsfd = rset_open (result, RSETF_WRITE);
1777
1778     key.sysno = atoi (termz);
1779     key.seqno = 1;
1780     if (key.sysno <= 0)
1781         key.sysno = 1;
1782     rset_write (result, rsfd, &key);
1783     rset_close (result, rsfd);
1784     return result;
1785 }
1786
1787 static RSET rpn_sort_spec (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1788                            oid_value attributeSet, NMEM stream,
1789                            Z_SortKeySpecList *sort_sequence,
1790                            const char *rank_type)
1791 {
1792     rset_null_parms parms;    
1793     int i;
1794     int sort_relation_value;
1795     AttrType sort_relation_type;
1796     int use_value;
1797     AttrType use_type;
1798     Z_SortKeySpec *sks;
1799     Z_SortKey *sk;
1800     Z_AttributeElement *ae;
1801     int oid[OID_SIZE];
1802     oident oe;
1803     char termz[20];
1804     
1805     attr_init (&sort_relation_type, zapt, 7);
1806     sort_relation_value = attr_find (&sort_relation_type, &attributeSet);
1807
1808     attr_init (&use_type, zapt, 1);
1809     use_value = attr_find (&use_type, &attributeSet);
1810
1811     if (!sort_sequence->specs)
1812     {
1813         sort_sequence->num_specs = 10;
1814         sort_sequence->specs = (Z_SortKeySpec **)
1815             nmem_malloc (stream, sort_sequence->num_specs *
1816                          sizeof(*sort_sequence->specs));
1817         for (i = 0; i<sort_sequence->num_specs; i++)
1818             sort_sequence->specs[i] = 0;
1819     }
1820     if (zapt->term->which != Z_Term_general)
1821         i = 0;
1822     else
1823         i = atoi_n ((char *) zapt->term->u.general->buf,
1824                     zapt->term->u.general->len);
1825     if (i >= sort_sequence->num_specs)
1826         i = 0;
1827     sprintf (termz, "%d", i);
1828
1829     oe.proto = PROTO_Z3950;
1830     oe.oclass = CLASS_ATTSET;
1831     oe.value = attributeSet;
1832     if (!oid_ent_to_oid (&oe, oid))
1833         return 0;
1834
1835     sks = (Z_SortKeySpec *) nmem_malloc (stream, sizeof(*sks));
1836     sks->sortElement = (Z_SortElement *)
1837         nmem_malloc (stream, sizeof(*sks->sortElement));
1838     sks->sortElement->which = Z_SortElement_generic;
1839     sk = sks->sortElement->u.generic = (Z_SortKey *)
1840         nmem_malloc (stream, sizeof(*sk));
1841     sk->which = Z_SortKey_sortAttributes;
1842     sk->u.sortAttributes = (Z_SortAttributes *)
1843         nmem_malloc (stream, sizeof(*sk->u.sortAttributes));
1844
1845     sk->u.sortAttributes->id = oid;
1846     sk->u.sortAttributes->list = (Z_AttributeList *)
1847         nmem_malloc (stream, sizeof(*sk->u.sortAttributes->list));
1848     sk->u.sortAttributes->list->num_attributes = 1;
1849     sk->u.sortAttributes->list->attributes = (Z_AttributeElement **)
1850         nmem_malloc (stream, sizeof(*sk->u.sortAttributes->list->attributes));
1851     ae = *sk->u.sortAttributes->list->attributes = (Z_AttributeElement *)
1852         nmem_malloc (stream, sizeof(**sk->u.sortAttributes->list->attributes));
1853     ae->attributeSet = 0;
1854     ae->attributeType = (int *)
1855         nmem_malloc (stream, sizeof(*ae->attributeType));
1856     *ae->attributeType = 1;
1857     ae->which = Z_AttributeValue_numeric;
1858     ae->value.numeric = (int *)
1859         nmem_malloc (stream, sizeof(*ae->value.numeric));
1860     *ae->value.numeric = use_value;
1861
1862     sks->sortRelation = (int *)
1863         nmem_malloc (stream, sizeof(*sks->sortRelation));
1864     if (sort_relation_value == 1)
1865         *sks->sortRelation = Z_SortKeySpec_ascending;
1866     else if (sort_relation_value == 2)
1867         *sks->sortRelation = Z_SortKeySpec_descending;
1868     else 
1869         *sks->sortRelation = Z_SortKeySpec_ascending;
1870
1871     sks->caseSensitivity = (int *)
1872         nmem_malloc (stream, sizeof(*sks->caseSensitivity));
1873     *sks->caseSensitivity = 0;
1874
1875     sks->which = Z_SortKeySpec_null;
1876     sks->u.null = odr_nullval ();
1877     sort_sequence->specs[i] = sks;
1878
1879     parms.rset_term = rset_term_create (termz, -1, rank_type,
1880                                         zapt->term->which);
1881     return rset_create (rset_kind_null, &parms);
1882 }
1883
1884
1885 static int parse_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1886                        oid_value attributeSet,
1887                        struct xpath_location_step *xpath, int max, NMEM mem)
1888 {
1889     oid_value curAttributeSet = attributeSet;
1890     AttrType use;
1891     const char *use_string = 0;
1892     
1893     attr_init (&use, zapt, 1);
1894     attr_find_ex (&use, &curAttributeSet, &use_string);
1895
1896     if (!use_string || *use_string != '/')
1897         return -1;
1898
1899     return zebra_parse_xpath_str(use_string, xpath, max, mem);
1900 }
1901  
1902                
1903
1904 static RSET xpath_trunc(ZebraHandle zh, NMEM stream,
1905                         int reg_type, const char *term, int use,
1906                         oid_value curAttributeSet)
1907 {
1908     RSET rset;
1909     struct grep_info grep_info;
1910     char term_dict[2048];
1911     char ord_buf[32];
1912     int prefix_len = 0;
1913     int ord = zebraExplain_lookupSU (zh->reg->zei, curAttributeSet, use);
1914     int ord_len, i, r, max_pos;
1915     int term_type = Z_Term_characterString;
1916     const char *flags = "void";
1917
1918     if (grep_info_prepare (zh, 0 /* zapt */, &grep_info, '0', stream))
1919     {
1920         rset_null_parms parms;
1921         
1922         parms.rset_term = rset_term_create (term, strlen(term),
1923                                             flags, term_type);
1924         parms.rset_term->nn = 0;
1925         return rset_create (rset_kind_null, &parms);
1926     }
1927
1928     if (ord < 0)
1929     {
1930         rset_null_parms parms;
1931         
1932         parms.rset_term = rset_term_create (term, strlen(term),
1933                                             flags, term_type);
1934         parms.rset_term->nn = 0;
1935         return rset_create (rset_kind_null, &parms);
1936     }
1937     if (prefix_len)
1938         term_dict[prefix_len++] = '|';
1939     else
1940         term_dict[prefix_len++] = '(';
1941     
1942     ord_len = key_SU_encode (ord, ord_buf);
1943     for (i = 0; i<ord_len; i++)
1944     {
1945         term_dict[prefix_len++] = 1;
1946         term_dict[prefix_len++] = ord_buf[i];
1947     }
1948     term_dict[prefix_len++] = ')';
1949     term_dict[prefix_len++] = 1;
1950     term_dict[prefix_len++] = reg_type;
1951     
1952     strcpy (term_dict+prefix_len, term);
1953     
1954     grep_info.isam_p_indx = 0;
1955     r = dict_lookup_grep (zh->reg->dict, term_dict, 0,
1956                           &grep_info, &max_pos, 0, grep_handle);
1957     yaz_log (LOG_LOG, "%s %d positions", term, grep_info.isam_p_indx);
1958     rset = rset_trunc (zh, grep_info.isam_p_buf,
1959                        grep_info.isam_p_indx, term, strlen(term),
1960                        flags, 1, term_type);
1961     grep_info_delete (&grep_info);
1962     return rset;
1963 }
1964
1965 static RSET rpn_search_xpath (ZebraHandle zh,
1966                               oid_value attributeSet,
1967                               int num_bases, char **basenames,
1968                               NMEM stream, const char *rank_type, RSET rset,
1969                               int xpath_len, struct xpath_location_step *xpath)
1970 {
1971     oid_value curAttributeSet = attributeSet;
1972     int base_no;
1973     int i;
1974
1975     if (xpath_len < 0)
1976         return rset;
1977
1978     yaz_log (LOG_DEBUG, "len=%d", xpath_len);
1979     for (i = 0; i<xpath_len; i++)
1980     {
1981         yaz_log (LOG_DEBUG, "XPATH %d %s", i, xpath[i].part);
1982     }
1983
1984     curAttributeSet = VAL_IDXPATH;
1985
1986     /*
1987       //a    ->    a/.*
1988       //a/b  ->    b/a/.*
1989       /a     ->    a/
1990       /a/b   ->    b/a/
1991
1992       /      ->    none
1993
1994    a[@attr=value]/b[@other=othervalue]
1995
1996  /e/@a val      range(e/,range(@a,freetext(w,1015,val),@a),e/)
1997  /a/b val       range(b/a/,freetext(w,1016,val),b/a/)
1998  /a/b/@c val    range(b/a/,range(@c,freetext(w,1016,val),@c),b/a/)
1999  /a/b[@c=y] val range(b/a/,freetext(w,1016,val),b/a/,@c=y)
2000  /a[@c=y]/b val range(a/,range(b/a/,freetext(w,1016,val),b/a/),a/,@c=y)
2001  /a[@c=x]/b[@c=y] range(a/,range(b/a/,freetext(w,1016,val),b/a/,@c=y),a/,@c=x)
2002       
2003     */
2004
2005     dict_grep_cmap (zh->reg->dict, 0, 0);
2006
2007     for (base_no = 0; base_no < num_bases; base_no++)
2008     {
2009         int level = xpath_len;
2010         int first_path = 1;
2011         
2012         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
2013         {
2014             zh->errCode = 109; /* Database unavailable */
2015             zh->errString = basenames[base_no];
2016             return rset;
2017         }
2018         while (--level >= 0)
2019         {
2020             char xpath_rev[128];
2021             int i, len;
2022             rset_between_parms parms;
2023             RSET rset_start_tag = 0, rset_end_tag = 0, rset_attr = 0;
2024
2025             *xpath_rev = 0;
2026             len = 0;
2027             for (i = level; i >= 1; --i)
2028             {
2029                 const char *cp = xpath[i].part;
2030                 if (*cp)
2031                 {
2032                     for (;*cp; cp++)
2033                         if (*cp == '*')
2034                         {
2035                             memcpy (xpath_rev + len, "[^/]*", 5);
2036                             len += 5;
2037                         }
2038                         else if (*cp == ' ')
2039                         {
2040
2041                             xpath_rev[len++] = 1;
2042                             xpath_rev[len++] = ' ';
2043                         }
2044
2045                         else
2046                             xpath_rev[len++] = *cp;
2047                     xpath_rev[len++] = '/';
2048                 }
2049                 else if (i == 1)  /* // case */
2050                 {
2051                     xpath_rev[len++] = '.';
2052                     xpath_rev[len++] = '*';
2053                 }
2054             }
2055             xpath_rev[len] = 0;
2056
2057             if (xpath[level].predicate &&
2058                 xpath[level].predicate->which == XPATH_PREDICATE_RELATION &&
2059                 xpath[level].predicate->u.relation.name[0])
2060             {
2061                 WRBUF wbuf = wrbuf_alloc();
2062                 wrbuf_puts(wbuf, xpath[level].predicate->u.relation.name+1);
2063                 if (xpath[level].predicate->u.relation.value)
2064                 {
2065                     const char *cp = xpath[level].predicate->u.relation.value;
2066                     wrbuf_putc(wbuf, '=');
2067                     
2068                     while (*cp)
2069                     {
2070                         if (strchr(REGEX_CHARS, *cp))
2071                             wrbuf_putc(wbuf, '\\');
2072                         wrbuf_putc(wbuf, *cp);
2073                         cp++;
2074                     }
2075                 }
2076                 wrbuf_puts(wbuf, "");
2077                 rset_attr = xpath_trunc (
2078                     zh, stream, '0', wrbuf_buf(wbuf), 3, curAttributeSet);
2079                 wrbuf_free(wbuf, 1);
2080             } 
2081             else 
2082             {
2083                 if (!first_path)
2084                     continue;
2085             }
2086             yaz_log (LOG_DEBUG, "xpath_rev (%d) = %s", level, xpath_rev);
2087             if (strlen(xpath_rev))
2088             {
2089                 rset_start_tag = xpath_trunc(zh, stream, 
2090                                          '0', xpath_rev, 1, curAttributeSet);
2091             
2092                 rset_end_tag = xpath_trunc(zh, stream,
2093                                        '0', xpath_rev, 2, curAttributeSet);
2094
2095                 parms.key_size = sizeof(struct it_key);
2096                 parms.cmp = key_compare_it;
2097                 parms.rset_l = rset_start_tag;
2098                 parms.rset_m = rset;
2099                 parms.rset_r = rset_end_tag;
2100                 parms.rset_attr = rset_attr;
2101                 parms.printer = key_print_it;
2102                 rset = rset_create (rset_kind_between, &parms);
2103             }
2104             first_path = 0;
2105         }
2106     }
2107
2108     return rset;
2109 }
2110
2111
2112
2113 static RSET rpn_search_APT (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2114                             oid_value attributeSet, NMEM stream,
2115                             Z_SortKeySpecList *sort_sequence,
2116                             int num_bases, char **basenames)
2117 {
2118     unsigned reg_id;
2119     char *search_type = NULL;
2120     char rank_type[128];
2121     int complete_flag;
2122     int sort_flag;
2123     char termz[IT_MAX_WORD+1];
2124     RSET rset = 0;
2125     int xpath_len;
2126     int xpath_use = 0;
2127     struct xpath_location_step xpath[10];
2128
2129     zebra_maps_attr (zh->reg->zebra_maps, zapt, &reg_id, &search_type,
2130                      rank_type, &complete_flag, &sort_flag);
2131     
2132     yaz_log(LOG_DEBUG, "reg_id=%c", reg_id);
2133     yaz_log(LOG_DEBUG, "complete_flag=%d", complete_flag);
2134     yaz_log(LOG_DEBUG, "search_type=%s", search_type);
2135     yaz_log(LOG_DEBUG, "rank_type=%s", rank_type);
2136
2137     if (zapt_term_to_utf8(zh, zapt, termz))
2138         return 0;
2139
2140     if (sort_flag)
2141         return rpn_sort_spec (zh, zapt, attributeSet, stream, sort_sequence,
2142                               rank_type);
2143     xpath_len = parse_xpath(zh, zapt, attributeSet, xpath, 10, stream);
2144     if (xpath_len >= 0)
2145     {
2146         xpath_use = 1016;
2147         if (xpath[xpath_len-1].part[0] == '@')
2148             xpath_use = 1015;
2149     }
2150
2151     if (!strcmp (search_type, "phrase"))
2152     {
2153         rset = rpn_search_APT_phrase (zh, zapt, termz, attributeSet, stream,
2154                                       reg_id, complete_flag, rank_type,
2155                                       xpath_use,
2156                                       num_bases, basenames);
2157     }
2158     else if (!strcmp (search_type, "and-list"))
2159     {
2160         rset = rpn_search_APT_and_list (zh, zapt, termz, attributeSet, stream,
2161                                         reg_id, complete_flag, rank_type,
2162                                         xpath_use,
2163                                         num_bases, basenames);
2164     }
2165     else if (!strcmp (search_type, "or-list"))
2166     {
2167         rset = rpn_search_APT_or_list (zh, zapt, termz, attributeSet, stream,
2168                                        reg_id, complete_flag, rank_type,
2169                                        xpath_use,
2170                                        num_bases, basenames);
2171     }
2172     else if (!strcmp (search_type, "local"))
2173     {
2174         rset = rpn_search_APT_local (zh, zapt, termz, attributeSet, stream,
2175                                      rank_type);
2176     }
2177     else if (!strcmp (search_type, "numeric"))
2178     {
2179         rset = rpn_search_APT_numeric (zh, zapt, termz, attributeSet, stream,
2180                                        reg_id, complete_flag, rank_type,
2181                                        xpath_use,
2182                                        num_bases, basenames);
2183     }
2184     else if (!strcmp (search_type, "always"))
2185     {
2186         rset = 0;
2187     }
2188     else
2189         zh->errCode = 118;
2190     return rpn_search_xpath (zh, attributeSet, num_bases, basenames,
2191                              stream, rank_type, rset, xpath_len, xpath);
2192 }
2193
2194 static RSET rpn_search_structure (ZebraHandle zh, Z_RPNStructure *zs,
2195                                   oid_value attributeSet, NMEM stream,
2196                                   Z_SortKeySpecList *sort_sequence,
2197                                   int num_bases, char **basenames)
2198 {
2199     RSET r = NULL;
2200     if (zs->which == Z_RPNStructure_complex)
2201     {
2202         Z_Operator *zop = zs->u.complex->roperator;
2203         rset_bool_parms bool_parms;
2204
2205         bool_parms.rset_l = rpn_search_structure (zh, zs->u.complex->s1,
2206                                                   attributeSet, stream,
2207                                                   sort_sequence,
2208                                                   num_bases, basenames);
2209         if (bool_parms.rset_l == NULL)
2210             return NULL;
2211         bool_parms.rset_r = rpn_search_structure (zh, zs->u.complex->s2,
2212                                                   attributeSet, stream,
2213                                                   sort_sequence,
2214                                                   num_bases, basenames);
2215         if (bool_parms.rset_r == NULL)
2216         {
2217             rset_delete (bool_parms.rset_l);
2218             return NULL;
2219         }
2220         bool_parms.key_size = sizeof(struct it_key);
2221         bool_parms.cmp = key_compare_it;
2222         bool_parms.log_item = key_logdump_txt;
2223
2224         switch (zop->which)
2225         {
2226         case Z_Operator_and:
2227             r = rset_create (rset_kind_and, &bool_parms);
2228             break;
2229         case Z_Operator_or:
2230             r = rset_create (rset_kind_or, &bool_parms);
2231             break;
2232         case Z_Operator_and_not:
2233             r = rset_create (rset_kind_not, &bool_parms);
2234             break;
2235         case Z_Operator_prox:
2236             if (zop->u.prox->which != Z_ProximityOperator_known)
2237             {
2238                 zh->errCode = 132;
2239                 return NULL;
2240             }
2241             if (*zop->u.prox->u.known != Z_ProxUnit_word)
2242             {
2243                 char *val = (char *) nmem_malloc (stream, 16);
2244                 zh->errCode = 132;
2245                 zh->errString = val;
2246                 sprintf (val, "%d", *zop->u.prox->u.known);
2247                 return NULL;
2248             }
2249             else
2250             {
2251                 /* new / old prox */
2252                 rset_prox_parms parms;
2253                 RSET twosets[2];
2254                 
2255                 twosets[0] = bool_parms.rset_l;
2256                 twosets[1] = bool_parms.rset_r;
2257                 parms.rset = twosets;
2258                 parms.rset_no = 2;
2259                 parms.ordered = *zop->u.prox->ordered;
2260                 parms.exclusion = (!zop->u.prox->exclusion ? 0 :
2261                                    *zop->u.prox->exclusion);
2262                 parms.relation = *zop->u.prox->relationType;
2263                 parms.distance = *zop->u.prox->distance;
2264                 parms.key_size = sizeof(struct it_key);
2265                 parms.cmp = key_compare_it;
2266                 parms.getseq = key_get_seq;
2267                 parms.log_item = key_logdump_txt;
2268                 r = rset_create(rset_kind_prox, &parms);
2269             }
2270             break;
2271         default:
2272             zh->errCode = 110;
2273             return NULL;
2274         }
2275     }
2276     else if (zs->which == Z_RPNStructure_simple)
2277     {
2278         if (zs->u.simple->which == Z_Operand_APT)
2279         {
2280             yaz_log(LOG_DEBUG, "rpn_search_APT");
2281             r = rpn_search_APT (zh, zs->u.simple->u.attributesPlusTerm,
2282                                 attributeSet, stream, sort_sequence,
2283                                 num_bases, basenames);
2284         }
2285         else if (zs->u.simple->which == Z_Operand_resultSetId)
2286         {
2287             yaz_log(LOG_DEBUG, "rpn_search_ref");
2288             r = resultSetRef (zh, zs->u.simple->u.resultSetId);
2289             if (!r)
2290             {
2291                 r = rset_create (rset_kind_null, NULL);
2292                 zh->errCode = 30;
2293                 zh->errString =
2294                     nmem_strdup (stream, zs->u.simple->u.resultSetId);
2295                 return 0;
2296             }
2297             else
2298                 rset_dup(r);
2299         }
2300         else
2301         {
2302             zh->errCode = 3;
2303             return 0;
2304         }
2305     }
2306     else
2307     {
2308         zh->errCode = 3;
2309         return 0;
2310     }
2311     return r;
2312 }
2313
2314
2315 RSET rpn_search (ZebraHandle zh, NMEM nmem,
2316                  Z_RPNQuery *rpn, int num_bases, char **basenames, 
2317                  const char *setname,
2318                  ZebraSet sset)
2319 {
2320     RSET rset;
2321     oident *attrset;
2322     oid_value attributeSet;
2323     Z_SortKeySpecList *sort_sequence;
2324     int sort_status, i;
2325
2326     zh->errCode = 0;
2327     zh->errString = NULL;
2328     zh->hits = 0;
2329
2330     sort_sequence = (Z_SortKeySpecList *)
2331         nmem_malloc (nmem, sizeof(*sort_sequence));
2332     sort_sequence->num_specs = 10;
2333     sort_sequence->specs = (Z_SortKeySpec **)
2334         nmem_malloc (nmem, sort_sequence->num_specs *
2335                      sizeof(*sort_sequence->specs));
2336     for (i = 0; i<sort_sequence->num_specs; i++)
2337         sort_sequence->specs[i] = 0;
2338     
2339     attrset = oid_getentbyoid (rpn->attributeSetId);
2340     attributeSet = attrset->value;
2341     rset = rpn_search_structure (zh, rpn->RPNStructure, attributeSet,
2342                                  nmem, sort_sequence, num_bases, basenames);
2343     if (!rset)
2344         return 0;
2345
2346     if (zh->errCode)
2347         yaz_log(LOG_DEBUG, "search error: %d", zh->errCode);
2348     
2349     for (i = 0; sort_sequence->specs[i]; i++)
2350         ;
2351     sort_sequence->num_specs = i;
2352     if (!i)
2353         resultSetRank (zh, sset, rset);
2354     else
2355     {
2356         yaz_log(LOG_DEBUG, "resultSetSortSingle in rpn_search");
2357         resultSetSortSingle (zh, nmem, sset, rset,
2358                              sort_sequence, &sort_status);
2359         if (zh->errCode)
2360         {
2361             yaz_log(LOG_DEBUG, "resultSetSortSingle status = %d", zh->errCode);
2362         }
2363     }
2364     return rset;
2365 }
2366
2367 struct scan_info_entry {
2368     char *term;
2369     ISAMS_P isam_p;
2370 };
2371
2372 struct scan_info {
2373     struct scan_info_entry *list;
2374     ODR odr;
2375     int before, after;
2376     char prefix[20];
2377 };
2378
2379 static int scan_handle (char *name, const char *info, int pos, void *client)
2380 {
2381     int len_prefix, idx;
2382     struct scan_info *scan_info = (struct scan_info *) client;
2383
2384     len_prefix = strlen(scan_info->prefix);
2385     if (memcmp (name, scan_info->prefix, len_prefix))
2386         return 1;
2387     if (pos > 0)        idx = scan_info->after - pos + scan_info->before;
2388     else
2389         idx = - pos - 1;
2390     scan_info->list[idx].term = (char *)
2391         odr_malloc (scan_info->odr, strlen(name + len_prefix)+1);
2392     strcpy (scan_info->list[idx].term, name + len_prefix);
2393     assert (*info == sizeof(ISAMS_P));
2394     memcpy (&scan_info->list[idx].isam_p, info+1, sizeof(ISAMS_P));
2395     return 0;
2396 }
2397
2398 static void scan_term_untrans (ZebraHandle zh, NMEM stream, int reg_type,
2399                                char **dst, const char *src)
2400 {
2401     char term_src[IT_MAX_WORD];
2402     char term_dst[IT_MAX_WORD];
2403     
2404     term_untrans (zh, reg_type, term_src, src);
2405
2406     if (zh->iconv_from_utf8 != 0)
2407     {
2408         int len;
2409         char *inbuf = term_src;
2410         size_t inleft = strlen(term_src);
2411         char *outbuf = term_dst;
2412         size_t outleft = sizeof(term_dst)-1;
2413         size_t ret;
2414         
2415         ret = yaz_iconv (zh->iconv_from_utf8, &inbuf, &inleft,
2416                          &outbuf, &outleft);
2417         if (ret == (size_t)(-1))
2418             len = 0;
2419         else
2420             len = outbuf - term_dst;
2421         *dst = nmem_malloc (stream, len + 1);
2422         if (len > 0)
2423             memcpy (*dst, term_dst, len);
2424         (*dst)[len] = '\0';
2425     }
2426     else
2427         *dst = nmem_strdup (stream, term_src);
2428 }
2429
2430 static void count_set (RSET r, int *count)
2431 {
2432     int psysno = 0;
2433     int kno = 0;
2434     struct it_key key;
2435     RSFD rfd;
2436     int term_index;
2437
2438     yaz_log(LOG_DEBUG, "count_set");
2439
2440     *count = 0;
2441     rfd = rset_open (r, RSETF_READ);
2442     while (rset_read (r, rfd, &key, &term_index))
2443     {
2444         if (key.sysno != psysno)
2445         {
2446             psysno = key.sysno;
2447             (*count)++;
2448         }
2449         kno++;
2450     }
2451     rset_close (r, rfd);
2452     yaz_log(LOG_DEBUG, "%d keys, %d records", kno, *count);
2453 }
2454
2455 void rpn_scan (ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt,
2456                oid_value attributeset,
2457                int num_bases, char **basenames,
2458                int *position, int *num_entries, ZebraScanEntry **list,
2459                int *is_partial, RSET limit_set, int return_zero)
2460 {
2461     int i;
2462     int pos = *position;
2463     int num = *num_entries;
2464     int before;
2465     int after;
2466     int base_no;
2467     char termz[IT_MAX_WORD+20];
2468     AttrType use;
2469     int use_value;
2470     const char *use_string = 0;
2471     struct scan_info *scan_info_array;
2472     ZebraScanEntry *glist;
2473     int ords[32], ord_no = 0;
2474     int ptr[32];
2475
2476     int bases_ok = 0;     /* no of databases with OK attribute */
2477     int errCode = 0;      /* err code (if any is not OK) */
2478     char *errString = 0;  /* addinfo */
2479
2480     unsigned reg_id;
2481     char *search_type = NULL;
2482     char rank_type[128];
2483     int complete_flag;
2484     int sort_flag;
2485
2486     *list = 0;
2487
2488     if (attributeset == VAL_NONE)
2489         attributeset = VAL_BIB1;
2490
2491     if (!limit_set)
2492     {
2493         AttrType termset;
2494         int termset_value_numeric;
2495         const char *termset_value_string;
2496         attr_init (&termset, zapt, 8);
2497         termset_value_numeric =
2498             attr_find_ex (&termset, NULL, &termset_value_string);
2499         if (termset_value_numeric != -1)
2500         {
2501             char resname[32];
2502             const char *termset_name = 0;
2503             
2504             if (termset_value_numeric != -2)
2505             {
2506                 
2507                 sprintf (resname, "%d", termset_value_numeric);
2508                 termset_name = resname;
2509             }
2510             else
2511                 termset_name = termset_value_string;
2512             
2513             limit_set = resultSetRef (zh, termset_name);
2514         }
2515     }
2516         
2517     yaz_log (LOG_DEBUG, "position = %d, num = %d set=%d",
2518              pos, num, attributeset);
2519         
2520     attr_init (&use, zapt, 1);
2521     use_value = attr_find_ex (&use, &attributeset, &use_string);
2522
2523     if (zebra_maps_attr (zh->reg->zebra_maps, zapt, &reg_id, &search_type,
2524                          rank_type, &complete_flag, &sort_flag))
2525     {
2526         *num_entries = 0;
2527         zh->errCode = 113;
2528         return ;
2529     }
2530     yaz_log (LOG_DEBUG, "use_value = %d", use_value);
2531
2532     if (use_value == -1)
2533         use_value = 1016;
2534     for (base_no = 0; base_no < num_bases && ord_no < 32; base_no++)
2535     {
2536         int r;
2537         attent attp;
2538         data1_local_attribute *local_attr;
2539
2540         if ((r=att_getentbyatt (zh, &attp, attributeset, use_value,
2541                                 use_string)))
2542         {
2543             yaz_log(LOG_DEBUG, "att_getentbyatt fail. set=%d use=%d",
2544                   attributeset, use_value);
2545             if (r == -1)
2546             {
2547                 char val_str[32];
2548                 sprintf (val_str, "%d", use_value);
2549                 errCode = 114;
2550                 errString = odr_strdup (stream, val_str);
2551             }   
2552             else
2553                 errCode = 121;
2554             continue;
2555         }
2556         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
2557         {
2558             zh->errString = basenames[base_no];
2559             zh->errCode = 109; /* Database unavailable */
2560             *num_entries = 0;
2561             return;
2562         }
2563         bases_ok++;
2564         for (local_attr = attp.local_attributes; local_attr && ord_no < 32;
2565              local_attr = local_attr->next)
2566         {
2567             int ord;
2568
2569             ord = zebraExplain_lookupSU (zh->reg->zei, attp.attset_ordinal,
2570                                          local_attr->local);
2571             if (ord > 0)
2572                 ords[ord_no++] = ord;
2573         }
2574     }
2575     if (!bases_ok && errCode)
2576     {
2577         zh->errCode = errCode;
2578         zh->errString = errString;
2579         *num_entries = 0;
2580     }
2581     if (ord_no == 0)
2582     {
2583         *num_entries = 0;
2584         return;
2585     }
2586     /* prepare dictionary scanning */
2587     before = pos-1;
2588     after = 1+num-pos;
2589     scan_info_array = (struct scan_info *)
2590         odr_malloc (stream, ord_no * sizeof(*scan_info_array));
2591     for (i = 0; i < ord_no; i++)
2592     {
2593         int j, prefix_len = 0;
2594         int before_tmp = before, after_tmp = after;
2595         struct scan_info *scan_info = scan_info_array + i;
2596         struct rpn_char_map_info rcmi;
2597
2598         rpn_char_map_prepare (zh->reg, reg_id, &rcmi);
2599
2600         scan_info->before = before;
2601         scan_info->after = after;
2602         scan_info->odr = stream;
2603
2604         scan_info->list = (struct scan_info_entry *)
2605             odr_malloc (stream, (before+after) * sizeof(*scan_info->list));
2606         for (j = 0; j<before+after; j++)
2607             scan_info->list[j].term = NULL;
2608
2609         prefix_len += key_SU_encode (ords[i], termz + prefix_len);
2610         termz[prefix_len++] = reg_id;
2611         termz[prefix_len] = 0;
2612         strcpy (scan_info->prefix, termz);
2613
2614         if (trans_scan_term (zh, zapt, termz+prefix_len, reg_id))
2615             return ;
2616                     
2617         dict_scan (zh->reg->dict, termz, &before_tmp, &after_tmp,
2618                    scan_info, scan_handle);
2619     }
2620     glist = (ZebraScanEntry *)
2621         odr_malloc (stream, (before+after)*sizeof(*glist));
2622
2623     /* consider terms after main term */
2624     for (i = 0; i < ord_no; i++)
2625         ptr[i] = before;
2626     
2627     *is_partial = 0;
2628     for (i = 0; i<after; i++)
2629     {
2630         int j, j0 = -1;
2631         const char *mterm = NULL;
2632         const char *tst;
2633         RSET rset;
2634         
2635         for (j = 0; j < ord_no; j++)
2636         {
2637             if (ptr[j] < before+after &&
2638                 (tst=scan_info_array[j].list[ptr[j]].term) &&
2639                 (!mterm || strcmp (tst, mterm) < 0))
2640             {
2641                 j0 = j;
2642                 mterm = tst;
2643             }
2644         }
2645         if (j0 == -1)
2646             break;
2647         scan_term_untrans (zh, stream->mem, reg_id,
2648                            &glist[i+before].term, mterm);
2649         rset = rset_trunc (zh, &scan_info_array[j0].list[ptr[j0]].isam_p, 1,
2650                            glist[i+before].term, strlen(glist[i+before].term),
2651                            NULL, 0, zapt->term->which);
2652
2653         ptr[j0]++;
2654         for (j = j0+1; j<ord_no; j++)
2655         {
2656             if (ptr[j] < before+after &&
2657                 (tst=scan_info_array[j].list[ptr[j]].term) &&
2658                 !strcmp (tst, mterm))
2659             {
2660                 rset_bool_parms bool_parms;
2661                 RSET rset2;
2662
2663                 rset2 =
2664                    rset_trunc (zh, &scan_info_array[j].list[ptr[j]].isam_p, 1,
2665                                glist[i+before].term,
2666                                strlen(glist[i+before].term), NULL, 0,
2667                                zapt->term->which);
2668
2669                 bool_parms.key_size = sizeof(struct it_key);
2670                 bool_parms.cmp = key_compare_it;
2671                 bool_parms.log_item = key_logdump_txt;
2672                 bool_parms.rset_l = rset;
2673                 bool_parms.rset_r = rset2;
2674               
2675                 rset = rset_create (rset_kind_or, &bool_parms);
2676
2677                 ptr[j]++;
2678             }
2679         }
2680         if (limit_set)
2681         {
2682             rset_bool_parms bool_parms;
2683
2684             bool_parms.key_size = sizeof(struct it_key);
2685             bool_parms.cmp = key_compare_it;
2686             bool_parms.log_item = key_logdump_txt;
2687             bool_parms.rset_l = rset;
2688             bool_parms.rset_r = rset_dup(limit_set);
2689
2690             rset = rset_create (rset_kind_and, &bool_parms);
2691         }
2692         count_set (rset, &glist[i+before].occurrences);
2693         rset_delete (rset);
2694     }
2695     if (i < after)
2696     {
2697         *num_entries -= (after-i);
2698         *is_partial = 1;
2699     }
2700
2701     /* consider terms before main term */
2702     for (i = 0; i<ord_no; i++)
2703         ptr[i] = 0;
2704
2705     for (i = 0; i<before; i++)
2706     {
2707         int j, j0 = -1;
2708         const char *mterm = NULL;
2709         const char *tst;
2710         RSET rset;
2711         
2712         for (j = 0; j <ord_no; j++)
2713         {
2714             if (ptr[j] < before &&
2715                 (tst=scan_info_array[j].list[before-1-ptr[j]].term) &&
2716                 (!mterm || strcmp (tst, mterm) > 0))
2717             {
2718                 j0 = j;
2719                 mterm = tst;
2720             }
2721         }
2722         if (j0 == -1)
2723             break;
2724
2725         scan_term_untrans (zh, stream->mem, reg_id,
2726                            &glist[before-1-i].term, mterm);
2727
2728         rset = rset_trunc
2729                (zh, &scan_info_array[j0].list[before-1-ptr[j0]].isam_p, 1,
2730                 glist[before-1-i].term, strlen(glist[before-1-i].term),
2731                 NULL, 0, zapt->term->which);
2732
2733         ptr[j0]++;
2734
2735         for (j = j0+1; j<ord_no; j++)
2736         {
2737             if (ptr[j] < before &&
2738                 (tst=scan_info_array[j].list[before-1-ptr[j]].term) &&
2739                 !strcmp (tst, mterm))
2740             {
2741                 rset_bool_parms bool_parms;
2742                 RSET rset2;
2743
2744                 rset2 = rset_trunc (zh,
2745                          &scan_info_array[j].list[before-1-ptr[j]].isam_p, 1,
2746                                     glist[before-1-i].term,
2747                                     strlen(glist[before-1-i].term), NULL, 0,
2748                                     zapt->term->which);
2749
2750                 bool_parms.key_size = sizeof(struct it_key);
2751                 bool_parms.cmp = key_compare_it;
2752                 bool_parms.log_item = key_logdump_txt;
2753                 bool_parms.rset_l = rset;
2754                 bool_parms.rset_r = rset2;
2755               
2756                 rset = rset_create (rset_kind_or, &bool_parms);
2757
2758                 ptr[j]++;
2759             }
2760         }
2761         if (limit_set)
2762         {
2763             rset_bool_parms bool_parms;
2764
2765             bool_parms.key_size = sizeof(struct it_key);
2766             bool_parms.cmp = key_compare_it;
2767             bool_parms.log_item = key_logdump_txt;
2768             bool_parms.rset_l = rset;
2769             bool_parms.rset_r = rset_dup(limit_set);
2770
2771             rset = rset_create (rset_kind_and, &bool_parms);
2772         }
2773         count_set (rset, &glist[before-1-i].occurrences);
2774         rset_delete (rset);
2775     }
2776     i = before-i;
2777     if (i)
2778     {
2779         *is_partial = 1;
2780         *position -= i;
2781         *num_entries -= i;
2782     }
2783     *list = glist + i;               /* list is set to first 'real' entry */
2784     
2785     yaz_log(LOG_DEBUG, "position = %d, num_entries = %d",
2786           *position, *num_entries);
2787     if (zh->errCode)
2788         yaz_log(LOG_DEBUG, "scan error: %d", zh->errCode);
2789 }
2790