Added two else-statements in seldom used term structure handling code.
[idzebra-moved-to-github.git] / index / zrpn.c
1 /* $Id: zrpn.c,v 1.169 2005-02-25 10:08:44 adam Exp $
2    Copyright (C) 1995-2005
3    Index Data ApS
4
5 This file is part of the Zebra server.
6
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
11
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with Zebra; see the file LICENSE.zebra.  If not, write to the
19 Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
20 02111-1307, USA.
21 */
22
23
24 #include <stdio.h>
25 #include <assert.h>
26 #ifdef WIN32
27 #include <io.h>
28 #else
29 #include <unistd.h>
30 #endif
31 #include <ctype.h>
32
33 #include "index.h"
34 #include <zebra_xpath.h>
35
36 #include <charmap.h>
37 #include <rset.h>
38
39 /* maximum number of terms in an and/or/phrase item */
40 #define TERM_LIST_LENGTH_MAX 256
41
42 static const struct key_control it_ctrl =
43
44     sizeof(struct it_key),
45     2, /* we have sysnos and seqnos in this key, nothing more */
46     key_compare_it, 
47     key_logdump_txt,   /* FIXME  - clean up these functions */
48     key_get_seq,
49 };
50
51
52 const struct key_control *key_it_ctrl = &it_ctrl;
53
54 struct rpn_char_map_info
55 {
56     ZebraMaps zm;
57     int reg_type;
58 };
59
60 typedef struct
61 {
62     int type;
63     int major;
64     int minor;
65     Z_AttributesPlusTerm *zapt;
66 } AttrType;
67
68
69 static int log_level_set = 0;
70 static int log_level_rpn = 0;
71
72 static const char **rpn_char_map_handler(void *vp, const char **from, int len)
73 {
74     struct rpn_char_map_info *p = (struct rpn_char_map_info *) vp;
75     const char **out = zebra_maps_input(p->zm, p->reg_type, from, len, 0);
76 #if 0
77     if (out && *out)
78     {
79         const char *outp = *out;
80         yaz_log(YLOG_LOG, "---");
81         while (*outp)
82         {
83             yaz_log(YLOG_LOG, "%02X", *outp);
84             outp++;
85         }
86     }
87 #endif
88     return out;
89 }
90
91 static void rpn_char_map_prepare(struct zebra_register *reg, int reg_type,
92                                   struct rpn_char_map_info *map_info)
93 {
94     map_info->zm = reg->zebra_maps;
95     map_info->reg_type = reg_type;
96     dict_grep_cmap(reg->dict, map_info, rpn_char_map_handler);
97 }
98
99 static int attr_find_ex(AttrType *src, oid_value *attributeSetP,
100                          const char **string_value)
101 {
102     int num_attributes;
103
104     num_attributes = src->zapt->attributes->num_attributes;
105     while (src->major < num_attributes)
106     {
107         Z_AttributeElement *element;
108
109         element = src->zapt->attributes->attributes[src->major];
110         if (src->type == *element->attributeType)
111         {
112             switch (element->which) 
113             {
114             case Z_AttributeValue_numeric:
115                 ++(src->major);
116                 if (element->attributeSet && attributeSetP)
117                 {
118                     oident *attrset;
119
120                     attrset = oid_getentbyoid(element->attributeSet);
121                     *attributeSetP = attrset->value;
122                 }
123                 return *element->value.numeric;
124                 break;
125             case Z_AttributeValue_complex:
126                 if (src->minor >= element->value.complex->num_list)
127                     break;
128                 if (element->attributeSet && attributeSetP)
129                 {
130                     oident *attrset;
131                     
132                     attrset = oid_getentbyoid(element->attributeSet);
133                     *attributeSetP = attrset->value;
134                 }
135                 if (element->value.complex->list[src->minor]->which ==  
136                     Z_StringOrNumeric_numeric)
137                 {
138                     ++(src->minor);
139                     return
140                         *element->value.complex->list[src->minor-1]->u.numeric;
141                 }
142                 else if (element->value.complex->list[src->minor]->which ==  
143                          Z_StringOrNumeric_string)
144                 {
145                     if (!string_value)
146                         break;
147                     ++(src->minor);
148                     *string_value = 
149                         element->value.complex->list[src->minor-1]->u.string;
150                     return -2;
151                 }
152                 else
153                     break;
154             default:
155                 assert(0);
156             }
157         }
158         ++(src->major);
159     }
160     return -1;
161 }
162
163 static int attr_find(AttrType *src, oid_value *attributeSetP)
164 {
165     return attr_find_ex(src, attributeSetP, 0);
166 }
167
168 static void attr_init(AttrType *src, Z_AttributesPlusTerm *zapt,
169                        int type)
170 {
171     src->zapt = zapt;
172     src->type = type;
173     src->major = 0;
174     src->minor = 0;
175 }
176
177 #define TERM_COUNT        
178        
179 struct grep_info {        
180 #ifdef TERM_COUNT        
181     int *term_no;        
182 #endif        
183     ISAMC_P *isam_p_buf;
184     int isam_p_size;        
185     int isam_p_indx;
186     ZebraHandle zh;
187     int reg_type;
188     ZebraSet termset;
189 };        
190
191 static void term_untrans(ZebraHandle zh, int reg_type,
192                            char *dst, const char *src)
193 {
194     int len = 0;
195     while (*src)
196     {
197         const char *cp = zebra_maps_output(zh->reg->zebra_maps,
198                                            reg_type, &src);
199         if (!cp && len < IT_MAX_WORD-1)
200             dst[len++] = *src++;
201         else
202             while (*cp && len < IT_MAX_WORD-1)
203                 dst[len++] = *cp++;
204     }
205     dst[len] = '\0';
206 }
207
208 static void add_isam_p(const char *name, const char *info,
209                         struct grep_info *p)
210 {
211     if (!log_level_set)
212     {
213         log_level_rpn = yaz_log_module_level("rpn");
214         log_level_set = 1;
215     }
216     if (p->isam_p_indx == p->isam_p_size)
217     {
218         ISAMC_P *new_isam_p_buf;
219 #ifdef TERM_COUNT        
220         int *new_term_no;        
221 #endif
222         p->isam_p_size = 2*p->isam_p_size + 100;
223         new_isam_p_buf = (ISAMC_P *) xmalloc(sizeof(*new_isam_p_buf) *
224                                              p->isam_p_size);
225         if (p->isam_p_buf)
226         {
227             memcpy(new_isam_p_buf, p->isam_p_buf,
228                     p->isam_p_indx * sizeof(*p->isam_p_buf));
229             xfree(p->isam_p_buf);
230         }
231         p->isam_p_buf = new_isam_p_buf;
232
233 #ifdef TERM_COUNT
234         new_term_no = (int *) xmalloc(sizeof(*new_term_no) * p->isam_p_size);
235         if (p->term_no)
236         {
237             memcpy(new_term_no, p->isam_p_buf,
238                     p->isam_p_indx * sizeof(*p->term_no));
239             xfree(p->term_no);
240         }
241         p->term_no = new_term_no;
242 #endif
243     }
244     assert(*info == sizeof(*p->isam_p_buf));
245     memcpy(p->isam_p_buf + p->isam_p_indx, info+1, sizeof(*p->isam_p_buf));
246
247 #if 1
248     if (p->termset)
249     {
250         const char *db;
251         int set, use;
252         char term_tmp[IT_MAX_WORD];
253         int su_code = 0;
254         int len = key_SU_decode (&su_code, name);
255         
256         term_untrans  (p->zh, p->reg_type, term_tmp, name+len+1);
257         yaz_log(log_level_rpn, "grep: %d %c %s", su_code, name[len], term_tmp);
258         zebraExplain_lookup_ord (p->zh->reg->zei,
259                                  su_code, &db, &set, &use);
260         yaz_log(log_level_rpn, "grep:  set=%d use=%d db=%s", set, use, db);
261         
262         resultSetAddTerm(p->zh, p->termset, name[len], db,
263                          set, use, term_tmp);
264     }
265 #endif
266     (p->isam_p_indx)++;
267 }
268
269 static int grep_handle(char *name, const char *info, void *p)
270 {
271     add_isam_p(name, info, (struct grep_info *) p);
272     return 0;
273 }
274
275 static int term_pre(ZebraMaps zebra_maps, int reg_type, const char **src,
276                      const char *ct1, const char *ct2, int first)
277 {
278     const char *s1, *s0 = *src;
279     const char **map;
280
281     /* skip white space */
282     while (*s0)
283     {
284         if (ct1 && strchr(ct1, *s0))
285             break;
286         if (ct2 && strchr(ct2, *s0))
287             break;
288         s1 = s0;
289         map = zebra_maps_input(zebra_maps, reg_type, &s1, strlen(s1), first);
290         if (**map != *CHR_SPACE)
291             break;
292         s0 = s1;
293     }
294     *src = s0;
295     return *s0;
296 }
297
298 #define REGEX_CHARS " []()|.*+?!"
299
300 /* term_100: handle term, where trunc = none(no operators at all) */
301 static int term_100(ZebraMaps zebra_maps, int reg_type,
302                      const char **src, char *dst, int space_split,
303                      char *dst_term)
304 {
305     const char *s0, *s1;
306     const char **map;
307     int i = 0;
308     int j = 0;
309
310     const char *space_start = 0;
311     const char *space_end = 0;
312
313     if (!term_pre(zebra_maps, reg_type, src, NULL, NULL, !space_split))
314         return 0;
315     s0 = *src;
316     while (*s0)
317     {
318         s1 = s0;
319         map = zebra_maps_input(zebra_maps, reg_type, &s0, strlen(s0), 0);
320         if (space_split)
321         {
322             if (**map == *CHR_SPACE)
323                 break;
324         }
325         else  /* complete subfield only. */
326         {
327             if (**map == *CHR_SPACE)
328             {   /* save space mapping for later  .. */
329                 space_start = s1;
330                 space_end = s0;
331                 continue;
332             }
333             else if (space_start)
334             {   /* reload last space */
335                 while (space_start < space_end)
336                 {
337                     if (strchr(REGEX_CHARS, *space_start))
338                         dst[i++] = '\\';
339                     dst_term[j++] = *space_start;
340                     dst[i++] = *space_start++;
341                 }
342                 /* and reset */
343                 space_start = space_end = 0;
344             }
345         }
346         /* add non-space char */
347         while (s1 < s0)
348         {
349             if (strchr(REGEX_CHARS, *s1))
350                 dst[i++] = '\\';
351             dst_term[j++] = *s1;
352             dst[i++] = *s1++;
353         }
354     }
355     dst[i] = '\0';
356     dst_term[j] = '\0';
357     *src = s0;
358     return i;
359 }
360
361 /* term_101: handle term, where trunc = Process # */
362 static int term_101(ZebraMaps zebra_maps, int reg_type,
363                      const char **src, char *dst, int space_split,
364                      char *dst_term)
365 {
366     const char *s0, *s1;
367     const char **map;
368     int i = 0;
369     int j = 0;
370
371     if (!term_pre(zebra_maps, reg_type, src, "#", "#", !space_split))
372         return 0;
373     s0 = *src;
374     while (*s0)
375     {
376         if (*s0 == '#')
377         {
378             dst[i++] = '.';
379             dst[i++] = '*';
380             dst_term[j++] = *s0++;
381         }
382         else
383         {
384             s1 = s0;
385             map = zebra_maps_input(zebra_maps, reg_type, &s0, strlen(s0), 0);
386             if (space_split && **map == *CHR_SPACE)
387                 break;
388             while (s1 < s0)
389             {
390                 if (strchr(REGEX_CHARS, *s1))
391                     dst[i++] = '\\';
392                 dst_term[j++] = *s1;
393                 dst[i++] = *s1++;
394             }
395         }
396     }
397     dst[i] = '\0';
398     dst_term[j++] = '\0';
399     *src = s0;
400     return i;
401 }
402
403 /* term_103: handle term, where trunc = re-2 (regular expressions) */
404 static int term_103(ZebraMaps zebra_maps, int reg_type, const char **src,
405                      char *dst, int *errors, int space_split,
406                      char *dst_term)
407 {
408     int i = 0;
409     int j = 0;
410     const char *s0, *s1;
411     const char **map;
412
413     if (!term_pre(zebra_maps, reg_type, src, "^\\()[].*+?|", "(", !space_split))
414         return 0;
415     s0 = *src;
416     if (errors && *s0 == '+' && s0[1] && s0[2] == '+' && s0[3] &&
417         isdigit(((const unsigned char *)s0)[1]))
418     {
419         *errors = s0[1] - '0';
420         s0 += 3;
421         if (*errors > 3)
422             *errors = 3;
423     }
424     while (*s0)
425     {
426         if (strchr("^\\()[].*+?|-", *s0))
427         {
428             dst_term[j++] = *s0;
429             dst[i++] = *s0++;
430         }
431         else
432         {
433             s1 = s0;
434             map = zebra_maps_input(zebra_maps, reg_type, &s0, strlen(s0), 0);
435             if (**map == *CHR_SPACE)
436                 break;
437             while (s1 < s0)
438             {
439                 if (strchr(REGEX_CHARS, *s1))
440                     dst[i++] = '\\';
441                 dst_term[j++] = *s1;
442                 dst[i++] = *s1++;
443             }
444         }
445     }
446     dst[i] = '\0';
447     dst_term[j] = '\0';
448     *src = s0;
449     return i;
450 }
451
452 /* term_103: handle term, where trunc = re-1 (regular expressions) */
453 static int term_102 (ZebraMaps zebra_maps, int reg_type, const char **src,
454                      char *dst, int space_split, char *dst_term)
455 {
456     return term_103(zebra_maps, reg_type, src, dst, NULL, space_split,
457                      dst_term);
458 }
459
460
461 /* term_104: handle term, where trunc = Process # and ! */
462 static int term_104(ZebraMaps zebra_maps, int reg_type,
463                      const char **src, char *dst, int space_split,
464                      char *dst_term)
465 {
466     const char *s0, *s1;
467     const char **map;
468     int i = 0;
469     int j = 0;
470
471     if (!term_pre(zebra_maps, reg_type, src, "?*#", "?*#", !space_split))
472         return 0;
473     s0 = *src;
474     while (*s0)
475     {
476         if (*s0 == '?')
477         {
478             dst_term[j++] = *s0++;
479             if (*s0 >= '0' && *s0 <= '9')
480             {
481                 int limit = 0;
482                 while (*s0 >= '0' && *s0 <= '9')
483                 {
484                     limit = limit * 10 + (*s0 - '0');
485                     dst_term[j++] = *s0++;
486                 }
487                 if (limit > 20)
488                     limit = 20;
489                 while (--limit >= 0)
490                 {
491                     dst[i++] = '.';
492                     dst[i++] = '?';
493                 }
494             }
495             else
496             {
497                 dst[i++] = '.';
498                 dst[i++] = '*';
499             }
500         }
501         else if (*s0 == '*')
502         {
503             dst[i++] = '.';
504             dst[i++] = '*';
505             dst_term[j++] = *s0++;
506         }
507         else if (*s0 == '#')
508         {
509             dst[i++] = '.';
510             dst_term[j++] = *s0++;
511         }
512         else
513         {
514             s1 = s0;
515             map = zebra_maps_input(zebra_maps, reg_type, &s0, strlen(s0), 0);
516             if (space_split && **map == *CHR_SPACE)
517                 break;
518             while (s1 < s0)
519             {
520                 if (strchr(REGEX_CHARS, *s1))
521                     dst[i++] = '\\';
522                 dst_term[j++] = *s1;
523                 dst[i++] = *s1++;
524             }
525         }
526     }
527     dst[i] = '\0';
528     dst_term[j++] = '\0';
529     *src = s0;
530     return i;
531 }
532
533 /* term_105/106: handle term, where trunc = Process * and ! and right trunc */
534 static int term_105 (ZebraMaps zebra_maps, int reg_type,
535                      const char **src, char *dst, int space_split,
536                      char *dst_term, int right_truncate)
537 {
538     const char *s0, *s1;
539     const char **map;
540     int i = 0;
541     int j = 0;
542
543     if (!term_pre(zebra_maps, reg_type, src, "*!", "*!", !space_split))
544         return 0;
545     s0 = *src;
546     while (*s0)
547     {
548         if (*s0 == '*')
549         {
550             dst[i++] = '.';
551             dst[i++] = '*';
552             dst_term[j++] = *s0++;
553         }
554         else if (*s0 == '!')
555         {
556             dst[i++] = '.';
557             dst_term[j++] = *s0++;
558         }
559         else
560         {
561             s1 = s0;
562             map = zebra_maps_input(zebra_maps, reg_type, &s0, strlen(s0), 0);
563             if (space_split && **map == *CHR_SPACE)
564                 break;
565             while (s1 < s0)
566             {
567                 if (strchr(REGEX_CHARS, *s1))
568                     dst[i++] = '\\';
569                 dst_term[j++] = *s1;
570                 dst[i++] = *s1++;
571             }
572         }
573     }
574     if (right_truncate)
575     {
576         dst[i++] = '.';
577         dst[i++] = '*';
578     }
579     dst[i] = '\0';
580     
581     dst_term[j++] = '\0';
582     *src = s0;
583     return i;
584 }
585
586
587 /* gen_regular_rel - generate regular expression from relation
588  *  val:     border value (inclusive)
589  *  islt:    1 if <=; 0 if >=.
590  */
591 static void gen_regular_rel(char *dst, int val, int islt)
592 {
593     int dst_p;
594     int w, d, i;
595     int pos = 0;
596     char numstr[20];
597
598     yaz_log(YLOG_DEBUG, "gen_regular_rel. val=%d, islt=%d", val, islt);
599     if (val >= 0)
600     {
601         if (islt)
602             strcpy(dst, "(-[0-9]+|(");
603         else
604             strcpy(dst, "((");
605     } 
606     else
607     {
608         if (!islt)
609         {
610             strcpy(dst, "([0-9]+|-(");
611             dst_p = strlen(dst);
612             islt = 1;
613         }
614         else
615         {
616             strcpy(dst, "(-(");
617             islt = 0;
618         }
619         val = -val;
620     }
621     dst_p = strlen(dst);
622     sprintf(numstr, "%d", val);
623     for (w = strlen(numstr); --w >= 0; pos++)
624     {
625         d = numstr[w];
626         if (pos > 0)
627         {
628             if (islt)
629             {
630                 if (d == '0')
631                     continue;
632                 d--;
633             } 
634             else
635             {
636                 if (d == '9')
637                     continue;
638                 d++;
639             }
640         }
641         
642         strcpy(dst + dst_p, numstr);
643         dst_p = strlen(dst) - pos - 1;
644
645         if (islt)
646         {
647             if (d != '0')
648             {
649                 dst[dst_p++] = '[';
650                 dst[dst_p++] = '0';
651                 dst[dst_p++] = '-';
652                 dst[dst_p++] = d;
653                 dst[dst_p++] = ']';
654             }
655             else
656                 dst[dst_p++] = d;
657         }
658         else
659         {
660             if (d != '9')
661             { 
662                 dst[dst_p++] = '[';
663                 dst[dst_p++] = d;
664                 dst[dst_p++] = '-';
665                 dst[dst_p++] = '9';
666                 dst[dst_p++] = ']';
667             }
668             else
669                 dst[dst_p++] = d;
670         }
671         for (i = 0; i<pos; i++)
672         {
673             dst[dst_p++] = '[';
674             dst[dst_p++] = '0';
675             dst[dst_p++] = '-';
676             dst[dst_p++] = '9';
677             dst[dst_p++] = ']';
678         }
679         dst[dst_p++] = '|';
680     }
681     dst[dst_p] = '\0';
682     if (islt)
683     {
684         /* match everything less than 10^(pos-1) */
685         strcat(dst, "0*");
686         for (i = 1; i<pos; i++)
687             strcat(dst, "[0-9]?");
688     }
689     else
690     {
691         /* match everything greater than 10^pos */
692         for (i = 0; i <= pos; i++)
693             strcat(dst, "[0-9]");
694         strcat(dst, "[0-9]*");
695     }
696     strcat(dst, "))");
697 }
698
699 void string_rel_add_char(char **term_p, const char *src, int *indx)
700 {
701     if (src[*indx] == '\\')
702         *(*term_p)++ = src[(*indx)++];
703     *(*term_p)++ = src[(*indx)++];
704 }
705
706 /*
707  *   >  abc     ([b-].*|a[c-].*|ab[d-].*|abc.+)
708  *              ([^-a].*|a[^-b].*ab[^-c].*|abc.+)
709  *   >= abc     ([b-].*|a[c-].*|ab[c-].*)
710  *              ([^-a].*|a[^-b].*|ab[c-].*)
711  *   <  abc     ([-0].*|a[-a].*|ab[-b].*)
712  *              ([^a-].*|a[^b-].*|ab[^c-].*)
713  *   <= abc     ([-0].*|a[-a].*|ab[-b].*|abc)
714  *              ([^a-].*|a[^b-].*|ab[^c-].*|abc)
715  */
716 static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
717                             const char **term_sub, char *term_dict,
718                             oid_value attributeSet,
719                             int reg_type, int space_split, char *term_dst)
720 {
721     AttrType relation;
722     int relation_value;
723     int i;
724     char *term_tmp = term_dict + strlen(term_dict);
725     char term_component[2*IT_MAX_WORD+20];
726
727     attr_init(&relation, zapt, 2);
728     relation_value = attr_find(&relation, NULL);
729
730     yaz_log(YLOG_DEBUG, "string relation value=%d", relation_value);
731     switch (relation_value)
732     {
733     case 1:
734         if (!term_100 (zh->reg->zebra_maps, reg_type,
735                        term_sub, term_component,
736                        space_split, term_dst))
737             return 0;
738         yaz_log(log_level_rpn, "Relation <");
739         
740         *term_tmp++ = '(';
741         for (i = 0; term_component[i]; )
742         {
743             int j = 0;
744
745             if (i)
746                 *term_tmp++ = '|';
747             while (j < i)
748                 string_rel_add_char (&term_tmp, term_component, &j);
749
750             *term_tmp++ = '[';
751
752             *term_tmp++ = '^';
753             string_rel_add_char (&term_tmp, term_component, &i);
754             *term_tmp++ = '-';
755
756             *term_tmp++ = ']';
757             *term_tmp++ = '.';
758             *term_tmp++ = '*';
759
760             if ((term_tmp - term_dict) > IT_MAX_WORD)
761                 break;
762         }
763         *term_tmp++ = ')';
764         *term_tmp = '\0';
765         break;
766     case 2:
767         if (!term_100 (zh->reg->zebra_maps, reg_type,
768                        term_sub, term_component,
769                        space_split, term_dst))
770             return 0;
771         yaz_log(log_level_rpn, "Relation <=");
772
773         *term_tmp++ = '(';
774         for (i = 0; term_component[i]; )
775         {
776             int j = 0;
777
778             while (j < i)
779                 string_rel_add_char (&term_tmp, term_component, &j);
780             *term_tmp++ = '[';
781
782             *term_tmp++ = '^';
783             string_rel_add_char (&term_tmp, term_component, &i);
784             *term_tmp++ = '-';
785
786             *term_tmp++ = ']';
787             *term_tmp++ = '.';
788             *term_tmp++ = '*';
789
790             *term_tmp++ = '|';
791
792             if ((term_tmp - term_dict) > IT_MAX_WORD)
793                 break;
794         }
795         for (i = 0; term_component[i]; )
796             string_rel_add_char (&term_tmp, term_component, &i);
797         *term_tmp++ = ')';
798         *term_tmp = '\0';
799         break;
800     case 5:
801         if (!term_100 (zh->reg->zebra_maps, reg_type,
802                        term_sub, term_component, space_split, term_dst))
803             return 0;
804         yaz_log(log_level_rpn, "Relation >");
805
806         *term_tmp++ = '(';
807         for (i = 0; term_component[i];)
808         {
809             int j = 0;
810
811             while (j < i)
812                 string_rel_add_char (&term_tmp, term_component, &j);
813             *term_tmp++ = '[';
814             
815             *term_tmp++ = '^';
816             *term_tmp++ = '-';
817             string_rel_add_char (&term_tmp, term_component, &i);
818
819             *term_tmp++ = ']';
820             *term_tmp++ = '.';
821             *term_tmp++ = '*';
822
823             *term_tmp++ = '|';
824
825             if ((term_tmp - term_dict) > IT_MAX_WORD)
826                 break;
827         }
828         for (i = 0; term_component[i];)
829             string_rel_add_char (&term_tmp, term_component, &i);
830         *term_tmp++ = '.';
831         *term_tmp++ = '+';
832         *term_tmp++ = ')';
833         *term_tmp = '\0';
834         break;
835     case 4:
836         if (!term_100 (zh->reg->zebra_maps, reg_type, term_sub,
837                        term_component, space_split, term_dst))
838             return 0;
839         yaz_log(log_level_rpn, "Relation >=");
840
841         *term_tmp++ = '(';
842         for (i = 0; term_component[i];)
843         {
844             int j = 0;
845
846             if (i)
847                 *term_tmp++ = '|';
848             while (j < i)
849                 string_rel_add_char (&term_tmp, term_component, &j);
850             *term_tmp++ = '[';
851
852             if (term_component[i+1])
853             {
854                 *term_tmp++ = '^';
855                 *term_tmp++ = '-';
856                 string_rel_add_char (&term_tmp, term_component, &i);
857             }
858             else
859             {
860                 string_rel_add_char (&term_tmp, term_component, &i);
861                 *term_tmp++ = '-';
862             }
863             *term_tmp++ = ']';
864             *term_tmp++ = '.';
865             *term_tmp++ = '*';
866
867             if ((term_tmp - term_dict) > IT_MAX_WORD)
868                 break;
869         }
870         *term_tmp++ = ')';
871         *term_tmp = '\0';
872         break;
873     case 3:
874     default:
875         yaz_log(log_level_rpn, "Relation =");
876         if (!term_100 (zh->reg->zebra_maps, reg_type, term_sub,
877                        term_component, space_split, term_dst))
878             return 0;
879         strcat(term_tmp, "(");
880         strcat(term_tmp, term_component);
881         strcat(term_tmp, ")");
882     }
883     return 1;
884 }
885
886 static int string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
887                         const char **term_sub, 
888                         oid_value attributeSet, NMEM stream,
889                         struct grep_info *grep_info,
890                         int reg_type, int complete_flag,
891                         int num_bases, char **basenames,
892                         char *term_dst, int xpath_use);
893
894 static RSET term_trunc(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
895                         const char **term_sub, 
896                         oid_value attributeSet, NMEM stream,
897                         struct grep_info *grep_info,
898                         int reg_type, int complete_flag,
899                         int num_bases, char **basenames,
900                         char *term_dst,
901                         const char *rank_type, int xpath_use,
902                         NMEM rset_nmem)
903 {
904     int r;
905     grep_info->isam_p_indx = 0;
906     r = string_term(zh, zapt, term_sub, attributeSet, stream, grep_info,
907                      reg_type, complete_flag, num_bases, basenames,
908                      term_dst, xpath_use);
909     if (r < 1)
910         return 0;
911     yaz_log(log_level_rpn, "term: %s", term_dst);
912     return rset_trunc(zh, grep_info->isam_p_buf,
913                        grep_info->isam_p_indx, term_dst,
914                        strlen(term_dst), rank_type, 1 /* preserve pos */,
915                        zapt->term->which, rset_nmem,
916                        key_it_ctrl,key_it_ctrl->scope);
917 }
918 static char *nmem_strdup_i(NMEM nmem, int v)
919 {
920     char val_str[64];
921     sprintf (val_str, "%d", v);
922     return nmem_strdup(nmem, val_str);
923 }
924
925 static int string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
926                        const char **term_sub, 
927                        oid_value attributeSet, NMEM stream,
928                        struct grep_info *grep_info,
929                        int reg_type, int complete_flag,
930                        int num_bases, char **basenames,
931                        char *term_dst, int xpath_use)
932 {
933     char term_dict[2*IT_MAX_WORD+4000];
934     int j, r, base_no;
935     AttrType truncation;
936     int truncation_value;
937     AttrType use;
938     int use_value;
939     const char *use_string = 0;
940     oid_value curAttributeSet = attributeSet;
941     const char *termp;
942     struct rpn_char_map_info rcmi;
943     int space_split = complete_flag ? 0 : 1;
944
945     int bases_ok = 0;     /* no of databases with OK attribute */
946     int errCode = 0;      /* err code (if any is not OK) */
947     char *errString = 0;  /* addinfo */
948
949     rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
950     attr_init (&use, zapt, 1);
951     use_value = attr_find_ex (&use, &curAttributeSet, &use_string);
952     yaz_log(log_level_rpn, "string_term, use value %d", use_value);
953     attr_init (&truncation, zapt, 5);
954     truncation_value = attr_find (&truncation, NULL);
955     yaz_log(log_level_rpn, "truncation value %d", truncation_value);
956
957     if (use_value == -1)    /* no attribute - assumy "any" */
958         use_value = 1016;
959     for (base_no = 0; base_no < num_bases; base_no++)
960     {
961         int attr_ok = 0;
962         int regex_range = 0;
963         int init_pos = 0;
964         attent attp;
965         data1_local_attribute id_xpath_attr;
966         data1_local_attribute *local_attr;
967         int max_pos, prefix_len = 0;
968
969         termp = *term_sub;
970
971         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
972         {
973             zh->errCode = 109; /* Database unavailable */
974             zh->errString = basenames[base_no];
975             return -1;
976         }
977         if (xpath_use > 0 && use_value == -2) 
978         {
979             use_value = xpath_use;
980             attp.local_attributes = &id_xpath_attr;
981             attp.attset_ordinal = VAL_IDXPATH;
982             id_xpath_attr.next = 0;
983             id_xpath_attr.local = use_value;
984         }
985         else if (curAttributeSet == VAL_IDXPATH)
986         {
987             attp.local_attributes = &id_xpath_attr;
988             attp.attset_ordinal = VAL_IDXPATH;
989             id_xpath_attr.next = 0;
990             id_xpath_attr.local = use_value;
991         }
992         else
993         {
994             if ((r = att_getentbyatt (zh, &attp, curAttributeSet, use_value,
995                                             use_string)))
996             {
997                 yaz_log(YLOG_DEBUG, "att_getentbyatt fail. set=%d use=%d r=%d",
998                       curAttributeSet, use_value, r);
999                 if (r == -1)
1000                 {
1001                     /* set was found, but value wasn't defined */
1002                     errCode = 114;
1003                     if (use_string)
1004                         errString = nmem_strdup(stream, use_string);
1005                     else
1006                         errString = nmem_strdup_i (stream, use_value);
1007                 }
1008                 else
1009                 {
1010                     int oid[OID_SIZE];
1011                     struct oident oident;
1012                     
1013                     oident.proto = PROTO_Z3950;
1014                     oident.oclass = CLASS_ATTSET;
1015                     oident.value = curAttributeSet;
1016                     oid_ent_to_oid (&oident, oid);
1017                     
1018                     errCode = 121;
1019                     errString = nmem_strdup (stream, oident.desc);
1020                 }
1021                 continue;
1022             }
1023         }
1024         for (local_attr = attp.local_attributes; local_attr;
1025              local_attr = local_attr->next)
1026         {
1027             int ord;
1028             char ord_buf[32];
1029             int i, ord_len;
1030             
1031             ord = zebraExplain_lookupSU (zh->reg->zei, attp.attset_ordinal,
1032                                          local_attr->local);
1033             if (ord < 0)
1034                 continue;
1035             if (prefix_len)
1036                 term_dict[prefix_len++] = '|';
1037             else
1038                 term_dict[prefix_len++] = '(';
1039             
1040             ord_len = key_SU_encode (ord, ord_buf);
1041             for (i = 0; i<ord_len; i++)
1042             {
1043                 term_dict[prefix_len++] = 1;
1044                 term_dict[prefix_len++] = ord_buf[i];
1045             }
1046         }
1047         if (!prefix_len)
1048         {
1049 #if 1
1050             bases_ok++;
1051 #else
1052             errCode = 114;
1053             errString = nmem_strdup_i(stream, use_value);
1054             continue;
1055 #endif
1056         }
1057         else
1058         {
1059             bases_ok++; /* this has OK attributes */
1060             attr_ok = 1;
1061         }
1062
1063         term_dict[prefix_len++] = ')';
1064         term_dict[prefix_len++] = 1;
1065         term_dict[prefix_len++] = reg_type;
1066         yaz_log(log_level_rpn, "reg_type = %d", term_dict[prefix_len-1]);
1067         term_dict[prefix_len] = '\0';
1068         j = prefix_len;
1069         switch (truncation_value)
1070         {
1071         case -1:         /* not specified */
1072         case 100:        /* do not truncate */
1073             if (!string_relation (zh, zapt, &termp, term_dict,
1074                                   attributeSet,
1075                                   reg_type, space_split, term_dst))
1076                 return 0;
1077             break;
1078         case 1:          /* right truncation */
1079             term_dict[j++] = '(';
1080             if (!term_100(zh->reg->zebra_maps, reg_type,
1081                           &termp, term_dict + j, space_split, term_dst))
1082                 return 0;
1083             strcat(term_dict, ".*)");
1084             break;
1085         case 2:          /* keft truncation */
1086             term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*';
1087             if (!term_100(zh->reg->zebra_maps, reg_type,
1088                           &termp, term_dict + j, space_split, term_dst))
1089                 return 0;
1090             strcat(term_dict, ")");
1091             break;
1092         case 3:          /* left&right truncation */
1093             term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*';
1094             if (!term_100(zh->reg->zebra_maps, reg_type,
1095                           &termp, term_dict + j, space_split, term_dst))
1096                 return 0;
1097             strcat(term_dict, ".*)");
1098             break;
1099         case 101:        /* process # in term */
1100             term_dict[j++] = '(';
1101             if (!term_101(zh->reg->zebra_maps, reg_type,
1102                           &termp, term_dict + j, space_split, term_dst))
1103                 return 0;
1104             strcat(term_dict, ")");
1105             break;
1106         case 102:        /* Regexp-1 */
1107             term_dict[j++] = '(';
1108             if (!term_102(zh->reg->zebra_maps, reg_type,
1109                           &termp, term_dict + j, space_split, term_dst))
1110                 return 0;
1111             strcat(term_dict, ")");
1112             break;
1113         case 103:       /* Regexp-2 */
1114             r = 1;
1115             term_dict[j++] = '(';
1116             init_pos = 2;
1117             if (!term_103 (zh->reg->zebra_maps, reg_type,
1118                            &termp, term_dict + j, &regex_range,
1119                            space_split, term_dst))
1120                 return 0;
1121             strcat(term_dict, ")");
1122         case 104:        /* process # and ! in term */
1123             term_dict[j++] = '(';
1124             if (!term_104 (zh->reg->zebra_maps, reg_type,
1125                            &termp, term_dict + j, space_split, term_dst))
1126                 return 0;
1127             strcat(term_dict, ")");
1128             break;
1129         case 105:        /* process * and ! in term */
1130             term_dict[j++] = '(';
1131             if (!term_105 (zh->reg->zebra_maps, reg_type,
1132                            &termp, term_dict + j, space_split, term_dst, 1))
1133                 return 0;
1134             strcat(term_dict, ")");
1135             break;
1136         case 106:        /* process * and ! in term */
1137             term_dict[j++] = '(';
1138             if (!term_105 (zh->reg->zebra_maps, reg_type,
1139                            &termp, term_dict + j, space_split, term_dst, 0))
1140                 return 0;
1141             strcat(term_dict, ")");
1142             break;
1143         default:
1144             zh->errCode = 120;
1145             zh->errString = nmem_strdup_i(stream, truncation_value);
1146             return -1;
1147         }
1148         if (attr_ok)
1149         {
1150             yaz_log(log_level_rpn, "dict_lookup_grep: %s", term_dict+prefix_len);
1151             r = dict_lookup_grep(zh->reg->dict, term_dict, regex_range,
1152                                  grep_info, &max_pos, init_pos,
1153                                  grep_handle);
1154             if (r)
1155                 yaz_log(YLOG_WARN, "dict_lookup_grep fail %d", r);
1156         }
1157     }
1158     if (!bases_ok)
1159     {
1160         zh->errCode = errCode;
1161         zh->errString = errString;
1162         return -1;
1163     }
1164     *term_sub = termp;
1165     yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1166     return 1;
1167 }
1168
1169
1170 /* convert APT search term to UTF8 */
1171 static int zapt_term_to_utf8 (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1172                               char *termz)
1173 {
1174     size_t sizez;
1175     Z_Term *term = zapt->term;
1176
1177     switch (term->which)
1178     {
1179     case Z_Term_general:
1180         if (zh->iconv_to_utf8 != 0)
1181         {
1182             char *inbuf = term->u.general->buf;
1183             size_t inleft = term->u.general->len;
1184             char *outbuf = termz;
1185             size_t outleft = IT_MAX_WORD-1;
1186             size_t ret;
1187
1188             ret = yaz_iconv(zh->iconv_to_utf8, &inbuf, &inleft,
1189                         &outbuf, &outleft);
1190             if (ret == (size_t)(-1))
1191             {
1192                 ret = yaz_iconv(zh->iconv_to_utf8, 0, 0, 0, 0);
1193                 zh->errCode = 125;
1194                 return -1;
1195             }
1196             *outbuf = 0;
1197         }
1198         else
1199         {
1200             sizez = term->u.general->len;
1201             if (sizez > IT_MAX_WORD-1)
1202                 sizez = IT_MAX_WORD-1;
1203             memcpy (termz, term->u.general->buf, sizez);
1204             termz[sizez] = '\0';
1205         }
1206         break;
1207     case Z_Term_characterString:
1208         sizez = strlen(term->u.characterString);
1209         if (sizez > IT_MAX_WORD-1)
1210             sizez = IT_MAX_WORD-1;
1211         memcpy (termz, term->u.characterString, sizez);
1212         termz[sizez] = '\0';
1213         break;
1214     default:
1215         zh->errCode = 124;
1216         return -1;
1217     }
1218     return 0;
1219 }
1220
1221 /* convert APT SCAN term to internal cmap */
1222 static int trans_scan_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1223                             char *termz, int reg_type)
1224 {
1225     char termz0[IT_MAX_WORD];
1226
1227     if (zapt_term_to_utf8(zh, zapt, termz0))
1228         return -1;    /* error */
1229     else
1230     {
1231         const char **map;
1232         const char *cp = (const char *) termz0;
1233         const char *cp_end = cp + strlen(cp);
1234         const char *src;
1235         int i = 0;
1236         const char *space_map = NULL;
1237         int len;
1238             
1239         while ((len = (cp_end - cp)) > 0)
1240         {
1241             map = zebra_maps_input (zh->reg->zebra_maps, reg_type, &cp, len, 0);
1242             if (**map == *CHR_SPACE)
1243                 space_map = *map;
1244             else
1245             {
1246                 if (i && space_map)
1247                     for (src = space_map; *src; src++)
1248                         termz[i++] = *src;
1249                 space_map = NULL;
1250                 for (src = *map; *src; src++)
1251                     termz[i++] = *src;
1252             }
1253         }
1254         termz[i] = '\0';
1255     }
1256     return 0;
1257 }
1258
1259 char *normalize_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1260                      const char *termz, NMEM stream, unsigned reg_id)
1261 {
1262     WRBUF wrbuf = 0;
1263     AttrType truncation;
1264     int truncation_value;
1265     char *ex_list = 0;
1266
1267     attr_init (&truncation, zapt, 5);
1268     truncation_value = attr_find (&truncation, NULL);
1269
1270     switch (truncation_value)
1271     {
1272     default:
1273         ex_list = "";
1274         break;
1275     case 101:
1276         ex_list = "#";
1277         break;
1278     case 102:
1279     case 103:
1280         ex_list = 0;
1281         break;
1282     case 104:
1283         ex_list = "!#";
1284         break;
1285     case 105:
1286         ex_list = "!*";
1287         break;
1288     }
1289     if (ex_list)
1290         wrbuf = zebra_replace(zh->reg->zebra_maps, reg_id, ex_list,
1291                               termz, strlen(termz));
1292     if (!wrbuf)
1293         return nmem_strdup(stream, termz);
1294     else
1295     {
1296         char *buf = (char*) nmem_malloc(stream, wrbuf_len(wrbuf)+1);
1297         memcpy (buf, wrbuf_buf(wrbuf), wrbuf_len(wrbuf));
1298         buf[wrbuf_len(wrbuf)] = '\0';
1299         return buf;
1300     }
1301 }
1302
1303 static void grep_info_delete (struct grep_info *grep_info)
1304 {
1305 #ifdef TERM_COUNT
1306     xfree(grep_info->term_no);
1307 #endif
1308     xfree (grep_info->isam_p_buf);
1309 }
1310
1311 static int grep_info_prepare (ZebraHandle zh,
1312                               Z_AttributesPlusTerm *zapt,
1313                               struct grep_info *grep_info,
1314                               int reg_type,
1315                               NMEM stream)
1316 {
1317     AttrType termset;
1318     int termset_value_numeric;
1319     const char *termset_value_string;
1320
1321 #ifdef TERM_COUNT
1322     grep_info->term_no = 0;
1323 #endif
1324     grep_info->isam_p_size = 0;
1325     grep_info->isam_p_buf = NULL;
1326     grep_info->zh = zh;
1327     grep_info->reg_type = reg_type;
1328     grep_info->termset = 0;
1329
1330     if (!zapt)
1331         return 0;
1332     attr_init (&termset, zapt, 8);
1333     termset_value_numeric =
1334         attr_find_ex (&termset, NULL, &termset_value_string);
1335     if (termset_value_numeric != -1)
1336     {
1337         char resname[32];
1338         const char *termset_name = 0;
1339         if (termset_value_numeric != -2)
1340         {
1341     
1342             sprintf (resname, "%d", termset_value_numeric);
1343             termset_name = resname;
1344         }
1345         else
1346             termset_name = termset_value_string;
1347         yaz_log(log_level_rpn, "creating termset set %s", termset_name);
1348         grep_info->termset = resultSetAdd (zh, termset_name, 1);
1349         if (!grep_info->termset)
1350         {
1351             zh->errCode = 128;
1352             zh->errString = nmem_strdup (stream, termset_name);
1353             return -1;
1354         }
1355     }
1356     return 0;
1357 }
1358                                
1359
1360 static RSET rpn_search_APT_phrase (ZebraHandle zh,
1361                                    Z_AttributesPlusTerm *zapt,
1362                                    const char *termz_org,
1363                                    oid_value attributeSet,
1364                                    NMEM stream,
1365                                    int reg_type, int complete_flag,
1366                                    const char *rank_type, int xpath_use,
1367                                    int num_bases, char **basenames, 
1368                                    NMEM rset_nmem)
1369 {
1370     char term_dst[IT_MAX_WORD+1];
1371     RSET rset[TERM_LIST_LENGTH_MAX], result;
1372     size_t rset_no = 0;
1373     struct grep_info grep_info;
1374     char *termz = normalize_term(zh, zapt, termz_org, stream, reg_type);
1375     const char *termp = termz;
1376
1377     *term_dst = 0;
1378     if (grep_info_prepare (zh, zapt, &grep_info, reg_type, stream))
1379         return 0;
1380     for (; rset_no < sizeof(rset)/sizeof(*rset); rset_no++)
1381     { 
1382         yaz_log(log_level_rpn, "APT_phrase termp=%s", termp);
1383         rset[rset_no] = term_trunc(zh, zapt, &termp, attributeSet,
1384                                     stream, &grep_info,
1385                                     reg_type, complete_flag,
1386                                     num_bases, basenames,
1387                                     term_dst, rank_type,
1388                                     xpath_use,rset_nmem);
1389         if (!rset[rset_no])
1390             break;
1391     }
1392     grep_info_delete (&grep_info);
1393     if (rset_no == 0)
1394         return rsnull_create (rset_nmem,key_it_ctrl); 
1395     else if (rset_no == 1)
1396         return (rset[0]);
1397     else
1398         result = rsprox_create( rset_nmem, key_it_ctrl, key_it_ctrl->scope,
1399                        rset_no, rset,
1400                        1 /* ordered */, 0 /* exclusion */,
1401                        3 /* relation */, 1 /* distance */);
1402     return result;
1403 }
1404
1405 static RSET rpn_search_APT_or_list (ZebraHandle zh,
1406                                     Z_AttributesPlusTerm *zapt,
1407                                     const char *termz_org,
1408                                     oid_value attributeSet,
1409                                     NMEM stream,
1410                                     int reg_type, int complete_flag,
1411                                     const char *rank_type,
1412                                     int xpath_use,
1413                                     int num_bases, char **basenames,
1414                                     NMEM rset_nmem)
1415 {
1416     char term_dst[IT_MAX_WORD+1];
1417     RSET rset[TERM_LIST_LENGTH_MAX];
1418     size_t rset_no = 0;
1419     struct grep_info grep_info;
1420     char *termz = normalize_term(zh, zapt, termz_org, stream, reg_type);
1421     const char *termp = termz;
1422
1423     if (grep_info_prepare (zh, zapt, &grep_info, reg_type, stream))
1424         return 0;
1425     for (; rset_no < sizeof(rset)/sizeof(*rset); rset_no++)
1426     { 
1427         yaz_log(log_level_rpn, "APT_or_list termp=%s", termp);
1428         rset[rset_no] = term_trunc(zh, zapt, &termp, attributeSet,
1429                                     stream, &grep_info,
1430                                     reg_type, complete_flag,
1431                                     num_bases, basenames,
1432                                     term_dst, rank_type,
1433                                     xpath_use,rset_nmem);
1434         if (!rset[rset_no])
1435             break;
1436     }
1437     grep_info_delete (&grep_info);
1438     if (rset_no == 0)
1439         return rsnull_create (rset_nmem,key_it_ctrl);  
1440     return rsmulti_or_create(rset_nmem, key_it_ctrl,key_it_ctrl->scope,
1441                              rset_no, rset);
1442 }
1443
1444 static RSET rpn_search_APT_and_list (ZebraHandle zh,
1445                                      Z_AttributesPlusTerm *zapt,
1446                                      const char *termz_org,
1447                                      oid_value attributeSet,
1448                                      NMEM stream,
1449                                      int reg_type, int complete_flag,
1450                                      const char *rank_type, 
1451                                      int xpath_use,
1452                                      int num_bases, char **basenames,
1453                                      NMEM rset_nmem)
1454 {
1455     char term_dst[IT_MAX_WORD+1];
1456     RSET rset[TERM_LIST_LENGTH_MAX];
1457     size_t rset_no = 0;
1458     struct grep_info grep_info;
1459     char *termz = normalize_term(zh, zapt, termz_org, stream, reg_type);
1460     const char *termp = termz;
1461
1462     if (grep_info_prepare (zh, zapt, &grep_info, reg_type, stream))
1463         return 0;
1464     for (; rset_no < sizeof(rset)/sizeof(*rset); rset_no++)
1465     { 
1466         yaz_log(log_level_rpn, "APT_and_list termp=%s", termp);
1467         rset[rset_no] = term_trunc(zh, zapt, &termp, attributeSet,
1468                                     stream, &grep_info,
1469                                     reg_type, complete_flag,
1470                                     num_bases, basenames,
1471                                     term_dst, rank_type,
1472                                     xpath_use, rset_nmem);
1473         if (!rset[rset_no])
1474             break;
1475     }
1476     grep_info_delete (&grep_info);
1477     if (rset_no == 0)
1478         return rsnull_create(rset_nmem,key_it_ctrl); 
1479     
1480     return rsmulti_and_create(rset_nmem, key_it_ctrl, key_it_ctrl->scope,
1481                               rset_no, rset);
1482 }
1483
1484 static int numeric_relation (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1485                              const char **term_sub,
1486                              char *term_dict,
1487                              oid_value attributeSet,
1488                              struct grep_info *grep_info,
1489                              int *max_pos,
1490                              int reg_type,
1491                              char *term_dst)
1492 {
1493     AttrType relation;
1494     int relation_value;
1495     int term_value;
1496     int r;
1497     char *term_tmp = term_dict + strlen(term_dict);
1498
1499     attr_init (&relation, zapt, 2);
1500     relation_value = attr_find (&relation, NULL);
1501
1502     yaz_log(log_level_rpn, "numeric relation value=%d", relation_value);
1503
1504     if (!term_100 (zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1505                    term_dst))
1506         return 0;
1507     term_value = atoi (term_tmp);
1508     switch (relation_value)
1509     {
1510     case 1:
1511         yaz_log(log_level_rpn, "Relation <");
1512         gen_regular_rel (term_tmp, term_value-1, 1);
1513         break;
1514     case 2:
1515         yaz_log(log_level_rpn, "Relation <=");
1516         gen_regular_rel (term_tmp, term_value, 1);
1517         break;
1518     case 4:
1519         yaz_log(log_level_rpn, "Relation >=");
1520         gen_regular_rel (term_tmp, term_value, 0);
1521         break;
1522     case 5:
1523         yaz_log(log_level_rpn, "Relation >");
1524         gen_regular_rel (term_tmp, term_value+1, 0);
1525         break;
1526     case 3:
1527     default:
1528         yaz_log(log_level_rpn, "Relation =");
1529         sprintf (term_tmp, "(0*%d)", term_value);
1530     }
1531     yaz_log(log_level_rpn, "dict_lookup_grep: %s", term_tmp);
1532     r = dict_lookup_grep(zh->reg->dict, term_dict, 0, grep_info, max_pos,
1533                           0, grep_handle);
1534     if (r)
1535         yaz_log(YLOG_WARN, "dict_lookup_grep fail, rel = gt: %d", r);
1536     yaz_log(log_level_rpn, "%d positions", grep_info->isam_p_indx);
1537     return 1;
1538 }
1539
1540 static int numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1541                          const char **term_sub, 
1542                          oid_value attributeSet, struct grep_info *grep_info,
1543                          int reg_type, int complete_flag,
1544                          int num_bases, char **basenames,
1545                          char *term_dst, int xpath_use, NMEM stream)
1546 {
1547     char term_dict[2*IT_MAX_WORD+2];
1548     int r, base_no;
1549     AttrType use;
1550     int use_value;
1551     const char *use_string = 0;
1552     oid_value curAttributeSet = attributeSet;
1553     const char *termp;
1554     struct rpn_char_map_info rcmi;
1555
1556     int bases_ok = 0;     /* no of databases with OK attribute */
1557     int errCode = 0;      /* err code (if any is not OK) */
1558     char *errString = 0;  /* addinfo */
1559
1560     rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
1561     attr_init (&use, zapt, 1);
1562     use_value = attr_find_ex (&use, &curAttributeSet, &use_string);
1563
1564     if (use_value == -1)
1565         use_value = 1016;
1566
1567     for (base_no = 0; base_no < num_bases; base_no++)
1568     {
1569         attent attp;
1570         data1_local_attribute id_xpath_attr;
1571         data1_local_attribute *local_attr;
1572         int max_pos, prefix_len = 0;
1573
1574         termp = *term_sub;
1575         if (use_value == -2)  /* string attribute (assume IDXPATH/any) */
1576         {
1577             use_value = xpath_use;
1578             attp.local_attributes = &id_xpath_attr;
1579             attp.attset_ordinal = VAL_IDXPATH;
1580             id_xpath_attr.next = 0;
1581             id_xpath_attr.local = use_value;
1582         }
1583         else if (curAttributeSet == VAL_IDXPATH)
1584         {
1585             attp.local_attributes = &id_xpath_attr;
1586             attp.attset_ordinal = VAL_IDXPATH;
1587             id_xpath_attr.next = 0;
1588             id_xpath_attr.local = use_value;
1589         }
1590         else
1591         {
1592             if ((r = att_getentbyatt (zh, &attp, curAttributeSet, use_value,
1593                                             use_string)))
1594             {
1595                 yaz_log(YLOG_DEBUG, "att_getentbyatt fail. set=%d use=%d r=%d",
1596                       curAttributeSet, use_value, r);
1597                 if (r == -1)
1598                 {
1599                     errString = nmem_strdup_i(stream, use_value);
1600                     errCode = 114;
1601                 }
1602                 else
1603                     errCode = 121;
1604                 continue;
1605             }
1606         }
1607         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1608         {
1609             zh->errCode = 109; /* Database unavailable */
1610             zh->errString = basenames[base_no];
1611             return -1;
1612         }
1613         for (local_attr = attp.local_attributes; local_attr;
1614              local_attr = local_attr->next)
1615         {
1616             int ord;
1617             char ord_buf[32];
1618             int i, ord_len;
1619
1620             ord = zebraExplain_lookupSU (zh->reg->zei, attp.attset_ordinal,
1621                                           local_attr->local);
1622             if (ord < 0)
1623                 continue;
1624             if (prefix_len)
1625                 term_dict[prefix_len++] = '|';
1626             else
1627                 term_dict[prefix_len++] = '(';
1628
1629             ord_len = key_SU_encode (ord, ord_buf);
1630             for (i = 0; i<ord_len; i++)
1631             {
1632                 term_dict[prefix_len++] = 1;
1633                 term_dict[prefix_len++] = ord_buf[i];
1634             }
1635         }
1636         if (!prefix_len)
1637         {
1638             errCode = 114;
1639             errString = nmem_strdup_i(stream, use_value);
1640             continue;
1641         }
1642         bases_ok++;
1643         term_dict[prefix_len++] = ')';        
1644         term_dict[prefix_len++] = 1;
1645         term_dict[prefix_len++] = reg_type;
1646         yaz_log(YLOG_DEBUG, "reg_type = %d", term_dict[prefix_len-1]);
1647         term_dict[prefix_len] = '\0';
1648         if (!numeric_relation (zh, zapt, &termp, term_dict,
1649                                attributeSet, grep_info, &max_pos, reg_type,
1650                                term_dst))
1651             return 0;
1652     }
1653     if (!bases_ok)
1654     {
1655         zh->errCode = errCode;
1656         zh->errString = errString;
1657         return -1;
1658     }
1659     *term_sub = termp;
1660     yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1661     return 1;
1662 }
1663
1664 static RSET rpn_search_APT_numeric (ZebraHandle zh,
1665                                     Z_AttributesPlusTerm *zapt,
1666                                     const char *termz,
1667                                     oid_value attributeSet,
1668                                     NMEM stream,
1669                                     int reg_type, int complete_flag,
1670                                     const char *rank_type, int xpath_use,
1671                                     int num_bases, char **basenames,
1672                                     NMEM rset_nmem)
1673 {
1674     char term_dst[IT_MAX_WORD+1];
1675     const char *termp = termz;
1676     RSET rset[TERM_LIST_LENGTH_MAX];
1677     int  r;
1678     size_t rset_no = 0;
1679     struct grep_info grep_info;
1680
1681     yaz_log(log_level_rpn, "APT_numeric t='%s'",termz);
1682     if (grep_info_prepare (zh, zapt, &grep_info, reg_type, stream))
1683         return 0;
1684     for (; rset_no < sizeof(rset)/sizeof(*rset); rset_no++)
1685     { 
1686         yaz_log(YLOG_DEBUG, "APT_numeric termp=%s", termp);
1687         grep_info.isam_p_indx = 0;
1688         r = numeric_term(zh, zapt, &termp, attributeSet, &grep_info,
1689                           reg_type, complete_flag, num_bases, basenames,
1690                           term_dst, xpath_use,
1691                           stream);
1692         if (r < 1)
1693             break;
1694         yaz_log(YLOG_DEBUG, "term: %s", term_dst);
1695         rset[rset_no] = rset_trunc(zh, grep_info.isam_p_buf,
1696                                     grep_info.isam_p_indx, term_dst,
1697                                     strlen(term_dst), rank_type,
1698                                     0 /* preserve position */,
1699                                     zapt->term->which, rset_nmem, 
1700                                     key_it_ctrl,key_it_ctrl->scope);
1701         if (!rset[rset_no])
1702             break;
1703     }
1704     grep_info_delete (&grep_info);
1705     if (rset_no == 0)
1706         return rsnull_create(rset_nmem,key_it_ctrl);
1707     if (rset_no == 1)
1708         return rset[0];
1709     return rsmulti_and_create(rset_nmem,key_it_ctrl,key_it_ctrl->scope,
1710                               rset_no, rset);
1711 }
1712
1713 static RSET rpn_search_APT_local (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1714                                   const char *termz,
1715                                   oid_value attributeSet,
1716                                   NMEM stream,
1717                                   const char *rank_type, NMEM rset_nmem)
1718 {
1719     RSET result;
1720     RSFD rsfd;
1721     struct it_key key;
1722     int sys;
1723     result = rstemp_create( rset_nmem,key_it_ctrl,key_it_ctrl->scope,
1724                      res_get (zh->res, "setTmpDir"),0 );
1725     rsfd = rset_open (result, RSETF_WRITE);
1726
1727     sys = atoi(termz);
1728     if (sys <= 0)
1729         sys = 1;
1730     key.mem[0] = sys;
1731     key.mem[1] = 1;
1732     key.len = 2;
1733     rset_write (rsfd, &key);
1734     rset_close (rsfd);
1735     return result;
1736 }
1737
1738 static RSET rpn_sort_spec (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1739                            oid_value attributeSet, NMEM stream,
1740                            Z_SortKeySpecList *sort_sequence,
1741                            const char *rank_type)
1742 {
1743     int i;
1744     int sort_relation_value;
1745     AttrType sort_relation_type;
1746     int use_value;
1747     AttrType use_type;
1748     Z_SortKeySpec *sks;
1749     Z_SortKey *sk;
1750     Z_AttributeElement *ae;
1751     int oid[OID_SIZE];
1752     oident oe;
1753     char termz[20];
1754     
1755     attr_init (&sort_relation_type, zapt, 7);
1756     sort_relation_value = attr_find (&sort_relation_type, &attributeSet);
1757
1758     attr_init (&use_type, zapt, 1);
1759     use_value = attr_find (&use_type, &attributeSet);
1760
1761     if (!sort_sequence->specs)
1762     {
1763         sort_sequence->num_specs = 10;
1764         sort_sequence->specs = (Z_SortKeySpec **)
1765             nmem_malloc(stream, sort_sequence->num_specs *
1766                          sizeof(*sort_sequence->specs));
1767         for (i = 0; i<sort_sequence->num_specs; i++)
1768             sort_sequence->specs[i] = 0;
1769     }
1770     if (zapt->term->which != Z_Term_general)
1771         i = 0;
1772     else
1773         i = atoi_n ((char *) zapt->term->u.general->buf,
1774                     zapt->term->u.general->len);
1775     if (i >= sort_sequence->num_specs)
1776         i = 0;
1777     sprintf (termz, "%d", i);
1778
1779     oe.proto = PROTO_Z3950;
1780     oe.oclass = CLASS_ATTSET;
1781     oe.value = attributeSet;
1782     if (!oid_ent_to_oid (&oe, oid))
1783         return 0;
1784
1785     sks = (Z_SortKeySpec *) nmem_malloc(stream, sizeof(*sks));
1786     sks->sortElement = (Z_SortElement *)
1787         nmem_malloc(stream, sizeof(*sks->sortElement));
1788     sks->sortElement->which = Z_SortElement_generic;
1789     sk = sks->sortElement->u.generic = (Z_SortKey *)
1790         nmem_malloc(stream, sizeof(*sk));
1791     sk->which = Z_SortKey_sortAttributes;
1792     sk->u.sortAttributes = (Z_SortAttributes *)
1793         nmem_malloc(stream, sizeof(*sk->u.sortAttributes));
1794
1795     sk->u.sortAttributes->id = oid;
1796     sk->u.sortAttributes->list = (Z_AttributeList *)
1797         nmem_malloc(stream, sizeof(*sk->u.sortAttributes->list));
1798     sk->u.sortAttributes->list->num_attributes = 1;
1799     sk->u.sortAttributes->list->attributes = (Z_AttributeElement **)
1800         nmem_malloc(stream, sizeof(*sk->u.sortAttributes->list->attributes));
1801     ae = *sk->u.sortAttributes->list->attributes = (Z_AttributeElement *)
1802         nmem_malloc(stream, sizeof(**sk->u.sortAttributes->list->attributes));
1803     ae->attributeSet = 0;
1804     ae->attributeType = (int *)
1805         nmem_malloc(stream, sizeof(*ae->attributeType));
1806     *ae->attributeType = 1;
1807     ae->which = Z_AttributeValue_numeric;
1808     ae->value.numeric = (int *)
1809         nmem_malloc(stream, sizeof(*ae->value.numeric));
1810     *ae->value.numeric = use_value;
1811
1812     sks->sortRelation = (int *)
1813         nmem_malloc(stream, sizeof(*sks->sortRelation));
1814     if (sort_relation_value == 1)
1815         *sks->sortRelation = Z_SortKeySpec_ascending;
1816     else if (sort_relation_value == 2)
1817         *sks->sortRelation = Z_SortKeySpec_descending;
1818     else 
1819         *sks->sortRelation = Z_SortKeySpec_ascending;
1820
1821     sks->caseSensitivity = (int *)
1822         nmem_malloc(stream, sizeof(*sks->caseSensitivity));
1823     *sks->caseSensitivity = 0;
1824
1825     sks->which = Z_SortKeySpec_null;
1826     sks->u.null = odr_nullval ();
1827     sort_sequence->specs[i] = sks;
1828     return rsnull_create (NULL,key_it_ctrl);
1829         /* FIXME - nmem?? */
1830 }
1831
1832
1833 static int parse_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1834                        oid_value attributeSet,
1835                        struct xpath_location_step *xpath, int max, NMEM mem)
1836 {
1837     oid_value curAttributeSet = attributeSet;
1838     AttrType use;
1839     const char *use_string = 0;
1840     
1841     attr_init (&use, zapt, 1);
1842     attr_find_ex (&use, &curAttributeSet, &use_string);
1843
1844     if (!use_string || *use_string != '/')
1845         return -1;
1846
1847     return zebra_parse_xpath_str(use_string, xpath, max, mem);
1848 }
1849  
1850                
1851
1852 static RSET xpath_trunc(ZebraHandle zh, NMEM stream,
1853                         int reg_type, const char *term, int use,
1854                         oid_value curAttributeSet, NMEM rset_nmem)
1855 {
1856     RSET rset;
1857     struct grep_info grep_info;
1858     char term_dict[2048];
1859     char ord_buf[32];
1860     int prefix_len = 0;
1861     int ord = zebraExplain_lookupSU (zh->reg->zei, curAttributeSet, use);
1862     int ord_len, i, r, max_pos;
1863     int term_type = Z_Term_characterString;
1864     const char *flags = "void";
1865
1866     if (grep_info_prepare (zh, 0 /* zapt */, &grep_info, '0', stream))
1867         return rsnull_create (rset_nmem,key_it_ctrl);
1868
1869     if (ord < 0)
1870         return rsnull_create (rset_nmem,key_it_ctrl);
1871     if (prefix_len)
1872         term_dict[prefix_len++] = '|';
1873     else
1874         term_dict[prefix_len++] = '(';
1875     
1876     ord_len = key_SU_encode (ord, ord_buf);
1877     for (i = 0; i<ord_len; i++)
1878     {
1879         term_dict[prefix_len++] = 1;
1880         term_dict[prefix_len++] = ord_buf[i];
1881     }
1882     term_dict[prefix_len++] = ')';
1883     term_dict[prefix_len++] = 1;
1884     term_dict[prefix_len++] = reg_type;
1885     
1886     strcpy(term_dict+prefix_len, term);
1887     
1888     grep_info.isam_p_indx = 0;
1889     r = dict_lookup_grep(zh->reg->dict, term_dict, 0,
1890                           &grep_info, &max_pos, 0, grep_handle);
1891     yaz_log (YLOG_LOG, "%s %d positions", term,
1892              grep_info.isam_p_indx);
1893     rset = rset_trunc(zh, grep_info.isam_p_buf,
1894                        grep_info.isam_p_indx, term, strlen(term),
1895                        flags, 1, term_type,rset_nmem,
1896                        key_it_ctrl, key_it_ctrl->scope);
1897     grep_info_delete (&grep_info);
1898     return rset;
1899 }
1900
1901 static RSET rpn_search_xpath (ZebraHandle zh,
1902                               oid_value attributeSet,
1903                               int num_bases, char **basenames,
1904                               NMEM stream, const char *rank_type, RSET rset,
1905                               int xpath_len, struct xpath_location_step *xpath,
1906                               NMEM rset_nmem)
1907 {
1908     oid_value curAttributeSet = attributeSet;
1909     int base_no;
1910     int i;
1911
1912     if (xpath_len < 0)
1913         return rset;
1914
1915     yaz_log (YLOG_DEBUG, "xpath len=%d", xpath_len);
1916     for (i = 0; i<xpath_len; i++)
1917     {
1918         yaz_log (log_level_rpn, "XPATH %d %s", i, xpath[i].part);
1919
1920     }
1921
1922     curAttributeSet = VAL_IDXPATH;
1923
1924     /*
1925       //a    ->    a/.*
1926       //a/b  ->    b/a/.*
1927       /a     ->    a/
1928       /a/b   ->    b/a/
1929
1930       /      ->    none
1931
1932    a[@attr = value]/b[@other = othervalue]
1933
1934  /e/@a val      range(e/,range(@a,freetext(w,1015,val),@a),e/)
1935  /a/b val       range(b/a/,freetext(w,1016,val),b/a/)
1936  /a/b/@c val    range(b/a/,range(@c,freetext(w,1016,val),@c),b/a/)
1937  /a/b[@c = y] val range(b/a/,freetext(w,1016,val),b/a/,@c = y)
1938  /a[@c = y]/b val range(a/,range(b/a/,freetext(w,1016,val),b/a/),a/,@c = y)
1939  /a[@c = x]/b[@c = y] range(a/,range(b/a/,freetext(w,1016,val),b/a/,@c = y),a/,@c = x)
1940       
1941     */
1942
1943     dict_grep_cmap (zh->reg->dict, 0, 0);
1944
1945     for (base_no = 0; base_no < num_bases; base_no++)
1946     {
1947         int level = xpath_len;
1948         int first_path = 1;
1949         
1950         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1951         {
1952             zh->errCode = 109; /* Database unavailable */
1953             zh->errString = basenames[base_no];
1954             return rset;
1955         }
1956         while (--level >= 0)
1957         {
1958             char xpath_rev[128];
1959             int i, len;
1960             RSET rset_start_tag = 0, rset_end_tag = 0, rset_attr = 0;
1961
1962             *xpath_rev = 0;
1963             len = 0;
1964             for (i = level; i >= 1; --i)
1965             {
1966                 const char *cp = xpath[i].part;
1967                 if (*cp)
1968                 {
1969                     for (;*cp; cp++)
1970                         if (*cp == '*')
1971                         {
1972                             memcpy (xpath_rev + len, "[^/]*", 5);
1973                             len += 5;
1974                         }
1975                         else if (*cp == ' ')
1976                         {
1977
1978                             xpath_rev[len++] = 1;
1979                             xpath_rev[len++] = ' ';
1980                         }
1981
1982                         else
1983                             xpath_rev[len++] = *cp;
1984                     xpath_rev[len++] = '/';
1985                 }
1986                 else if (i == 1)  /* // case */
1987                 {
1988                     xpath_rev[len++] = '.';
1989                     xpath_rev[len++] = '*';
1990                 }
1991             }
1992             xpath_rev[len] = 0;
1993
1994             if (xpath[level].predicate &&
1995                 xpath[level].predicate->which == XPATH_PREDICATE_RELATION &&
1996                 xpath[level].predicate->u.relation.name[0])
1997             {
1998                 WRBUF wbuf = wrbuf_alloc();
1999                 wrbuf_puts(wbuf, xpath[level].predicate->u.relation.name+1);
2000                 if (xpath[level].predicate->u.relation.value)
2001                 {
2002                     const char *cp = xpath[level].predicate->u.relation.value;
2003                     wrbuf_putc(wbuf, '=');
2004                     
2005                     while (*cp)
2006                     {
2007                         if (strchr(REGEX_CHARS, *cp))
2008                             wrbuf_putc(wbuf, '\\');
2009                         wrbuf_putc(wbuf, *cp);
2010                         cp++;
2011                     }
2012                 }
2013                 wrbuf_puts(wbuf, "");
2014                 rset_attr = xpath_trunc(
2015                     zh, stream, '0', wrbuf_buf(wbuf), 3, 
2016                     curAttributeSet,rset_nmem);
2017                 wrbuf_free(wbuf, 1);
2018             } 
2019             else 
2020             {
2021                 if (!first_path)
2022                     continue;
2023             }
2024             yaz_log (log_level_rpn, "xpath_rev (%d) = %s", level, xpath_rev);
2025             if (strlen(xpath_rev))
2026             {
2027                 rset_start_tag = xpath_trunc(zh, stream, '0', 
2028                         xpath_rev, 1, curAttributeSet, rset_nmem);
2029             
2030                 rset_end_tag = xpath_trunc(zh, stream, '0', 
2031                         xpath_rev, 2, curAttributeSet, rset_nmem);
2032
2033                 rset = rsbetween_create(rset_nmem, key_it_ctrl,
2034                                         key_it_ctrl->scope,
2035                                         rset_start_tag, rset,
2036                                         rset_end_tag, rset_attr);
2037             }
2038             first_path = 0;
2039         }
2040     }
2041
2042     return rset;
2043 }
2044
2045
2046
2047 static RSET rpn_search_APT (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2048                             oid_value attributeSet, NMEM stream,
2049                             Z_SortKeySpecList *sort_sequence,
2050                             int num_bases, char **basenames, 
2051                             NMEM rset_nmem)
2052 {
2053     unsigned reg_id;
2054     char *search_type = NULL;
2055     char rank_type[128];
2056     int complete_flag;
2057     int sort_flag;
2058     char termz[IT_MAX_WORD+1];
2059     RSET rset = 0;
2060     int xpath_len;
2061     int xpath_use = 0;
2062     struct xpath_location_step xpath[10];
2063
2064     if (!log_level_set)
2065     {
2066         log_level_rpn = yaz_log_module_level("rpn");
2067         log_level_set = 1;
2068     }
2069     zebra_maps_attr (zh->reg->zebra_maps, zapt, &reg_id, &search_type,
2070                      rank_type, &complete_flag, &sort_flag);
2071     
2072     yaz_log(YLOG_DEBUG, "reg_id=%c", reg_id);
2073     yaz_log(YLOG_DEBUG, "complete_flag=%d", complete_flag);
2074     yaz_log(YLOG_DEBUG, "search_type=%s", search_type);
2075     yaz_log(YLOG_DEBUG, "rank_type=%s", rank_type);
2076
2077     if (zapt_term_to_utf8(zh, zapt, termz))
2078         return 0;
2079
2080     if (sort_flag)
2081         return rpn_sort_spec (zh, zapt, attributeSet, stream, sort_sequence,
2082                               rank_type);
2083     xpath_len = parse_xpath(zh, zapt, attributeSet, xpath, 10, stream);
2084     if (xpath_len >= 0)
2085     {
2086         xpath_use = 1016;
2087         if (xpath[xpath_len-1].part[0] == '@')
2088             xpath_use = 1015;
2089     }
2090
2091     if (!strcmp (search_type, "phrase"))
2092     {
2093         rset = rpn_search_APT_phrase (zh, zapt, termz, attributeSet, stream,
2094                                       reg_id, complete_flag, rank_type,
2095                                       xpath_use,
2096                                       num_bases, basenames, rset_nmem);
2097     }
2098     else if (!strcmp (search_type, "and-list"))
2099     {
2100         rset = rpn_search_APT_and_list (zh, zapt, termz, attributeSet, stream,
2101                                         reg_id, complete_flag, rank_type,
2102                                         xpath_use,
2103                                         num_bases, basenames, rset_nmem);
2104     }
2105     else if (!strcmp (search_type, "or-list"))
2106     {
2107         rset = rpn_search_APT_or_list (zh, zapt, termz, attributeSet, stream,
2108                                        reg_id, complete_flag, rank_type,
2109                                        xpath_use,
2110                                        num_bases, basenames, rset_nmem);
2111     }
2112     else if (!strcmp (search_type, "local"))
2113     {
2114         rset = rpn_search_APT_local (zh, zapt, termz, attributeSet, stream,
2115                                      rank_type, rset_nmem);
2116     }
2117     else if (!strcmp (search_type, "numeric"))
2118     {
2119         rset = rpn_search_APT_numeric (zh, zapt, termz, attributeSet, stream,
2120                                        reg_id, complete_flag, rank_type,
2121                                        xpath_use,
2122                                        num_bases, basenames, rset_nmem);
2123     }
2124     else if (!strcmp (search_type, "always"))
2125     {
2126         rset = 0;
2127     }
2128     else
2129         zh->errCode = 118;
2130     return rpn_search_xpath (zh, attributeSet, num_bases, basenames,
2131                              stream, rank_type, rset, 
2132                              xpath_len, xpath, rset_nmem);
2133 }
2134
2135 static RSET rpn_search_structure (ZebraHandle zh, Z_RPNStructure *zs,
2136                                   oid_value attributeSet, 
2137                                   NMEM stream, NMEM rset_nmem,
2138                                   Z_SortKeySpecList *sort_sequence,
2139                                   int num_bases, char **basenames)
2140 {
2141     RSET r = NULL;
2142     if (zs->which == Z_RPNStructure_complex)
2143     {
2144         Z_Operator *zop = zs->u.complex->roperator;
2145         RSET rsets[2]; /* l and r argument */
2146
2147         rsets[0]=rpn_search_structure (zh, zs->u.complex->s1,
2148                                        attributeSet, stream, rset_nmem,
2149                                        sort_sequence,
2150                                        num_bases, basenames);
2151         if (rsets[0] == NULL)
2152             return NULL;
2153         rsets[1]=rpn_search_structure (zh, zs->u.complex->s2,
2154                                        attributeSet, stream, rset_nmem,
2155                                        sort_sequence,
2156                                        num_bases, basenames);
2157         if (rsets[1] == NULL)
2158         {
2159             rset_delete (rsets[0]);
2160             return NULL;
2161         }
2162
2163         switch (zop->which)
2164         {
2165         case Z_Operator_and:
2166             r = rsmulti_and_create(rset_nmem, key_it_ctrl, key_it_ctrl->scope,
2167                                    2, rsets);
2168             break;
2169         case Z_Operator_or:
2170             r = rsmulti_or_create(rset_nmem, key_it_ctrl, key_it_ctrl->scope,
2171                                   2, rsets);
2172             break;
2173         case Z_Operator_and_not:
2174             r = rsbool_create_not(rset_nmem,key_it_ctrl, key_it_ctrl->scope,
2175                     rsets[0],rsets[1]);
2176             break;
2177         case Z_Operator_prox:
2178             if (zop->u.prox->which != Z_ProximityOperator_known)
2179             {
2180                 zh->errCode = 132;
2181                 return NULL;
2182             }
2183             if (*zop->u.prox->u.known != Z_ProxUnit_word)
2184             {
2185                 char *val = (char *) nmem_malloc(stream, 16);
2186                 zh->errCode = 132;
2187                 zh->errString = val;
2188                 sprintf (val, "%d", *zop->u.prox->u.known);
2189                 return NULL;
2190             }
2191             else
2192             {
2193                 /* new / old prox */
2194                 r = rsprox_create(rset_nmem,key_it_ctrl,key_it_ctrl->scope,
2195                          2, rsets, 
2196                          *zop->u.prox->ordered,
2197                          (!zop->u.prox->exclusion ? 
2198                               0 : *zop->u.prox->exclusion),
2199                          *zop->u.prox->relationType,
2200                          *zop->u.prox->distance );
2201             }
2202             break;
2203         default:
2204             zh->errCode = 110;
2205             return NULL;
2206         }
2207     }
2208     else if (zs->which == Z_RPNStructure_simple)
2209     {
2210         if (zs->u.simple->which == Z_Operand_APT)
2211         {
2212             yaz_log(YLOG_DEBUG, "rpn_search_APT");
2213             r = rpn_search_APT (zh, zs->u.simple->u.attributesPlusTerm,
2214                                 attributeSet, stream, sort_sequence,
2215                                 num_bases, basenames,rset_nmem);
2216         }
2217         else if (zs->u.simple->which == Z_Operand_resultSetId)
2218         {
2219             yaz_log(YLOG_DEBUG, "rpn_search_ref");
2220             r = resultSetRef (zh, zs->u.simple->u.resultSetId);
2221             if (!r)
2222             {
2223                 r = rsnull_create (rset_nmem,key_it_ctrl);
2224                 zh->errCode = 30;
2225                 zh->errString =
2226                     nmem_strdup (stream, zs->u.simple->u.resultSetId);
2227                 return 0;
2228             }
2229             else
2230                 rset_dup(r);
2231         }
2232         else
2233         {
2234             zh->errCode = 3;
2235             return 0;
2236         }
2237     }
2238     else
2239     {
2240         zh->errCode = 3;
2241         return 0;
2242     }
2243     return r;
2244 }
2245
2246
2247 RSET rpn_search(ZebraHandle zh, NMEM nmem, NMEM rset_nmem,
2248                 Z_RPNQuery *rpn, int num_bases, char **basenames, 
2249                 const char *setname,
2250                 ZebraSet sset)
2251 {
2252     RSET rset;
2253     oident *attrset;
2254     oid_value attributeSet;
2255     Z_SortKeySpecList *sort_sequence;
2256     int sort_status, i;
2257
2258     zh->errCode = 0;
2259     zh->errString = NULL;
2260     zh->hits = 0;
2261
2262     sort_sequence = (Z_SortKeySpecList *)
2263         nmem_malloc(nmem, sizeof(*sort_sequence));
2264     sort_sequence->num_specs = 10; /* FIXME - Hard-coded number */
2265     sort_sequence->specs = (Z_SortKeySpec **)
2266         nmem_malloc(nmem, sort_sequence->num_specs *
2267                      sizeof(*sort_sequence->specs));
2268     for (i = 0; i<sort_sequence->num_specs; i++)
2269         sort_sequence->specs[i] = 0;
2270     
2271     attrset = oid_getentbyoid (rpn->attributeSetId);
2272     attributeSet = attrset->value;
2273     rset = rpn_search_structure (zh, rpn->RPNStructure, attributeSet,
2274                                  nmem, rset_nmem,
2275                                  sort_sequence, num_bases, basenames);
2276     if (!rset)
2277         return 0;
2278
2279     if (zh->errCode)
2280         yaz_log(YLOG_DEBUG, "search error: %d", zh->errCode);
2281     
2282     for (i = 0; sort_sequence->specs[i]; i++)
2283         ;
2284     sort_sequence->num_specs = i;
2285     if (!i)
2286         resultSetRank (zh, sset, rset, rset_nmem);
2287     else
2288     {
2289         yaz_log(YLOG_DEBUG, "resultSetSortSingle in rpn_search");
2290         resultSetSortSingle (zh, nmem, sset, rset,
2291                              sort_sequence, &sort_status);
2292         if (zh->errCode)
2293         {
2294             yaz_log(YLOG_DEBUG, "resultSetSortSingle status = %d", zh->errCode);
2295         }
2296     }
2297     return rset;
2298 }
2299
2300 struct scan_info_entry {
2301     char *term;
2302     ISAMC_P isam_p;
2303 };
2304
2305 struct scan_info {
2306     struct scan_info_entry *list;
2307     ODR odr;
2308     int before, after;
2309     char prefix[20];
2310 };
2311
2312 static int scan_handle (char *name, const char *info, int pos, void *client)
2313 {
2314     int len_prefix, idx;
2315     struct scan_info *scan_info = (struct scan_info *) client;
2316
2317     len_prefix = strlen(scan_info->prefix);
2318     if (memcmp (name, scan_info->prefix, len_prefix))
2319         return 1;
2320     if (pos > 0)        idx = scan_info->after - pos + scan_info->before;
2321     else
2322         idx = - pos - 1;
2323     scan_info->list[idx].term = (char *)
2324         odr_malloc(scan_info->odr, strlen(name + len_prefix)+1);
2325     strcpy(scan_info->list[idx].term, name + len_prefix);
2326     assert (*info == sizeof(ISAMC_P));
2327     memcpy (&scan_info->list[idx].isam_p, info+1, sizeof(ISAMC_P));
2328     return 0;
2329 }
2330
2331 static void scan_term_untrans (ZebraHandle zh, NMEM stream, int reg_type,
2332                                char **dst, const char *src)
2333 {
2334     char term_src[IT_MAX_WORD];
2335     char term_dst[IT_MAX_WORD];
2336     
2337     term_untrans (zh, reg_type, term_src, src);
2338
2339     if (zh->iconv_from_utf8 != 0)
2340     {
2341         int len;
2342         char *inbuf = term_src;
2343         size_t inleft = strlen(term_src);
2344         char *outbuf = term_dst;
2345         size_t outleft = sizeof(term_dst)-1;
2346         size_t ret;
2347         
2348         ret = yaz_iconv (zh->iconv_from_utf8, &inbuf, &inleft,
2349                          &outbuf, &outleft);
2350         if (ret == (size_t)(-1))
2351             len = 0;
2352         else
2353             len = outbuf - term_dst;
2354         *dst = nmem_malloc(stream, len + 1);
2355         if (len > 0)
2356             memcpy (*dst, term_dst, len);
2357         (*dst)[len] = '\0';
2358     }
2359     else
2360         *dst = nmem_strdup(stream, term_src);
2361 }
2362
2363 static void count_set (RSET r, int *count)
2364 {
2365     zint psysno = 0;
2366     int kno = 0;
2367     struct it_key key;
2368     RSFD rfd;
2369
2370     yaz_log(YLOG_DEBUG, "count_set");
2371
2372     *count = 0;
2373     rfd = rset_open (r, RSETF_READ);
2374     while (rset_read (rfd, &key,0 /* never mind terms */))
2375     {
2376         if (key.mem[0] != psysno)
2377         {
2378             psysno = key.mem[0];
2379             (*count)++;
2380         }
2381         kno++;
2382     }
2383     rset_close (rfd);
2384     yaz_log(YLOG_DEBUG, "%d keys, %d records", kno, *count);
2385 }
2386
2387 void rpn_scan(ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt,
2388                oid_value attributeset,
2389                int num_bases, char **basenames,
2390                int *position, int *num_entries, ZebraScanEntry **list,
2391                int *is_partial, RSET limit_set, int return_zero)
2392 {
2393     int i;
2394     int pos = *position;
2395     int num = *num_entries;
2396     int before;
2397     int after;
2398     int base_no;
2399     char termz[IT_MAX_WORD+20];
2400     AttrType use;
2401     int use_value;
2402     const char *use_string = 0;
2403     struct scan_info *scan_info_array;
2404     ZebraScanEntry *glist;
2405     int ords[32], ord_no = 0;
2406     int ptr[32];
2407
2408     int bases_ok = 0;     /* no of databases with OK attribute */
2409     int errCode = 0;      /* err code (if any is not OK) */
2410     char *errString = 0;  /* addinfo */
2411
2412     unsigned reg_id;
2413     char *search_type = NULL;
2414     char rank_type[128];
2415     int complete_flag;
2416     int sort_flag;
2417     NMEM rset_nmem = NULL; 
2418
2419     *list = 0;
2420
2421     if (attributeset == VAL_NONE)
2422         attributeset = VAL_BIB1;
2423
2424     if (!limit_set)
2425     {
2426         AttrType termset;
2427         int termset_value_numeric;
2428         const char *termset_value_string;
2429         attr_init (&termset, zapt, 8);
2430         termset_value_numeric =
2431             attr_find_ex (&termset, NULL, &termset_value_string);
2432         if (termset_value_numeric != -1)
2433         {
2434             char resname[32];
2435             const char *termset_name = 0;
2436             
2437             if (termset_value_numeric != -2)
2438             {
2439                 
2440                 sprintf (resname, "%d", termset_value_numeric);
2441                 termset_name = resname;
2442             }
2443             else
2444                 termset_name = termset_value_string;
2445             
2446             limit_set = resultSetRef (zh, termset_name);
2447         }
2448     }
2449         
2450     yaz_log (YLOG_DEBUG, "position = %d, num = %d set=%d",
2451              pos, num, attributeset);
2452         
2453     attr_init (&use, zapt, 1);
2454     use_value = attr_find_ex (&use, &attributeset, &use_string);
2455
2456     if (zebra_maps_attr (zh->reg->zebra_maps, zapt, &reg_id, &search_type,
2457                          rank_type, &complete_flag, &sort_flag))
2458     {
2459         *num_entries = 0;
2460         zh->errCode = 113;
2461         return ;
2462     }
2463     yaz_log (YLOG_DEBUG, "use_value = %d", use_value);
2464
2465     if (use_value == -1)
2466         use_value = 1016;
2467     for (base_no = 0; base_no < num_bases && ord_no < 32; base_no++)
2468     {
2469         int r;
2470         attent attp;
2471         data1_local_attribute *local_attr;
2472
2473         if ((r = att_getentbyatt (zh, &attp, attributeset, use_value,
2474                                 use_string)))
2475         {
2476             yaz_log(YLOG_DEBUG, "att_getentbyatt fail. set=%d use=%d",
2477                   attributeset, use_value);
2478             if (r == -1)
2479             {
2480                 char val_str[32];
2481                 sprintf (val_str, "%d", use_value);
2482                 errCode = 114;
2483                 errString = odr_strdup (stream, val_str);
2484             }   
2485             else
2486                 errCode = 121;
2487             continue;
2488         }
2489         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
2490         {
2491             zh->errString = basenames[base_no];
2492             zh->errCode = 109; /* Database unavailable */
2493             *num_entries = 0;
2494             return;
2495         }
2496         bases_ok++;
2497         for (local_attr = attp.local_attributes; local_attr && ord_no < 32;
2498              local_attr = local_attr->next)
2499         {
2500             int ord;
2501
2502             ord = zebraExplain_lookupSU (zh->reg->zei, attp.attset_ordinal,
2503                                          local_attr->local);
2504             if (ord > 0)
2505                 ords[ord_no++] = ord;
2506         }
2507     }
2508     if (!bases_ok && errCode)
2509     {
2510         zh->errCode = errCode;
2511         zh->errString = errString;
2512         *num_entries = 0;
2513     }
2514     if (ord_no == 0)
2515     {
2516         *num_entries = 0;
2517         return;
2518     }
2519     /* prepare dictionary scanning */
2520     before = pos-1;
2521     after = 1+num-pos;
2522     scan_info_array = (struct scan_info *)
2523         odr_malloc(stream, ord_no * sizeof(*scan_info_array));
2524     for (i = 0; i < ord_no; i++)
2525     {
2526         int j, prefix_len = 0;
2527         int before_tmp = before, after_tmp = after;
2528         struct scan_info *scan_info = scan_info_array + i;
2529         struct rpn_char_map_info rcmi;
2530
2531         rpn_char_map_prepare (zh->reg, reg_id, &rcmi);
2532
2533         scan_info->before = before;
2534         scan_info->after = after;
2535         scan_info->odr = stream;
2536
2537         scan_info->list = (struct scan_info_entry *)
2538             odr_malloc(stream, (before+after) * sizeof(*scan_info->list));
2539         for (j = 0; j<before+after; j++)
2540             scan_info->list[j].term = NULL;
2541
2542         prefix_len += key_SU_encode (ords[i], termz + prefix_len);
2543         termz[prefix_len++] = reg_id;
2544         termz[prefix_len] = 0;
2545         strcpy(scan_info->prefix, termz);
2546
2547         if (trans_scan_term(zh, zapt, termz+prefix_len, reg_id))
2548             return ;
2549         
2550         dict_scan(zh->reg->dict, termz, &before_tmp, &after_tmp,
2551                   scan_info, scan_handle);
2552     }
2553     glist = (ZebraScanEntry *)
2554         odr_malloc(stream, (before+after)*sizeof(*glist));
2555
2556     rset_nmem = nmem_create();
2557
2558     /* consider terms after main term */
2559     for (i = 0; i < ord_no; i++)
2560         ptr[i] = before;
2561     
2562     *is_partial = 0;
2563     for (i = 0; i<after; i++)
2564     {
2565         int j, j0 = -1;
2566         const char *mterm = NULL;
2567         const char *tst;
2568         RSET rset;
2569         
2570         for (j = 0; j < ord_no; j++)
2571         {
2572             if (ptr[j] < before+after &&
2573                 (tst = scan_info_array[j].list[ptr[j]].term) &&
2574                 (!mterm || strcmp (tst, mterm) < 0))
2575             {
2576                 j0 = j;
2577                 mterm = tst;
2578             }
2579         }
2580         if (j0 == -1)
2581             break;
2582         scan_term_untrans (zh, stream->mem, reg_id,
2583                            &glist[i+before].term, mterm);
2584         rset = rset_trunc(zh, &scan_info_array[j0].list[ptr[j0]].isam_p, 1,
2585                           glist[i+before].term, strlen(glist[i+before].term),
2586                           NULL, 0, zapt->term->which, rset_nmem, 
2587                           key_it_ctrl,key_it_ctrl->scope);
2588         ptr[j0]++;
2589         for (j = j0+1; j<ord_no; j++)
2590         {
2591             if (ptr[j] < before+after &&
2592                 (tst = scan_info_array[j].list[ptr[j]].term) &&
2593                 !strcmp (tst, mterm))
2594             {
2595                 RSET rsets[2];
2596                 
2597                 rsets[0] = rset;
2598                 rsets[1] =
2599                     rset_trunc(zh, &scan_info_array[j].list[ptr[j]].isam_p, 1,
2600                                glist[i+before].term,
2601                                strlen(glist[i+before].term), NULL, 0,
2602                                zapt->term->which,rset_nmem,
2603                                key_it_ctrl, key_it_ctrl->scope);
2604                 rset = rsmulti_or_create(rset_nmem, key_it_ctrl,
2605                                          2, key_it_ctrl->scope, rsets);
2606                 ptr[j]++;
2607             }
2608         }
2609         if (limit_set)
2610         {
2611             RSET rsets[2];
2612             rsets[0] = rset;
2613             rsets[1] = rset_dup(limit_set);
2614             
2615             rset = rsmulti_and_create(rset_nmem, key_it_ctrl,
2616                                       key_it_ctrl->scope, 2, rsets);
2617         }
2618         count_set(rset, &glist[i+before].occurrences);
2619         rset_delete(rset);
2620     }
2621     if (i < after)
2622     {
2623         *num_entries -= (after-i);
2624         *is_partial = 1;
2625     }
2626     
2627     /* consider terms before main term */
2628     for (i = 0; i<ord_no; i++)
2629         ptr[i] = 0;
2630     
2631     for (i = 0; i<before; i++)
2632     {
2633         int j, j0 = -1;
2634         const char *mterm = NULL;
2635         const char *tst;
2636         RSET rset;
2637         
2638         for (j = 0; j <ord_no; j++)
2639         {
2640             if (ptr[j] < before &&
2641                 (tst = scan_info_array[j].list[before-1-ptr[j]].term) &&
2642                 (!mterm || strcmp (tst, mterm) > 0))
2643             {
2644                 j0 = j;
2645                     mterm = tst;
2646             }
2647         }
2648         if (j0 == -1)
2649             break;
2650         
2651         scan_term_untrans (zh, stream->mem, reg_id,
2652                            &glist[before-1-i].term, mterm);
2653         
2654         rset = rset_trunc
2655             (zh, &scan_info_array[j0].list[before-1-ptr[j0]].isam_p, 1,
2656              glist[before-1-i].term, strlen(glist[before-1-i].term),
2657              NULL, 0, zapt->term->which,rset_nmem,
2658              key_it_ctrl,key_it_ctrl->scope);
2659         
2660         ptr[j0]++;
2661         
2662         for (j = j0+1; j<ord_no; j++)
2663         {
2664             if (ptr[j] < before &&
2665                 (tst = scan_info_array[j].list[before-1-ptr[j]].term) &&
2666                 !strcmp (tst, mterm))
2667             {
2668                 RSET rsets[2];
2669                 
2670                 rsets[0] = rset;
2671                 rsets[1] = rset_trunc(
2672                     zh,
2673                     &scan_info_array[j].list[before-1-ptr[j]].isam_p, 1,
2674                     glist[before-1-i].term,
2675                     strlen(glist[before-1-i].term), NULL, 0,
2676                     zapt->term->which, rset_nmem,
2677                     key_it_ctrl, key_it_ctrl->scope);
2678                 rset = rsmulti_or_create(rset_nmem, key_it_ctrl,
2679                                          2, key_it_ctrl->scope, rsets);
2680                 
2681                 ptr[j]++;
2682             }
2683         }
2684         if (limit_set)
2685         {
2686             RSET rsets[2];
2687             rsets[0] = rset;
2688             rsets[1] = rset_dup(limit_set);
2689             
2690             rset = rsmulti_and_create(rset_nmem, key_it_ctrl,
2691                                       key_it_ctrl->scope, 2, rsets);
2692         }
2693         count_set (rset, &glist[before-1-i].occurrences);
2694         rset_delete (rset);
2695     }
2696     i = before-i;
2697     if (i)
2698     {
2699         *is_partial = 1;
2700         *position -= i;
2701         *num_entries -= i;
2702     }
2703     
2704     nmem_destroy(rset_nmem);
2705     *list = glist + i;               /* list is set to first 'real' entry */
2706     
2707     yaz_log(YLOG_DEBUG, "position = %d, num_entries = %d",
2708             *position, *num_entries);
2709     if (zh->errCode)
2710         yaz_log(YLOG_DEBUG, "scan error: %d", zh->errCode);
2711 }
2712