String attribute support for record filter interface.
[idzebra-moved-to-github.git] / index / zrpn.c
1 /* $Id: zrpn.c,v 1.170 2005-03-05 09:19:15 adam Exp $
2    Copyright (C) 1995-2005
3    Index Data ApS
4
5 This file is part of the Zebra server.
6
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
11
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with Zebra; see the file LICENSE.zebra.  If not, write to the
19 Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
20 02111-1307, USA.
21 */
22
23
24 #include <stdio.h>
25 #include <assert.h>
26 #ifdef WIN32
27 #include <io.h>
28 #else
29 #include <unistd.h>
30 #endif
31 #include <ctype.h>
32
33 #include "index.h"
34 #include <zebra_xpath.h>
35
36 #include <charmap.h>
37 #include <rset.h>
38
39 /* maximum number of terms in an and/or/phrase item */
40 #define TERM_LIST_LENGTH_MAX 256
41
42 static const struct key_control it_ctrl =
43
44     sizeof(struct it_key),
45     2, /* we have sysnos and seqnos in this key, nothing more */
46     key_compare_it, 
47     key_logdump_txt,   /* FIXME  - clean up these functions */
48     key_get_seq,
49 };
50
51
52 const struct key_control *key_it_ctrl = &it_ctrl;
53
54 struct rpn_char_map_info
55 {
56     ZebraMaps zm;
57     int reg_type;
58 };
59
60 typedef struct
61 {
62     int type;
63     int major;
64     int minor;
65     Z_AttributesPlusTerm *zapt;
66 } AttrType;
67
68
69 static int log_level_set = 0;
70 static int log_level_rpn = 0;
71
72 static const char **rpn_char_map_handler(void *vp, const char **from, int len)
73 {
74     struct rpn_char_map_info *p = (struct rpn_char_map_info *) vp;
75     const char **out = zebra_maps_input(p->zm, p->reg_type, from, len, 0);
76 #if 0
77     if (out && *out)
78     {
79         const char *outp = *out;
80         yaz_log(YLOG_LOG, "---");
81         while (*outp)
82         {
83             yaz_log(YLOG_LOG, "%02X", *outp);
84             outp++;
85         }
86     }
87 #endif
88     return out;
89 }
90
91 static void rpn_char_map_prepare(struct zebra_register *reg, int reg_type,
92                                   struct rpn_char_map_info *map_info)
93 {
94     map_info->zm = reg->zebra_maps;
95     map_info->reg_type = reg_type;
96     dict_grep_cmap(reg->dict, map_info, rpn_char_map_handler);
97 }
98
99 static int attr_find_ex(AttrType *src, oid_value *attributeSetP,
100                          const char **string_value)
101 {
102     int num_attributes;
103
104     num_attributes = src->zapt->attributes->num_attributes;
105     while (src->major < num_attributes)
106     {
107         Z_AttributeElement *element;
108
109         element = src->zapt->attributes->attributes[src->major];
110         if (src->type == *element->attributeType)
111         {
112             switch (element->which) 
113             {
114             case Z_AttributeValue_numeric:
115                 ++(src->major);
116                 if (element->attributeSet && attributeSetP)
117                 {
118                     oident *attrset;
119
120                     attrset = oid_getentbyoid(element->attributeSet);
121                     *attributeSetP = attrset->value;
122                 }
123                 return *element->value.numeric;
124                 break;
125             case Z_AttributeValue_complex:
126                 if (src->minor >= element->value.complex->num_list)
127                     break;
128                 if (element->attributeSet && attributeSetP)
129                 {
130                     oident *attrset;
131                     
132                     attrset = oid_getentbyoid(element->attributeSet);
133                     *attributeSetP = attrset->value;
134                 }
135                 if (element->value.complex->list[src->minor]->which ==  
136                     Z_StringOrNumeric_numeric)
137                 {
138                     ++(src->minor);
139                     return
140                         *element->value.complex->list[src->minor-1]->u.numeric;
141                 }
142                 else if (element->value.complex->list[src->minor]->which ==  
143                          Z_StringOrNumeric_string)
144                 {
145                     if (!string_value)
146                         break;
147                     ++(src->minor);
148                     *string_value = 
149                         element->value.complex->list[src->minor-1]->u.string;
150                     return -2;
151                 }
152                 else
153                     break;
154             default:
155                 assert(0);
156             }
157         }
158         ++(src->major);
159     }
160     return -1;
161 }
162
163 static int attr_find(AttrType *src, oid_value *attributeSetP)
164 {
165     return attr_find_ex(src, attributeSetP, 0);
166 }
167
168 static void attr_init(AttrType *src, Z_AttributesPlusTerm *zapt,
169                        int type)
170 {
171     src->zapt = zapt;
172     src->type = type;
173     src->major = 0;
174     src->minor = 0;
175 }
176
177 #define TERM_COUNT        
178        
179 struct grep_info {        
180 #ifdef TERM_COUNT        
181     int *term_no;        
182 #endif        
183     ISAMC_P *isam_p_buf;
184     int isam_p_size;        
185     int isam_p_indx;
186     ZebraHandle zh;
187     int reg_type;
188     ZebraSet termset;
189 };        
190
191 static void term_untrans(ZebraHandle zh, int reg_type,
192                            char *dst, const char *src)
193 {
194     int len = 0;
195     while (*src)
196     {
197         const char *cp = zebra_maps_output(zh->reg->zebra_maps,
198                                            reg_type, &src);
199         if (!cp && len < IT_MAX_WORD-1)
200             dst[len++] = *src++;
201         else
202             while (*cp && len < IT_MAX_WORD-1)
203                 dst[len++] = *cp++;
204     }
205     dst[len] = '\0';
206 }
207
208 static void add_isam_p(const char *name, const char *info,
209                        struct grep_info *p)
210 {
211     if (!log_level_set)
212     {
213         log_level_rpn = yaz_log_module_level("rpn");
214         log_level_set = 1;
215     }
216     if (p->isam_p_indx == p->isam_p_size)
217     {
218         ISAMC_P *new_isam_p_buf;
219 #ifdef TERM_COUNT        
220         int *new_term_no;        
221 #endif
222         p->isam_p_size = 2*p->isam_p_size + 100;
223         new_isam_p_buf = (ISAMC_P *) xmalloc(sizeof(*new_isam_p_buf) *
224                                              p->isam_p_size);
225         if (p->isam_p_buf)
226         {
227             memcpy(new_isam_p_buf, p->isam_p_buf,
228                     p->isam_p_indx * sizeof(*p->isam_p_buf));
229             xfree(p->isam_p_buf);
230         }
231         p->isam_p_buf = new_isam_p_buf;
232
233 #ifdef TERM_COUNT
234         new_term_no = (int *) xmalloc(sizeof(*new_term_no) * p->isam_p_size);
235         if (p->term_no)
236         {
237             memcpy(new_term_no, p->isam_p_buf,
238                     p->isam_p_indx * sizeof(*p->term_no));
239             xfree(p->term_no);
240         }
241         p->term_no = new_term_no;
242 #endif
243     }
244     assert(*info == sizeof(*p->isam_p_buf));
245     memcpy(p->isam_p_buf + p->isam_p_indx, info+1, sizeof(*p->isam_p_buf));
246
247 #if 1
248     if (p->termset)
249     {
250         const char *db;
251         int set, use;
252         char term_tmp[IT_MAX_WORD];
253         int su_code = 0;
254         int len = key_SU_decode (&su_code, name);
255         
256         term_untrans  (p->zh, p->reg_type, term_tmp, name+len+1);
257         yaz_log(log_level_rpn, "grep: %d %c %s", su_code, name[len], term_tmp);
258         zebraExplain_lookup_ord (p->zh->reg->zei,
259                                  su_code, &db, &set, &use);
260         yaz_log(log_level_rpn, "grep:  set=%d use=%d db=%s", set, use, db);
261         
262         resultSetAddTerm(p->zh, p->termset, name[len], db,
263                          set, use, term_tmp);
264     }
265 #endif
266     (p->isam_p_indx)++;
267 }
268
269 static int grep_handle(char *name, const char *info, void *p)
270 {
271     add_isam_p(name, info, (struct grep_info *) p);
272     return 0;
273 }
274
275 static int term_pre(ZebraMaps zebra_maps, int reg_type, const char **src,
276                      const char *ct1, const char *ct2, int first)
277 {
278     const char *s1, *s0 = *src;
279     const char **map;
280
281     /* skip white space */
282     while (*s0)
283     {
284         if (ct1 && strchr(ct1, *s0))
285             break;
286         if (ct2 && strchr(ct2, *s0))
287             break;
288         s1 = s0;
289         map = zebra_maps_input(zebra_maps, reg_type, &s1, strlen(s1), first);
290         if (**map != *CHR_SPACE)
291             break;
292         s0 = s1;
293     }
294     *src = s0;
295     return *s0;
296 }
297
298 #define REGEX_CHARS " []()|.*+?!"
299
300 /* term_100: handle term, where trunc = none(no operators at all) */
301 static int term_100(ZebraMaps zebra_maps, int reg_type,
302                      const char **src, char *dst, int space_split,
303                      char *dst_term)
304 {
305     const char *s0, *s1;
306     const char **map;
307     int i = 0;
308     int j = 0;
309
310     const char *space_start = 0;
311     const char *space_end = 0;
312
313     if (!term_pre(zebra_maps, reg_type, src, NULL, NULL, !space_split))
314         return 0;
315     s0 = *src;
316     while (*s0)
317     {
318         s1 = s0;
319         map = zebra_maps_input(zebra_maps, reg_type, &s0, strlen(s0), 0);
320         if (space_split)
321         {
322             if (**map == *CHR_SPACE)
323                 break;
324         }
325         else  /* complete subfield only. */
326         {
327             if (**map == *CHR_SPACE)
328             {   /* save space mapping for later  .. */
329                 space_start = s1;
330                 space_end = s0;
331                 continue;
332             }
333             else if (space_start)
334             {   /* reload last space */
335                 while (space_start < space_end)
336                 {
337                     if (strchr(REGEX_CHARS, *space_start))
338                         dst[i++] = '\\';
339                     dst_term[j++] = *space_start;
340                     dst[i++] = *space_start++;
341                 }
342                 /* and reset */
343                 space_start = space_end = 0;
344             }
345         }
346         /* add non-space char */
347         while (s1 < s0)
348         {
349             if (strchr(REGEX_CHARS, *s1))
350                 dst[i++] = '\\';
351             dst_term[j++] = *s1;
352             dst[i++] = *s1++;
353         }
354     }
355     dst[i] = '\0';
356     dst_term[j] = '\0';
357     *src = s0;
358     return i;
359 }
360
361 /* term_101: handle term, where trunc = Process # */
362 static int term_101(ZebraMaps zebra_maps, int reg_type,
363                      const char **src, char *dst, int space_split,
364                      char *dst_term)
365 {
366     const char *s0, *s1;
367     const char **map;
368     int i = 0;
369     int j = 0;
370
371     if (!term_pre(zebra_maps, reg_type, src, "#", "#", !space_split))
372         return 0;
373     s0 = *src;
374     while (*s0)
375     {
376         if (*s0 == '#')
377         {
378             dst[i++] = '.';
379             dst[i++] = '*';
380             dst_term[j++] = *s0++;
381         }
382         else
383         {
384             s1 = s0;
385             map = zebra_maps_input(zebra_maps, reg_type, &s0, strlen(s0), 0);
386             if (space_split && **map == *CHR_SPACE)
387                 break;
388             while (s1 < s0)
389             {
390                 if (strchr(REGEX_CHARS, *s1))
391                     dst[i++] = '\\';
392                 dst_term[j++] = *s1;
393                 dst[i++] = *s1++;
394             }
395         }
396     }
397     dst[i] = '\0';
398     dst_term[j++] = '\0';
399     *src = s0;
400     return i;
401 }
402
403 /* term_103: handle term, where trunc = re-2 (regular expressions) */
404 static int term_103(ZebraMaps zebra_maps, int reg_type, const char **src,
405                      char *dst, int *errors, int space_split,
406                      char *dst_term)
407 {
408     int i = 0;
409     int j = 0;
410     const char *s0, *s1;
411     const char **map;
412
413     if (!term_pre(zebra_maps, reg_type, src, "^\\()[].*+?|", "(", !space_split))
414         return 0;
415     s0 = *src;
416     if (errors && *s0 == '+' && s0[1] && s0[2] == '+' && s0[3] &&
417         isdigit(((const unsigned char *)s0)[1]))
418     {
419         *errors = s0[1] - '0';
420         s0 += 3;
421         if (*errors > 3)
422             *errors = 3;
423     }
424     while (*s0)
425     {
426         if (strchr("^\\()[].*+?|-", *s0))
427         {
428             dst_term[j++] = *s0;
429             dst[i++] = *s0++;
430         }
431         else
432         {
433             s1 = s0;
434             map = zebra_maps_input(zebra_maps, reg_type, &s0, strlen(s0), 0);
435             if (**map == *CHR_SPACE)
436                 break;
437             while (s1 < s0)
438             {
439                 if (strchr(REGEX_CHARS, *s1))
440                     dst[i++] = '\\';
441                 dst_term[j++] = *s1;
442                 dst[i++] = *s1++;
443             }
444         }
445     }
446     dst[i] = '\0';
447     dst_term[j] = '\0';
448     *src = s0;
449     return i;
450 }
451
452 /* term_103: handle term, where trunc = re-1 (regular expressions) */
453 static int term_102 (ZebraMaps zebra_maps, int reg_type, const char **src,
454                      char *dst, int space_split, char *dst_term)
455 {
456     return term_103(zebra_maps, reg_type, src, dst, NULL, space_split,
457                      dst_term);
458 }
459
460
461 /* term_104: handle term, where trunc = Process # and ! */
462 static int term_104(ZebraMaps zebra_maps, int reg_type,
463                      const char **src, char *dst, int space_split,
464                      char *dst_term)
465 {
466     const char *s0, *s1;
467     const char **map;
468     int i = 0;
469     int j = 0;
470
471     if (!term_pre(zebra_maps, reg_type, src, "?*#", "?*#", !space_split))
472         return 0;
473     s0 = *src;
474     while (*s0)
475     {
476         if (*s0 == '?')
477         {
478             dst_term[j++] = *s0++;
479             if (*s0 >= '0' && *s0 <= '9')
480             {
481                 int limit = 0;
482                 while (*s0 >= '0' && *s0 <= '9')
483                 {
484                     limit = limit * 10 + (*s0 - '0');
485                     dst_term[j++] = *s0++;
486                 }
487                 if (limit > 20)
488                     limit = 20;
489                 while (--limit >= 0)
490                 {
491                     dst[i++] = '.';
492                     dst[i++] = '?';
493                 }
494             }
495             else
496             {
497                 dst[i++] = '.';
498                 dst[i++] = '*';
499             }
500         }
501         else if (*s0 == '*')
502         {
503             dst[i++] = '.';
504             dst[i++] = '*';
505             dst_term[j++] = *s0++;
506         }
507         else if (*s0 == '#')
508         {
509             dst[i++] = '.';
510             dst_term[j++] = *s0++;
511         }
512         else
513         {
514             s1 = s0;
515             map = zebra_maps_input(zebra_maps, reg_type, &s0, strlen(s0), 0);
516             if (space_split && **map == *CHR_SPACE)
517                 break;
518             while (s1 < s0)
519             {
520                 if (strchr(REGEX_CHARS, *s1))
521                     dst[i++] = '\\';
522                 dst_term[j++] = *s1;
523                 dst[i++] = *s1++;
524             }
525         }
526     }
527     dst[i] = '\0';
528     dst_term[j++] = '\0';
529     *src = s0;
530     return i;
531 }
532
533 /* term_105/106: handle term, where trunc = Process * and ! and right trunc */
534 static int term_105 (ZebraMaps zebra_maps, int reg_type,
535                      const char **src, char *dst, int space_split,
536                      char *dst_term, int right_truncate)
537 {
538     const char *s0, *s1;
539     const char **map;
540     int i = 0;
541     int j = 0;
542
543     if (!term_pre(zebra_maps, reg_type, src, "*!", "*!", !space_split))
544         return 0;
545     s0 = *src;
546     while (*s0)
547     {
548         if (*s0 == '*')
549         {
550             dst[i++] = '.';
551             dst[i++] = '*';
552             dst_term[j++] = *s0++;
553         }
554         else if (*s0 == '!')
555         {
556             dst[i++] = '.';
557             dst_term[j++] = *s0++;
558         }
559         else
560         {
561             s1 = s0;
562             map = zebra_maps_input(zebra_maps, reg_type, &s0, strlen(s0), 0);
563             if (space_split && **map == *CHR_SPACE)
564                 break;
565             while (s1 < s0)
566             {
567                 if (strchr(REGEX_CHARS, *s1))
568                     dst[i++] = '\\';
569                 dst_term[j++] = *s1;
570                 dst[i++] = *s1++;
571             }
572         }
573     }
574     if (right_truncate)
575     {
576         dst[i++] = '.';
577         dst[i++] = '*';
578     }
579     dst[i] = '\0';
580     
581     dst_term[j++] = '\0';
582     *src = s0;
583     return i;
584 }
585
586
587 /* gen_regular_rel - generate regular expression from relation
588  *  val:     border value (inclusive)
589  *  islt:    1 if <=; 0 if >=.
590  */
591 static void gen_regular_rel(char *dst, int val, int islt)
592 {
593     int dst_p;
594     int w, d, i;
595     int pos = 0;
596     char numstr[20];
597
598     yaz_log(YLOG_DEBUG, "gen_regular_rel. val=%d, islt=%d", val, islt);
599     if (val >= 0)
600     {
601         if (islt)
602             strcpy(dst, "(-[0-9]+|(");
603         else
604             strcpy(dst, "((");
605     } 
606     else
607     {
608         if (!islt)
609         {
610             strcpy(dst, "([0-9]+|-(");
611             dst_p = strlen(dst);
612             islt = 1;
613         }
614         else
615         {
616             strcpy(dst, "(-(");
617             islt = 0;
618         }
619         val = -val;
620     }
621     dst_p = strlen(dst);
622     sprintf(numstr, "%d", val);
623     for (w = strlen(numstr); --w >= 0; pos++)
624     {
625         d = numstr[w];
626         if (pos > 0)
627         {
628             if (islt)
629             {
630                 if (d == '0')
631                     continue;
632                 d--;
633             } 
634             else
635             {
636                 if (d == '9')
637                     continue;
638                 d++;
639             }
640         }
641         
642         strcpy(dst + dst_p, numstr);
643         dst_p = strlen(dst) - pos - 1;
644
645         if (islt)
646         {
647             if (d != '0')
648             {
649                 dst[dst_p++] = '[';
650                 dst[dst_p++] = '0';
651                 dst[dst_p++] = '-';
652                 dst[dst_p++] = d;
653                 dst[dst_p++] = ']';
654             }
655             else
656                 dst[dst_p++] = d;
657         }
658         else
659         {
660             if (d != '9')
661             { 
662                 dst[dst_p++] = '[';
663                 dst[dst_p++] = d;
664                 dst[dst_p++] = '-';
665                 dst[dst_p++] = '9';
666                 dst[dst_p++] = ']';
667             }
668             else
669                 dst[dst_p++] = d;
670         }
671         for (i = 0; i<pos; i++)
672         {
673             dst[dst_p++] = '[';
674             dst[dst_p++] = '0';
675             dst[dst_p++] = '-';
676             dst[dst_p++] = '9';
677             dst[dst_p++] = ']';
678         }
679         dst[dst_p++] = '|';
680     }
681     dst[dst_p] = '\0';
682     if (islt)
683     {
684         /* match everything less than 10^(pos-1) */
685         strcat(dst, "0*");
686         for (i = 1; i<pos; i++)
687             strcat(dst, "[0-9]?");
688     }
689     else
690     {
691         /* match everything greater than 10^pos */
692         for (i = 0; i <= pos; i++)
693             strcat(dst, "[0-9]");
694         strcat(dst, "[0-9]*");
695     }
696     strcat(dst, "))");
697 }
698
699 void string_rel_add_char(char **term_p, const char *src, int *indx)
700 {
701     if (src[*indx] == '\\')
702         *(*term_p)++ = src[(*indx)++];
703     *(*term_p)++ = src[(*indx)++];
704 }
705
706 /*
707  *   >  abc     ([b-].*|a[c-].*|ab[d-].*|abc.+)
708  *              ([^-a].*|a[^-b].*ab[^-c].*|abc.+)
709  *   >= abc     ([b-].*|a[c-].*|ab[c-].*)
710  *              ([^-a].*|a[^-b].*|ab[c-].*)
711  *   <  abc     ([-0].*|a[-a].*|ab[-b].*)
712  *              ([^a-].*|a[^b-].*|ab[^c-].*)
713  *   <= abc     ([-0].*|a[-a].*|ab[-b].*|abc)
714  *              ([^a-].*|a[^b-].*|ab[^c-].*|abc)
715  */
716 static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
717                             const char **term_sub, char *term_dict,
718                             oid_value attributeSet,
719                             int reg_type, int space_split, char *term_dst)
720 {
721     AttrType relation;
722     int relation_value;
723     int i;
724     char *term_tmp = term_dict + strlen(term_dict);
725     char term_component[2*IT_MAX_WORD+20];
726
727     attr_init(&relation, zapt, 2);
728     relation_value = attr_find(&relation, NULL);
729
730     yaz_log(YLOG_DEBUG, "string relation value=%d", relation_value);
731     switch (relation_value)
732     {
733     case 1:
734         if (!term_100 (zh->reg->zebra_maps, reg_type,
735                        term_sub, term_component,
736                        space_split, term_dst))
737             return 0;
738         yaz_log(log_level_rpn, "Relation <");
739         
740         *term_tmp++ = '(';
741         for (i = 0; term_component[i]; )
742         {
743             int j = 0;
744
745             if (i)
746                 *term_tmp++ = '|';
747             while (j < i)
748                 string_rel_add_char (&term_tmp, term_component, &j);
749
750             *term_tmp++ = '[';
751
752             *term_tmp++ = '^';
753             string_rel_add_char (&term_tmp, term_component, &i);
754             *term_tmp++ = '-';
755
756             *term_tmp++ = ']';
757             *term_tmp++ = '.';
758             *term_tmp++ = '*';
759
760             if ((term_tmp - term_dict) > IT_MAX_WORD)
761                 break;
762         }
763         *term_tmp++ = ')';
764         *term_tmp = '\0';
765         break;
766     case 2:
767         if (!term_100 (zh->reg->zebra_maps, reg_type,
768                        term_sub, term_component,
769                        space_split, term_dst))
770             return 0;
771         yaz_log(log_level_rpn, "Relation <=");
772
773         *term_tmp++ = '(';
774         for (i = 0; term_component[i]; )
775         {
776             int j = 0;
777
778             while (j < i)
779                 string_rel_add_char (&term_tmp, term_component, &j);
780             *term_tmp++ = '[';
781
782             *term_tmp++ = '^';
783             string_rel_add_char (&term_tmp, term_component, &i);
784             *term_tmp++ = '-';
785
786             *term_tmp++ = ']';
787             *term_tmp++ = '.';
788             *term_tmp++ = '*';
789
790             *term_tmp++ = '|';
791
792             if ((term_tmp - term_dict) > IT_MAX_WORD)
793                 break;
794         }
795         for (i = 0; term_component[i]; )
796             string_rel_add_char (&term_tmp, term_component, &i);
797         *term_tmp++ = ')';
798         *term_tmp = '\0';
799         break;
800     case 5:
801         if (!term_100 (zh->reg->zebra_maps, reg_type,
802                        term_sub, term_component, space_split, term_dst))
803             return 0;
804         yaz_log(log_level_rpn, "Relation >");
805
806         *term_tmp++ = '(';
807         for (i = 0; term_component[i];)
808         {
809             int j = 0;
810
811             while (j < i)
812                 string_rel_add_char (&term_tmp, term_component, &j);
813             *term_tmp++ = '[';
814             
815             *term_tmp++ = '^';
816             *term_tmp++ = '-';
817             string_rel_add_char (&term_tmp, term_component, &i);
818
819             *term_tmp++ = ']';
820             *term_tmp++ = '.';
821             *term_tmp++ = '*';
822
823             *term_tmp++ = '|';
824
825             if ((term_tmp - term_dict) > IT_MAX_WORD)
826                 break;
827         }
828         for (i = 0; term_component[i];)
829             string_rel_add_char (&term_tmp, term_component, &i);
830         *term_tmp++ = '.';
831         *term_tmp++ = '+';
832         *term_tmp++ = ')';
833         *term_tmp = '\0';
834         break;
835     case 4:
836         if (!term_100 (zh->reg->zebra_maps, reg_type, term_sub,
837                        term_component, space_split, term_dst))
838             return 0;
839         yaz_log(log_level_rpn, "Relation >=");
840
841         *term_tmp++ = '(';
842         for (i = 0; term_component[i];)
843         {
844             int j = 0;
845
846             if (i)
847                 *term_tmp++ = '|';
848             while (j < i)
849                 string_rel_add_char (&term_tmp, term_component, &j);
850             *term_tmp++ = '[';
851
852             if (term_component[i+1])
853             {
854                 *term_tmp++ = '^';
855                 *term_tmp++ = '-';
856                 string_rel_add_char (&term_tmp, term_component, &i);
857             }
858             else
859             {
860                 string_rel_add_char (&term_tmp, term_component, &i);
861                 *term_tmp++ = '-';
862             }
863             *term_tmp++ = ']';
864             *term_tmp++ = '.';
865             *term_tmp++ = '*';
866
867             if ((term_tmp - term_dict) > IT_MAX_WORD)
868                 break;
869         }
870         *term_tmp++ = ')';
871         *term_tmp = '\0';
872         break;
873     case 3:
874     default:
875         yaz_log(log_level_rpn, "Relation =");
876         if (!term_100 (zh->reg->zebra_maps, reg_type, term_sub,
877                        term_component, space_split, term_dst))
878             return 0;
879         strcat(term_tmp, "(");
880         strcat(term_tmp, term_component);
881         strcat(term_tmp, ")");
882     }
883     return 1;
884 }
885
886 static int string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
887                         const char **term_sub, 
888                         oid_value attributeSet, NMEM stream,
889                         struct grep_info *grep_info,
890                         int reg_type, int complete_flag,
891                         int num_bases, char **basenames,
892                         char *term_dst, int xpath_use);
893
894 static RSET term_trunc(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
895                         const char **term_sub, 
896                         oid_value attributeSet, NMEM stream,
897                         struct grep_info *grep_info,
898                         int reg_type, int complete_flag,
899                         int num_bases, char **basenames,
900                         char *term_dst,
901                         const char *rank_type, int xpath_use,
902                         NMEM rset_nmem)
903 {
904     int r;
905     grep_info->isam_p_indx = 0;
906     r = string_term(zh, zapt, term_sub, attributeSet, stream, grep_info,
907                      reg_type, complete_flag, num_bases, basenames,
908                      term_dst, xpath_use);
909     if (r < 1)
910         return 0;
911     yaz_log(log_level_rpn, "term: %s", term_dst);
912     return rset_trunc(zh, grep_info->isam_p_buf,
913                        grep_info->isam_p_indx, term_dst,
914                        strlen(term_dst), rank_type, 1 /* preserve pos */,
915                        zapt->term->which, rset_nmem,
916                        key_it_ctrl,key_it_ctrl->scope);
917 }
918 static char *nmem_strdup_i(NMEM nmem, int v)
919 {
920     char val_str[64];
921     sprintf (val_str, "%d", v);
922     return nmem_strdup(nmem, val_str);
923 }
924
925 static int string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
926                        const char **term_sub, 
927                        oid_value attributeSet, NMEM stream,
928                        struct grep_info *grep_info,
929                        int reg_type, int complete_flag,
930                        int num_bases, char **basenames,
931                        char *term_dst, int xpath_use)
932 {
933     char term_dict[2*IT_MAX_WORD+4000];
934     int j, r, base_no;
935     AttrType truncation;
936     int truncation_value;
937     AttrType use;
938     int use_value;
939     const char *use_string = 0;
940     oid_value curAttributeSet = attributeSet;
941     const char *termp;
942     struct rpn_char_map_info rcmi;
943     int space_split = complete_flag ? 0 : 1;
944
945     int bases_ok = 0;     /* no of databases with OK attribute */
946     int errCode = 0;      /* err code (if any is not OK) */
947     char *errString = 0;  /* addinfo */
948
949     rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
950     attr_init (&use, zapt, 1);
951     use_value = attr_find_ex (&use, &curAttributeSet, &use_string);
952     yaz_log(log_level_rpn, "string_term, use value %d", use_value);
953     attr_init (&truncation, zapt, 5);
954     truncation_value = attr_find (&truncation, NULL);
955     yaz_log(log_level_rpn, "truncation value %d", truncation_value);
956
957     if (use_value == -1)    /* no attribute - assumy "any" */
958         use_value = 1016;
959     for (base_no = 0; base_no < num_bases; base_no++)
960     {
961         int ord = -1;
962         int attr_ok = 0;
963         int regex_range = 0;
964         int init_pos = 0;
965         attent attp;
966         data1_local_attribute id_xpath_attr;
967         data1_local_attribute *local_attr;
968         int max_pos, prefix_len = 0;
969
970         termp = *term_sub;
971
972         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
973         {
974             zh->errCode = 109; /* Database unavailable */
975             zh->errString = basenames[base_no];
976             return -1;
977         }
978         if (xpath_use > 0 && use_value == -2) 
979         {
980             /* xpath mode and we have a string attribute */
981             attp.local_attributes = &id_xpath_attr;
982             attp.attset_ordinal = VAL_IDXPATH;
983             id_xpath_attr.next = 0;
984
985             use_value = xpath_use;  /* xpath_use as use-attribute now */
986             id_xpath_attr.local = use_value;
987         }
988         else if (curAttributeSet == VAL_IDXPATH && use_value >= 0)
989         {
990             /* X-Path attribute, use numeric value directly */
991             attp.local_attributes = &id_xpath_attr;
992             attp.attset_ordinal = VAL_IDXPATH;
993             id_xpath_attr.next = 0;
994             id_xpath_attr.local = use_value;
995         }
996         else if (use_string &&
997                  (ord = zebraExplain_lookup_attr_str(zh->reg->zei,
998                                                      use_string)) >= 0)
999         {
1000             /* we have a match for a raw string attribute */
1001             char ord_buf[32];
1002             int i, ord_len;
1003
1004             if (prefix_len)
1005                 term_dict[prefix_len++] = '|';
1006             else
1007                 term_dict[prefix_len++] = '(';
1008             
1009             ord_len = key_SU_encode (ord, ord_buf);
1010             for (i = 0; i<ord_len; i++)
1011             {
1012                 term_dict[prefix_len++] = 1;
1013                 term_dict[prefix_len++] = ord_buf[i];
1014             }
1015             attp.local_attributes = 0;  /* no more attributes */
1016         }
1017         else 
1018         {
1019             /* lookup in the .att files . Allow string as well */
1020             if ((r = att_getentbyatt (zh, &attp, curAttributeSet, use_value,
1021                                       use_string)))
1022             {
1023                 yaz_log(YLOG_DEBUG, "att_getentbyatt fail. set=%d use=%d r=%d",
1024                       curAttributeSet, use_value, r);
1025                 if (r == -1)
1026                 {
1027                     /* set was found, but value wasn't defined */
1028                     errCode = 114;
1029                     if (use_string)
1030                         errString = nmem_strdup(stream, use_string);
1031                     else
1032                         errString = nmem_strdup_i (stream, use_value);
1033                 }
1034                 else
1035                 {
1036                     int oid[OID_SIZE];
1037                     struct oident oident;
1038                     
1039                     oident.proto = PROTO_Z3950;
1040                     oident.oclass = CLASS_ATTSET;
1041                     oident.value = curAttributeSet;
1042                     oid_ent_to_oid (&oident, oid);
1043                     
1044                     errCode = 121;
1045                     errString = nmem_strdup (stream, oident.desc);
1046                 }
1047                 continue;
1048             }
1049         }
1050         for (local_attr = attp.local_attributes; local_attr;
1051              local_attr = local_attr->next)
1052         {
1053             char ord_buf[32];
1054             int i, ord_len;
1055             
1056             ord = zebraExplain_lookup_attr_su(zh->reg->zei,
1057                                               attp.attset_ordinal,
1058                                               local_attr->local);
1059             if (ord < 0)
1060                 continue;
1061             if (prefix_len)
1062                 term_dict[prefix_len++] = '|';
1063             else
1064                 term_dict[prefix_len++] = '(';
1065             
1066             ord_len = key_SU_encode (ord, ord_buf);
1067             for (i = 0; i<ord_len; i++)
1068             {
1069                 term_dict[prefix_len++] = 1;
1070                 term_dict[prefix_len++] = ord_buf[i];
1071             }
1072         }
1073         if (!prefix_len)
1074         {
1075 #if 1
1076             bases_ok++;
1077 #else
1078             errCode = 114;
1079             errString = nmem_strdup_i(stream, use_value);
1080             continue;
1081 #endif
1082         }
1083         else
1084         {
1085             bases_ok++; /* this has OK attributes */
1086             attr_ok = 1;
1087         }
1088
1089         term_dict[prefix_len++] = ')';
1090         term_dict[prefix_len++] = 1;
1091         term_dict[prefix_len++] = reg_type;
1092         yaz_log(log_level_rpn, "reg_type = %d", term_dict[prefix_len-1]);
1093         term_dict[prefix_len] = '\0';
1094         j = prefix_len;
1095         switch (truncation_value)
1096         {
1097         case -1:         /* not specified */
1098         case 100:        /* do not truncate */
1099             if (!string_relation (zh, zapt, &termp, term_dict,
1100                                   attributeSet,
1101                                   reg_type, space_split, term_dst))
1102                 return 0;
1103             break;
1104         case 1:          /* right truncation */
1105             term_dict[j++] = '(';
1106             if (!term_100(zh->reg->zebra_maps, reg_type,
1107                           &termp, term_dict + j, space_split, term_dst))
1108                 return 0;
1109             strcat(term_dict, ".*)");
1110             break;
1111         case 2:          /* keft truncation */
1112             term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*';
1113             if (!term_100(zh->reg->zebra_maps, reg_type,
1114                           &termp, term_dict + j, space_split, term_dst))
1115                 return 0;
1116             strcat(term_dict, ")");
1117             break;
1118         case 3:          /* left&right truncation */
1119             term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*';
1120             if (!term_100(zh->reg->zebra_maps, reg_type,
1121                           &termp, term_dict + j, space_split, term_dst))
1122                 return 0;
1123             strcat(term_dict, ".*)");
1124             break;
1125         case 101:        /* process # in term */
1126             term_dict[j++] = '(';
1127             if (!term_101(zh->reg->zebra_maps, reg_type,
1128                           &termp, term_dict + j, space_split, term_dst))
1129                 return 0;
1130             strcat(term_dict, ")");
1131             break;
1132         case 102:        /* Regexp-1 */
1133             term_dict[j++] = '(';
1134             if (!term_102(zh->reg->zebra_maps, reg_type,
1135                           &termp, term_dict + j, space_split, term_dst))
1136                 return 0;
1137             strcat(term_dict, ")");
1138             break;
1139         case 103:       /* Regexp-2 */
1140             r = 1;
1141             term_dict[j++] = '(';
1142             init_pos = 2;
1143             if (!term_103 (zh->reg->zebra_maps, reg_type,
1144                            &termp, term_dict + j, &regex_range,
1145                            space_split, term_dst))
1146                 return 0;
1147             strcat(term_dict, ")");
1148         case 104:        /* process # and ! in term */
1149             term_dict[j++] = '(';
1150             if (!term_104 (zh->reg->zebra_maps, reg_type,
1151                            &termp, term_dict + j, space_split, term_dst))
1152                 return 0;
1153             strcat(term_dict, ")");
1154             break;
1155         case 105:        /* process * and ! in term */
1156             term_dict[j++] = '(';
1157             if (!term_105 (zh->reg->zebra_maps, reg_type,
1158                            &termp, term_dict + j, space_split, term_dst, 1))
1159                 return 0;
1160             strcat(term_dict, ")");
1161             break;
1162         case 106:        /* process * and ! in term */
1163             term_dict[j++] = '(';
1164             if (!term_105 (zh->reg->zebra_maps, reg_type,
1165                            &termp, term_dict + j, space_split, term_dst, 0))
1166                 return 0;
1167             strcat(term_dict, ")");
1168             break;
1169         default:
1170             zh->errCode = 120;
1171             zh->errString = nmem_strdup_i(stream, truncation_value);
1172             return -1;
1173         }
1174         if (attr_ok)
1175         {
1176             yaz_log(log_level_rpn, "dict_lookup_grep: %s", term_dict+prefix_len);
1177             r = dict_lookup_grep(zh->reg->dict, term_dict, regex_range,
1178                                  grep_info, &max_pos, init_pos,
1179                                  grep_handle);
1180             if (r)
1181                 yaz_log(YLOG_WARN, "dict_lookup_grep fail %d", r);
1182         }
1183     }
1184     if (!bases_ok)
1185     {
1186         zh->errCode = errCode;
1187         zh->errString = errString;
1188         return -1;
1189     }
1190     *term_sub = termp;
1191     yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1192     return 1;
1193 }
1194
1195
1196 /* convert APT search term to UTF8 */
1197 static int zapt_term_to_utf8 (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1198                               char *termz)
1199 {
1200     size_t sizez;
1201     Z_Term *term = zapt->term;
1202
1203     switch (term->which)
1204     {
1205     case Z_Term_general:
1206         if (zh->iconv_to_utf8 != 0)
1207         {
1208             char *inbuf = term->u.general->buf;
1209             size_t inleft = term->u.general->len;
1210             char *outbuf = termz;
1211             size_t outleft = IT_MAX_WORD-1;
1212             size_t ret;
1213
1214             ret = yaz_iconv(zh->iconv_to_utf8, &inbuf, &inleft,
1215                         &outbuf, &outleft);
1216             if (ret == (size_t)(-1))
1217             {
1218                 ret = yaz_iconv(zh->iconv_to_utf8, 0, 0, 0, 0);
1219                 zh->errCode = 125;
1220                 return -1;
1221             }
1222             *outbuf = 0;
1223         }
1224         else
1225         {
1226             sizez = term->u.general->len;
1227             if (sizez > IT_MAX_WORD-1)
1228                 sizez = IT_MAX_WORD-1;
1229             memcpy (termz, term->u.general->buf, sizez);
1230             termz[sizez] = '\0';
1231         }
1232         break;
1233     case Z_Term_characterString:
1234         sizez = strlen(term->u.characterString);
1235         if (sizez > IT_MAX_WORD-1)
1236             sizez = IT_MAX_WORD-1;
1237         memcpy (termz, term->u.characterString, sizez);
1238         termz[sizez] = '\0';
1239         break;
1240     default:
1241         zh->errCode = 124;
1242         return -1;
1243     }
1244     return 0;
1245 }
1246
1247 /* convert APT SCAN term to internal cmap */
1248 static int trans_scan_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1249                             char *termz, int reg_type)
1250 {
1251     char termz0[IT_MAX_WORD];
1252
1253     if (zapt_term_to_utf8(zh, zapt, termz0))
1254         return -1;    /* error */
1255     else
1256     {
1257         const char **map;
1258         const char *cp = (const char *) termz0;
1259         const char *cp_end = cp + strlen(cp);
1260         const char *src;
1261         int i = 0;
1262         const char *space_map = NULL;
1263         int len;
1264             
1265         while ((len = (cp_end - cp)) > 0)
1266         {
1267             map = zebra_maps_input (zh->reg->zebra_maps, reg_type, &cp, len, 0);
1268             if (**map == *CHR_SPACE)
1269                 space_map = *map;
1270             else
1271             {
1272                 if (i && space_map)
1273                     for (src = space_map; *src; src++)
1274                         termz[i++] = *src;
1275                 space_map = NULL;
1276                 for (src = *map; *src; src++)
1277                     termz[i++] = *src;
1278             }
1279         }
1280         termz[i] = '\0';
1281     }
1282     return 0;
1283 }
1284
1285 char *normalize_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1286                      const char *termz, NMEM stream, unsigned reg_id)
1287 {
1288     WRBUF wrbuf = 0;
1289     AttrType truncation;
1290     int truncation_value;
1291     char *ex_list = 0;
1292
1293     attr_init (&truncation, zapt, 5);
1294     truncation_value = attr_find (&truncation, NULL);
1295
1296     switch (truncation_value)
1297     {
1298     default:
1299         ex_list = "";
1300         break;
1301     case 101:
1302         ex_list = "#";
1303         break;
1304     case 102:
1305     case 103:
1306         ex_list = 0;
1307         break;
1308     case 104:
1309         ex_list = "!#";
1310         break;
1311     case 105:
1312         ex_list = "!*";
1313         break;
1314     }
1315     if (ex_list)
1316         wrbuf = zebra_replace(zh->reg->zebra_maps, reg_id, ex_list,
1317                               termz, strlen(termz));
1318     if (!wrbuf)
1319         return nmem_strdup(stream, termz);
1320     else
1321     {
1322         char *buf = (char*) nmem_malloc(stream, wrbuf_len(wrbuf)+1);
1323         memcpy (buf, wrbuf_buf(wrbuf), wrbuf_len(wrbuf));
1324         buf[wrbuf_len(wrbuf)] = '\0';
1325         return buf;
1326     }
1327 }
1328
1329 static void grep_info_delete (struct grep_info *grep_info)
1330 {
1331 #ifdef TERM_COUNT
1332     xfree(grep_info->term_no);
1333 #endif
1334     xfree (grep_info->isam_p_buf);
1335 }
1336
1337 static int grep_info_prepare (ZebraHandle zh,
1338                               Z_AttributesPlusTerm *zapt,
1339                               struct grep_info *grep_info,
1340                               int reg_type,
1341                               NMEM stream)
1342 {
1343     AttrType termset;
1344     int termset_value_numeric;
1345     const char *termset_value_string;
1346
1347 #ifdef TERM_COUNT
1348     grep_info->term_no = 0;
1349 #endif
1350     grep_info->isam_p_size = 0;
1351     grep_info->isam_p_buf = NULL;
1352     grep_info->zh = zh;
1353     grep_info->reg_type = reg_type;
1354     grep_info->termset = 0;
1355
1356     if (!zapt)
1357         return 0;
1358     attr_init (&termset, zapt, 8);
1359     termset_value_numeric =
1360         attr_find_ex (&termset, NULL, &termset_value_string);
1361     if (termset_value_numeric != -1)
1362     {
1363         char resname[32];
1364         const char *termset_name = 0;
1365         if (termset_value_numeric != -2)
1366         {
1367     
1368             sprintf (resname, "%d", termset_value_numeric);
1369             termset_name = resname;
1370         }
1371         else
1372             termset_name = termset_value_string;
1373         yaz_log(log_level_rpn, "creating termset set %s", termset_name);
1374         grep_info->termset = resultSetAdd (zh, termset_name, 1);
1375         if (!grep_info->termset)
1376         {
1377             zh->errCode = 128;
1378             zh->errString = nmem_strdup (stream, termset_name);
1379             return -1;
1380         }
1381     }
1382     return 0;
1383 }
1384                                
1385
1386 static RSET rpn_search_APT_phrase (ZebraHandle zh,
1387                                    Z_AttributesPlusTerm *zapt,
1388                                    const char *termz_org,
1389                                    oid_value attributeSet,
1390                                    NMEM stream,
1391                                    int reg_type, int complete_flag,
1392                                    const char *rank_type, int xpath_use,
1393                                    int num_bases, char **basenames, 
1394                                    NMEM rset_nmem)
1395 {
1396     char term_dst[IT_MAX_WORD+1];
1397     RSET rset[TERM_LIST_LENGTH_MAX], result;
1398     size_t rset_no = 0;
1399     struct grep_info grep_info;
1400     char *termz = normalize_term(zh, zapt, termz_org, stream, reg_type);
1401     const char *termp = termz;
1402
1403     *term_dst = 0;
1404     if (grep_info_prepare (zh, zapt, &grep_info, reg_type, stream))
1405         return 0;
1406     for (; rset_no < sizeof(rset)/sizeof(*rset); rset_no++)
1407     { 
1408         yaz_log(log_level_rpn, "APT_phrase termp=%s", termp);
1409         rset[rset_no] = term_trunc(zh, zapt, &termp, attributeSet,
1410                                     stream, &grep_info,
1411                                     reg_type, complete_flag,
1412                                     num_bases, basenames,
1413                                     term_dst, rank_type,
1414                                     xpath_use,rset_nmem);
1415         if (!rset[rset_no])
1416             break;
1417     }
1418     grep_info_delete (&grep_info);
1419     if (rset_no == 0)
1420         return rsnull_create (rset_nmem,key_it_ctrl); 
1421     else if (rset_no == 1)
1422         return (rset[0]);
1423     else
1424         result = rsprox_create( rset_nmem, key_it_ctrl, key_it_ctrl->scope,
1425                        rset_no, rset,
1426                        1 /* ordered */, 0 /* exclusion */,
1427                        3 /* relation */, 1 /* distance */);
1428     return result;
1429 }
1430
1431 static RSET rpn_search_APT_or_list (ZebraHandle zh,
1432                                     Z_AttributesPlusTerm *zapt,
1433                                     const char *termz_org,
1434                                     oid_value attributeSet,
1435                                     NMEM stream,
1436                                     int reg_type, int complete_flag,
1437                                     const char *rank_type,
1438                                     int xpath_use,
1439                                     int num_bases, char **basenames,
1440                                     NMEM rset_nmem)
1441 {
1442     char term_dst[IT_MAX_WORD+1];
1443     RSET rset[TERM_LIST_LENGTH_MAX];
1444     size_t rset_no = 0;
1445     struct grep_info grep_info;
1446     char *termz = normalize_term(zh, zapt, termz_org, stream, reg_type);
1447     const char *termp = termz;
1448
1449     if (grep_info_prepare (zh, zapt, &grep_info, reg_type, stream))
1450         return 0;
1451     for (; rset_no < sizeof(rset)/sizeof(*rset); rset_no++)
1452     { 
1453         yaz_log(log_level_rpn, "APT_or_list termp=%s", termp);
1454         rset[rset_no] = term_trunc(zh, zapt, &termp, attributeSet,
1455                                     stream, &grep_info,
1456                                     reg_type, complete_flag,
1457                                     num_bases, basenames,
1458                                     term_dst, rank_type,
1459                                     xpath_use,rset_nmem);
1460         if (!rset[rset_no])
1461             break;
1462     }
1463     grep_info_delete (&grep_info);
1464     if (rset_no == 0)
1465         return rsnull_create (rset_nmem,key_it_ctrl);  
1466     return rsmulti_or_create(rset_nmem, key_it_ctrl,key_it_ctrl->scope,
1467                              rset_no, rset);
1468 }
1469
1470 static RSET rpn_search_APT_and_list (ZebraHandle zh,
1471                                      Z_AttributesPlusTerm *zapt,
1472                                      const char *termz_org,
1473                                      oid_value attributeSet,
1474                                      NMEM stream,
1475                                      int reg_type, int complete_flag,
1476                                      const char *rank_type, 
1477                                      int xpath_use,
1478                                      int num_bases, char **basenames,
1479                                      NMEM rset_nmem)
1480 {
1481     char term_dst[IT_MAX_WORD+1];
1482     RSET rset[TERM_LIST_LENGTH_MAX];
1483     size_t rset_no = 0;
1484     struct grep_info grep_info;
1485     char *termz = normalize_term(zh, zapt, termz_org, stream, reg_type);
1486     const char *termp = termz;
1487
1488     if (grep_info_prepare (zh, zapt, &grep_info, reg_type, stream))
1489         return 0;
1490     for (; rset_no < sizeof(rset)/sizeof(*rset); rset_no++)
1491     { 
1492         yaz_log(log_level_rpn, "APT_and_list termp=%s", termp);
1493         rset[rset_no] = term_trunc(zh, zapt, &termp, attributeSet,
1494                                     stream, &grep_info,
1495                                     reg_type, complete_flag,
1496                                     num_bases, basenames,
1497                                     term_dst, rank_type,
1498                                     xpath_use, rset_nmem);
1499         if (!rset[rset_no])
1500             break;
1501     }
1502     grep_info_delete (&grep_info);
1503     if (rset_no == 0)
1504         return rsnull_create(rset_nmem,key_it_ctrl); 
1505     
1506     return rsmulti_and_create(rset_nmem, key_it_ctrl, key_it_ctrl->scope,
1507                               rset_no, rset);
1508 }
1509
1510 static int numeric_relation (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1511                              const char **term_sub,
1512                              char *term_dict,
1513                              oid_value attributeSet,
1514                              struct grep_info *grep_info,
1515                              int *max_pos,
1516                              int reg_type,
1517                              char *term_dst)
1518 {
1519     AttrType relation;
1520     int relation_value;
1521     int term_value;
1522     int r;
1523     char *term_tmp = term_dict + strlen(term_dict);
1524
1525     attr_init (&relation, zapt, 2);
1526     relation_value = attr_find (&relation, NULL);
1527
1528     yaz_log(log_level_rpn, "numeric relation value=%d", relation_value);
1529
1530     if (!term_100 (zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1531                    term_dst))
1532         return 0;
1533     term_value = atoi (term_tmp);
1534     switch (relation_value)
1535     {
1536     case 1:
1537         yaz_log(log_level_rpn, "Relation <");
1538         gen_regular_rel (term_tmp, term_value-1, 1);
1539         break;
1540     case 2:
1541         yaz_log(log_level_rpn, "Relation <=");
1542         gen_regular_rel (term_tmp, term_value, 1);
1543         break;
1544     case 4:
1545         yaz_log(log_level_rpn, "Relation >=");
1546         gen_regular_rel (term_tmp, term_value, 0);
1547         break;
1548     case 5:
1549         yaz_log(log_level_rpn, "Relation >");
1550         gen_regular_rel (term_tmp, term_value+1, 0);
1551         break;
1552     case 3:
1553     default:
1554         yaz_log(log_level_rpn, "Relation =");
1555         sprintf (term_tmp, "(0*%d)", term_value);
1556     }
1557     yaz_log(log_level_rpn, "dict_lookup_grep: %s", term_tmp);
1558     r = dict_lookup_grep(zh->reg->dict, term_dict, 0, grep_info, max_pos,
1559                           0, grep_handle);
1560     if (r)
1561         yaz_log(YLOG_WARN, "dict_lookup_grep fail, rel = gt: %d", r);
1562     yaz_log(log_level_rpn, "%d positions", grep_info->isam_p_indx);
1563     return 1;
1564 }
1565
1566 static int numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1567                          const char **term_sub, 
1568                          oid_value attributeSet, struct grep_info *grep_info,
1569                          int reg_type, int complete_flag,
1570                          int num_bases, char **basenames,
1571                          char *term_dst, int xpath_use, NMEM stream)
1572 {
1573     char term_dict[2*IT_MAX_WORD+2];
1574     int r, base_no;
1575     AttrType use;
1576     int use_value;
1577     const char *use_string = 0;
1578     oid_value curAttributeSet = attributeSet;
1579     const char *termp;
1580     struct rpn_char_map_info rcmi;
1581
1582     int bases_ok = 0;     /* no of databases with OK attribute */
1583     int errCode = 0;      /* err code (if any is not OK) */
1584     char *errString = 0;  /* addinfo */
1585
1586     rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
1587     attr_init (&use, zapt, 1);
1588     use_value = attr_find_ex (&use, &curAttributeSet, &use_string);
1589
1590     if (use_value == -1)
1591         use_value = 1016;
1592
1593     for (base_no = 0; base_no < num_bases; base_no++)
1594     {
1595         attent attp;
1596         data1_local_attribute id_xpath_attr;
1597         data1_local_attribute *local_attr;
1598         int max_pos, prefix_len = 0;
1599
1600         termp = *term_sub;
1601         if (use_value == -2)  /* string attribute (assume IDXPATH/any) */
1602         {
1603             use_value = xpath_use;
1604             attp.local_attributes = &id_xpath_attr;
1605             attp.attset_ordinal = VAL_IDXPATH;
1606             id_xpath_attr.next = 0;
1607             id_xpath_attr.local = use_value;
1608         }
1609         else if (curAttributeSet == VAL_IDXPATH)
1610         {
1611             attp.local_attributes = &id_xpath_attr;
1612             attp.attset_ordinal = VAL_IDXPATH;
1613             id_xpath_attr.next = 0;
1614             id_xpath_attr.local = use_value;
1615         }
1616         else
1617         {
1618             if ((r = att_getentbyatt (zh, &attp, curAttributeSet, use_value,
1619                                             use_string)))
1620             {
1621                 yaz_log(YLOG_DEBUG, "att_getentbyatt fail. set=%d use=%d r=%d",
1622                       curAttributeSet, use_value, r);
1623                 if (r == -1)
1624                 {
1625                     errString = nmem_strdup_i(stream, use_value);
1626                     errCode = 114;
1627                 }
1628                 else
1629                     errCode = 121;
1630                 continue;
1631             }
1632         }
1633         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1634         {
1635             zh->errCode = 109; /* Database unavailable */
1636             zh->errString = basenames[base_no];
1637             return -1;
1638         }
1639         for (local_attr = attp.local_attributes; local_attr;
1640              local_attr = local_attr->next)
1641         {
1642             int ord;
1643             char ord_buf[32];
1644             int i, ord_len;
1645
1646             ord = zebraExplain_lookup_attr_su(zh->reg->zei,
1647                                               attp.attset_ordinal,
1648                                               local_attr->local);
1649             if (ord < 0)
1650                 continue;
1651             if (prefix_len)
1652                 term_dict[prefix_len++] = '|';
1653             else
1654                 term_dict[prefix_len++] = '(';
1655
1656             ord_len = key_SU_encode (ord, ord_buf);
1657             for (i = 0; i<ord_len; i++)
1658             {
1659                 term_dict[prefix_len++] = 1;
1660                 term_dict[prefix_len++] = ord_buf[i];
1661             }
1662         }
1663         if (!prefix_len)
1664         {
1665             errCode = 114;
1666             errString = nmem_strdup_i(stream, use_value);
1667             continue;
1668         }
1669         bases_ok++;
1670         term_dict[prefix_len++] = ')';        
1671         term_dict[prefix_len++] = 1;
1672         term_dict[prefix_len++] = reg_type;
1673         yaz_log(YLOG_DEBUG, "reg_type = %d", term_dict[prefix_len-1]);
1674         term_dict[prefix_len] = '\0';
1675         if (!numeric_relation (zh, zapt, &termp, term_dict,
1676                                attributeSet, grep_info, &max_pos, reg_type,
1677                                term_dst))
1678             return 0;
1679     }
1680     if (!bases_ok)
1681     {
1682         zh->errCode = errCode;
1683         zh->errString = errString;
1684         return -1;
1685     }
1686     *term_sub = termp;
1687     yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1688     return 1;
1689 }
1690
1691 static RSET rpn_search_APT_numeric (ZebraHandle zh,
1692                                     Z_AttributesPlusTerm *zapt,
1693                                     const char *termz,
1694                                     oid_value attributeSet,
1695                                     NMEM stream,
1696                                     int reg_type, int complete_flag,
1697                                     const char *rank_type, int xpath_use,
1698                                     int num_bases, char **basenames,
1699                                     NMEM rset_nmem)
1700 {
1701     char term_dst[IT_MAX_WORD+1];
1702     const char *termp = termz;
1703     RSET rset[TERM_LIST_LENGTH_MAX];
1704     int  r;
1705     size_t rset_no = 0;
1706     struct grep_info grep_info;
1707
1708     yaz_log(log_level_rpn, "APT_numeric t='%s'",termz);
1709     if (grep_info_prepare (zh, zapt, &grep_info, reg_type, stream))
1710         return 0;
1711     for (; rset_no < sizeof(rset)/sizeof(*rset); rset_no++)
1712     { 
1713         yaz_log(YLOG_DEBUG, "APT_numeric termp=%s", termp);
1714         grep_info.isam_p_indx = 0;
1715         r = numeric_term(zh, zapt, &termp, attributeSet, &grep_info,
1716                           reg_type, complete_flag, num_bases, basenames,
1717                           term_dst, xpath_use,
1718                           stream);
1719         if (r < 1)
1720             break;
1721         yaz_log(YLOG_DEBUG, "term: %s", term_dst);
1722         rset[rset_no] = rset_trunc(zh, grep_info.isam_p_buf,
1723                                     grep_info.isam_p_indx, term_dst,
1724                                     strlen(term_dst), rank_type,
1725                                     0 /* preserve position */,
1726                                     zapt->term->which, rset_nmem, 
1727                                     key_it_ctrl,key_it_ctrl->scope);
1728         if (!rset[rset_no])
1729             break;
1730     }
1731     grep_info_delete (&grep_info);
1732     if (rset_no == 0)
1733         return rsnull_create(rset_nmem,key_it_ctrl);
1734     if (rset_no == 1)
1735         return rset[0];
1736     return rsmulti_and_create(rset_nmem,key_it_ctrl,key_it_ctrl->scope,
1737                               rset_no, rset);
1738 }
1739
1740 static RSET rpn_search_APT_local (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1741                                   const char *termz,
1742                                   oid_value attributeSet,
1743                                   NMEM stream,
1744                                   const char *rank_type, NMEM rset_nmem)
1745 {
1746     RSET result;
1747     RSFD rsfd;
1748     struct it_key key;
1749     int sys;
1750     result = rstemp_create( rset_nmem,key_it_ctrl,key_it_ctrl->scope,
1751                      res_get (zh->res, "setTmpDir"),0 );
1752     rsfd = rset_open (result, RSETF_WRITE);
1753
1754     sys = atoi(termz);
1755     if (sys <= 0)
1756         sys = 1;
1757     key.mem[0] = sys;
1758     key.mem[1] = 1;
1759     key.len = 2;
1760     rset_write (rsfd, &key);
1761     rset_close (rsfd);
1762     return result;
1763 }
1764
1765 static RSET rpn_sort_spec (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1766                            oid_value attributeSet, NMEM stream,
1767                            Z_SortKeySpecList *sort_sequence,
1768                            const char *rank_type)
1769 {
1770     int i;
1771     int sort_relation_value;
1772     AttrType sort_relation_type;
1773     int use_value;
1774     AttrType use_type;
1775     Z_SortKeySpec *sks;
1776     Z_SortKey *sk;
1777     Z_AttributeElement *ae;
1778     int oid[OID_SIZE];
1779     oident oe;
1780     char termz[20];
1781     
1782     attr_init (&sort_relation_type, zapt, 7);
1783     sort_relation_value = attr_find (&sort_relation_type, &attributeSet);
1784
1785     attr_init (&use_type, zapt, 1);
1786     use_value = attr_find (&use_type, &attributeSet);
1787
1788     if (!sort_sequence->specs)
1789     {
1790         sort_sequence->num_specs = 10;
1791         sort_sequence->specs = (Z_SortKeySpec **)
1792             nmem_malloc(stream, sort_sequence->num_specs *
1793                          sizeof(*sort_sequence->specs));
1794         for (i = 0; i<sort_sequence->num_specs; i++)
1795             sort_sequence->specs[i] = 0;
1796     }
1797     if (zapt->term->which != Z_Term_general)
1798         i = 0;
1799     else
1800         i = atoi_n ((char *) zapt->term->u.general->buf,
1801                     zapt->term->u.general->len);
1802     if (i >= sort_sequence->num_specs)
1803         i = 0;
1804     sprintf (termz, "%d", i);
1805
1806     oe.proto = PROTO_Z3950;
1807     oe.oclass = CLASS_ATTSET;
1808     oe.value = attributeSet;
1809     if (!oid_ent_to_oid (&oe, oid))
1810         return 0;
1811
1812     sks = (Z_SortKeySpec *) nmem_malloc(stream, sizeof(*sks));
1813     sks->sortElement = (Z_SortElement *)
1814         nmem_malloc(stream, sizeof(*sks->sortElement));
1815     sks->sortElement->which = Z_SortElement_generic;
1816     sk = sks->sortElement->u.generic = (Z_SortKey *)
1817         nmem_malloc(stream, sizeof(*sk));
1818     sk->which = Z_SortKey_sortAttributes;
1819     sk->u.sortAttributes = (Z_SortAttributes *)
1820         nmem_malloc(stream, sizeof(*sk->u.sortAttributes));
1821
1822     sk->u.sortAttributes->id = oid;
1823     sk->u.sortAttributes->list = (Z_AttributeList *)
1824         nmem_malloc(stream, sizeof(*sk->u.sortAttributes->list));
1825     sk->u.sortAttributes->list->num_attributes = 1;
1826     sk->u.sortAttributes->list->attributes = (Z_AttributeElement **)
1827         nmem_malloc(stream, sizeof(*sk->u.sortAttributes->list->attributes));
1828     ae = *sk->u.sortAttributes->list->attributes = (Z_AttributeElement *)
1829         nmem_malloc(stream, sizeof(**sk->u.sortAttributes->list->attributes));
1830     ae->attributeSet = 0;
1831     ae->attributeType = (int *)
1832         nmem_malloc(stream, sizeof(*ae->attributeType));
1833     *ae->attributeType = 1;
1834     ae->which = Z_AttributeValue_numeric;
1835     ae->value.numeric = (int *)
1836         nmem_malloc(stream, sizeof(*ae->value.numeric));
1837     *ae->value.numeric = use_value;
1838
1839     sks->sortRelation = (int *)
1840         nmem_malloc(stream, sizeof(*sks->sortRelation));
1841     if (sort_relation_value == 1)
1842         *sks->sortRelation = Z_SortKeySpec_ascending;
1843     else if (sort_relation_value == 2)
1844         *sks->sortRelation = Z_SortKeySpec_descending;
1845     else 
1846         *sks->sortRelation = Z_SortKeySpec_ascending;
1847
1848     sks->caseSensitivity = (int *)
1849         nmem_malloc(stream, sizeof(*sks->caseSensitivity));
1850     *sks->caseSensitivity = 0;
1851
1852     sks->which = Z_SortKeySpec_null;
1853     sks->u.null = odr_nullval ();
1854     sort_sequence->specs[i] = sks;
1855     return rsnull_create (NULL,key_it_ctrl);
1856         /* FIXME - nmem?? */
1857 }
1858
1859
1860 static int parse_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1861                        oid_value attributeSet,
1862                        struct xpath_location_step *xpath, int max, NMEM mem)
1863 {
1864     oid_value curAttributeSet = attributeSet;
1865     AttrType use;
1866     const char *use_string = 0;
1867     
1868     attr_init (&use, zapt, 1);
1869     attr_find_ex (&use, &curAttributeSet, &use_string);
1870
1871     if (!use_string || *use_string != '/')
1872         return -1;
1873
1874     return zebra_parse_xpath_str(use_string, xpath, max, mem);
1875 }
1876  
1877                
1878
1879 static RSET xpath_trunc(ZebraHandle zh, NMEM stream,
1880                         int reg_type, const char *term, int use,
1881                         oid_value curAttributeSet, NMEM rset_nmem)
1882 {
1883     RSET rset;
1884     struct grep_info grep_info;
1885     char term_dict[2048];
1886     char ord_buf[32];
1887     int prefix_len = 0;
1888     int ord = zebraExplain_lookup_attr_su(zh->reg->zei, curAttributeSet, use);
1889     int ord_len, i, r, max_pos;
1890     int term_type = Z_Term_characterString;
1891     const char *flags = "void";
1892
1893     if (grep_info_prepare (zh, 0 /* zapt */, &grep_info, '0', stream))
1894         return rsnull_create (rset_nmem,key_it_ctrl);
1895
1896     if (ord < 0)
1897         return rsnull_create (rset_nmem,key_it_ctrl);
1898     if (prefix_len)
1899         term_dict[prefix_len++] = '|';
1900     else
1901         term_dict[prefix_len++] = '(';
1902     
1903     ord_len = key_SU_encode (ord, ord_buf);
1904     for (i = 0; i<ord_len; i++)
1905     {
1906         term_dict[prefix_len++] = 1;
1907         term_dict[prefix_len++] = ord_buf[i];
1908     }
1909     term_dict[prefix_len++] = ')';
1910     term_dict[prefix_len++] = 1;
1911     term_dict[prefix_len++] = reg_type;
1912     
1913     strcpy(term_dict+prefix_len, term);
1914     
1915     grep_info.isam_p_indx = 0;
1916     r = dict_lookup_grep(zh->reg->dict, term_dict, 0,
1917                           &grep_info, &max_pos, 0, grep_handle);
1918     yaz_log (YLOG_LOG, "%s %d positions", term,
1919              grep_info.isam_p_indx);
1920     rset = rset_trunc(zh, grep_info.isam_p_buf,
1921                        grep_info.isam_p_indx, term, strlen(term),
1922                        flags, 1, term_type,rset_nmem,
1923                        key_it_ctrl, key_it_ctrl->scope);
1924     grep_info_delete (&grep_info);
1925     return rset;
1926 }
1927
1928 static RSET rpn_search_xpath (ZebraHandle zh,
1929                               oid_value attributeSet,
1930                               int num_bases, char **basenames,
1931                               NMEM stream, const char *rank_type, RSET rset,
1932                               int xpath_len, struct xpath_location_step *xpath,
1933                               NMEM rset_nmem)
1934 {
1935     oid_value curAttributeSet = attributeSet;
1936     int base_no;
1937     int i;
1938
1939     if (xpath_len < 0)
1940         return rset;
1941
1942     yaz_log (YLOG_DEBUG, "xpath len=%d", xpath_len);
1943     for (i = 0; i<xpath_len; i++)
1944     {
1945         yaz_log (log_level_rpn, "XPATH %d %s", i, xpath[i].part);
1946
1947     }
1948
1949     curAttributeSet = VAL_IDXPATH;
1950
1951     /*
1952       //a    ->    a/.*
1953       //a/b  ->    b/a/.*
1954       /a     ->    a/
1955       /a/b   ->    b/a/
1956
1957       /      ->    none
1958
1959    a[@attr = value]/b[@other = othervalue]
1960
1961  /e/@a val      range(e/,range(@a,freetext(w,1015,val),@a),e/)
1962  /a/b val       range(b/a/,freetext(w,1016,val),b/a/)
1963  /a/b/@c val    range(b/a/,range(@c,freetext(w,1016,val),@c),b/a/)
1964  /a/b[@c = y] val range(b/a/,freetext(w,1016,val),b/a/,@c = y)
1965  /a[@c = y]/b val range(a/,range(b/a/,freetext(w,1016,val),b/a/),a/,@c = y)
1966  /a[@c = x]/b[@c = y] range(a/,range(b/a/,freetext(w,1016,val),b/a/,@c = y),a/,@c = x)
1967       
1968     */
1969
1970     dict_grep_cmap (zh->reg->dict, 0, 0);
1971
1972     for (base_no = 0; base_no < num_bases; base_no++)
1973     {
1974         int level = xpath_len;
1975         int first_path = 1;
1976         
1977         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1978         {
1979             zh->errCode = 109; /* Database unavailable */
1980             zh->errString = basenames[base_no];
1981             return rset;
1982         }
1983         while (--level >= 0)
1984         {
1985             char xpath_rev[128];
1986             int i, len;
1987             RSET rset_start_tag = 0, rset_end_tag = 0, rset_attr = 0;
1988
1989             *xpath_rev = 0;
1990             len = 0;
1991             for (i = level; i >= 1; --i)
1992             {
1993                 const char *cp = xpath[i].part;
1994                 if (*cp)
1995                 {
1996                     for (;*cp; cp++)
1997                         if (*cp == '*')
1998                         {
1999                             memcpy (xpath_rev + len, "[^/]*", 5);
2000                             len += 5;
2001                         }
2002                         else if (*cp == ' ')
2003                         {
2004
2005                             xpath_rev[len++] = 1;
2006                             xpath_rev[len++] = ' ';
2007                         }
2008
2009                         else
2010                             xpath_rev[len++] = *cp;
2011                     xpath_rev[len++] = '/';
2012                 }
2013                 else if (i == 1)  /* // case */
2014                 {
2015                     xpath_rev[len++] = '.';
2016                     xpath_rev[len++] = '*';
2017                 }
2018             }
2019             xpath_rev[len] = 0;
2020
2021             if (xpath[level].predicate &&
2022                 xpath[level].predicate->which == XPATH_PREDICATE_RELATION &&
2023                 xpath[level].predicate->u.relation.name[0])
2024             {
2025                 WRBUF wbuf = wrbuf_alloc();
2026                 wrbuf_puts(wbuf, xpath[level].predicate->u.relation.name+1);
2027                 if (xpath[level].predicate->u.relation.value)
2028                 {
2029                     const char *cp = xpath[level].predicate->u.relation.value;
2030                     wrbuf_putc(wbuf, '=');
2031                     
2032                     while (*cp)
2033                     {
2034                         if (strchr(REGEX_CHARS, *cp))
2035                             wrbuf_putc(wbuf, '\\');
2036                         wrbuf_putc(wbuf, *cp);
2037                         cp++;
2038                     }
2039                 }
2040                 wrbuf_puts(wbuf, "");
2041                 rset_attr = xpath_trunc(
2042                     zh, stream, '0', wrbuf_buf(wbuf), 3, 
2043                     curAttributeSet,rset_nmem);
2044                 wrbuf_free(wbuf, 1);
2045             } 
2046             else 
2047             {
2048                 if (!first_path)
2049                     continue;
2050             }
2051             yaz_log (log_level_rpn, "xpath_rev (%d) = %s", level, xpath_rev);
2052             if (strlen(xpath_rev))
2053             {
2054                 rset_start_tag = xpath_trunc(zh, stream, '0', 
2055                         xpath_rev, 1, curAttributeSet, rset_nmem);
2056             
2057                 rset_end_tag = xpath_trunc(zh, stream, '0', 
2058                         xpath_rev, 2, curAttributeSet, rset_nmem);
2059
2060                 rset = rsbetween_create(rset_nmem, key_it_ctrl,
2061                                         key_it_ctrl->scope,
2062                                         rset_start_tag, rset,
2063                                         rset_end_tag, rset_attr);
2064             }
2065             first_path = 0;
2066         }
2067     }
2068
2069     return rset;
2070 }
2071
2072
2073
2074 static RSET rpn_search_APT (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2075                             oid_value attributeSet, NMEM stream,
2076                             Z_SortKeySpecList *sort_sequence,
2077                             int num_bases, char **basenames, 
2078                             NMEM rset_nmem)
2079 {
2080     unsigned reg_id;
2081     char *search_type = NULL;
2082     char rank_type[128];
2083     int complete_flag;
2084     int sort_flag;
2085     char termz[IT_MAX_WORD+1];
2086     RSET rset = 0;
2087     int xpath_len;
2088     int xpath_use = 0;
2089     struct xpath_location_step xpath[10];
2090
2091     if (!log_level_set)
2092     {
2093         log_level_rpn = yaz_log_module_level("rpn");
2094         log_level_set = 1;
2095     }
2096     zebra_maps_attr (zh->reg->zebra_maps, zapt, &reg_id, &search_type,
2097                      rank_type, &complete_flag, &sort_flag);
2098     
2099     yaz_log(YLOG_DEBUG, "reg_id=%c", reg_id);
2100     yaz_log(YLOG_DEBUG, "complete_flag=%d", complete_flag);
2101     yaz_log(YLOG_DEBUG, "search_type=%s", search_type);
2102     yaz_log(YLOG_DEBUG, "rank_type=%s", rank_type);
2103
2104     if (zapt_term_to_utf8(zh, zapt, termz))
2105         return 0;
2106
2107     if (sort_flag)
2108         return rpn_sort_spec (zh, zapt, attributeSet, stream, sort_sequence,
2109                               rank_type);
2110     xpath_len = parse_xpath(zh, zapt, attributeSet, xpath, 10, stream);
2111     if (xpath_len >= 0)
2112     {
2113         xpath_use = 1016;
2114         if (xpath[xpath_len-1].part[0] == '@')
2115             xpath_use = 1015;
2116     }
2117
2118     if (!strcmp (search_type, "phrase"))
2119     {
2120         rset = rpn_search_APT_phrase (zh, zapt, termz, attributeSet, stream,
2121                                       reg_id, complete_flag, rank_type,
2122                                       xpath_use,
2123                                       num_bases, basenames, rset_nmem);
2124     }
2125     else if (!strcmp (search_type, "and-list"))
2126     {
2127         rset = rpn_search_APT_and_list (zh, zapt, termz, attributeSet, stream,
2128                                         reg_id, complete_flag, rank_type,
2129                                         xpath_use,
2130                                         num_bases, basenames, rset_nmem);
2131     }
2132     else if (!strcmp (search_type, "or-list"))
2133     {
2134         rset = rpn_search_APT_or_list (zh, zapt, termz, attributeSet, stream,
2135                                        reg_id, complete_flag, rank_type,
2136                                        xpath_use,
2137                                        num_bases, basenames, rset_nmem);
2138     }
2139     else if (!strcmp (search_type, "local"))
2140     {
2141         rset = rpn_search_APT_local (zh, zapt, termz, attributeSet, stream,
2142                                      rank_type, rset_nmem);
2143     }
2144     else if (!strcmp (search_type, "numeric"))
2145     {
2146         rset = rpn_search_APT_numeric (zh, zapt, termz, attributeSet, stream,
2147                                        reg_id, complete_flag, rank_type,
2148                                        xpath_use,
2149                                        num_bases, basenames, rset_nmem);
2150     }
2151     else if (!strcmp (search_type, "always"))
2152     {
2153         rset = 0;
2154     }
2155     else
2156         zh->errCode = 118;
2157     return rpn_search_xpath (zh, attributeSet, num_bases, basenames,
2158                              stream, rank_type, rset, 
2159                              xpath_len, xpath, rset_nmem);
2160 }
2161
2162 static RSET rpn_search_structure (ZebraHandle zh, Z_RPNStructure *zs,
2163                                   oid_value attributeSet, 
2164                                   NMEM stream, NMEM rset_nmem,
2165                                   Z_SortKeySpecList *sort_sequence,
2166                                   int num_bases, char **basenames)
2167 {
2168     RSET r = NULL;
2169     if (zs->which == Z_RPNStructure_complex)
2170     {
2171         Z_Operator *zop = zs->u.complex->roperator;
2172         RSET rsets[2]; /* l and r argument */
2173
2174         rsets[0]=rpn_search_structure (zh, zs->u.complex->s1,
2175                                        attributeSet, stream, rset_nmem,
2176                                        sort_sequence,
2177                                        num_bases, basenames);
2178         if (rsets[0] == NULL)
2179             return NULL;
2180         rsets[1]=rpn_search_structure (zh, zs->u.complex->s2,
2181                                        attributeSet, stream, rset_nmem,
2182                                        sort_sequence,
2183                                        num_bases, basenames);
2184         if (rsets[1] == NULL)
2185         {
2186             rset_delete (rsets[0]);
2187             return NULL;
2188         }
2189
2190         switch (zop->which)
2191         {
2192         case Z_Operator_and:
2193             r = rsmulti_and_create(rset_nmem, key_it_ctrl, key_it_ctrl->scope,
2194                                    2, rsets);
2195             break;
2196         case Z_Operator_or:
2197             r = rsmulti_or_create(rset_nmem, key_it_ctrl, key_it_ctrl->scope,
2198                                   2, rsets);
2199             break;
2200         case Z_Operator_and_not:
2201             r = rsbool_create_not(rset_nmem,key_it_ctrl, key_it_ctrl->scope,
2202                     rsets[0],rsets[1]);
2203             break;
2204         case Z_Operator_prox:
2205             if (zop->u.prox->which != Z_ProximityOperator_known)
2206             {
2207                 zh->errCode = 132;
2208                 return NULL;
2209             }
2210             if (*zop->u.prox->u.known != Z_ProxUnit_word)
2211             {
2212                 char *val = (char *) nmem_malloc(stream, 16);
2213                 zh->errCode = 132;
2214                 zh->errString = val;
2215                 sprintf (val, "%d", *zop->u.prox->u.known);
2216                 return NULL;
2217             }
2218             else
2219             {
2220                 /* new / old prox */
2221                 r = rsprox_create(rset_nmem,key_it_ctrl,key_it_ctrl->scope,
2222                          2, rsets, 
2223                          *zop->u.prox->ordered,
2224                          (!zop->u.prox->exclusion ? 
2225                               0 : *zop->u.prox->exclusion),
2226                          *zop->u.prox->relationType,
2227                          *zop->u.prox->distance );
2228             }
2229             break;
2230         default:
2231             zh->errCode = 110;
2232             return NULL;
2233         }
2234     }
2235     else if (zs->which == Z_RPNStructure_simple)
2236     {
2237         if (zs->u.simple->which == Z_Operand_APT)
2238         {
2239             yaz_log(YLOG_DEBUG, "rpn_search_APT");
2240             r = rpn_search_APT (zh, zs->u.simple->u.attributesPlusTerm,
2241                                 attributeSet, stream, sort_sequence,
2242                                 num_bases, basenames,rset_nmem);
2243         }
2244         else if (zs->u.simple->which == Z_Operand_resultSetId)
2245         {
2246             yaz_log(YLOG_DEBUG, "rpn_search_ref");
2247             r = resultSetRef (zh, zs->u.simple->u.resultSetId);
2248             if (!r)
2249             {
2250                 r = rsnull_create (rset_nmem,key_it_ctrl);
2251                 zh->errCode = 30;
2252                 zh->errString =
2253                     nmem_strdup (stream, zs->u.simple->u.resultSetId);
2254                 return 0;
2255             }
2256             else
2257                 rset_dup(r);
2258         }
2259         else
2260         {
2261             zh->errCode = 3;
2262             return 0;
2263         }
2264     }
2265     else
2266     {
2267         zh->errCode = 3;
2268         return 0;
2269     }
2270     return r;
2271 }
2272
2273
2274 RSET rpn_search(ZebraHandle zh, NMEM nmem, NMEM rset_nmem,
2275                 Z_RPNQuery *rpn, int num_bases, char **basenames, 
2276                 const char *setname,
2277                 ZebraSet sset)
2278 {
2279     RSET rset;
2280     oident *attrset;
2281     oid_value attributeSet;
2282     Z_SortKeySpecList *sort_sequence;
2283     int sort_status, i;
2284
2285     zh->errCode = 0;
2286     zh->errString = NULL;
2287     zh->hits = 0;
2288
2289     sort_sequence = (Z_SortKeySpecList *)
2290         nmem_malloc(nmem, sizeof(*sort_sequence));
2291     sort_sequence->num_specs = 10; /* FIXME - Hard-coded number */
2292     sort_sequence->specs = (Z_SortKeySpec **)
2293         nmem_malloc(nmem, sort_sequence->num_specs *
2294                      sizeof(*sort_sequence->specs));
2295     for (i = 0; i<sort_sequence->num_specs; i++)
2296         sort_sequence->specs[i] = 0;
2297     
2298     attrset = oid_getentbyoid (rpn->attributeSetId);
2299     attributeSet = attrset->value;
2300     rset = rpn_search_structure (zh, rpn->RPNStructure, attributeSet,
2301                                  nmem, rset_nmem,
2302                                  sort_sequence, num_bases, basenames);
2303     if (!rset)
2304         return 0;
2305
2306     if (zh->errCode)
2307         yaz_log(YLOG_DEBUG, "search error: %d", zh->errCode);
2308     
2309     for (i = 0; sort_sequence->specs[i]; i++)
2310         ;
2311     sort_sequence->num_specs = i;
2312     if (!i)
2313         resultSetRank (zh, sset, rset, rset_nmem);
2314     else
2315     {
2316         yaz_log(YLOG_DEBUG, "resultSetSortSingle in rpn_search");
2317         resultSetSortSingle (zh, nmem, sset, rset,
2318                              sort_sequence, &sort_status);
2319         if (zh->errCode)
2320         {
2321             yaz_log(YLOG_DEBUG, "resultSetSortSingle status = %d", zh->errCode);
2322         }
2323     }
2324     return rset;
2325 }
2326
2327 struct scan_info_entry {
2328     char *term;
2329     ISAMC_P isam_p;
2330 };
2331
2332 struct scan_info {
2333     struct scan_info_entry *list;
2334     ODR odr;
2335     int before, after;
2336     char prefix[20];
2337 };
2338
2339 static int scan_handle (char *name, const char *info, int pos, void *client)
2340 {
2341     int len_prefix, idx;
2342     struct scan_info *scan_info = (struct scan_info *) client;
2343
2344     len_prefix = strlen(scan_info->prefix);
2345     if (memcmp (name, scan_info->prefix, len_prefix))
2346         return 1;
2347     if (pos > 0)        idx = scan_info->after - pos + scan_info->before;
2348     else
2349         idx = - pos - 1;
2350     scan_info->list[idx].term = (char *)
2351         odr_malloc(scan_info->odr, strlen(name + len_prefix)+1);
2352     strcpy(scan_info->list[idx].term, name + len_prefix);
2353     assert (*info == sizeof(ISAMC_P));
2354     memcpy (&scan_info->list[idx].isam_p, info+1, sizeof(ISAMC_P));
2355     return 0;
2356 }
2357
2358 static void scan_term_untrans (ZebraHandle zh, NMEM stream, int reg_type,
2359                                char **dst, const char *src)
2360 {
2361     char term_src[IT_MAX_WORD];
2362     char term_dst[IT_MAX_WORD];
2363     
2364     term_untrans (zh, reg_type, term_src, src);
2365
2366     if (zh->iconv_from_utf8 != 0)
2367     {
2368         int len;
2369         char *inbuf = term_src;
2370         size_t inleft = strlen(term_src);
2371         char *outbuf = term_dst;
2372         size_t outleft = sizeof(term_dst)-1;
2373         size_t ret;
2374         
2375         ret = yaz_iconv (zh->iconv_from_utf8, &inbuf, &inleft,
2376                          &outbuf, &outleft);
2377         if (ret == (size_t)(-1))
2378             len = 0;
2379         else
2380             len = outbuf - term_dst;
2381         *dst = nmem_malloc(stream, len + 1);
2382         if (len > 0)
2383             memcpy (*dst, term_dst, len);
2384         (*dst)[len] = '\0';
2385     }
2386     else
2387         *dst = nmem_strdup(stream, term_src);
2388 }
2389
2390 static void count_set (RSET r, int *count)
2391 {
2392     zint psysno = 0;
2393     int kno = 0;
2394     struct it_key key;
2395     RSFD rfd;
2396
2397     yaz_log(YLOG_DEBUG, "count_set");
2398
2399     *count = 0;
2400     rfd = rset_open (r, RSETF_READ);
2401     while (rset_read (rfd, &key,0 /* never mind terms */))
2402     {
2403         if (key.mem[0] != psysno)
2404         {
2405             psysno = key.mem[0];
2406             (*count)++;
2407         }
2408         kno++;
2409     }
2410     rset_close (rfd);
2411     yaz_log(YLOG_DEBUG, "%d keys, %d records", kno, *count);
2412 }
2413
2414 void rpn_scan(ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt,
2415                oid_value attributeset,
2416                int num_bases, char **basenames,
2417                int *position, int *num_entries, ZebraScanEntry **list,
2418                int *is_partial, RSET limit_set, int return_zero)
2419 {
2420     int i;
2421     int pos = *position;
2422     int num = *num_entries;
2423     int before;
2424     int after;
2425     int base_no;
2426     char termz[IT_MAX_WORD+20];
2427     AttrType use;
2428     int use_value;
2429     const char *use_string = 0;
2430     struct scan_info *scan_info_array;
2431     ZebraScanEntry *glist;
2432     int ords[32], ord_no = 0;
2433     int ptr[32];
2434
2435     int bases_ok = 0;     /* no of databases with OK attribute */
2436     int errCode = 0;      /* err code (if any is not OK) */
2437     char *errString = 0;  /* addinfo */
2438
2439     unsigned reg_id;
2440     char *search_type = NULL;
2441     char rank_type[128];
2442     int complete_flag;
2443     int sort_flag;
2444     NMEM rset_nmem = NULL; 
2445
2446     *list = 0;
2447
2448     if (attributeset == VAL_NONE)
2449         attributeset = VAL_BIB1;
2450
2451     if (!limit_set)
2452     {
2453         AttrType termset;
2454         int termset_value_numeric;
2455         const char *termset_value_string;
2456         attr_init (&termset, zapt, 8);
2457         termset_value_numeric =
2458             attr_find_ex (&termset, NULL, &termset_value_string);
2459         if (termset_value_numeric != -1)
2460         {
2461             char resname[32];
2462             const char *termset_name = 0;
2463             
2464             if (termset_value_numeric != -2)
2465             {
2466                 
2467                 sprintf (resname, "%d", termset_value_numeric);
2468                 termset_name = resname;
2469             }
2470             else
2471                 termset_name = termset_value_string;
2472             
2473             limit_set = resultSetRef (zh, termset_name);
2474         }
2475     }
2476         
2477     yaz_log (YLOG_DEBUG, "position = %d, num = %d set=%d",
2478              pos, num, attributeset);
2479         
2480     attr_init (&use, zapt, 1);
2481     use_value = attr_find_ex (&use, &attributeset, &use_string);
2482
2483     if (zebra_maps_attr (zh->reg->zebra_maps, zapt, &reg_id, &search_type,
2484                          rank_type, &complete_flag, &sort_flag))
2485     {
2486         *num_entries = 0;
2487         zh->errCode = 113;
2488         return ;
2489     }
2490     yaz_log (YLOG_DEBUG, "use_value = %d", use_value);
2491
2492     if (use_value == -1)
2493         use_value = 1016;
2494     for (base_no = 0; base_no < num_bases && ord_no < 32; base_no++)
2495     {
2496         int r;
2497         attent attp;
2498         data1_local_attribute *local_attr;
2499
2500         if ((r = att_getentbyatt (zh, &attp, attributeset, use_value,
2501                                 use_string)))
2502         {
2503             yaz_log(YLOG_DEBUG, "att_getentbyatt fail. set=%d use=%d",
2504                   attributeset, use_value);
2505             if (r == -1)
2506             {
2507                 char val_str[32];
2508                 sprintf (val_str, "%d", use_value);
2509                 errCode = 114;
2510                 errString = odr_strdup (stream, val_str);
2511             }   
2512             else
2513                 errCode = 121;
2514             continue;
2515         }
2516         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
2517         {
2518             zh->errString = basenames[base_no];
2519             zh->errCode = 109; /* Database unavailable */
2520             *num_entries = 0;
2521             return;
2522         }
2523         bases_ok++;
2524         for (local_attr = attp.local_attributes; local_attr && ord_no < 32;
2525              local_attr = local_attr->next)
2526         {
2527             int ord;
2528
2529             ord = zebraExplain_lookup_attr_su(zh->reg->zei,
2530                                               attp.attset_ordinal,
2531                                               local_attr->local);
2532             if (ord > 0)
2533                 ords[ord_no++] = ord;
2534         }
2535     }
2536     if (!bases_ok && errCode)
2537     {
2538         zh->errCode = errCode;
2539         zh->errString = errString;
2540         *num_entries = 0;
2541     }
2542     if (ord_no == 0)
2543     {
2544         *num_entries = 0;
2545         return;
2546     }
2547     /* prepare dictionary scanning */
2548     before = pos-1;
2549     after = 1+num-pos;
2550     scan_info_array = (struct scan_info *)
2551         odr_malloc(stream, ord_no * sizeof(*scan_info_array));
2552     for (i = 0; i < ord_no; i++)
2553     {
2554         int j, prefix_len = 0;
2555         int before_tmp = before, after_tmp = after;
2556         struct scan_info *scan_info = scan_info_array + i;
2557         struct rpn_char_map_info rcmi;
2558
2559         rpn_char_map_prepare (zh->reg, reg_id, &rcmi);
2560
2561         scan_info->before = before;
2562         scan_info->after = after;
2563         scan_info->odr = stream;
2564
2565         scan_info->list = (struct scan_info_entry *)
2566             odr_malloc(stream, (before+after) * sizeof(*scan_info->list));
2567         for (j = 0; j<before+after; j++)
2568             scan_info->list[j].term = NULL;
2569
2570         prefix_len += key_SU_encode (ords[i], termz + prefix_len);
2571         termz[prefix_len++] = reg_id;
2572         termz[prefix_len] = 0;
2573         strcpy(scan_info->prefix, termz);
2574
2575         if (trans_scan_term(zh, zapt, termz+prefix_len, reg_id))
2576             return ;
2577         
2578         dict_scan(zh->reg->dict, termz, &before_tmp, &after_tmp,
2579                   scan_info, scan_handle);
2580     }
2581     glist = (ZebraScanEntry *)
2582         odr_malloc(stream, (before+after)*sizeof(*glist));
2583
2584     rset_nmem = nmem_create();
2585
2586     /* consider terms after main term */
2587     for (i = 0; i < ord_no; i++)
2588         ptr[i] = before;
2589     
2590     *is_partial = 0;
2591     for (i = 0; i<after; i++)
2592     {
2593         int j, j0 = -1;
2594         const char *mterm = NULL;
2595         const char *tst;
2596         RSET rset;
2597         
2598         for (j = 0; j < ord_no; j++)
2599         {
2600             if (ptr[j] < before+after &&
2601                 (tst = scan_info_array[j].list[ptr[j]].term) &&
2602                 (!mterm || strcmp (tst, mterm) < 0))
2603             {
2604                 j0 = j;
2605                 mterm = tst;
2606             }
2607         }
2608         if (j0 == -1)
2609             break;
2610         scan_term_untrans (zh, stream->mem, reg_id,
2611                            &glist[i+before].term, mterm);
2612         rset = rset_trunc(zh, &scan_info_array[j0].list[ptr[j0]].isam_p, 1,
2613                           glist[i+before].term, strlen(glist[i+before].term),
2614                           NULL, 0, zapt->term->which, rset_nmem, 
2615                           key_it_ctrl,key_it_ctrl->scope);
2616         ptr[j0]++;
2617         for (j = j0+1; j<ord_no; j++)
2618         {
2619             if (ptr[j] < before+after &&
2620                 (tst = scan_info_array[j].list[ptr[j]].term) &&
2621                 !strcmp (tst, mterm))
2622             {
2623                 RSET rsets[2];
2624                 
2625                 rsets[0] = rset;
2626                 rsets[1] =
2627                     rset_trunc(zh, &scan_info_array[j].list[ptr[j]].isam_p, 1,
2628                                glist[i+before].term,
2629                                strlen(glist[i+before].term), NULL, 0,
2630                                zapt->term->which,rset_nmem,
2631                                key_it_ctrl, key_it_ctrl->scope);
2632                 rset = rsmulti_or_create(rset_nmem, key_it_ctrl,
2633                                          2, key_it_ctrl->scope, rsets);
2634                 ptr[j]++;
2635             }
2636         }
2637         if (limit_set)
2638         {
2639             RSET rsets[2];
2640             rsets[0] = rset;
2641             rsets[1] = rset_dup(limit_set);
2642             
2643             rset = rsmulti_and_create(rset_nmem, key_it_ctrl,
2644                                       key_it_ctrl->scope, 2, rsets);
2645         }
2646         count_set(rset, &glist[i+before].occurrences);
2647         rset_delete(rset);
2648     }
2649     if (i < after)
2650     {
2651         *num_entries -= (after-i);
2652         *is_partial = 1;
2653     }
2654     
2655     /* consider terms before main term */
2656     for (i = 0; i<ord_no; i++)
2657         ptr[i] = 0;
2658     
2659     for (i = 0; i<before; i++)
2660     {
2661         int j, j0 = -1;
2662         const char *mterm = NULL;
2663         const char *tst;
2664         RSET rset;
2665         
2666         for (j = 0; j <ord_no; j++)
2667         {
2668             if (ptr[j] < before &&
2669                 (tst = scan_info_array[j].list[before-1-ptr[j]].term) &&
2670                 (!mterm || strcmp (tst, mterm) > 0))
2671             {
2672                 j0 = j;
2673                     mterm = tst;
2674             }
2675         }
2676         if (j0 == -1)
2677             break;
2678         
2679         scan_term_untrans (zh, stream->mem, reg_id,
2680                            &glist[before-1-i].term, mterm);
2681         
2682         rset = rset_trunc
2683             (zh, &scan_info_array[j0].list[before-1-ptr[j0]].isam_p, 1,
2684              glist[before-1-i].term, strlen(glist[before-1-i].term),
2685              NULL, 0, zapt->term->which,rset_nmem,
2686              key_it_ctrl,key_it_ctrl->scope);
2687         
2688         ptr[j0]++;
2689         
2690         for (j = j0+1; j<ord_no; j++)
2691         {
2692             if (ptr[j] < before &&
2693                 (tst = scan_info_array[j].list[before-1-ptr[j]].term) &&
2694                 !strcmp (tst, mterm))
2695             {
2696                 RSET rsets[2];
2697                 
2698                 rsets[0] = rset;
2699                 rsets[1] = rset_trunc(
2700                     zh,
2701                     &scan_info_array[j].list[before-1-ptr[j]].isam_p, 1,
2702                     glist[before-1-i].term,
2703                     strlen(glist[before-1-i].term), NULL, 0,
2704                     zapt->term->which, rset_nmem,
2705                     key_it_ctrl, key_it_ctrl->scope);
2706                 rset = rsmulti_or_create(rset_nmem, key_it_ctrl,
2707                                          2, key_it_ctrl->scope, rsets);
2708                 
2709                 ptr[j]++;
2710             }
2711         }
2712         if (limit_set)
2713         {
2714             RSET rsets[2];
2715             rsets[0] = rset;
2716             rsets[1] = rset_dup(limit_set);
2717             
2718             rset = rsmulti_and_create(rset_nmem, key_it_ctrl,
2719                                       key_it_ctrl->scope, 2, rsets);
2720         }
2721         count_set (rset, &glist[before-1-i].occurrences);
2722         rset_delete (rset);
2723     }
2724     i = before-i;
2725     if (i)
2726     {
2727         *is_partial = 1;
2728         *position -= i;
2729         *num_entries -= i;
2730     }
2731     
2732     nmem_destroy(rset_nmem);
2733     *list = glist + i;               /* list is set to first 'real' entry */
2734     
2735     yaz_log(YLOG_DEBUG, "position = %d, num_entries = %d",
2736             *position, *num_entries);
2737     if (zh->errCode)
2738         yaz_log(YLOG_DEBUG, "scan error: %d", zh->errCode);
2739 }
2740