Bump year. Change Aps->ApS
[idzebra-moved-to-github.git] / index / zrpn.c
1 /* $Id: zrpn.c,v 1.167 2005-01-15 19:38:29 adam Exp $
2    Copyright (C) 1995-2005
3    Index Data ApS
4
5 This file is part of the Zebra server.
6
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
11
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with Zebra; see the file LICENSE.zebra.  If not, write to the
19 Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
20 02111-1307, USA.
21 */
22
23
24 #include <stdio.h>
25 #include <assert.h>
26 #ifdef WIN32
27 #include <io.h>
28 #else
29 #include <unistd.h>
30 #endif
31 #include <ctype.h>
32
33 #include "index.h"
34 #include <zebra_xpath.h>
35
36 #include <charmap.h>
37 #include <rset.h>
38
39 /* maximum number of terms in an and/or/phrase item */
40 #define TERM_LIST_LENGTH_MAX 256
41
42 static const struct key_control it_ctrl =
43
44     sizeof(struct it_key),
45     2, /* we have sysnos and seqnos in this key, nothing more */
46     key_compare_it, 
47     key_logdump_txt,   /* FIXME  - clean up these functions */
48     key_get_seq,
49 };
50
51
52 const struct key_control *key_it_ctrl = &it_ctrl;
53
54 struct rpn_char_map_info
55 {
56     ZebraMaps zm;
57     int reg_type;
58 };
59
60 typedef struct
61 {
62     int type;
63     int major;
64     int minor;
65     Z_AttributesPlusTerm *zapt;
66 } AttrType;
67
68
69 static int log_level_set=0;
70 static int log_level_rpn=0;
71
72 static const char **rpn_char_map_handler(void *vp, const char **from, int len)
73 {
74     struct rpn_char_map_info *p = (struct rpn_char_map_info *) vp;
75     const char **out = zebra_maps_input(p->zm, p->reg_type, from, len, 0);
76 #if 0
77     if (out && *out)
78     {
79         const char *outp = *out;
80         yaz_log(YLOG_LOG, "---");
81         while (*outp)
82         {
83             yaz_log(YLOG_LOG, "%02X", *outp);
84             outp++;
85         }
86     }
87 #endif
88     return out;
89 }
90
91 static void rpn_char_map_prepare(struct zebra_register *reg, int reg_type,
92                                   struct rpn_char_map_info *map_info)
93 {
94     map_info->zm = reg->zebra_maps;
95     map_info->reg_type = reg_type;
96     dict_grep_cmap(reg->dict, map_info, rpn_char_map_handler);
97 }
98
99 static int attr_find_ex(AttrType *src, oid_value *attributeSetP,
100                          const char **string_value)
101 {
102     int num_attributes;
103
104     num_attributes = src->zapt->attributes->num_attributes;
105     while (src->major < num_attributes)
106     {
107         Z_AttributeElement *element;
108
109         element = src->zapt->attributes->attributes[src->major];
110         if (src->type == *element->attributeType)
111         {
112             switch (element->which) 
113             {
114             case Z_AttributeValue_numeric:
115                 ++(src->major);
116                 if (element->attributeSet && attributeSetP)
117                 {
118                     oident *attrset;
119
120                     attrset = oid_getentbyoid(element->attributeSet);
121                     *attributeSetP = attrset->value;
122                 }
123                 return *element->value.numeric;
124                 break;
125             case Z_AttributeValue_complex:
126                 if (src->minor >= element->value.complex->num_list)
127                     break;
128                 if (element->attributeSet && attributeSetP)
129                 {
130                     oident *attrset;
131                     
132                     attrset = oid_getentbyoid(element->attributeSet);
133                     *attributeSetP = attrset->value;
134                 }
135                 if (element->value.complex->list[src->minor]->which ==  
136                     Z_StringOrNumeric_numeric)
137                 {
138                     ++(src->minor);
139                     return
140                         *element->value.complex->list[src->minor-1]->u.numeric;
141                 }
142                 else if (element->value.complex->list[src->minor]->which ==  
143                          Z_StringOrNumeric_string)
144                 {
145                     if (!string_value)
146                         break;
147                     ++(src->minor);
148                     *string_value = 
149                         element->value.complex->list[src->minor-1]->u.string;
150                     return -2;
151                 }
152                 else
153                     break;
154             default:
155                 assert(0);
156             }
157         }
158         ++(src->major);
159     }
160     return -1;
161 }
162
163 static int attr_find(AttrType *src, oid_value *attributeSetP)
164 {
165     return attr_find_ex(src, attributeSetP, 0);
166 }
167
168 static void attr_init(AttrType *src, Z_AttributesPlusTerm *zapt,
169                        int type)
170 {
171     src->zapt = zapt;
172     src->type = type;
173     src->major = 0;
174     src->minor = 0;
175 }
176
177 #define TERM_COUNT        
178        
179 struct grep_info {        
180 #ifdef TERM_COUNT        
181     int *term_no;        
182 #endif        
183     ISAMC_P *isam_p_buf;
184     int isam_p_size;        
185     int isam_p_indx;
186     ZebraHandle zh;
187     int reg_type;
188     ZebraSet termset;
189 };        
190
191 static void term_untrans(ZebraHandle zh, int reg_type,
192                            char *dst, const char *src)
193 {
194     int len = 0;
195     while (*src)
196     {
197         const char *cp = zebra_maps_output(zh->reg->zebra_maps,
198                                             reg_type, &src);
199         if (!cp && len < IT_MAX_WORD-1)
200             dst[len++] = *src++;
201         else
202             while (*cp && len < IT_MAX_WORD-1)
203                 dst[len++] = *cp++;
204     }
205     dst[len] = '\0';
206 }
207
208 static void add_isam_p(const char *name, const char *info,
209                         struct grep_info *p)
210 {
211     if (!log_level_set)
212     {
213         log_level_rpn = yaz_log_module_level("rpn");
214         log_level_set=1;
215     }
216     if (p->isam_p_indx == p->isam_p_size)
217     {
218         ISAMC_P *new_isam_p_buf;
219 #ifdef TERM_COUNT        
220         int *new_term_no;        
221 #endif
222         p->isam_p_size = 2*p->isam_p_size + 100;
223         new_isam_p_buf = (ISAMC_P *) xmalloc(sizeof(*new_isam_p_buf) *
224                                              p->isam_p_size);
225         if (p->isam_p_buf)
226         {
227             memcpy(new_isam_p_buf, p->isam_p_buf,
228                     p->isam_p_indx * sizeof(*p->isam_p_buf));
229             xfree(p->isam_p_buf);
230         }
231         p->isam_p_buf = new_isam_p_buf;
232
233 #ifdef TERM_COUNT
234         new_term_no = (int *) xmalloc(sizeof(*new_term_no) *
235                                        p->isam_p_size);
236         if (p->term_no)
237         {
238             memcpy(new_term_no, p->isam_p_buf,
239                     p->isam_p_indx * sizeof(*p->term_no));
240             xfree(p->term_no);
241         }
242         p->term_no = new_term_no;
243 #endif
244     }
245     assert(*info == sizeof(*p->isam_p_buf));
246     memcpy(p->isam_p_buf + p->isam_p_indx, info+1, sizeof(*p->isam_p_buf));
247
248 #if 1
249     if (p->termset)
250     {
251         const char *db;
252         int set, use;
253         char term_tmp[IT_MAX_WORD];
254         int su_code = 0;
255         int len = key_SU_decode (&su_code, name);
256         
257         term_untrans  (p->zh, p->reg_type, term_tmp, name+len+1);
258         yaz_log(log_level_rpn, "grep: %d %c %s", su_code, name[len], term_tmp);
259         zebraExplain_lookup_ord (p->zh->reg->zei,
260                                  su_code, &db, &set, &use);
261         yaz_log(log_level_rpn, "grep:  set=%d use=%d db=%s", set, use, db);
262         
263         resultSetAddTerm(p->zh, p->termset, name[len], db,
264                          set, use, term_tmp);
265     }
266 #endif
267     (p->isam_p_indx)++;
268 }
269
270 static int grep_handle(char *name, const char *info, void *p)
271 {
272     add_isam_p(name, info, (struct grep_info *) p);
273     return 0;
274 }
275
276 static int term_pre(ZebraMaps zebra_maps, int reg_type, const char **src,
277                      const char *ct1, const char *ct2, int first)
278 {
279     const char *s1, *s0 = *src;
280     const char **map;
281
282     /* skip white space */
283     while (*s0)
284     {
285         if (ct1 && strchr(ct1, *s0))
286             break;
287         if (ct2 && strchr(ct2, *s0))
288             break;
289         s1 = s0;
290         map = zebra_maps_input(zebra_maps, reg_type, &s1, strlen(s1), first);
291         if (**map != *CHR_SPACE)
292             break;
293         s0 = s1;
294     }
295     *src = s0;
296     return *s0;
297 }
298
299 #define REGEX_CHARS " []()|.*+?!"
300
301 /* term_100: handle term, where trunc=none(no operators at all) */
302 static int term_100(ZebraMaps zebra_maps, int reg_type,
303                      const char **src, char *dst, int space_split,
304                      char *dst_term)
305 {
306     const char *s0, *s1;
307     const char **map;
308     int i = 0;
309     int j = 0;
310
311     const char *space_start = 0;
312     const char *space_end = 0;
313
314     if (!term_pre(zebra_maps, reg_type, src, NULL, NULL, !space_split))
315         return 0;
316     s0 = *src;
317     while (*s0)
318     {
319         s1 = s0;
320         map = zebra_maps_input(zebra_maps, reg_type, &s0, strlen(s0), 0);
321         if (space_split)
322         {
323             if (**map == *CHR_SPACE)
324                 break;
325         }
326         else  /* complete subfield only. */
327         {
328             if (**map == *CHR_SPACE)
329             {   /* save space mapping for later  .. */
330                 space_start = s1;
331                 space_end = s0;
332                 continue;
333             }
334             else if (space_start)
335             {   /* reload last space */
336                 while (space_start < space_end)
337                 {
338                     if (strchr(REGEX_CHARS, *space_start))
339                         dst[i++] = '\\';
340                     dst_term[j++] = *space_start;
341                     dst[i++] = *space_start++;
342                 }
343                 /* and reset */
344                 space_start = space_end = 0;
345             }
346         }
347         /* add non-space char */
348         while (s1 < s0)
349         {
350             if (strchr(REGEX_CHARS, *s1))
351                 dst[i++] = '\\';
352             dst_term[j++] = *s1;
353             dst[i++] = *s1++;
354         }
355     }
356     dst[i] = '\0';
357     dst_term[j] = '\0';
358     *src = s0;
359     return i;
360 }
361
362 /* term_101: handle term, where trunc=Process # */
363 static int term_101(ZebraMaps zebra_maps, int reg_type,
364                      const char **src, char *dst, int space_split,
365                      char *dst_term)
366 {
367     const char *s0, *s1;
368     const char **map;
369     int i = 0;
370     int j = 0;
371
372     if (!term_pre(zebra_maps, reg_type, src, "#", "#", !space_split))
373         return 0;
374     s0 = *src;
375     while (*s0)
376     {
377         if (*s0 == '#')
378         {
379             dst[i++] = '.';
380             dst[i++] = '*';
381             dst_term[j++] = *s0++;
382         }
383         else
384         {
385             s1 = s0;
386             map = zebra_maps_input(zebra_maps, reg_type, &s0, strlen(s0), 0);
387             if (space_split && **map == *CHR_SPACE)
388                 break;
389             while (s1 < s0)
390             {
391                 if (strchr(REGEX_CHARS, *s1))
392                     dst[i++] = '\\';
393                 dst_term[j++] = *s1;
394                 dst[i++] = *s1++;
395             }
396         }
397     }
398     dst[i] = '\0';
399     dst_term[j++] = '\0';
400     *src = s0;
401     return i;
402 }
403
404 /* term_103: handle term, where trunc=re-2 (regular expressions) */
405 static int term_103(ZebraMaps zebra_maps, int reg_type, const char **src,
406                      char *dst, int *errors, int space_split,
407                      char *dst_term)
408 {
409     int i = 0;
410     int j = 0;
411     const char *s0, *s1;
412     const char **map;
413
414     if (!term_pre(zebra_maps, reg_type, src, "^\\()[].*+?|", "(", !space_split))
415         return 0;
416     s0 = *src;
417     if (errors && *s0 == '+' && s0[1] && s0[2] == '+' && s0[3] &&
418         isdigit(((const unsigned char *)s0)[1]))
419     {
420         *errors = s0[1] - '0';
421         s0 += 3;
422         if (*errors > 3)
423             *errors = 3;
424     }
425     while (*s0)
426     {
427         if (strchr("^\\()[].*+?|-", *s0))
428         {
429             dst_term[j++] = *s0;
430             dst[i++] = *s0++;
431         }
432         else
433         {
434             s1 = s0;
435             map = zebra_maps_input(zebra_maps, reg_type, &s0, strlen(s0), 0);
436             if (**map == *CHR_SPACE)
437                 break;
438             while (s1 < s0)
439             {
440                 if (strchr(REGEX_CHARS, *s1))
441                     dst[i++] = '\\';
442                 dst_term[j++] = *s1;
443                 dst[i++] = *s1++;
444             }
445         }
446     }
447     dst[i] = '\0';
448     dst_term[j] = '\0';
449     *src = s0;
450     return i;
451 }
452
453 /* term_103: handle term, where trunc=re-1 (regular expressions) */
454 static int term_102 (ZebraMaps zebra_maps, int reg_type, const char **src,
455                      char *dst, int space_split, char *dst_term)
456 {
457     return term_103(zebra_maps, reg_type, src, dst, NULL, space_split,
458                      dst_term);
459 }
460
461
462 /* term_104: handle term, where trunc=Process # and ! */
463 static int term_104(ZebraMaps zebra_maps, int reg_type,
464                      const char **src, char *dst, int space_split,
465                      char *dst_term)
466 {
467     const char *s0, *s1;
468     const char **map;
469     int i = 0;
470     int j = 0;
471
472     if (!term_pre(zebra_maps, reg_type, src, "?*#", "?*#", !space_split))
473         return 0;
474     s0 = *src;
475     while (*s0)
476     {
477         if (*s0 == '?')
478         {
479             dst_term[j++] = *s0++;
480             if (*s0 >= '0' && *s0 <= '9')
481             {
482                 int limit = 0;
483                 while (*s0 >= '0' && *s0 <= '9')
484                 {
485                     limit = limit * 10 + (*s0 - '0');
486                     dst_term[j++] = *s0++;
487                 }
488                 if (limit > 20)
489                     limit = 20;
490                 while (--limit >= 0)
491                 {
492                     dst[i++] = '.';
493                     dst[i++] = '?';
494                 }
495             }
496             else
497             {
498                 dst[i++] = '.';
499                 dst[i++] = '*';
500             }
501         }
502         else if (*s0 == '*')
503         {
504             dst[i++] = '.';
505             dst[i++] = '*';
506             dst_term[j++] = *s0++;
507         }
508         else if (*s0 == '#')
509         {
510             dst[i++] = '.';
511             dst_term[j++] = *s0++;
512         }
513         {
514             s1 = s0;
515             map = zebra_maps_input(zebra_maps, reg_type, &s0, strlen(s0), 0);
516             if (space_split && **map == *CHR_SPACE)
517                 break;
518             while (s1 < s0)
519             {
520                 if (strchr(REGEX_CHARS, *s1))
521                     dst[i++] = '\\';
522                 dst_term[j++] = *s1;
523                 dst[i++] = *s1++;
524             }
525         }
526     }
527     dst[i] = '\0';
528     dst_term[j++] = '\0';
529     *src = s0;
530     return i;
531 }
532
533 /* term_105/106: handle term, where trunc=Process * and ! and right trunc */
534 static int term_105 (ZebraMaps zebra_maps, int reg_type,
535                      const char **src, char *dst, int space_split,
536                      char *dst_term, int right_truncate)
537 {
538     const char *s0, *s1;
539     const char **map;
540     int i = 0;
541     int j = 0;
542
543     if (!term_pre(zebra_maps, reg_type, src, "*!", "*!", !space_split))
544         return 0;
545     s0 = *src;
546     while (*s0)
547     {
548         if (*s0 == '*')
549         {
550             dst[i++] = '.';
551             dst[i++] = '*';
552             dst_term[j++] = *s0++;
553         }
554         else if (*s0 == '!')
555         {
556             dst[i++] = '.';
557             dst_term[j++] = *s0++;
558         }
559         {
560             s1 = s0;
561             map = zebra_maps_input(zebra_maps, reg_type, &s0, strlen(s0), 0);
562             if (space_split && **map == *CHR_SPACE)
563                 break;
564             while (s1 < s0)
565             {
566                 if (strchr(REGEX_CHARS, *s1))
567                     dst[i++] = '\\';
568                 dst_term[j++] = *s1;
569                 dst[i++] = *s1++;
570             }
571         }
572     }
573     if (right_truncate)
574     {
575         dst[i++] = '.';
576         dst[i++] = '*';
577     }
578     dst[i] = '\0';
579     
580     dst_term[j++] = '\0';
581     *src = s0;
582     return i;
583 }
584
585
586 /* gen_regular_rel - generate regular expression from relation
587  *  val:     border value (inclusive)
588  *  islt:    1 if <=; 0 if >=.
589  */
590 static void gen_regular_rel(char *dst, int val, int islt)
591 {
592     int dst_p;
593     int w, d, i;
594     int pos = 0;
595     char numstr[20];
596
597     yaz_log(YLOG_DEBUG, "gen_regular_rel. val=%d, islt=%d", val, islt);
598     if (val >= 0)
599     {
600         if (islt)
601             strcpy(dst, "(-[0-9]+|(");
602         else
603             strcpy(dst, "((");
604     } 
605     else
606     {
607         if (!islt)
608         {
609             strcpy(dst, "([0-9]+|-(");
610             dst_p = strlen(dst);
611             islt = 1;
612         }
613         else
614         {
615             strcpy(dst, "(-(");
616             islt = 0;
617         }
618         val = -val;
619     }
620     dst_p = strlen(dst);
621     sprintf(numstr, "%d", val);
622     for (w = strlen(numstr); --w >= 0; pos++)
623     {
624         d = numstr[w];
625         if (pos > 0)
626         {
627             if (islt)
628             {
629                 if (d == '0')
630                     continue;
631                 d--;
632             } 
633             else
634             {
635                 if (d == '9')
636                     continue;
637                 d++;
638             }
639         }
640         
641         strcpy(dst + dst_p, numstr);
642         dst_p = strlen(dst) - pos - 1;
643
644         if (islt)
645         {
646             if (d != '0')
647             {
648                 dst[dst_p++] = '[';
649                 dst[dst_p++] = '0';
650                 dst[dst_p++] = '-';
651                 dst[dst_p++] = d;
652                 dst[dst_p++] = ']';
653             }
654             else
655                 dst[dst_p++] = d;
656         }
657         else
658         {
659             if (d != '9')
660             { 
661                 dst[dst_p++] = '[';
662                 dst[dst_p++] = d;
663                 dst[dst_p++] = '-';
664                 dst[dst_p++] = '9';
665                 dst[dst_p++] = ']';
666             }
667             else
668                 dst[dst_p++] = d;
669         }
670         for (i = 0; i<pos; i++)
671         {
672             dst[dst_p++] = '[';
673             dst[dst_p++] = '0';
674             dst[dst_p++] = '-';
675             dst[dst_p++] = '9';
676             dst[dst_p++] = ']';
677         }
678         dst[dst_p++] = '|';
679     }
680     dst[dst_p] = '\0';
681     if (islt)
682     {
683         /* match everything less than 10^(pos-1) */
684         strcat(dst, "0*");
685         for (i=1; i<pos; i++)
686             strcat(dst, "[0-9]?");
687     }
688     else
689     {
690         /* match everything greater than 10^pos */
691         for (i = 0; i <= pos; i++)
692             strcat(dst, "[0-9]");
693         strcat(dst, "[0-9]*");
694     }
695     strcat(dst, "))");
696 }
697
698 void string_rel_add_char(char **term_p, const char *src, int *indx)
699 {
700     if (src[*indx] == '\\')
701         *(*term_p)++ = src[(*indx)++];
702     *(*term_p)++ = src[(*indx)++];
703 }
704
705 /*
706  *   >  abc     ([b-].*|a[c-].*|ab[d-].*|abc.+)
707  *              ([^-a].*|a[^-b].*ab[^-c].*|abc.+)
708  *   >= abc     ([b-].*|a[c-].*|ab[c-].*)
709  *              ([^-a].*|a[^-b].*|ab[c-].*)
710  *   <  abc     ([-0].*|a[-a].*|ab[-b].*)
711  *              ([^a-].*|a[^b-].*|ab[^c-].*)
712  *   <= abc     ([-0].*|a[-a].*|ab[-b].*|abc)
713  *              ([^a-].*|a[^b-].*|ab[^c-].*|abc)
714  */
715 static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
716                             const char **term_sub, char *term_dict,
717                             oid_value attributeSet,
718                             int reg_type, int space_split, char *term_dst)
719 {
720     AttrType relation;
721     int relation_value;
722     int i;
723     char *term_tmp = term_dict + strlen(term_dict);
724     char term_component[2*IT_MAX_WORD+20];
725
726     attr_init(&relation, zapt, 2);
727     relation_value = attr_find(&relation, NULL);
728
729     yaz_log(YLOG_DEBUG, "string relation value=%d", relation_value);
730     switch (relation_value)
731     {
732     case 1:
733         if (!term_100 (zh->reg->zebra_maps, reg_type,
734                        term_sub, term_component,
735                        space_split, term_dst))
736             return 0;
737         yaz_log(log_level_rpn, "Relation <");
738         
739         *term_tmp++ = '(';
740         for (i = 0; term_component[i]; )
741         {
742             int j = 0;
743
744             if (i)
745                 *term_tmp++ = '|';
746             while (j < i)
747                 string_rel_add_char (&term_tmp, term_component, &j);
748
749             *term_tmp++ = '[';
750
751             *term_tmp++ = '^';
752             string_rel_add_char (&term_tmp, term_component, &i);
753             *term_tmp++ = '-';
754
755             *term_tmp++ = ']';
756             *term_tmp++ = '.';
757             *term_tmp++ = '*';
758
759             if ((term_tmp - term_dict) > IT_MAX_WORD)
760                 break;
761         }
762         *term_tmp++ = ')';
763         *term_tmp = '\0';
764         break;
765     case 2:
766         if (!term_100 (zh->reg->zebra_maps, reg_type,
767                        term_sub, term_component,
768                        space_split, term_dst))
769             return 0;
770         yaz_log(log_level_rpn, "Relation <=");
771
772         *term_tmp++ = '(';
773         for (i = 0; term_component[i]; )
774         {
775             int j = 0;
776
777             while (j < i)
778                 string_rel_add_char (&term_tmp, term_component, &j);
779             *term_tmp++ = '[';
780
781             *term_tmp++ = '^';
782             string_rel_add_char (&term_tmp, term_component, &i);
783             *term_tmp++ = '-';
784
785             *term_tmp++ = ']';
786             *term_tmp++ = '.';
787             *term_tmp++ = '*';
788
789             *term_tmp++ = '|';
790
791             if ((term_tmp - term_dict) > IT_MAX_WORD)
792                 break;
793         }
794         for (i = 0; term_component[i]; )
795             string_rel_add_char (&term_tmp, term_component, &i);
796         *term_tmp++ = ')';
797         *term_tmp = '\0';
798         break;
799     case 5:
800         if (!term_100 (zh->reg->zebra_maps, reg_type,
801                        term_sub, term_component, space_split, term_dst))
802             return 0;
803         yaz_log(log_level_rpn, "Relation >");
804
805         *term_tmp++ = '(';
806         for (i = 0; term_component[i];)
807         {
808             int j = 0;
809
810             while (j < i)
811                 string_rel_add_char (&term_tmp, term_component, &j);
812             *term_tmp++ = '[';
813             
814             *term_tmp++ = '^';
815             *term_tmp++ = '-';
816             string_rel_add_char (&term_tmp, term_component, &i);
817
818             *term_tmp++ = ']';
819             *term_tmp++ = '.';
820             *term_tmp++ = '*';
821
822             *term_tmp++ = '|';
823
824             if ((term_tmp - term_dict) > IT_MAX_WORD)
825                 break;
826         }
827         for (i = 0; term_component[i];)
828             string_rel_add_char (&term_tmp, term_component, &i);
829         *term_tmp++ = '.';
830         *term_tmp++ = '+';
831         *term_tmp++ = ')';
832         *term_tmp = '\0';
833         break;
834     case 4:
835         if (!term_100 (zh->reg->zebra_maps, reg_type, term_sub,
836                        term_component, space_split, term_dst))
837             return 0;
838         yaz_log(log_level_rpn, "Relation >=");
839
840         *term_tmp++ = '(';
841         for (i = 0; term_component[i];)
842         {
843             int j = 0;
844
845             if (i)
846                 *term_tmp++ = '|';
847             while (j < i)
848                 string_rel_add_char (&term_tmp, term_component, &j);
849             *term_tmp++ = '[';
850
851             if (term_component[i+1])
852             {
853                 *term_tmp++ = '^';
854                 *term_tmp++ = '-';
855                 string_rel_add_char (&term_tmp, term_component, &i);
856             }
857             else
858             {
859                 string_rel_add_char (&term_tmp, term_component, &i);
860                 *term_tmp++ = '-';
861             }
862             *term_tmp++ = ']';
863             *term_tmp++ = '.';
864             *term_tmp++ = '*';
865
866             if ((term_tmp - term_dict) > IT_MAX_WORD)
867                 break;
868         }
869         *term_tmp++ = ')';
870         *term_tmp = '\0';
871         break;
872     case 3:
873     default:
874         yaz_log(log_level_rpn, "Relation =");
875         if (!term_100 (zh->reg->zebra_maps, reg_type, term_sub,
876                        term_component, space_split, term_dst))
877             return 0;
878         strcat(term_tmp, "(");
879         strcat(term_tmp, term_component);
880         strcat(term_tmp, ")");
881     }
882     return 1;
883 }
884
885 static int string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
886                         const char **term_sub, 
887                         oid_value attributeSet, NMEM stream,
888                         struct grep_info *grep_info,
889                         int reg_type, int complete_flag,
890                         int num_bases, char **basenames,
891                         char *term_dst, int xpath_use);
892
893 static RSET term_trunc(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
894                         const char **term_sub, 
895                         oid_value attributeSet, NMEM stream,
896                         struct grep_info *grep_info,
897                         int reg_type, int complete_flag,
898                         int num_bases, char **basenames,
899                         char *term_dst,
900                         const char *rank_type, int xpath_use,
901                         NMEM rset_nmem)
902 {
903     int r;
904     grep_info->isam_p_indx = 0;
905     r = string_term(zh, zapt, term_sub, attributeSet, stream, grep_info,
906                      reg_type, complete_flag, num_bases, basenames,
907                      term_dst, xpath_use);
908     if (r < 1)
909         return 0;
910     yaz_log(log_level_rpn, "term: %s", term_dst);
911     return rset_trunc(zh, grep_info->isam_p_buf,
912                        grep_info->isam_p_indx, term_dst,
913                        strlen(term_dst), rank_type, 1 /* preserve pos */,
914                        zapt->term->which, rset_nmem,
915                        key_it_ctrl,key_it_ctrl->scope);
916 }
917 static char *nmem_strdup_i(NMEM nmem, int v)
918 {
919     char val_str[64];
920     sprintf (val_str, "%d", v);
921     return nmem_strdup(nmem, val_str);
922 }
923
924 static int string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
925                        const char **term_sub, 
926                        oid_value attributeSet, NMEM stream,
927                        struct grep_info *grep_info,
928                        int reg_type, int complete_flag,
929                        int num_bases, char **basenames,
930                        char *term_dst, int xpath_use)
931 {
932     char term_dict[2*IT_MAX_WORD+4000];
933     int j, r, base_no;
934     AttrType truncation;
935     int truncation_value;
936     AttrType use;
937     int use_value;
938     const char *use_string = 0;
939     oid_value curAttributeSet = attributeSet;
940     const char *termp;
941     struct rpn_char_map_info rcmi;
942     int space_split = complete_flag ? 0 : 1;
943
944     int bases_ok = 0;     /* no of databases with OK attribute */
945     int errCode = 0;      /* err code (if any is not OK) */
946     char *errString = 0;  /* addinfo */
947
948     rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
949     attr_init (&use, zapt, 1);
950     use_value = attr_find_ex (&use, &curAttributeSet, &use_string);
951     yaz_log(log_level_rpn, "string_term, use value %d", use_value);
952     attr_init (&truncation, zapt, 5);
953     truncation_value = attr_find (&truncation, NULL);
954     yaz_log(log_level_rpn, "truncation value %d", truncation_value);
955
956     if (use_value == -1)    /* no attribute - assumy "any" */
957         use_value = 1016;
958     for (base_no = 0; base_no < num_bases; base_no++)
959     {
960         int attr_ok = 0;
961         int regex_range = 0;
962         int init_pos = 0;
963         attent attp;
964         data1_local_attribute id_xpath_attr;
965         data1_local_attribute *local_attr;
966         int max_pos, prefix_len = 0;
967
968         termp = *term_sub;
969
970         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
971         {
972             zh->errCode = 109; /* Database unavailable */
973             zh->errString = basenames[base_no];
974             return -1;
975         }
976         if (xpath_use > 0 && use_value == -2) 
977         {
978             use_value = xpath_use;
979             attp.local_attributes = &id_xpath_attr;
980             attp.attset_ordinal = VAL_IDXPATH;
981             id_xpath_attr.next = 0;
982             id_xpath_attr.local = use_value;
983         }
984         else if (curAttributeSet == VAL_IDXPATH)
985         {
986             attp.local_attributes = &id_xpath_attr;
987             attp.attset_ordinal = VAL_IDXPATH;
988             id_xpath_attr.next = 0;
989             id_xpath_attr.local = use_value;
990         }
991         else
992         {
993             if ((r=att_getentbyatt (zh, &attp, curAttributeSet, use_value,
994                                             use_string)))
995             {
996                 yaz_log(YLOG_DEBUG, "att_getentbyatt fail. set=%d use=%d r=%d",
997                       curAttributeSet, use_value, r);
998                 if (r == -1)
999                 {
1000                     /* set was found, but value wasn't defined */
1001                     errCode = 114;
1002                     if (use_string)
1003                         errString = nmem_strdup(stream, use_string);
1004                     else
1005                         errString = nmem_strdup_i (stream, use_value);
1006                 }
1007                 else
1008                 {
1009                     int oid[OID_SIZE];
1010                     struct oident oident;
1011                     
1012                     oident.proto = PROTO_Z3950;
1013                     oident.oclass = CLASS_ATTSET;
1014                     oident.value = curAttributeSet;
1015                     oid_ent_to_oid (&oident, oid);
1016                     
1017                     errCode = 121;
1018                     errString = nmem_strdup (stream, oident.desc);
1019                 }
1020                 continue;
1021             }
1022         }
1023         for (local_attr = attp.local_attributes; local_attr;
1024              local_attr = local_attr->next)
1025         {
1026             int ord;
1027             char ord_buf[32];
1028             int i, ord_len;
1029             
1030             ord = zebraExplain_lookupSU (zh->reg->zei, attp.attset_ordinal,
1031                                          local_attr->local);
1032             if (ord < 0)
1033                 continue;
1034             if (prefix_len)
1035                 term_dict[prefix_len++] = '|';
1036             else
1037                 term_dict[prefix_len++] = '(';
1038             
1039             ord_len = key_SU_encode (ord, ord_buf);
1040             for (i = 0; i<ord_len; i++)
1041             {
1042                 term_dict[prefix_len++] = 1;
1043                 term_dict[prefix_len++] = ord_buf[i];
1044             }
1045         }
1046         if (!prefix_len)
1047         {
1048 #if 1
1049             bases_ok++;
1050 #else
1051             errCode = 114;
1052             errString = nmem_strdup_i(stream, use_value);
1053             continue;
1054 #endif
1055         }
1056         else
1057         {
1058             bases_ok++; /* this has OK attributes */
1059             attr_ok = 1;
1060         }
1061
1062         term_dict[prefix_len++] = ')';
1063         term_dict[prefix_len++] = 1;
1064         term_dict[prefix_len++] = reg_type;
1065         yaz_log(log_level_rpn, "reg_type = %d", term_dict[prefix_len-1]);
1066         term_dict[prefix_len] = '\0';
1067         j = prefix_len;
1068         switch (truncation_value)
1069         {
1070         case -1:         /* not specified */
1071         case 100:        /* do not truncate */
1072             if (!string_relation (zh, zapt, &termp, term_dict,
1073                                   attributeSet,
1074                                   reg_type, space_split, term_dst))
1075                 return 0;
1076             break;
1077         case 1:          /* right truncation */
1078             term_dict[j++] = '(';
1079             if (!term_100(zh->reg->zebra_maps, reg_type,
1080                           &termp, term_dict + j, space_split, term_dst))
1081                 return 0;
1082             strcat(term_dict, ".*)");
1083             break;
1084         case 2:          /* keft truncation */
1085             term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*';
1086             if (!term_100(zh->reg->zebra_maps, reg_type,
1087                           &termp, term_dict + j, space_split, term_dst))
1088                 return 0;
1089             strcat(term_dict, ")");
1090             break;
1091         case 3:          /* left&right truncation */
1092             term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*';
1093             if (!term_100(zh->reg->zebra_maps, reg_type,
1094                           &termp, term_dict + j, space_split, term_dst))
1095                 return 0;
1096             strcat(term_dict, ".*)");
1097             break;
1098         case 101:        /* process # in term */
1099             term_dict[j++] = '(';
1100             if (!term_101(zh->reg->zebra_maps, reg_type,
1101                           &termp, term_dict + j, space_split, term_dst))
1102                 return 0;
1103             strcat(term_dict, ")");
1104             break;
1105         case 102:        /* Regexp-1 */
1106             term_dict[j++] = '(';
1107             if (!term_102(zh->reg->zebra_maps, reg_type,
1108                           &termp, term_dict + j, space_split, term_dst))
1109                 return 0;
1110             strcat(term_dict, ")");
1111             break;
1112         case 103:       /* Regexp-2 */
1113             r = 1;
1114             term_dict[j++] = '(';
1115             init_pos = 2;
1116             if (!term_103 (zh->reg->zebra_maps, reg_type,
1117                            &termp, term_dict + j, &regex_range,
1118                            space_split, term_dst))
1119                 return 0;
1120             strcat(term_dict, ")");
1121         case 104:        /* process # and ! in term */
1122             term_dict[j++] = '(';
1123             if (!term_104 (zh->reg->zebra_maps, reg_type,
1124                            &termp, term_dict + j, space_split, term_dst))
1125                 return 0;
1126             strcat(term_dict, ")");
1127             break;
1128         case 105:        /* process * and ! in term */
1129             term_dict[j++] = '(';
1130             if (!term_105 (zh->reg->zebra_maps, reg_type,
1131                            &termp, term_dict + j, space_split, term_dst, 1))
1132                 return 0;
1133             strcat(term_dict, ")");
1134             break;
1135         case 106:        /* process * and ! in term */
1136             term_dict[j++] = '(';
1137             if (!term_105 (zh->reg->zebra_maps, reg_type,
1138                            &termp, term_dict + j, space_split, term_dst, 0))
1139                 return 0;
1140             strcat(term_dict, ")");
1141             break;
1142         default:
1143             zh->errCode = 120;
1144             zh->errString = nmem_strdup_i(stream, truncation_value);
1145             return -1;
1146         }
1147         if (attr_ok)
1148         {
1149             yaz_log(log_level_rpn, "dict_lookup_grep: %s", term_dict+prefix_len);
1150             r = dict_lookup_grep(zh->reg->dict, term_dict, regex_range,
1151                                  grep_info, &max_pos, init_pos,
1152                                  grep_handle);
1153             if (r)
1154                 yaz_log(YLOG_WARN, "dict_lookup_grep fail %d", r);
1155         }
1156     }
1157     if (!bases_ok)
1158     {
1159         zh->errCode = errCode;
1160         zh->errString = errString;
1161         return -1;
1162     }
1163     *term_sub = termp;
1164     yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1165     return 1;
1166 }
1167
1168
1169 /* convert APT search term to UTF8 */
1170 static int zapt_term_to_utf8 (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1171                               char *termz)
1172 {
1173     size_t sizez;
1174     Z_Term *term = zapt->term;
1175
1176     switch (term->which)
1177     {
1178     case Z_Term_general:
1179         if (zh->iconv_to_utf8 != 0)
1180         {
1181             char *inbuf = term->u.general->buf;
1182             size_t inleft = term->u.general->len;
1183             char *outbuf = termz;
1184             size_t outleft = IT_MAX_WORD-1;
1185             size_t ret;
1186
1187             ret = yaz_iconv(zh->iconv_to_utf8, &inbuf, &inleft,
1188                         &outbuf, &outleft);
1189             if (ret == (size_t)(-1))
1190             {
1191                 ret = yaz_iconv(zh->iconv_to_utf8, 0, 0, 0, 0);
1192                 zh->errCode = 125;
1193                 return -1;
1194             }
1195             *outbuf = 0;
1196         }
1197         else
1198         {
1199             sizez = term->u.general->len;
1200             if (sizez > IT_MAX_WORD-1)
1201                 sizez = IT_MAX_WORD-1;
1202             memcpy (termz, term->u.general->buf, sizez);
1203             termz[sizez] = '\0';
1204         }
1205         break;
1206     case Z_Term_characterString:
1207         sizez = strlen(term->u.characterString);
1208         if (sizez > IT_MAX_WORD-1)
1209             sizez = IT_MAX_WORD-1;
1210         memcpy (termz, term->u.characterString, sizez);
1211         termz[sizez] = '\0';
1212         break;
1213     default:
1214         zh->errCode = 124;
1215         return -1;
1216     }
1217     return 0;
1218 }
1219
1220 /* convert APT SCAN term to internal cmap */
1221 static int trans_scan_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1222                             char *termz, int reg_type)
1223 {
1224     char termz0[IT_MAX_WORD];
1225
1226     if (zapt_term_to_utf8(zh, zapt, termz0))
1227         return -1;    /* error */
1228     else
1229     {
1230         const char **map;
1231         const char *cp = (const char *) termz0;
1232         const char *cp_end = cp + strlen(cp);
1233         const char *src;
1234         int i = 0;
1235         const char *space_map = NULL;
1236         int len;
1237             
1238         while ((len = (cp_end - cp)) > 0)
1239         {
1240             map = zebra_maps_input (zh->reg->zebra_maps, reg_type, &cp, len, 0);
1241             if (**map == *CHR_SPACE)
1242                 space_map = *map;
1243             else
1244             {
1245                 if (i && space_map)
1246                     for (src = space_map; *src; src++)
1247                         termz[i++] = *src;
1248                 space_map = NULL;
1249                 for (src = *map; *src; src++)
1250                     termz[i++] = *src;
1251             }
1252         }
1253         termz[i] = '\0';
1254     }
1255     return 0;
1256 }
1257
1258 char *normalize_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1259                      const char *termz, NMEM stream, unsigned reg_id)
1260 {
1261     WRBUF wrbuf = 0;
1262     AttrType truncation;
1263     int truncation_value;
1264     char *ex_list = 0;
1265
1266     attr_init (&truncation, zapt, 5);
1267     truncation_value = attr_find (&truncation, NULL);
1268
1269     switch (truncation_value)
1270     {
1271     default:
1272         ex_list = "";
1273         break;
1274     case 101:
1275         ex_list = "#";
1276         break;
1277     case 102:
1278     case 103:
1279         ex_list = 0;
1280         break;
1281     case 104:
1282         ex_list = "!#";
1283         break;
1284     case 105:
1285         ex_list = "!*";
1286         break;
1287     }
1288     if (ex_list)
1289         wrbuf = zebra_replace(zh->reg->zebra_maps, reg_id, ex_list,
1290                               termz, strlen(termz));
1291     if (!wrbuf)
1292         return nmem_strdup(stream, termz);
1293     else
1294     {
1295         char *buf = (char*) nmem_malloc(stream, wrbuf_len(wrbuf)+1);
1296         memcpy (buf, wrbuf_buf(wrbuf), wrbuf_len(wrbuf));
1297         buf[wrbuf_len(wrbuf)] = '\0';
1298         return buf;
1299     }
1300 }
1301
1302 static void grep_info_delete (struct grep_info *grep_info)
1303 {
1304 #ifdef TERM_COUNT
1305     xfree(grep_info->term_no);
1306 #endif
1307     xfree (grep_info->isam_p_buf);
1308 }
1309
1310 static int grep_info_prepare (ZebraHandle zh,
1311                               Z_AttributesPlusTerm *zapt,
1312                               struct grep_info *grep_info,
1313                               int reg_type,
1314                               NMEM stream)
1315 {
1316     AttrType termset;
1317     int termset_value_numeric;
1318     const char *termset_value_string;
1319
1320 #ifdef TERM_COUNT
1321     grep_info->term_no = 0;
1322 #endif
1323     grep_info->isam_p_size = 0;
1324     grep_info->isam_p_buf = NULL;
1325     grep_info->zh = zh;
1326     grep_info->reg_type = reg_type;
1327     grep_info->termset = 0;
1328
1329     if (!zapt)
1330         return 0;
1331     attr_init (&termset, zapt, 8);
1332     termset_value_numeric =
1333         attr_find_ex (&termset, NULL, &termset_value_string);
1334     if (termset_value_numeric != -1)
1335     {
1336         char resname[32];
1337         const char *termset_name = 0;
1338         if (termset_value_numeric != -2)
1339         {
1340     
1341             sprintf (resname, "%d", termset_value_numeric);
1342             termset_name = resname;
1343         }
1344         else
1345             termset_name = termset_value_string;
1346         yaz_log(log_level_rpn, "creating termset set %s", termset_name);
1347         grep_info->termset = resultSetAdd (zh, termset_name, 1);
1348         if (!grep_info->termset)
1349         {
1350             zh->errCode = 128;
1351             zh->errString = nmem_strdup (stream, termset_name);
1352             return -1;
1353         }
1354     }
1355     return 0;
1356 }
1357                                
1358
1359 static RSET rpn_search_APT_phrase (ZebraHandle zh,
1360                                    Z_AttributesPlusTerm *zapt,
1361                                    const char *termz_org,
1362                                    oid_value attributeSet,
1363                                    NMEM stream,
1364                                    int reg_type, int complete_flag,
1365                                    const char *rank_type, int xpath_use,
1366                                    int num_bases, char **basenames, 
1367                                    NMEM rset_nmem)
1368 {
1369     char term_dst[IT_MAX_WORD+1];
1370     RSET rset[TERM_LIST_LENGTH_MAX], result;
1371     size_t rset_no = 0;
1372     struct grep_info grep_info;
1373     char *termz = normalize_term(zh, zapt, termz_org, stream, reg_type);
1374     const char *termp = termz;
1375
1376     *term_dst = 0;
1377     if (grep_info_prepare (zh, zapt, &grep_info, reg_type, stream))
1378         return 0;
1379     for (; rset_no < sizeof(rset)/sizeof(*rset); rset_no++)
1380     { 
1381         yaz_log(log_level_rpn, "APT_phrase termp=%s", termp);
1382         rset[rset_no] = term_trunc(zh, zapt, &termp, attributeSet,
1383                                     stream, &grep_info,
1384                                     reg_type, complete_flag,
1385                                     num_bases, basenames,
1386                                     term_dst, rank_type,
1387                                     xpath_use,rset_nmem);
1388         if (!rset[rset_no])
1389             break;
1390     }
1391     grep_info_delete (&grep_info);
1392     if (rset_no == 0)
1393         return rsnull_create (rset_nmem,key_it_ctrl); 
1394     else if (rset_no == 1)
1395         return (rset[0]);
1396     else
1397         result = rsprox_create( rset_nmem, key_it_ctrl, key_it_ctrl->scope,
1398                        rset_no, rset,
1399                        1 /* ordered */, 0 /* exclusion */,
1400                        3 /* relation */, 1 /* distance */);
1401     return result;
1402 }
1403
1404 static RSET rpn_search_APT_or_list (ZebraHandle zh,
1405                                     Z_AttributesPlusTerm *zapt,
1406                                     const char *termz_org,
1407                                     oid_value attributeSet,
1408                                     NMEM stream,
1409                                     int reg_type, int complete_flag,
1410                                     const char *rank_type,
1411                                     int xpath_use,
1412                                     int num_bases, char **basenames,
1413                                     NMEM rset_nmem)
1414 {
1415     char term_dst[IT_MAX_WORD+1];
1416     RSET rset[TERM_LIST_LENGTH_MAX];
1417     size_t rset_no = 0;
1418     struct grep_info grep_info;
1419     char *termz = normalize_term(zh, zapt, termz_org, stream, reg_type);
1420     const char *termp = termz;
1421
1422     if (grep_info_prepare (zh, zapt, &grep_info, reg_type, stream))
1423         return 0;
1424     for (; rset_no < sizeof(rset)/sizeof(*rset); rset_no++)
1425     { 
1426         yaz_log(log_level_rpn, "APT_or_list termp=%s", termp);
1427         rset[rset_no] = term_trunc(zh, zapt, &termp, attributeSet,
1428                                     stream, &grep_info,
1429                                     reg_type, complete_flag,
1430                                     num_bases, basenames,
1431                                     term_dst, rank_type,
1432                                     xpath_use,rset_nmem);
1433         if (!rset[rset_no])
1434             break;
1435     }
1436     grep_info_delete (&grep_info);
1437     if (rset_no == 0)
1438         return rsnull_create (rset_nmem,key_it_ctrl);  
1439     return rsmultior_create(rset_nmem, key_it_ctrl,key_it_ctrl->scope,
1440                             rset_no, rset);
1441 }
1442
1443 static RSET rpn_search_APT_and_list (ZebraHandle zh,
1444                                      Z_AttributesPlusTerm *zapt,
1445                                      const char *termz_org,
1446                                      oid_value attributeSet,
1447                                      NMEM stream,
1448                                      int reg_type, int complete_flag,
1449                                      const char *rank_type, 
1450                                      int xpath_use,
1451                                      int num_bases, char **basenames,
1452                                      NMEM rset_nmem)
1453 {
1454     char term_dst[IT_MAX_WORD+1];
1455     RSET rset[TERM_LIST_LENGTH_MAX];
1456     size_t rset_no = 0;
1457     struct grep_info grep_info;
1458     char *termz = normalize_term(zh, zapt, termz_org, stream, reg_type);
1459     const char *termp = termz;
1460
1461     if (grep_info_prepare (zh, zapt, &grep_info, reg_type, stream))
1462         return 0;
1463     for (; rset_no < sizeof(rset)/sizeof(*rset); rset_no++)
1464     { 
1465         yaz_log(log_level_rpn, "APT_and_list termp=%s", termp);
1466         rset[rset_no] = term_trunc(zh, zapt, &termp, attributeSet,
1467                                     stream, &grep_info,
1468                                     reg_type, complete_flag,
1469                                     num_bases, basenames,
1470                                     term_dst, rank_type,
1471                                     xpath_use, rset_nmem);
1472         if (!rset[rset_no])
1473             break;
1474     }
1475     grep_info_delete (&grep_info);
1476     if (rset_no == 0)
1477         return rsnull_create (rset_nmem,key_it_ctrl); 
1478
1479     return rsmultiand_create( rset_nmem, key_it_ctrl, key_it_ctrl->scope,
1480                               rset_no, rset);
1481 }
1482
1483 static int numeric_relation (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1484                              const char **term_sub,
1485                              char *term_dict,
1486                              oid_value attributeSet,
1487                              struct grep_info *grep_info,
1488                              int *max_pos,
1489                              int reg_type,
1490                              char *term_dst)
1491 {
1492     AttrType relation;
1493     int relation_value;
1494     int term_value;
1495     int r;
1496     char *term_tmp = term_dict + strlen(term_dict);
1497
1498     attr_init (&relation, zapt, 2);
1499     relation_value = attr_find (&relation, NULL);
1500
1501     yaz_log(log_level_rpn, "numeric relation value=%d", relation_value);
1502
1503     if (!term_100 (zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1504                    term_dst))
1505         return 0;
1506     term_value = atoi (term_tmp);
1507     switch (relation_value)
1508     {
1509     case 1:
1510         yaz_log(log_level_rpn, "Relation <");
1511         gen_regular_rel (term_tmp, term_value-1, 1);
1512         break;
1513     case 2:
1514         yaz_log(log_level_rpn, "Relation <=");
1515         gen_regular_rel (term_tmp, term_value, 1);
1516         break;
1517     case 4:
1518         yaz_log(log_level_rpn, "Relation >=");
1519         gen_regular_rel (term_tmp, term_value, 0);
1520         break;
1521     case 5:
1522         yaz_log(log_level_rpn, "Relation >");
1523         gen_regular_rel (term_tmp, term_value+1, 0);
1524         break;
1525     case 3:
1526     default:
1527         yaz_log(log_level_rpn, "Relation =");
1528         sprintf (term_tmp, "(0*%d)", term_value);
1529     }
1530     yaz_log(log_level_rpn, "dict_lookup_grep: %s", term_tmp);
1531     r = dict_lookup_grep(zh->reg->dict, term_dict, 0, grep_info, max_pos,
1532                           0, grep_handle);
1533     if (r)
1534         yaz_log(YLOG_WARN, "dict_lookup_grep fail, rel=gt: %d", r);
1535     yaz_log(log_level_rpn, "%d positions", grep_info->isam_p_indx);
1536     return 1;
1537 }
1538
1539 static int numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1540                          const char **term_sub, 
1541                          oid_value attributeSet, struct grep_info *grep_info,
1542                          int reg_type, int complete_flag,
1543                          int num_bases, char **basenames,
1544                          char *term_dst, int xpath_use, NMEM stream)
1545 {
1546     char term_dict[2*IT_MAX_WORD+2];
1547     int r, base_no;
1548     AttrType use;
1549     int use_value;
1550     const char *use_string = 0;
1551     oid_value curAttributeSet = attributeSet;
1552     const char *termp;
1553     struct rpn_char_map_info rcmi;
1554
1555     int bases_ok = 0;     /* no of databases with OK attribute */
1556     int errCode = 0;      /* err code (if any is not OK) */
1557     char *errString = 0;  /* addinfo */
1558
1559     rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
1560     attr_init (&use, zapt, 1);
1561     use_value = attr_find_ex (&use, &curAttributeSet, &use_string);
1562
1563     if (use_value == -1)
1564         use_value = 1016;
1565
1566     for (base_no = 0; base_no < num_bases; base_no++)
1567     {
1568         attent attp;
1569         data1_local_attribute id_xpath_attr;
1570         data1_local_attribute *local_attr;
1571         int max_pos, prefix_len = 0;
1572
1573         termp = *term_sub;
1574         if (use_value == -2)  /* string attribute (assume IDXPATH/any) */
1575         {
1576             use_value = xpath_use;
1577             attp.local_attributes = &id_xpath_attr;
1578             attp.attset_ordinal = VAL_IDXPATH;
1579             id_xpath_attr.next = 0;
1580             id_xpath_attr.local = use_value;
1581         }
1582         else if (curAttributeSet == VAL_IDXPATH)
1583         {
1584             attp.local_attributes = &id_xpath_attr;
1585             attp.attset_ordinal = VAL_IDXPATH;
1586             id_xpath_attr.next = 0;
1587             id_xpath_attr.local = use_value;
1588         }
1589         else
1590         {
1591             if ((r=att_getentbyatt (zh, &attp, curAttributeSet, use_value,
1592                                             use_string)))
1593             {
1594                 yaz_log(YLOG_DEBUG, "att_getentbyatt fail. set=%d use=%d r=%d",
1595                       curAttributeSet, use_value, r);
1596                 if (r == -1)
1597                 {
1598                     errString = nmem_strdup_i(stream, use_value);
1599                     errCode = 114;
1600                 }
1601                 else
1602                     errCode = 121;
1603                 continue;
1604             }
1605         }
1606         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1607         {
1608             zh->errCode = 109; /* Database unavailable */
1609             zh->errString = basenames[base_no];
1610             return -1;
1611         }
1612         for (local_attr = attp.local_attributes; local_attr;
1613              local_attr = local_attr->next)
1614         {
1615             int ord;
1616             char ord_buf[32];
1617             int i, ord_len;
1618
1619             ord = zebraExplain_lookupSU (zh->reg->zei, attp.attset_ordinal,
1620                                           local_attr->local);
1621             if (ord < 0)
1622                 continue;
1623             if (prefix_len)
1624                 term_dict[prefix_len++] = '|';
1625             else
1626                 term_dict[prefix_len++] = '(';
1627
1628             ord_len = key_SU_encode (ord, ord_buf);
1629             for (i = 0; i<ord_len; i++)
1630             {
1631                 term_dict[prefix_len++] = 1;
1632                 term_dict[prefix_len++] = ord_buf[i];
1633             }
1634         }
1635         if (!prefix_len)
1636         {
1637             errCode = 114;
1638             errString = nmem_strdup_i(stream, use_value);
1639             continue;
1640         }
1641         bases_ok++;
1642         term_dict[prefix_len++] = ')';        
1643         term_dict[prefix_len++] = 1;
1644         term_dict[prefix_len++] = reg_type;
1645         yaz_log(YLOG_DEBUG, "reg_type = %d", term_dict[prefix_len-1]);
1646         term_dict[prefix_len] = '\0';
1647         if (!numeric_relation (zh, zapt, &termp, term_dict,
1648                                attributeSet, grep_info, &max_pos, reg_type,
1649                                term_dst))
1650             return 0;
1651     }
1652     if (!bases_ok)
1653     {
1654         zh->errCode = errCode;
1655         zh->errString = errString;
1656         return -1;
1657     }
1658     *term_sub = termp;
1659     yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1660     return 1;
1661 }
1662
1663 static RSET rpn_search_APT_numeric (ZebraHandle zh,
1664                                     Z_AttributesPlusTerm *zapt,
1665                                     const char *termz,
1666                                     oid_value attributeSet,
1667                                     NMEM stream,
1668                                     int reg_type, int complete_flag,
1669                                     const char *rank_type, int xpath_use,
1670                                     int num_bases, char **basenames,
1671                                     NMEM rset_nmem)
1672 {
1673     char term_dst[IT_MAX_WORD+1];
1674     const char *termp = termz;
1675     RSET rset[TERM_LIST_LENGTH_MAX];
1676     int  r;
1677     size_t rset_no = 0;
1678     struct grep_info grep_info;
1679
1680     yaz_log(log_level_rpn, "APT_numeric t='%s'",termz);
1681     if (grep_info_prepare (zh, zapt, &grep_info, reg_type, stream))
1682         return 0;
1683     for (; rset_no < sizeof(rset)/sizeof(*rset); rset_no++)
1684     { 
1685         yaz_log(YLOG_DEBUG, "APT_numeric termp=%s", termp);
1686         grep_info.isam_p_indx = 0;
1687         r = numeric_term(zh, zapt, &termp, attributeSet, &grep_info,
1688                           reg_type, complete_flag, num_bases, basenames,
1689                           term_dst, xpath_use,
1690                           stream);
1691         if (r < 1)
1692             break;
1693         yaz_log(YLOG_DEBUG, "term: %s", term_dst);
1694         rset[rset_no] = rset_trunc(zh, grep_info.isam_p_buf,
1695                                     grep_info.isam_p_indx, term_dst,
1696                                     strlen(term_dst), rank_type,
1697                                     0 /* preserve position */,
1698                                     zapt->term->which, rset_nmem, 
1699                                     key_it_ctrl,key_it_ctrl->scope);
1700         if (!rset[rset_no])
1701             break;
1702     }
1703     grep_info_delete (&grep_info);
1704     if (rset_no == 0)
1705         return rsnull_create (rset_nmem,key_it_ctrl);
1706     if (rset_no == 1)
1707         return rset[0];
1708     return rsmultiand_create(rset_nmem,key_it_ctrl,key_it_ctrl->scope,
1709                rset_no, rset);
1710 }
1711
1712 static RSET rpn_search_APT_local (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1713                                   const char *termz,
1714                                   oid_value attributeSet,
1715                                   NMEM stream,
1716                                   const char *rank_type, NMEM rset_nmem)
1717 {
1718     RSET result;
1719     RSFD rsfd;
1720     struct it_key key;
1721     int sys;
1722     result = rstemp_create( rset_nmem,key_it_ctrl,key_it_ctrl->scope,
1723                      res_get (zh->res, "setTmpDir"),0 );
1724     rsfd = rset_open (result, RSETF_WRITE);
1725
1726     sys = atoi(termz);
1727     if (sys <= 0)
1728         sys = 1;
1729     key.mem[0] = sys;
1730     key.mem[1] = 1;
1731     key.len = 2;
1732     rset_write (rsfd, &key);
1733     rset_close (rsfd);
1734     return result;
1735 }
1736
1737 static RSET rpn_sort_spec (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1738                            oid_value attributeSet, NMEM stream,
1739                            Z_SortKeySpecList *sort_sequence,
1740                            const char *rank_type)
1741 {
1742     int i;
1743     int sort_relation_value;
1744     AttrType sort_relation_type;
1745     int use_value;
1746     AttrType use_type;
1747     Z_SortKeySpec *sks;
1748     Z_SortKey *sk;
1749     Z_AttributeElement *ae;
1750     int oid[OID_SIZE];
1751     oident oe;
1752     char termz[20];
1753     
1754     attr_init (&sort_relation_type, zapt, 7);
1755     sort_relation_value = attr_find (&sort_relation_type, &attributeSet);
1756
1757     attr_init (&use_type, zapt, 1);
1758     use_value = attr_find (&use_type, &attributeSet);
1759
1760     if (!sort_sequence->specs)
1761     {
1762         sort_sequence->num_specs = 10;
1763         sort_sequence->specs = (Z_SortKeySpec **)
1764             nmem_malloc(stream, sort_sequence->num_specs *
1765                          sizeof(*sort_sequence->specs));
1766         for (i = 0; i<sort_sequence->num_specs; i++)
1767             sort_sequence->specs[i] = 0;
1768     }
1769     if (zapt->term->which != Z_Term_general)
1770         i = 0;
1771     else
1772         i = atoi_n ((char *) zapt->term->u.general->buf,
1773                     zapt->term->u.general->len);
1774     if (i >= sort_sequence->num_specs)
1775         i = 0;
1776     sprintf (termz, "%d", i);
1777
1778     oe.proto = PROTO_Z3950;
1779     oe.oclass = CLASS_ATTSET;
1780     oe.value = attributeSet;
1781     if (!oid_ent_to_oid (&oe, oid))
1782         return 0;
1783
1784     sks = (Z_SortKeySpec *) nmem_malloc(stream, sizeof(*sks));
1785     sks->sortElement = (Z_SortElement *)
1786         nmem_malloc(stream, sizeof(*sks->sortElement));
1787     sks->sortElement->which = Z_SortElement_generic;
1788     sk = sks->sortElement->u.generic = (Z_SortKey *)
1789         nmem_malloc(stream, sizeof(*sk));
1790     sk->which = Z_SortKey_sortAttributes;
1791     sk->u.sortAttributes = (Z_SortAttributes *)
1792         nmem_malloc(stream, sizeof(*sk->u.sortAttributes));
1793
1794     sk->u.sortAttributes->id = oid;
1795     sk->u.sortAttributes->list = (Z_AttributeList *)
1796         nmem_malloc(stream, sizeof(*sk->u.sortAttributes->list));
1797     sk->u.sortAttributes->list->num_attributes = 1;
1798     sk->u.sortAttributes->list->attributes = (Z_AttributeElement **)
1799         nmem_malloc(stream, sizeof(*sk->u.sortAttributes->list->attributes));
1800     ae = *sk->u.sortAttributes->list->attributes = (Z_AttributeElement *)
1801         nmem_malloc(stream, sizeof(**sk->u.sortAttributes->list->attributes));
1802     ae->attributeSet = 0;
1803     ae->attributeType = (int *)
1804         nmem_malloc(stream, sizeof(*ae->attributeType));
1805     *ae->attributeType = 1;
1806     ae->which = Z_AttributeValue_numeric;
1807     ae->value.numeric = (int *)
1808         nmem_malloc(stream, sizeof(*ae->value.numeric));
1809     *ae->value.numeric = use_value;
1810
1811     sks->sortRelation = (int *)
1812         nmem_malloc(stream, sizeof(*sks->sortRelation));
1813     if (sort_relation_value == 1)
1814         *sks->sortRelation = Z_SortKeySpec_ascending;
1815     else if (sort_relation_value == 2)
1816         *sks->sortRelation = Z_SortKeySpec_descending;
1817     else 
1818         *sks->sortRelation = Z_SortKeySpec_ascending;
1819
1820     sks->caseSensitivity = (int *)
1821         nmem_malloc(stream, sizeof(*sks->caseSensitivity));
1822     *sks->caseSensitivity = 0;
1823
1824     sks->which = Z_SortKeySpec_null;
1825     sks->u.null = odr_nullval ();
1826     sort_sequence->specs[i] = sks;
1827     return rsnull_create (NULL,key_it_ctrl);
1828         /* FIXME - nmem?? */
1829 }
1830
1831
1832 static int parse_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1833                        oid_value attributeSet,
1834                        struct xpath_location_step *xpath, int max, NMEM mem)
1835 {
1836     oid_value curAttributeSet = attributeSet;
1837     AttrType use;
1838     const char *use_string = 0;
1839     
1840     attr_init (&use, zapt, 1);
1841     attr_find_ex (&use, &curAttributeSet, &use_string);
1842
1843     if (!use_string || *use_string != '/')
1844         return -1;
1845
1846     return zebra_parse_xpath_str(use_string, xpath, max, mem);
1847 }
1848  
1849                
1850
1851 static RSET xpath_trunc(ZebraHandle zh, NMEM stream,
1852                         int reg_type, const char *term, int use,
1853                         oid_value curAttributeSet, NMEM rset_nmem)
1854 {
1855     RSET rset;
1856     struct grep_info grep_info;
1857     char term_dict[2048];
1858     char ord_buf[32];
1859     int prefix_len = 0;
1860     int ord = zebraExplain_lookupSU (zh->reg->zei, curAttributeSet, use);
1861     int ord_len, i, r, max_pos;
1862     int term_type = Z_Term_characterString;
1863     const char *flags = "void";
1864
1865     if (grep_info_prepare (zh, 0 /* zapt */, &grep_info, '0', stream))
1866         return rsnull_create (rset_nmem,key_it_ctrl);
1867
1868     if (ord < 0)
1869         return rsnull_create (rset_nmem,key_it_ctrl);
1870     if (prefix_len)
1871         term_dict[prefix_len++] = '|';
1872     else
1873         term_dict[prefix_len++] = '(';
1874     
1875     ord_len = key_SU_encode (ord, ord_buf);
1876     for (i = 0; i<ord_len; i++)
1877     {
1878         term_dict[prefix_len++] = 1;
1879         term_dict[prefix_len++] = ord_buf[i];
1880     }
1881     term_dict[prefix_len++] = ')';
1882     term_dict[prefix_len++] = 1;
1883     term_dict[prefix_len++] = reg_type;
1884     
1885     strcpy(term_dict+prefix_len, term);
1886     
1887     grep_info.isam_p_indx = 0;
1888     r = dict_lookup_grep(zh->reg->dict, term_dict, 0,
1889                           &grep_info, &max_pos, 0, grep_handle);
1890     yaz_log (YLOG_LOG, "%s %d positions", term,
1891              grep_info.isam_p_indx);
1892     rset = rset_trunc(zh, grep_info.isam_p_buf,
1893                        grep_info.isam_p_indx, term, strlen(term),
1894                        flags, 1, term_type,rset_nmem,
1895                        key_it_ctrl, key_it_ctrl->scope);
1896     grep_info_delete (&grep_info);
1897     return rset;
1898 }
1899
1900 static RSET rpn_search_xpath (ZebraHandle zh,
1901                               oid_value attributeSet,
1902                               int num_bases, char **basenames,
1903                               NMEM stream, const char *rank_type, RSET rset,
1904                               int xpath_len, struct xpath_location_step *xpath,
1905                               NMEM rset_nmem)
1906 {
1907     oid_value curAttributeSet = attributeSet;
1908     int base_no;
1909     int i;
1910
1911     if (xpath_len < 0)
1912         return rset;
1913
1914     yaz_log (YLOG_DEBUG, "xpath len=%d", xpath_len);
1915     for (i = 0; i<xpath_len; i++)
1916     {
1917         yaz_log (log_level_rpn, "XPATH %d %s", i, xpath[i].part);
1918
1919     }
1920
1921     curAttributeSet = VAL_IDXPATH;
1922
1923     /*
1924       //a    ->    a/.*
1925       //a/b  ->    b/a/.*
1926       /a     ->    a/
1927       /a/b   ->    b/a/
1928
1929       /      ->    none
1930
1931    a[@attr=value]/b[@other=othervalue]
1932
1933  /e/@a val      range(e/,range(@a,freetext(w,1015,val),@a),e/)
1934  /a/b val       range(b/a/,freetext(w,1016,val),b/a/)
1935  /a/b/@c val    range(b/a/,range(@c,freetext(w,1016,val),@c),b/a/)
1936  /a/b[@c=y] val range(b/a/,freetext(w,1016,val),b/a/,@c=y)
1937  /a[@c=y]/b val range(a/,range(b/a/,freetext(w,1016,val),b/a/),a/,@c=y)
1938  /a[@c=x]/b[@c=y] range(a/,range(b/a/,freetext(w,1016,val),b/a/,@c=y),a/,@c=x)
1939       
1940     */
1941
1942     dict_grep_cmap (zh->reg->dict, 0, 0);
1943
1944     for (base_no = 0; base_no < num_bases; base_no++)
1945     {
1946         int level = xpath_len;
1947         int first_path = 1;
1948         
1949         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1950         {
1951             zh->errCode = 109; /* Database unavailable */
1952             zh->errString = basenames[base_no];
1953             return rset;
1954         }
1955         while (--level >= 0)
1956         {
1957             char xpath_rev[128];
1958             int i, len;
1959             RSET rset_start_tag = 0, rset_end_tag = 0, rset_attr = 0;
1960
1961             *xpath_rev = 0;
1962             len = 0;
1963             for (i = level; i >= 1; --i)
1964             {
1965                 const char *cp = xpath[i].part;
1966                 if (*cp)
1967                 {
1968                     for (;*cp; cp++)
1969                         if (*cp == '*')
1970                         {
1971                             memcpy (xpath_rev + len, "[^/]*", 5);
1972                             len += 5;
1973                         }
1974                         else if (*cp == ' ')
1975                         {
1976
1977                             xpath_rev[len++] = 1;
1978                             xpath_rev[len++] = ' ';
1979                         }
1980
1981                         else
1982                             xpath_rev[len++] = *cp;
1983                     xpath_rev[len++] = '/';
1984                 }
1985                 else if (i == 1)  /* // case */
1986                 {
1987                     xpath_rev[len++] = '.';
1988                     xpath_rev[len++] = '*';
1989                 }
1990             }
1991             xpath_rev[len] = 0;
1992
1993             if (xpath[level].predicate &&
1994                 xpath[level].predicate->which == XPATH_PREDICATE_RELATION &&
1995                 xpath[level].predicate->u.relation.name[0])
1996             {
1997                 WRBUF wbuf = wrbuf_alloc();
1998                 wrbuf_puts(wbuf, xpath[level].predicate->u.relation.name+1);
1999                 if (xpath[level].predicate->u.relation.value)
2000                 {
2001                     const char *cp = xpath[level].predicate->u.relation.value;
2002                     wrbuf_putc(wbuf, '=');
2003                     
2004                     while (*cp)
2005                     {
2006                         if (strchr(REGEX_CHARS, *cp))
2007                             wrbuf_putc(wbuf, '\\');
2008                         wrbuf_putc(wbuf, *cp);
2009                         cp++;
2010                     }
2011                 }
2012                 wrbuf_puts(wbuf, "");
2013                 rset_attr = xpath_trunc(
2014                     zh, stream, '0', wrbuf_buf(wbuf), 3, 
2015                     curAttributeSet,rset_nmem);
2016                 wrbuf_free(wbuf, 1);
2017             } 
2018             else 
2019             {
2020                 if (!first_path)
2021                     continue;
2022             }
2023             yaz_log (log_level_rpn, "xpath_rev (%d) = %s", level, xpath_rev);
2024             if (strlen(xpath_rev))
2025             {
2026                 rset_start_tag = xpath_trunc(zh, stream, '0', 
2027                         xpath_rev, 1, curAttributeSet, rset_nmem);
2028             
2029                 rset_end_tag = xpath_trunc(zh, stream, '0', 
2030                         xpath_rev, 2, curAttributeSet, rset_nmem);
2031
2032                 /*
2033                 parms.key_size = sizeof(struct it_key);
2034                 parms.cmp = key_compare_it;
2035                 parms.rset_l = rset_start_tag;
2036                 parms.rset_m = rset;
2037                 parms.rset_r = rset_end_tag;
2038                 parms.rset_attr = rset_attr;
2039                 parms.printer = key_print_it;
2040                 rset = rset_create (rset_kind_between, &parms);
2041                 */
2042                 rset=rsbetween_create( rset_nmem,key_it_ctrl,key_it_ctrl->scope,
2043                         rset_start_tag, rset, rset_end_tag, rset_attr);
2044             }
2045             first_path = 0;
2046         }
2047     }
2048
2049     return rset;
2050 }
2051
2052
2053
2054 static RSET rpn_search_APT (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2055                             oid_value attributeSet, NMEM stream,
2056                             Z_SortKeySpecList *sort_sequence,
2057                             int num_bases, char **basenames, 
2058                             NMEM rset_nmem)
2059 {
2060     unsigned reg_id;
2061     char *search_type = NULL;
2062     char rank_type[128];
2063     int complete_flag;
2064     int sort_flag;
2065     char termz[IT_MAX_WORD+1];
2066     RSET rset = 0;
2067     int xpath_len;
2068     int xpath_use = 0;
2069     struct xpath_location_step xpath[10];
2070
2071     if (!log_level_set)
2072     {
2073         log_level_rpn = yaz_log_module_level("rpn");
2074         log_level_set=1;
2075     }
2076     zebra_maps_attr (zh->reg->zebra_maps, zapt, &reg_id, &search_type,
2077                      rank_type, &complete_flag, &sort_flag);
2078     
2079     yaz_log(YLOG_DEBUG, "reg_id=%c", reg_id);
2080     yaz_log(YLOG_DEBUG, "complete_flag=%d", complete_flag);
2081     yaz_log(YLOG_DEBUG, "search_type=%s", search_type);
2082     yaz_log(YLOG_DEBUG, "rank_type=%s", rank_type);
2083
2084     if (zapt_term_to_utf8(zh, zapt, termz))
2085         return 0;
2086
2087     if (sort_flag)
2088         return rpn_sort_spec (zh, zapt, attributeSet, stream, sort_sequence,
2089                               rank_type);
2090     xpath_len = parse_xpath(zh, zapt, attributeSet, xpath, 10, stream);
2091     if (xpath_len >= 0)
2092     {
2093         xpath_use = 1016;
2094         if (xpath[xpath_len-1].part[0] == '@')
2095             xpath_use = 1015;
2096     }
2097
2098     if (!strcmp (search_type, "phrase"))
2099     {
2100         rset = rpn_search_APT_phrase (zh, zapt, termz, attributeSet, stream,
2101                                       reg_id, complete_flag, rank_type,
2102                                       xpath_use,
2103                                       num_bases, basenames, rset_nmem);
2104     }
2105     else if (!strcmp (search_type, "and-list"))
2106     {
2107         rset = rpn_search_APT_and_list (zh, zapt, termz, attributeSet, stream,
2108                                         reg_id, complete_flag, rank_type,
2109                                         xpath_use,
2110                                         num_bases, basenames, rset_nmem);
2111     }
2112     else if (!strcmp (search_type, "or-list"))
2113     {
2114         rset = rpn_search_APT_or_list (zh, zapt, termz, attributeSet, stream,
2115                                        reg_id, complete_flag, rank_type,
2116                                        xpath_use,
2117                                        num_bases, basenames, rset_nmem);
2118     }
2119     else if (!strcmp (search_type, "local"))
2120     {
2121         rset = rpn_search_APT_local (zh, zapt, termz, attributeSet, stream,
2122                                      rank_type, rset_nmem);
2123     }
2124     else if (!strcmp (search_type, "numeric"))
2125     {
2126         rset = rpn_search_APT_numeric (zh, zapt, termz, attributeSet, stream,
2127                                        reg_id, complete_flag, rank_type,
2128                                        xpath_use,
2129                                        num_bases, basenames, rset_nmem);
2130     }
2131     else if (!strcmp (search_type, "always"))
2132     {
2133         rset = 0;
2134     }
2135     else
2136         zh->errCode = 118;
2137     return rpn_search_xpath (zh, attributeSet, num_bases, basenames,
2138                              stream, rank_type, rset, 
2139                              xpath_len, xpath, rset_nmem);
2140 }
2141
2142 static RSET rpn_search_structure (ZebraHandle zh, Z_RPNStructure *zs,
2143                                   oid_value attributeSet, 
2144                                   NMEM stream, NMEM rset_nmem,
2145                                   Z_SortKeySpecList *sort_sequence,
2146                                   int num_bases, char **basenames)
2147 {
2148     RSET r = NULL;
2149     if (zs->which == Z_RPNStructure_complex)
2150     {
2151         Z_Operator *zop = zs->u.complex->roperator;
2152         RSET rsets[2]; /* l and r argument */
2153
2154         rsets[0]=rpn_search_structure (zh, zs->u.complex->s1,
2155                                        attributeSet, stream, rset_nmem,
2156                                        sort_sequence,
2157                                        num_bases, basenames);
2158         if (rsets[0] == NULL)
2159             return NULL;
2160         rsets[1]=rpn_search_structure (zh, zs->u.complex->s2,
2161                                        attributeSet, stream, rset_nmem,
2162                                        sort_sequence,
2163                                        num_bases, basenames);
2164         if (rsets[1] == NULL)
2165         {
2166             rset_delete (rsets[0]);
2167             return NULL;
2168         }
2169
2170         switch (zop->which)
2171         {
2172         case Z_Operator_and:
2173             r=rsmultiand_create(rset_nmem, key_it_ctrl, key_it_ctrl->scope,
2174                         2, rsets);
2175             break;
2176         case Z_Operator_or:
2177             r=rsmultior_create(rset_nmem, key_it_ctrl, key_it_ctrl->scope,
2178                         2, rsets);
2179             break;
2180         case Z_Operator_and_not:
2181             r = rsbool_create_not(rset_nmem,key_it_ctrl, key_it_ctrl->scope,
2182                     rsets[0],rsets[1]);
2183             break;
2184         case Z_Operator_prox:
2185             if (zop->u.prox->which != Z_ProximityOperator_known)
2186             {
2187                 zh->errCode = 132;
2188                 return NULL;
2189             }
2190             if (*zop->u.prox->u.known != Z_ProxUnit_word)
2191             {
2192                 char *val = (char *) nmem_malloc(stream, 16);
2193                 zh->errCode = 132;
2194                 zh->errString = val;
2195                 sprintf (val, "%d", *zop->u.prox->u.known);
2196                 return NULL;
2197             }
2198             else
2199             {
2200                 /* new / old prox */
2201                 r=rsprox_create(rset_nmem,key_it_ctrl,key_it_ctrl->scope,
2202                          2, rsets, 
2203                          *zop->u.prox->ordered,
2204                          (!zop->u.prox->exclusion ? 
2205                               0 : *zop->u.prox->exclusion),
2206                          *zop->u.prox->relationType,
2207                          *zop->u.prox->distance );
2208             }
2209             break;
2210         default:
2211             zh->errCode = 110;
2212             return NULL;
2213         }
2214     }
2215     else if (zs->which == Z_RPNStructure_simple)
2216     {
2217         if (zs->u.simple->which == Z_Operand_APT)
2218         {
2219             yaz_log(YLOG_DEBUG, "rpn_search_APT");
2220             r = rpn_search_APT (zh, zs->u.simple->u.attributesPlusTerm,
2221                                 attributeSet, stream, sort_sequence,
2222                                 num_bases, basenames,rset_nmem);
2223         }
2224         else if (zs->u.simple->which == Z_Operand_resultSetId)
2225         {
2226             yaz_log(YLOG_DEBUG, "rpn_search_ref");
2227             r = resultSetRef (zh, zs->u.simple->u.resultSetId);
2228             if (!r)
2229             {
2230                 r = rsnull_create (rset_nmem,key_it_ctrl);
2231                 zh->errCode = 30;
2232                 zh->errString =
2233                     nmem_strdup (stream, zs->u.simple->u.resultSetId);
2234                 return 0;
2235             }
2236             else
2237                 rset_dup(r);
2238         }
2239         else
2240         {
2241             zh->errCode = 3;
2242             return 0;
2243         }
2244     }
2245     else
2246     {
2247         zh->errCode = 3;
2248         return 0;
2249     }
2250     return r;
2251 }
2252
2253
2254 RSET rpn_search(ZebraHandle zh, NMEM nmem, NMEM rset_nmem,
2255                 Z_RPNQuery *rpn, int num_bases, char **basenames, 
2256                 const char *setname,
2257                 ZebraSet sset)
2258 {
2259     RSET rset;
2260     oident *attrset;
2261     oid_value attributeSet;
2262     Z_SortKeySpecList *sort_sequence;
2263     int sort_status, i;
2264
2265     zh->errCode = 0;
2266     zh->errString = NULL;
2267     zh->hits = 0;
2268
2269     sort_sequence = (Z_SortKeySpecList *)
2270         nmem_malloc(nmem, sizeof(*sort_sequence));
2271     sort_sequence->num_specs = 10; /* FIXME - Hard-coded number */
2272     sort_sequence->specs = (Z_SortKeySpec **)
2273         nmem_malloc(nmem, sort_sequence->num_specs *
2274                      sizeof(*sort_sequence->specs));
2275     for (i = 0; i<sort_sequence->num_specs; i++)
2276         sort_sequence->specs[i] = 0;
2277     
2278     attrset = oid_getentbyoid (rpn->attributeSetId);
2279     attributeSet = attrset->value;
2280     rset = rpn_search_structure (zh, rpn->RPNStructure, attributeSet,
2281                                  nmem, rset_nmem,
2282                                  sort_sequence, num_bases, basenames);
2283     if (!rset)
2284         return 0;
2285
2286     if (zh->errCode)
2287         yaz_log(YLOG_DEBUG, "search error: %d", zh->errCode);
2288     
2289     for (i = 0; sort_sequence->specs[i]; i++)
2290         ;
2291     sort_sequence->num_specs = i;
2292     if (!i)
2293         resultSetRank (zh, sset, rset, rset_nmem);
2294     else
2295     {
2296         yaz_log(YLOG_DEBUG, "resultSetSortSingle in rpn_search");
2297         resultSetSortSingle (zh, nmem, sset, rset,
2298                              sort_sequence, &sort_status);
2299         if (zh->errCode)
2300         {
2301             yaz_log(YLOG_DEBUG, "resultSetSortSingle status = %d", zh->errCode);
2302         }
2303     }
2304     return rset;
2305 }
2306
2307 struct scan_info_entry {
2308     char *term;
2309     ISAMC_P isam_p;
2310 };
2311
2312 struct scan_info {
2313     struct scan_info_entry *list;
2314     ODR odr;
2315     int before, after;
2316     char prefix[20];
2317 };
2318
2319 static int scan_handle (char *name, const char *info, int pos, void *client)
2320 {
2321     int len_prefix, idx;
2322     struct scan_info *scan_info = (struct scan_info *) client;
2323
2324     len_prefix = strlen(scan_info->prefix);
2325     if (memcmp (name, scan_info->prefix, len_prefix))
2326         return 1;
2327     if (pos > 0)        idx = scan_info->after - pos + scan_info->before;
2328     else
2329         idx = - pos - 1;
2330     scan_info->list[idx].term = (char *)
2331         odr_malloc(scan_info->odr, strlen(name + len_prefix)+1);
2332     strcpy(scan_info->list[idx].term, name + len_prefix);
2333     assert (*info == sizeof(ISAMC_P));
2334     memcpy (&scan_info->list[idx].isam_p, info+1, sizeof(ISAMC_P));
2335     return 0;
2336 }
2337
2338 static void scan_term_untrans (ZebraHandle zh, NMEM stream, int reg_type,
2339                                char **dst, const char *src)
2340 {
2341     char term_src[IT_MAX_WORD];
2342     char term_dst[IT_MAX_WORD];
2343     
2344     term_untrans (zh, reg_type, term_src, src);
2345
2346     if (zh->iconv_from_utf8 != 0)
2347     {
2348         int len;
2349         char *inbuf = term_src;
2350         size_t inleft = strlen(term_src);
2351         char *outbuf = term_dst;
2352         size_t outleft = sizeof(term_dst)-1;
2353         size_t ret;
2354         
2355         ret = yaz_iconv (zh->iconv_from_utf8, &inbuf, &inleft,
2356                          &outbuf, &outleft);
2357         if (ret == (size_t)(-1))
2358             len = 0;
2359         else
2360             len = outbuf - term_dst;
2361         *dst = nmem_malloc(stream, len + 1);
2362         if (len > 0)
2363             memcpy (*dst, term_dst, len);
2364         (*dst)[len] = '\0';
2365     }
2366     else
2367         *dst = nmem_strdup(stream, term_src);
2368 }
2369
2370 static void count_set (RSET r, int *count)
2371 {
2372     zint psysno = 0;
2373     int kno = 0;
2374     struct it_key key;
2375     RSFD rfd;
2376
2377     yaz_log(YLOG_DEBUG, "count_set");
2378
2379     *count = 0;
2380     rfd = rset_open (r, RSETF_READ);
2381     while (rset_read (rfd, &key,0 /* never mind terms */))
2382     {
2383         if (key.mem[0] != psysno)
2384         {
2385             psysno = key.mem[0];
2386             (*count)++;
2387         }
2388         kno++;
2389     }
2390     rset_close (rfd);
2391     yaz_log(YLOG_DEBUG, "%d keys, %d records", kno, *count);
2392 }
2393
2394 void rpn_scan(ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt,
2395                oid_value attributeset,
2396                int num_bases, char **basenames,
2397                int *position, int *num_entries, ZebraScanEntry **list,
2398                int *is_partial, RSET limit_set, int return_zero)
2399 {
2400     int i;
2401     int pos = *position;
2402     int num = *num_entries;
2403     int before;
2404     int after;
2405     int base_no;
2406     char termz[IT_MAX_WORD+20];
2407     AttrType use;
2408     int use_value;
2409     const char *use_string = 0;
2410     struct scan_info *scan_info_array;
2411     ZebraScanEntry *glist;
2412     int ords[32], ord_no = 0;
2413     int ptr[32];
2414
2415     int bases_ok = 0;     /* no of databases with OK attribute */
2416     int errCode = 0;      /* err code (if any is not OK) */
2417     char *errString = 0;  /* addinfo */
2418
2419     unsigned reg_id;
2420     char *search_type = NULL;
2421     char rank_type[128];
2422     int complete_flag;
2423     int sort_flag;
2424     NMEM rset_nmem=NULL; 
2425
2426     *list = 0;
2427
2428     if (attributeset == VAL_NONE)
2429         attributeset = VAL_BIB1;
2430
2431     if (!limit_set)
2432     {
2433         AttrType termset;
2434         int termset_value_numeric;
2435         const char *termset_value_string;
2436         attr_init (&termset, zapt, 8);
2437         termset_value_numeric =
2438             attr_find_ex (&termset, NULL, &termset_value_string);
2439         if (termset_value_numeric != -1)
2440         {
2441             char resname[32];
2442             const char *termset_name = 0;
2443             
2444             if (termset_value_numeric != -2)
2445             {
2446                 
2447                 sprintf (resname, "%d", termset_value_numeric);
2448                 termset_name = resname;
2449             }
2450             else
2451                 termset_name = termset_value_string;
2452             
2453             limit_set = resultSetRef (zh, termset_name);
2454         }
2455     }
2456         
2457     yaz_log (YLOG_DEBUG, "position = %d, num = %d set=%d",
2458              pos, num, attributeset);
2459         
2460     attr_init (&use, zapt, 1);
2461     use_value = attr_find_ex (&use, &attributeset, &use_string);
2462
2463     if (zebra_maps_attr (zh->reg->zebra_maps, zapt, &reg_id, &search_type,
2464                          rank_type, &complete_flag, &sort_flag))
2465     {
2466         *num_entries = 0;
2467         zh->errCode = 113;
2468         return ;
2469     }
2470     yaz_log (YLOG_DEBUG, "use_value = %d", use_value);
2471
2472     if (use_value == -1)
2473         use_value = 1016;
2474     for (base_no = 0; base_no < num_bases && ord_no < 32; base_no++)
2475     {
2476         int r;
2477         attent attp;
2478         data1_local_attribute *local_attr;
2479
2480         if ((r=att_getentbyatt (zh, &attp, attributeset, use_value,
2481                                 use_string)))
2482         {
2483             yaz_log(YLOG_DEBUG, "att_getentbyatt fail. set=%d use=%d",
2484                   attributeset, use_value);
2485             if (r == -1)
2486             {
2487                 char val_str[32];
2488                 sprintf (val_str, "%d", use_value);
2489                 errCode = 114;
2490                 errString = odr_strdup (stream, val_str);
2491             }   
2492             else
2493                 errCode = 121;
2494             continue;
2495         }
2496         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
2497         {
2498             zh->errString = basenames[base_no];
2499             zh->errCode = 109; /* Database unavailable */
2500             *num_entries = 0;
2501             return;
2502         }
2503         bases_ok++;
2504         for (local_attr = attp.local_attributes; local_attr && ord_no < 32;
2505              local_attr = local_attr->next)
2506         {
2507             int ord;
2508
2509             ord = zebraExplain_lookupSU (zh->reg->zei, attp.attset_ordinal,
2510                                          local_attr->local);
2511             if (ord > 0)
2512                 ords[ord_no++] = ord;
2513         }
2514     }
2515     if (!bases_ok && errCode)
2516     {
2517         zh->errCode = errCode;
2518         zh->errString = errString;
2519         *num_entries = 0;
2520     }
2521     if (ord_no == 0)
2522     {
2523         *num_entries = 0;
2524         return;
2525     }
2526     /* prepare dictionary scanning */
2527     before = pos-1;
2528     after = 1+num-pos;
2529     scan_info_array = (struct scan_info *)
2530         odr_malloc(stream, ord_no * sizeof(*scan_info_array));
2531     for (i = 0; i < ord_no; i++)
2532     {
2533         int j, prefix_len = 0;
2534         int before_tmp = before, after_tmp = after;
2535         struct scan_info *scan_info = scan_info_array + i;
2536         struct rpn_char_map_info rcmi;
2537
2538         rpn_char_map_prepare (zh->reg, reg_id, &rcmi);
2539
2540         scan_info->before = before;
2541         scan_info->after = after;
2542         scan_info->odr = stream;
2543
2544         scan_info->list = (struct scan_info_entry *)
2545             odr_malloc(stream, (before+after) * sizeof(*scan_info->list));
2546         for (j = 0; j<before+after; j++)
2547             scan_info->list[j].term = NULL;
2548
2549         prefix_len += key_SU_encode (ords[i], termz + prefix_len);
2550         termz[prefix_len++] = reg_id;
2551         termz[prefix_len] = 0;
2552         strcpy(scan_info->prefix, termz);
2553
2554         if (trans_scan_term(zh, zapt, termz+prefix_len, reg_id))
2555             return ;
2556                     
2557         dict_scan(zh->reg->dict, termz, &before_tmp, &after_tmp,
2558                   scan_info, scan_handle);
2559     }
2560     glist = (ZebraScanEntry *)
2561         odr_malloc(stream, (before+after)*sizeof(*glist));
2562
2563     rset_nmem = nmem_create();
2564
2565     /* consider terms after main term */
2566     for (i = 0; i < ord_no; i++)
2567         ptr[i] = before;
2568     
2569     *is_partial = 0;
2570     for (i = 0; i<after; i++)
2571     {
2572         int j, j0 = -1;
2573         const char *mterm = NULL;
2574         const char *tst;
2575         RSET rset;
2576         
2577         for (j = 0; j < ord_no; j++)
2578         {
2579             if (ptr[j] < before+after &&
2580                 (tst=scan_info_array[j].list[ptr[j]].term) &&
2581                 (!mterm || strcmp (tst, mterm) < 0))
2582             {
2583                 j0 = j;
2584                 mterm = tst;
2585             }
2586         }
2587         if (j0 == -1)
2588             break;
2589         scan_term_untrans (zh, stream->mem, reg_id,
2590                            &glist[i+before].term, mterm);
2591         rset = rset_trunc(zh, &scan_info_array[j0].list[ptr[j0]].isam_p, 1,
2592                            glist[i+before].term, strlen(glist[i+before].term),
2593                            NULL, 0, zapt->term->which, rset_nmem, 
2594                            key_it_ctrl,key_it_ctrl->scope);
2595         ptr[j0]++;
2596         for (j = j0+1; j<ord_no; j++)
2597         {
2598             if (ptr[j] < before+after &&
2599                 (tst=scan_info_array[j].list[ptr[j]].term) &&
2600                 !strcmp (tst, mterm))
2601             {
2602                 RSET rset2;
2603
2604                 rset2 =
2605                    rset_trunc(zh, &scan_info_array[j].list[ptr[j]].isam_p, 1,
2606                                glist[i+before].term,
2607                                strlen(glist[i+before].term), NULL, 0,
2608                                zapt->term->which,rset_nmem,
2609                                key_it_ctrl, key_it_ctrl->scope);
2610                 rset = rsbool_create_or(rset_nmem,key_it_ctrl,
2611                                key_it_ctrl->scope, rset, rset2);
2612                 /* FIXME - Use a proper multi-or */
2613
2614                 ptr[j]++;
2615             }
2616         }
2617         if (limit_set)
2618             rset = rsbool_create_and(rset_nmem,key_it_ctrl,key_it_ctrl->scope,
2619                             rset, rset_dup(limit_set));
2620         count_set (rset, &glist[i+before].occurrences);
2621         rset_delete (rset);
2622     }
2623     if (i < after)
2624     {
2625         *num_entries -= (after-i);
2626         *is_partial = 1;
2627     }
2628
2629     /* consider terms before main term */
2630     for (i = 0; i<ord_no; i++)
2631         ptr[i] = 0;
2632
2633     for (i = 0; i<before; i++)
2634     {
2635         int j, j0 = -1;
2636         const char *mterm = NULL;
2637         const char *tst;
2638         RSET rset;
2639         
2640         for (j = 0; j <ord_no; j++)
2641         {
2642             if (ptr[j] < before &&
2643                 (tst=scan_info_array[j].list[before-1-ptr[j]].term) &&
2644                 (!mterm || strcmp (tst, mterm) > 0))
2645             {
2646                 j0 = j;
2647                 mterm = tst;
2648             }
2649         }
2650         if (j0 == -1)
2651             break;
2652
2653         scan_term_untrans (zh, stream->mem, reg_id,
2654                            &glist[before-1-i].term, mterm);
2655
2656         rset = rset_trunc
2657                (zh, &scan_info_array[j0].list[before-1-ptr[j0]].isam_p, 1,
2658                 glist[before-1-i].term, strlen(glist[before-1-i].term),
2659                 NULL, 0, zapt->term->which,rset_nmem,
2660                 key_it_ctrl,key_it_ctrl->scope);
2661
2662         ptr[j0]++;
2663
2664         for (j = j0+1; j<ord_no; j++)
2665         {
2666             if (ptr[j] < before &&
2667                 (tst=scan_info_array[j].list[before-1-ptr[j]].term) &&
2668                 !strcmp (tst, mterm))
2669             {
2670                 RSET rset2;
2671
2672                 rset2 = rset_trunc(
2673                     zh,
2674                     &scan_info_array[j].list[before-1-ptr[j]].isam_p, 1,
2675                     glist[before-1-i].term,
2676                     strlen(glist[before-1-i].term), NULL, 0,
2677                     zapt->term->which, rset_nmem,
2678                     key_it_ctrl, key_it_ctrl->scope);
2679                 rset = rsbool_create_and(rset_nmem,key_it_ctrl,
2680                                          key_it_ctrl->scope, rset, rset2);
2681                 /* FIXME - multi-and ?? */
2682                 ptr[j]++;
2683             }
2684         }
2685         if (limit_set)
2686             rset = rsbool_create_and(rset_nmem,key_it_ctrl,key_it_ctrl->scope,
2687                             rset, rset_dup(limit_set));
2688         count_set (rset, &glist[before-1-i].occurrences);
2689         rset_delete (rset);
2690     }
2691     i = before-i;
2692     if (i)
2693     {
2694         *is_partial = 1;
2695         *position -= i;
2696         *num_entries -= i;
2697     }
2698
2699     nmem_destroy(rset_nmem);
2700     *list = glist + i;               /* list is set to first 'real' entry */
2701     
2702     yaz_log(YLOG_DEBUG, "position = %d, num_entries = %d",
2703           *position, *num_entries);
2704     if (zh->errCode)
2705         yaz_log(YLOG_DEBUG, "scan error: %d", zh->errCode);
2706 }
2707