Removed old struct it_key handling. 580 lines removed.
[idzebra-moved-to-github.git] / index / zrpn.c
1 /* $Id: zrpn.c,v 1.153 2004-09-15 08:13:51 adam Exp $
2    Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003,2004
3    Index Data Aps
4
5 This file is part of the Zebra server.
6
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
11
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with Zebra; see the file LICENSE.zebra.  If not, write to the
19 Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
20 02111-1307, USA.
21 */
22
23
24 #include <stdio.h>
25 #include <assert.h>
26 #ifdef WIN32
27 #include <io.h>
28 #else
29 #include <unistd.h>
30 #endif
31 #include <ctype.h>
32
33 #include "index.h"
34 #include <zebra_xpath.h>
35
36 #include <charmap.h>
37 #include <rset.h>
38
39
40 static const struct key_control it_ctrl = { 
41     sizeof(struct it_key),
42     2, /* we have sysnos and seqnos in this key, nothing more */
43     key_compare_it, 
44     key_logdump_txt,   /* FIXME  - clean up these functions */
45     key_get_seq,
46 };
47
48
49 const struct key_control *key_it_ctrl = &it_ctrl;
50
51 struct rpn_char_map_info {
52     ZebraMaps zm;
53     int reg_type;
54 };
55
56 typedef struct {
57     int type;
58     int major;
59     int minor;
60     Z_AttributesPlusTerm *zapt;
61 } AttrType;
62
63
64 static const char **rpn_char_map_handler (void *vp, const char **from, int len)
65 {
66     struct rpn_char_map_info *p = (struct rpn_char_map_info *) vp;
67     const char **out = zebra_maps_input (p->zm, p->reg_type, from, len, 0);
68 #if 0
69     if (out && *out)
70     {
71         const char *outp = *out;
72         yaz_log (LOG_LOG, "---");
73         while (*outp)
74         {
75             yaz_log (LOG_LOG, "%02X", *outp);
76             outp++;
77         }
78     }
79 #endif
80     return out;
81 }
82
83 static void rpn_char_map_prepare (struct zebra_register *reg, int reg_type,
84                                   struct rpn_char_map_info *map_info)
85 {
86     map_info->zm = reg->zebra_maps;
87     map_info->reg_type = reg_type;
88     dict_grep_cmap (reg->dict, map_info, rpn_char_map_handler);
89 }
90
91 static int attr_find_ex (AttrType *src, oid_value *attributeSetP,
92                          const char **string_value)
93 {
94     int num_attributes;
95
96     num_attributes = src->zapt->attributes->num_attributes;
97     while (src->major < num_attributes)
98     {
99         Z_AttributeElement *element;
100
101         element = src->zapt->attributes->attributes[src->major];
102         if (src->type == *element->attributeType)
103         {
104             switch (element->which) 
105             {
106             case Z_AttributeValue_numeric:
107                 ++(src->major);
108                 if (element->attributeSet && attributeSetP)
109                 {
110                     oident *attrset;
111
112                     attrset = oid_getentbyoid (element->attributeSet);
113                     *attributeSetP = attrset->value;
114                 }
115                 return *element->value.numeric;
116                 break;
117             case Z_AttributeValue_complex:
118                 if (src->minor >= element->value.complex->num_list)
119                     break;
120                 if (element->attributeSet && attributeSetP)
121                 {
122                     oident *attrset;
123                     
124                     attrset = oid_getentbyoid (element->attributeSet);
125                     *attributeSetP = attrset->value;
126                 }
127                 if (element->value.complex->list[src->minor]->which ==  
128                     Z_StringOrNumeric_numeric)
129                 {
130                     ++(src->minor);
131                     return
132                         *element->value.complex->list[src->minor-1]->u.numeric;
133                 }
134                 else if (element->value.complex->list[src->minor]->which ==  
135                          Z_StringOrNumeric_string)
136                 {
137                     if (!string_value)
138                         break;
139                     ++(src->minor);
140                     *string_value = 
141                         element->value.complex->list[src->minor-1]->u.string;
142                     return -2;
143                 }
144                 else
145                     break;
146             default:
147                 assert (0);
148             }
149         }
150         ++(src->major);
151     }
152     return -1;
153 }
154
155 static int attr_find (AttrType *src, oid_value *attributeSetP)
156 {
157     return attr_find_ex (src, attributeSetP, 0);
158 }
159
160 static void attr_init (AttrType *src, Z_AttributesPlusTerm *zapt,
161                        int type)
162 {
163     src->zapt = zapt;
164     src->type = type;
165     src->major = 0;
166     src->minor = 0;
167 }
168
169 #define TERM_COUNT        
170        
171 struct grep_info {        
172 #ifdef TERM_COUNT        
173     int *term_no;        
174 #endif        
175     ISAMC_P *isam_p_buf;
176     int isam_p_size;        
177     int isam_p_indx;
178     ZebraHandle zh;
179     int reg_type;
180     ZebraSet termset;
181 };        
182
183 static void term_untrans  (ZebraHandle zh, int reg_type,
184                            char *dst, const char *src)
185 {
186     int len = 0;
187     while (*src)
188     {
189         const char *cp = zebra_maps_output (zh->reg->zebra_maps,
190                                             reg_type, &src);
191         if (!cp && len < IT_MAX_WORD-1)
192             dst[len++] = *src++;
193         else
194             while (*cp && len < IT_MAX_WORD-1)
195                 dst[len++] = *cp++;
196     }
197     dst[len] = '\0';
198 }
199
200 static void add_isam_p (const char *name, const char *info,
201                         struct grep_info *p)
202 {
203     if (p->isam_p_indx == p->isam_p_size)
204     {
205         ISAMC_P *new_isam_p_buf;
206 #ifdef TERM_COUNT        
207         int *new_term_no;        
208 #endif
209         p->isam_p_size = 2*p->isam_p_size + 100;
210         new_isam_p_buf = (ISAMC_P *) xmalloc (sizeof(*new_isam_p_buf) *
211                                              p->isam_p_size);
212         if (p->isam_p_buf)
213         {
214             memcpy (new_isam_p_buf, p->isam_p_buf,
215                     p->isam_p_indx * sizeof(*p->isam_p_buf));
216             xfree (p->isam_p_buf);
217         }
218         p->isam_p_buf = new_isam_p_buf;
219
220 #ifdef TERM_COUNT
221         new_term_no = (int *) xmalloc (sizeof(*new_term_no) *
222                                        p->isam_p_size);
223         if (p->term_no)
224         {
225             memcpy (new_term_no, p->isam_p_buf,
226                     p->isam_p_indx * sizeof(*p->term_no));
227             xfree (p->term_no);
228         }
229         p->term_no = new_term_no;
230 #endif
231     }
232     assert (*info == sizeof(*p->isam_p_buf));
233     memcpy (p->isam_p_buf + p->isam_p_indx, info+1, sizeof(*p->isam_p_buf));
234
235 #if 1
236     if (p->termset)
237     {
238         const char *db;
239         int set, use;
240         char term_tmp[IT_MAX_WORD];
241         int su_code = 0;
242         int len = key_SU_decode (&su_code, name);
243         
244         term_untrans  (p->zh, p->reg_type, term_tmp, name+len+1);
245         logf (LOG_LOG, "grep: %d %c %s", su_code, name[len], term_tmp);
246         zebraExplain_lookup_ord (p->zh->reg->zei,
247                                  su_code, &db, &set, &use);
248         logf (LOG_LOG, "grep:  set=%d use=%d db=%s", set, use, db);
249         
250         resultSetAddTerm (p->zh, p->termset, name[len], db,
251                           set, use, term_tmp);
252     }
253 #endif
254     (p->isam_p_indx)++;
255 }
256
257 static int grep_handle (char *name, const char *info, void *p)
258 {
259     add_isam_p (name, info, (struct grep_info *) p);
260     return 0;
261 }
262
263 static int term_pre (ZebraMaps zebra_maps, int reg_type, const char **src,
264                      const char *ct1, const char *ct2, int first)
265 {
266     const char *s1, *s0 = *src;
267     const char **map;
268
269     /* skip white space */
270     while (*s0)
271     {
272         if (ct1 && strchr (ct1, *s0))
273             break;
274         if (ct2 && strchr (ct2, *s0))
275             break;
276         s1 = s0;
277         map = zebra_maps_input (zebra_maps, reg_type, &s1, strlen(s1), first);
278         if (**map != *CHR_SPACE)
279             break;
280         s0 = s1;
281     }
282     *src = s0;
283     return *s0;
284 }
285
286 #define REGEX_CHARS " []()|.*+?!"
287
288 /* term_100: handle term, where trunc=none (no operators at all) */
289 static int term_100 (ZebraMaps zebra_maps, int reg_type,
290                      const char **src, char *dst, int space_split,
291                      char *dst_term)
292 {
293     const char *s0, *s1;
294     const char **map;
295     int i = 0;
296     int j = 0;
297
298     const char *space_start = 0;
299     const char *space_end = 0;
300
301     if (!term_pre (zebra_maps, reg_type, src, NULL, NULL, !space_split))
302         return 0;
303     s0 = *src;
304     while (*s0)
305     {
306         s1 = s0;
307         map = zebra_maps_input (zebra_maps, reg_type, &s0, strlen(s0), 0);
308         if (space_split)
309         {
310             if (**map == *CHR_SPACE)
311                 break;
312         }
313         else  /* complete subfield only. */
314         {
315             if (**map == *CHR_SPACE)
316             {   /* save space mapping for later  .. */
317                 space_start = s1;
318                 space_end = s0;
319                 continue;
320             }
321             else if (space_start)
322             {   /* reload last space */
323                 while (space_start < space_end)
324                 {
325                     if (strchr (REGEX_CHARS, *space_start))
326                         dst[i++] = '\\';
327                     dst_term[j++] = *space_start;
328                     dst[i++] = *space_start++;
329                 }
330                 /* and reset */
331                 space_start = space_end = 0;
332             }
333         }
334         /* add non-space char */
335         while (s1 < s0)
336         {
337             if (strchr(REGEX_CHARS, *s1))
338                 dst[i++] = '\\';
339             dst_term[j++] = *s1;
340             dst[i++] = *s1++;
341         }
342     }
343     dst[i] = '\0';
344     dst_term[j] = '\0';
345     *src = s0;
346     return i;
347 }
348
349 /* term_101: handle term, where trunc=Process # */
350 static int term_101 (ZebraMaps zebra_maps, int reg_type,
351                      const char **src, char *dst, int space_split,
352                      char *dst_term)
353 {
354     const char *s0, *s1;
355     const char **map;
356     int i = 0;
357     int j = 0;
358
359     if (!term_pre (zebra_maps, reg_type, src, "#", "#", !space_split))
360         return 0;
361     s0 = *src;
362     while (*s0)
363     {
364         if (*s0 == '#')
365         {
366             dst[i++] = '.';
367             dst[i++] = '*';
368             dst_term[j++] = *s0++;
369         }
370         else
371         {
372             s1 = s0;
373             map = zebra_maps_input (zebra_maps, reg_type, &s0, strlen(s0), 0);
374             if (space_split && **map == *CHR_SPACE)
375                 break;
376             while (s1 < s0)
377             {
378                 if (strchr(REGEX_CHARS, *s1))
379                     dst[i++] = '\\';
380                 dst_term[j++] = *s1;
381                 dst[i++] = *s1++;
382             }
383         }
384     }
385     dst[i] = '\0';
386     dst_term[j++] = '\0';
387     *src = s0;
388     return i;
389 }
390
391 /* term_103: handle term, where trunc=re-2 (regular expressions) */
392 static int term_103 (ZebraMaps zebra_maps, int reg_type, const char **src,
393                      char *dst, int *errors, int space_split,
394                      char *dst_term)
395 {
396     int i = 0;
397     int j = 0;
398     const char *s0, *s1;
399     const char **map;
400
401     if (!term_pre (zebra_maps, reg_type, src, "^\\()[].*+?|", "(", !space_split))
402         return 0;
403     s0 = *src;
404     if (errors && *s0 == '+' && s0[1] && s0[2] == '+' && s0[3] &&
405         isdigit (s0[1]))
406     {
407         *errors = s0[1] - '0';
408         s0 += 3;
409         if (*errors > 3)
410             *errors = 3;
411     }
412     while (*s0)
413     {
414         if (strchr ("^\\()[].*+?|-", *s0))
415         {
416             dst_term[j++] = *s0;
417             dst[i++] = *s0++;
418         }
419         else
420         {
421             s1 = s0;
422             map = zebra_maps_input (zebra_maps, reg_type, &s0, strlen(s0), 0);
423             if (**map == *CHR_SPACE)
424                 break;
425             while (s1 < s0)
426             {
427                 if (strchr(REGEX_CHARS, *s1))
428                     dst[i++] = '\\';
429                 dst_term[j++] = *s1;
430                 dst[i++] = *s1++;
431             }
432         }
433     }
434     dst[i] = '\0';
435     dst_term[j] = '\0';
436     *src = s0;
437     return i;
438 }
439
440 /* term_103: handle term, where trunc=re-1 (regular expressions) */
441 static int term_102 (ZebraMaps zebra_maps, int reg_type, const char **src,
442                      char *dst, int space_split, char *dst_term)
443 {
444     return term_103 (zebra_maps, reg_type, src, dst, NULL, space_split,
445                      dst_term);
446 }
447
448
449 /* term_104: handle term, where trunc=Process # and ! */
450 static int term_104 (ZebraMaps zebra_maps, int reg_type,
451                      const char **src, char *dst, int space_split,
452                      char *dst_term)
453 {
454     const char *s0, *s1;
455     const char **map;
456     int i = 0;
457     int j = 0;
458
459     if (!term_pre (zebra_maps, reg_type, src, "?*#", "?*#", !space_split))
460         return 0;
461     s0 = *src;
462     while (*s0)
463     {
464         if (*s0 == '?')
465         {
466             dst_term[j++] = *s0++;
467             if (*s0 >= '0' && *s0 <= '9')
468             {
469                 int limit = 0;
470                 while (*s0 >= '0' && *s0 <= '9')
471                 {
472                     limit = limit * 10 + (*s0 - '0');
473                     dst_term[j++] = *s0++;
474                 }
475                 if (limit > 20)
476                     limit = 20;
477                 while (--limit >= 0)
478                 {
479                     dst[i++] = '.';
480                     dst[i++] = '?';
481                 }
482             }
483             else
484             {
485                 dst[i++] = '.';
486                 dst[i++] = '*';
487             }
488         }
489         else if (*s0 == '*')
490         {
491             dst[i++] = '.';
492             dst[i++] = '*';
493             dst_term[j++] = *s0++;
494         }
495         else if (*s0 == '#')
496         {
497             dst[i++] = '.';
498             dst_term[j++] = *s0++;
499         }
500         {
501             s1 = s0;
502             map = zebra_maps_input (zebra_maps, reg_type, &s0, strlen(s0), 0);
503             if (space_split && **map == *CHR_SPACE)
504                 break;
505             while (s1 < s0)
506             {
507                 if (strchr(REGEX_CHARS, *s1))
508                     dst[i++] = '\\';
509                 dst_term[j++] = *s1;
510                 dst[i++] = *s1++;
511             }
512         }
513     }
514     dst[i] = '\0';
515     dst_term[j++] = '\0';
516     *src = s0;
517     return i;
518 }
519
520 /* term_105/106: handle term, where trunc=Process * and ! and right trunc */
521 static int term_105 (ZebraMaps zebra_maps, int reg_type,
522                      const char **src, char *dst, int space_split,
523                      char *dst_term, int right_truncate)
524 {
525     const char *s0, *s1;
526     const char **map;
527     int i = 0;
528     int j = 0;
529
530     if (!term_pre (zebra_maps, reg_type, src, "*!", "*!", !space_split))
531         return 0;
532     s0 = *src;
533     while (*s0)
534     {
535         if (*s0 == '*')
536         {
537             dst[i++] = '.';
538             dst[i++] = '*';
539             dst_term[j++] = *s0++;
540         }
541         else if (*s0 == '!')
542         {
543             dst[i++] = '.';
544             dst_term[j++] = *s0++;
545         }
546         {
547             s1 = s0;
548             map = zebra_maps_input (zebra_maps, reg_type, &s0, strlen(s0), 0);
549             if (space_split && **map == *CHR_SPACE)
550                 break;
551             while (s1 < s0)
552             {
553                 if (strchr(REGEX_CHARS, *s1))
554                     dst[i++] = '\\';
555                 dst_term[j++] = *s1;
556                 dst[i++] = *s1++;
557             }
558         }
559     }
560     if (right_truncate)
561     {
562         dst[i++] = '.';
563         dst[i++] = '*';
564     }
565     dst[i] = '\0';
566     
567     dst_term[j++] = '\0';
568     *src = s0;
569     return i;
570 }
571
572
573 /* gen_regular_rel - generate regular expression from relation
574  *  val:     border value (inclusive)
575  *  islt:    1 if <=; 0 if >=.
576  */
577 static void gen_regular_rel (char *dst, int val, int islt)
578 {
579     int dst_p;
580     int w, d, i;
581     int pos = 0;
582     char numstr[20];
583
584     logf (LOG_DEBUG, "gen_regular_rel. val=%d, islt=%d", val, islt);
585     if (val >= 0)
586     {
587         if (islt)
588             strcpy (dst, "(-[0-9]+|(");
589         else
590             strcpy (dst, "((");
591     } 
592     else
593     {
594         if (!islt)
595         {
596             strcpy (dst, "([0-9]+|-(");
597             dst_p = strlen (dst);
598             islt = 1;
599         }
600         else
601         {
602             strcpy (dst, "(-(");
603             islt = 0;
604         }
605         val = -val;
606     }
607     dst_p = strlen (dst);
608     sprintf (numstr, "%d", val);
609     for (w = strlen(numstr); --w >= 0; pos++)
610     {
611         d = numstr[w];
612         if (pos > 0)
613         {
614             if (islt)
615             {
616                 if (d == '0')
617                     continue;
618                 d--;
619             } 
620             else
621             {
622                 if (d == '9')
623                     continue;
624                 d++;
625             }
626         }
627         
628         strcpy (dst + dst_p, numstr);
629         dst_p = strlen(dst) - pos - 1;
630
631         if (islt)
632         {
633             if (d != '0')
634             {
635                 dst[dst_p++] = '[';
636                 dst[dst_p++] = '0';
637                 dst[dst_p++] = '-';
638                 dst[dst_p++] = d;
639                 dst[dst_p++] = ']';
640             }
641             else
642                 dst[dst_p++] = d;
643         }
644         else
645         {
646             if (d != '9')
647             { 
648                 dst[dst_p++] = '[';
649                 dst[dst_p++] = d;
650                 dst[dst_p++] = '-';
651                 dst[dst_p++] = '9';
652                 dst[dst_p++] = ']';
653             }
654             else
655                 dst[dst_p++] = d;
656         }
657         for (i = 0; i<pos; i++)
658         {
659             dst[dst_p++] = '[';
660             dst[dst_p++] = '0';
661             dst[dst_p++] = '-';
662             dst[dst_p++] = '9';
663             dst[dst_p++] = ']';
664         }
665         dst[dst_p++] = '|';
666     }
667     dst[dst_p] = '\0';
668     if (islt)
669     {
670         /* match everything less than 10^(pos-1) */
671         strcat (dst, "0*");
672         for (i=1; i<pos; i++)
673             strcat (dst, "[0-9]?");
674     }
675     else
676     {
677         /* match everything greater than 10^pos */
678         for (i = 0; i <= pos; i++)
679             strcat (dst, "[0-9]");
680         strcat (dst, "[0-9]*");
681     }
682     strcat (dst, "))");
683 }
684
685 void string_rel_add_char (char **term_p, const char *src, int *indx)
686 {
687     if (src[*indx] == '\\')
688         *(*term_p)++ = src[(*indx)++];
689     *(*term_p)++ = src[(*indx)++];
690 }
691
692 /*
693  *   >  abc     ([b-].*|a[c-].*|ab[d-].*|abc.+)
694  *              ([^-a].*|a[^-b].*ab[^-c].*|abc.+)
695  *   >= abc     ([b-].*|a[c-].*|ab[c-].*)
696  *              ([^-a].*|a[^-b].*|ab[c-].*)
697  *   <  abc     ([-0].*|a[-a].*|ab[-b].*)
698  *              ([^a-].*|a[^b-].*|ab[^c-].*)
699  *   <= abc     ([-0].*|a[-a].*|ab[-b].*|abc)
700  *              ([^a-].*|a[^b-].*|ab[^c-].*|abc)
701  */
702 static int string_relation (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
703                             const char **term_sub, char *term_dict,
704                             oid_value attributeSet,
705                             int reg_type, int space_split, char *term_dst)
706 {
707     AttrType relation;
708     int relation_value;
709     int i;
710     char *term_tmp = term_dict + strlen(term_dict);
711     char term_component[2*IT_MAX_WORD+20];
712
713     attr_init (&relation, zapt, 2);
714     relation_value = attr_find (&relation, NULL);
715
716     logf (LOG_DEBUG, "string relation value=%d", relation_value);
717     switch (relation_value)
718     {
719     case 1:
720         if (!term_100 (zh->reg->zebra_maps, reg_type,
721                        term_sub, term_component,
722                        space_split, term_dst))
723             return 0;
724         logf (LOG_DEBUG, "Relation <");
725         
726         *term_tmp++ = '(';
727         for (i = 0; term_component[i]; )
728         {
729             int j = 0;
730
731             if (i)
732                 *term_tmp++ = '|';
733             while (j < i)
734                 string_rel_add_char (&term_tmp, term_component, &j);
735
736             *term_tmp++ = '[';
737
738             *term_tmp++ = '^';
739             string_rel_add_char (&term_tmp, term_component, &i);
740             *term_tmp++ = '-';
741
742             *term_tmp++ = ']';
743             *term_tmp++ = '.';
744             *term_tmp++ = '*';
745
746             if ((term_tmp - term_dict) > IT_MAX_WORD)
747                 break;
748         }
749         *term_tmp++ = ')';
750         *term_tmp = '\0';
751         break;
752     case 2:
753         if (!term_100 (zh->reg->zebra_maps, reg_type,
754                        term_sub, term_component,
755                        space_split, term_dst))
756             return 0;
757         logf (LOG_DEBUG, "Relation <=");
758
759         *term_tmp++ = '(';
760         for (i = 0; term_component[i]; )
761         {
762             int j = 0;
763
764             while (j < i)
765                 string_rel_add_char (&term_tmp, term_component, &j);
766             *term_tmp++ = '[';
767
768             *term_tmp++ = '^';
769             string_rel_add_char (&term_tmp, term_component, &i);
770             *term_tmp++ = '-';
771
772             *term_tmp++ = ']';
773             *term_tmp++ = '.';
774             *term_tmp++ = '*';
775
776             *term_tmp++ = '|';
777
778             if ((term_tmp - term_dict) > IT_MAX_WORD)
779                 break;
780         }
781         for (i = 0; term_component[i]; )
782             string_rel_add_char (&term_tmp, term_component, &i);
783         *term_tmp++ = ')';
784         *term_tmp = '\0';
785         break;
786     case 5:
787         if (!term_100 (zh->reg->zebra_maps, reg_type,
788                        term_sub, term_component, space_split, term_dst))
789             return 0;
790         logf (LOG_DEBUG, "Relation >");
791
792         *term_tmp++ = '(';
793         for (i = 0; term_component[i];)
794         {
795             int j = 0;
796
797             while (j < i)
798                 string_rel_add_char (&term_tmp, term_component, &j);
799             *term_tmp++ = '[';
800             
801             *term_tmp++ = '^';
802             *term_tmp++ = '-';
803             string_rel_add_char (&term_tmp, term_component, &i);
804
805             *term_tmp++ = ']';
806             *term_tmp++ = '.';
807             *term_tmp++ = '*';
808
809             *term_tmp++ = '|';
810
811             if ((term_tmp - term_dict) > IT_MAX_WORD)
812                 break;
813         }
814         for (i = 0; term_component[i];)
815             string_rel_add_char (&term_tmp, term_component, &i);
816         *term_tmp++ = '.';
817         *term_tmp++ = '+';
818         *term_tmp++ = ')';
819         *term_tmp = '\0';
820         break;
821     case 4:
822         if (!term_100 (zh->reg->zebra_maps, reg_type, term_sub,
823                        term_component, space_split, term_dst))
824             return 0;
825         logf (LOG_DEBUG, "Relation >=");
826
827         *term_tmp++ = '(';
828         for (i = 0; term_component[i];)
829         {
830             int j = 0;
831
832             if (i)
833                 *term_tmp++ = '|';
834             while (j < i)
835                 string_rel_add_char (&term_tmp, term_component, &j);
836             *term_tmp++ = '[';
837
838             if (term_component[i+1])
839             {
840                 *term_tmp++ = '^';
841                 *term_tmp++ = '-';
842                 string_rel_add_char (&term_tmp, term_component, &i);
843             }
844             else
845             {
846                 string_rel_add_char (&term_tmp, term_component, &i);
847                 *term_tmp++ = '-';
848             }
849             *term_tmp++ = ']';
850             *term_tmp++ = '.';
851             *term_tmp++ = '*';
852
853             if ((term_tmp - term_dict) > IT_MAX_WORD)
854                 break;
855         }
856         *term_tmp++ = ')';
857         *term_tmp = '\0';
858         break;
859     case 3:
860     default:
861         logf (LOG_DEBUG, "Relation =");
862         if (!term_100 (zh->reg->zebra_maps, reg_type, term_sub,
863                        term_component, space_split, term_dst))
864             return 0;
865         strcat (term_tmp, "(");
866         strcat (term_tmp, term_component);
867         strcat (term_tmp, ")");
868     }
869     return 1;
870 }
871
872 static int string_term (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
873                         const char **term_sub, 
874                         oid_value attributeSet, NMEM stream,
875                         struct grep_info *grep_info,
876                         int reg_type, int complete_flag,
877                         int num_bases, char **basenames,
878                         char *term_dst, int xpath_use);
879
880 static RSET term_trunc (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
881                         const char **term_sub, 
882                         oid_value attributeSet, NMEM stream,
883                         struct grep_info *grep_info,
884                         int reg_type, int complete_flag,
885                         int num_bases, char **basenames,
886                         char *term_dst,
887                         const char *rank_type, int xpath_use,
888                         NMEM rset_nmem)
889 {
890     int r;
891     grep_info->isam_p_indx = 0;
892     r = string_term (zh, zapt, term_sub, attributeSet, stream, grep_info,
893                      reg_type, complete_flag, num_bases, basenames,
894                      term_dst, xpath_use);
895     if (r < 1)
896         return 0;
897     logf (LOG_DEBUG, "term: %s", term_dst);
898     return rset_trunc (zh, grep_info->isam_p_buf,
899                        grep_info->isam_p_indx, term_dst,
900                        strlen(term_dst), rank_type, 1 /* preserve pos */,
901                        zapt->term->which, rset_nmem,
902                        key_it_ctrl,key_it_ctrl->scope);
903 }
904
905
906 static int string_term (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
907                         const char **term_sub, 
908                         oid_value attributeSet, NMEM stream,
909                         struct grep_info *grep_info,
910                         int reg_type, int complete_flag,
911                         int num_bases, char **basenames,
912                         char *term_dst, int xpath_use)
913 {
914     char term_dict[2*IT_MAX_WORD+4000];
915     int j, r, base_no;
916     AttrType truncation;
917     int truncation_value;
918     AttrType use;
919     int use_value;
920     const char *use_string = 0;
921     oid_value curAttributeSet = attributeSet;
922     const char *termp;
923     struct rpn_char_map_info rcmi;
924     int space_split = complete_flag ? 0 : 1;
925
926     int bases_ok = 0;     /* no of databases with OK attribute */
927     int errCode = 0;      /* err code (if any is not OK) */
928     char *errString = 0;  /* addinfo */
929
930     rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
931     attr_init (&use, zapt, 1);
932     use_value = attr_find_ex (&use, &curAttributeSet, &use_string);
933     logf (LOG_DEBUG, "string_term, use value %d", use_value);
934     attr_init (&truncation, zapt, 5);
935     truncation_value = attr_find (&truncation, NULL);
936     logf (LOG_DEBUG, "truncation value %d", truncation_value);
937
938     if (use_value == -1)    /* no attribute - assumy "any" */
939         use_value = 1016;
940     for (base_no = 0; base_no < num_bases; base_no++)
941     {
942         attent attp;
943         data1_local_attribute id_xpath_attr;
944         data1_local_attribute *local_attr;
945         int max_pos, prefix_len = 0;
946
947         termp = *term_sub;
948
949         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
950         {
951             zh->errCode = 109; /* Database unavailable */
952             zh->errString = basenames[base_no];
953             return -1;
954         }
955         if (xpath_use > 0 && use_value == -2) 
956         {
957             use_value = xpath_use;
958             attp.local_attributes = &id_xpath_attr;
959             attp.attset_ordinal = VAL_IDXPATH;
960             id_xpath_attr.next = 0;
961             id_xpath_attr.local = use_value;
962         }
963         else if (curAttributeSet == VAL_IDXPATH)
964         {
965             attp.local_attributes = &id_xpath_attr;
966             attp.attset_ordinal = VAL_IDXPATH;
967             id_xpath_attr.next = 0;
968             id_xpath_attr.local = use_value;
969         }
970         else
971         {
972             if ((r=att_getentbyatt (zh, &attp, curAttributeSet, use_value,
973                                             use_string)))
974             {
975                 logf (LOG_DEBUG, "att_getentbyatt fail. set=%d use=%d r=%d",
976                       curAttributeSet, use_value, r);
977                 if (r == -1)
978                 {
979                     /* set was found, but value wasn't defined */
980                     errCode = 114;
981                     if (use_string)
982                         errString = nmem_strdup(stream, use_string);
983                     else
984                     {
985                         char val_str[32];
986                         sprintf (val_str, "%d", use_value);
987                         errString = nmem_strdup (stream, val_str);
988                     }
989                 }
990                 else
991                 {
992                     int oid[OID_SIZE];
993                     struct oident oident;
994                     
995                     oident.proto = PROTO_Z3950;
996                     oident.oclass = CLASS_ATTSET;
997                     oident.value = curAttributeSet;
998                     oid_ent_to_oid (&oident, oid);
999                     
1000                     errCode = 121;
1001                     errString = nmem_strdup (stream, oident.desc);
1002                 }
1003                 continue;
1004             }
1005         }
1006         for (local_attr = attp.local_attributes; local_attr;
1007              local_attr = local_attr->next)
1008         {
1009             int ord;
1010             char ord_buf[32];
1011             int i, ord_len;
1012             
1013             ord = zebraExplain_lookupSU (zh->reg->zei, attp.attset_ordinal,
1014                                          local_attr->local);
1015             if (ord < 0)
1016                 continue;
1017             if (prefix_len)
1018                 term_dict[prefix_len++] = '|';
1019             else
1020                 term_dict[prefix_len++] = '(';
1021             
1022             ord_len = key_SU_encode (ord, ord_buf);
1023             for (i = 0; i<ord_len; i++)
1024             {
1025                 term_dict[prefix_len++] = 1;
1026                 term_dict[prefix_len++] = ord_buf[i];
1027             }
1028         }
1029         if (!prefix_len)
1030         {
1031 #if 1
1032             bases_ok++;
1033 #else
1034             char val_str[32];
1035             sprintf (val_str, "%d", use_value);
1036             errCode = 114;
1037             errString = nmem_strdup (stream, val_str);
1038 #endif
1039             continue;
1040         }
1041         bases_ok++; /* this has OK attributes */
1042
1043         term_dict[prefix_len++] = ')';
1044         term_dict[prefix_len++] = 1;
1045         term_dict[prefix_len++] = reg_type;
1046         logf (LOG_DEBUG, "reg_type = %d", term_dict[prefix_len-1]);
1047         term_dict[prefix_len] = '\0';
1048         j = prefix_len;
1049         switch (truncation_value)
1050         {
1051         case -1:         /* not specified */
1052         case 100:        /* do not truncate */
1053             if (!string_relation (zh, zapt, &termp, term_dict,
1054                                   attributeSet,
1055                                   reg_type, space_split, term_dst))
1056                 return 0;
1057             logf (LOG_DEBUG, "dict_lookup_grep: %s", term_dict+prefix_len);
1058             r = dict_lookup_grep (zh->reg->dict, term_dict, 0,
1059                                   grep_info, &max_pos, 0, grep_handle);
1060             if (r)
1061                 logf (LOG_WARN, "dict_lookup_grep fail %d", r);
1062             break;
1063         case 1:          /* right truncation */
1064             term_dict[j++] = '(';
1065             if (!term_100 (zh->reg->zebra_maps, reg_type,
1066                            &termp, term_dict + j, space_split, term_dst))
1067                 return 0;
1068             strcat (term_dict, ".*)");
1069             dict_lookup_grep (zh->reg->dict, term_dict, 0, grep_info,
1070                               &max_pos, 0, grep_handle);
1071             break;
1072         case 2:          /* keft truncation */
1073             term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*';
1074             if (!term_100 (zh->reg->zebra_maps, reg_type,
1075                            &termp, term_dict + j, space_split, term_dst))
1076                 return 0;
1077             strcat (term_dict, ")");
1078             dict_lookup_grep (zh->reg->dict, term_dict, 0, grep_info,
1079                               &max_pos, 0, grep_handle);
1080             break;
1081         case 3:          /* left&right truncation */
1082             term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*';
1083             if (!term_100 (zh->reg->zebra_maps, reg_type,
1084                            &termp, term_dict + j, space_split, term_dst))
1085                 return 0;
1086             strcat (term_dict, ".*)");
1087             dict_lookup_grep (zh->reg->dict, term_dict, 0, grep_info,
1088                               &max_pos, 0, grep_handle);
1089             break;
1090             zh->errCode = 120;
1091             return -1;
1092         case 101:        /* process # in term */
1093             term_dict[j++] = '(';
1094             if (!term_101 (zh->reg->zebra_maps, reg_type,
1095                            &termp, term_dict + j, space_split, term_dst))
1096                 return 0;
1097             strcat (term_dict, ")");
1098             r = dict_lookup_grep (zh->reg->dict, term_dict, 0, grep_info,
1099                                   &max_pos, 0, grep_handle);
1100             if (r)
1101                 logf (LOG_WARN, "dict_lookup_grep err, trunc=#: %d", r);
1102             break;
1103         case 102:        /* Regexp-1 */
1104             term_dict[j++] = '(';
1105             if (!term_102 (zh->reg->zebra_maps, reg_type,
1106                            &termp, term_dict + j, space_split, term_dst))
1107                 return 0;
1108             strcat (term_dict, ")");
1109             logf (LOG_DEBUG, "Regexp-1 tolerance=%d", r);
1110             r = dict_lookup_grep (zh->reg->dict, term_dict, 0, grep_info,
1111                                   &max_pos, 0, grep_handle);
1112             if (r)
1113                 logf (LOG_WARN, "dict_lookup_grep err, trunc=regular: %d",
1114                       r);
1115             break;
1116         case 103:       /* Regexp-2 */
1117             r = 1;
1118             term_dict[j++] = '(';
1119             if (!term_103 (zh->reg->zebra_maps, reg_type,
1120                            &termp, term_dict + j, &r, space_split, term_dst))
1121                 return 0;
1122             strcat (term_dict, ")");
1123             logf (LOG_DEBUG, "Regexp-2 tolerance=%d", r);
1124             r = dict_lookup_grep (zh->reg->dict, term_dict, r, grep_info,
1125                                   &max_pos, 2, grep_handle);
1126             if (r)
1127                 logf (LOG_WARN, "dict_lookup_grep err, trunc=eregular: %d",
1128                       r);
1129             break;
1130         case 104:        /* process # and ! in term */
1131             term_dict[j++] = '(';
1132             if (!term_104 (zh->reg->zebra_maps, reg_type,
1133                            &termp, term_dict + j, space_split, term_dst))
1134                 return 0;
1135             strcat (term_dict, ")");
1136             r = dict_lookup_grep (zh->reg->dict, term_dict, 0, grep_info,
1137                                   &max_pos, 0, grep_handle);
1138             if (r)
1139                 logf (LOG_WARN, "dict_lookup_grep err, trunc=#/!: %d", r);
1140             break;
1141         case 105:        /* process * and ! in term */
1142             term_dict[j++] = '(';
1143             if (!term_105 (zh->reg->zebra_maps, reg_type,
1144                            &termp, term_dict + j, space_split, term_dst, 1))
1145                 return 0;
1146             strcat (term_dict, ")");
1147             r = dict_lookup_grep (zh->reg->dict, term_dict, 0, grep_info,
1148                                   &max_pos, 0, grep_handle);
1149             if (r)
1150                 logf (LOG_WARN, "dict_lookup_grep err, trunc=*/!: %d", r);
1151             break;
1152         case 106:        /* process * and ! in term */
1153             term_dict[j++] = '(';
1154             if (!term_105 (zh->reg->zebra_maps, reg_type,
1155                            &termp, term_dict + j, space_split, term_dst, 0))
1156                 return 0;
1157             strcat (term_dict, ")");
1158             r = dict_lookup_grep (zh->reg->dict, term_dict, 0, grep_info,
1159                                   &max_pos, 0, grep_handle);
1160             if (r)
1161                 logf (LOG_WARN, "dict_lookup_grep err, trunc=*/!: %d", r);
1162             break;
1163         }
1164     }
1165     if (!bases_ok)
1166     {
1167         zh->errCode = errCode;
1168         zh->errString = errString;
1169         return -1;
1170     }
1171     *term_sub = termp;
1172     logf (LOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1173     return 1;
1174 }
1175
1176
1177 /* convert APT search term to UTF8 */
1178 static int zapt_term_to_utf8 (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1179                               char *termz)
1180 {
1181     size_t sizez;
1182     Z_Term *term = zapt->term;
1183
1184     switch (term->which)
1185     {
1186     case Z_Term_general:
1187         if (zh->iconv_to_utf8 != 0)
1188         {
1189             char *inbuf = term->u.general->buf;
1190             size_t inleft = term->u.general->len;
1191             char *outbuf = termz;
1192             size_t outleft = IT_MAX_WORD-1;
1193             size_t ret;
1194
1195             ret = yaz_iconv(zh->iconv_to_utf8, &inbuf, &inleft,
1196                         &outbuf, &outleft);
1197             if (ret == (size_t)(-1))
1198             {
1199                 ret = yaz_iconv(zh->iconv_to_utf8, 0, 0, 0, 0);
1200                 zh->errCode = 125;
1201                 return -1;
1202             }
1203             *outbuf = 0;
1204         }
1205         else
1206         {
1207             sizez = term->u.general->len;
1208             if (sizez > IT_MAX_WORD-1)
1209                 sizez = IT_MAX_WORD-1;
1210             memcpy (termz, term->u.general->buf, sizez);
1211             termz[sizez] = '\0';
1212         }
1213         break;
1214     case Z_Term_characterString:
1215         sizez = strlen(term->u.characterString);
1216         if (sizez > IT_MAX_WORD-1)
1217             sizez = IT_MAX_WORD-1;
1218         memcpy (termz, term->u.characterString, sizez);
1219         termz[sizez] = '\0';
1220         break;
1221     default:
1222         zh->errCode = 124;
1223         return -1;
1224     }
1225     return 0;
1226 }
1227
1228 /* convert APT SCAN term to internal cmap */
1229 static int trans_scan_term (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1230                             char *termz, int reg_type)
1231 {
1232     char termz0[IT_MAX_WORD];
1233
1234     if (zapt_term_to_utf8(zh, zapt, termz0))
1235         return -1;    /* error */
1236     else
1237     {
1238         const char **map;
1239         const char *cp = (const char *) termz0;
1240         const char *cp_end = cp + strlen(cp);
1241         const char *src;
1242         int i = 0;
1243         const char *space_map = NULL;
1244         int len;
1245             
1246         while ((len = (cp_end - cp)) > 0)
1247         {
1248             map = zebra_maps_input (zh->reg->zebra_maps, reg_type, &cp, len, 0);
1249             if (**map == *CHR_SPACE)
1250                 space_map = *map;
1251             else
1252             {
1253                 if (i && space_map)
1254                     for (src = space_map; *src; src++)
1255                         termz[i++] = *src;
1256                 space_map = NULL;
1257                 for (src = *map; *src; src++)
1258                     termz[i++] = *src;
1259             }
1260         }
1261         termz[i] = '\0';
1262     }
1263     return 0;
1264 }
1265
1266 char *normalize_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1267                      const char *termz, NMEM stream, unsigned reg_id)
1268 {
1269     WRBUF wrbuf = 0;
1270     AttrType truncation;
1271     int truncation_value;
1272     char *ex_list = 0;
1273
1274     attr_init (&truncation, zapt, 5);
1275     truncation_value = attr_find (&truncation, NULL);
1276
1277     switch (truncation_value)
1278     {
1279     default:
1280         ex_list = "";
1281         break;
1282     case 101:
1283         ex_list = "#";
1284         break;
1285     case 102:
1286     case 103:
1287         ex_list = 0;
1288         break;
1289     case 104:
1290         ex_list = "!#";
1291         break;
1292     case 105:
1293         ex_list = "!*";
1294         break;
1295     }
1296     if (ex_list)
1297         wrbuf = zebra_replace(zh->reg->zebra_maps, reg_id, ex_list,
1298                               termz, strlen(termz));
1299     if (!wrbuf)
1300         return nmem_strdup(stream, termz);
1301     else
1302     {
1303         char *buf = (char*) nmem_malloc (stream, wrbuf_len(wrbuf)+1);
1304         memcpy (buf, wrbuf_buf(wrbuf), wrbuf_len(wrbuf));
1305         buf[wrbuf_len(wrbuf)] = '\0';
1306         return buf;
1307     }
1308 }
1309
1310 static void grep_info_delete (struct grep_info *grep_info)
1311 {
1312 #ifdef TERM_COUNT
1313     xfree(grep_info->term_no);
1314 #endif
1315     xfree (grep_info->isam_p_buf);
1316 }
1317
1318 static int grep_info_prepare (ZebraHandle zh,
1319                               Z_AttributesPlusTerm *zapt,
1320                               struct grep_info *grep_info,
1321                               int reg_type,
1322                               NMEM stream)
1323 {
1324     AttrType termset;
1325     int termset_value_numeric;
1326     const char *termset_value_string;
1327
1328 #ifdef TERM_COUNT
1329     grep_info->term_no = 0;
1330 #endif
1331     grep_info->isam_p_size = 0;
1332     grep_info->isam_p_buf = NULL;
1333     grep_info->zh = zh;
1334     grep_info->reg_type = reg_type;
1335     grep_info->termset = 0;
1336
1337     if (!zapt)
1338         return 0;
1339     attr_init (&termset, zapt, 8);
1340     termset_value_numeric =
1341         attr_find_ex (&termset, NULL, &termset_value_string);
1342     if (termset_value_numeric != -1)
1343     {
1344         char resname[32];
1345         const char *termset_name = 0;
1346         if (termset_value_numeric != -2)
1347         {
1348     
1349             sprintf (resname, "%d", termset_value_numeric);
1350             termset_name = resname;
1351         }
1352         else
1353             termset_name = termset_value_string;
1354         logf (LOG_LOG, "creating termset set %s", termset_name);
1355         grep_info->termset = resultSetAdd (zh, termset_name, 1);
1356         if (!grep_info->termset)
1357         {
1358             zh->errCode = 128;
1359             zh->errString = nmem_strdup (stream, termset_name);
1360             return -1;
1361         }
1362     }
1363     return 0;
1364 }
1365                                
1366
1367 static RSET rpn_search_APT_phrase (ZebraHandle zh,
1368                                    Z_AttributesPlusTerm *zapt,
1369                                    const char *termz_org,
1370                                    oid_value attributeSet,
1371                                    NMEM stream,
1372                                    int reg_type, int complete_flag,
1373                                    const char *rank_type, int xpath_use,
1374                                    int num_bases, char **basenames, 
1375                                    NMEM rset_nmem)
1376 {
1377     char term_dst[IT_MAX_WORD+1];
1378     RSET rset[60], result;
1379     int rset_no = 0;
1380     struct grep_info grep_info;
1381     char *termz = normalize_term(zh, zapt, termz_org, stream, reg_type);
1382     const char *termp = termz;
1383
1384     *term_dst = 0;
1385     if (grep_info_prepare (zh, zapt, &grep_info, reg_type, stream))
1386         return 0;
1387     while (1)
1388     { 
1389         logf (LOG_DEBUG, "APT_phrase termp=%s", termp);
1390         rset[rset_no] = term_trunc (zh, zapt, &termp, attributeSet,
1391                                     stream, &grep_info,
1392                                     reg_type, complete_flag,
1393                                     num_bases, basenames,
1394                                     term_dst, rank_type,
1395                                     xpath_use,rset_nmem);
1396         if (!rset[rset_no])
1397             break;
1398         if (++rset_no >= (int) (sizeof(rset)/sizeof(*rset)))
1399             break;
1400     }
1401     grep_info_delete (&grep_info);
1402     if (rset_no == 0)
1403         return rsnull_create (rset_nmem,key_it_ctrl); 
1404     else if (rset_no == 1)
1405         return (rset[0]);
1406     else
1407         result = rsprox_create( rset_nmem, key_it_ctrl, key_it_ctrl->scope,
1408                        rset_no, rset,
1409                        1 /* ordered */, 0 /* exclusion */,
1410                        3 /* relation */, 1 /* distance */);
1411     return result;
1412 }
1413
1414 static RSET rpn_search_APT_or_list (ZebraHandle zh,
1415                                     Z_AttributesPlusTerm *zapt,
1416                                     const char *termz_org,
1417                                     oid_value attributeSet,
1418                                     NMEM stream,
1419                                     int reg_type, int complete_flag,
1420                                     const char *rank_type,
1421                                     int xpath_use,
1422                                     int num_bases, char **basenames,
1423                                     NMEM rset_nmem)
1424 {
1425     char term_dst[IT_MAX_WORD+1];
1426     RSET rset[60];
1427     int rset_no = 0;
1428     struct grep_info grep_info;
1429     char *termz = normalize_term(zh, zapt, termz_org, stream, reg_type);
1430     const char *termp = termz;
1431
1432     if (grep_info_prepare (zh, zapt, &grep_info, reg_type, stream))
1433         return 0;
1434     while (1)
1435     { 
1436         logf (LOG_DEBUG, "APT_or_list termp=%s", termp);
1437         rset[rset_no] = term_trunc (zh, zapt, &termp, attributeSet,
1438                                     stream, &grep_info,
1439                                     reg_type, complete_flag,
1440                                     num_bases, basenames,
1441                                     term_dst, rank_type,
1442                                     xpath_use,rset_nmem);
1443         if (!rset[rset_no])
1444             break;
1445         if (++rset_no >= (int) (sizeof(rset)/sizeof(*rset)))
1446             break;
1447     }
1448     grep_info_delete (&grep_info);
1449     if (rset_no == 0)
1450         return rsnull_create (rset_nmem,key_it_ctrl);  
1451     return rsmultior_create(rset_nmem, key_it_ctrl,key_it_ctrl->scope,
1452                             rset_no, rset);
1453 }
1454
1455 static RSET rpn_search_APT_and_list (ZebraHandle zh,
1456                                      Z_AttributesPlusTerm *zapt,
1457                                      const char *termz_org,
1458                                      oid_value attributeSet,
1459                                      NMEM stream,
1460                                      int reg_type, int complete_flag,
1461                                      const char *rank_type, 
1462                                      int xpath_use,
1463                                      int num_bases, char **basenames,
1464                                      NMEM rset_nmem)
1465 {
1466     char term_dst[IT_MAX_WORD+1];
1467     RSET rset[60], result;
1468     int i, rset_no = 0;
1469     struct grep_info grep_info;
1470     char *termz = normalize_term(zh, zapt, termz_org, stream, reg_type);
1471     const char *termp = termz;
1472
1473     if (grep_info_prepare (zh, zapt, &grep_info, reg_type, stream))
1474         return 0;
1475     while (1)
1476     { 
1477         logf (LOG_DEBUG, "APT_and_list termp=%s", termp);
1478         rset[rset_no] = term_trunc (zh, zapt, &termp, attributeSet,
1479                                     stream, &grep_info,
1480                                     reg_type, complete_flag,
1481                                     num_bases, basenames,
1482                                     term_dst, rank_type,
1483                                     xpath_use, rset_nmem);
1484         if (!rset[rset_no])
1485             break;
1486         assert (rset[rset_no]);
1487         if (++rset_no >= (int) (sizeof(rset)/sizeof(*rset)))
1488             break;
1489     }
1490     grep_info_delete (&grep_info);
1491     if (rset_no == 0)
1492         return rsnull_create (rset_nmem,key_it_ctrl); 
1493     result = rset[0];
1494     /* FIXME - Use a proper rsmultiand */
1495     for (i = 1; i<rset_no; i++)
1496         result= rsbool_create_and(rset_nmem,key_it_ctrl,key_it_ctrl->scope,
1497                 result, rset[i] );
1498     return result;
1499 }
1500
1501 static int numeric_relation (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1502                              const char **term_sub,
1503                              char *term_dict,
1504                              oid_value attributeSet,
1505                              struct grep_info *grep_info,
1506                              int *max_pos,
1507                              int reg_type,
1508                              char *term_dst)
1509 {
1510     AttrType relation;
1511     int relation_value;
1512     int term_value;
1513     int r;
1514     char *term_tmp = term_dict + strlen(term_dict);
1515
1516     attr_init (&relation, zapt, 2);
1517     relation_value = attr_find (&relation, NULL);
1518
1519     logf (LOG_DEBUG, "numeric relation value=%d", relation_value);
1520
1521     if (!term_100 (zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1522                    term_dst))
1523         return 0;
1524     term_value = atoi (term_tmp);
1525     switch (relation_value)
1526     {
1527     case 1:
1528         logf (LOG_DEBUG, "Relation <");
1529         gen_regular_rel (term_tmp, term_value-1, 1);
1530         break;
1531     case 2:
1532         logf (LOG_DEBUG, "Relation <=");
1533         gen_regular_rel (term_tmp, term_value, 1);
1534         break;
1535     case 4:
1536         logf (LOG_DEBUG, "Relation >=");
1537         gen_regular_rel (term_tmp, term_value, 0);
1538         break;
1539     case 5:
1540         logf (LOG_DEBUG, "Relation >");
1541         gen_regular_rel (term_tmp, term_value+1, 0);
1542         break;
1543     case 3:
1544     default:
1545         logf (LOG_DEBUG, "Relation =");
1546         sprintf (term_tmp, "(0*%d)", term_value);
1547     }
1548     logf (LOG_DEBUG, "dict_lookup_grep: %s", term_tmp);
1549     r = dict_lookup_grep (zh->reg->dict, term_dict, 0, grep_info, max_pos,
1550                           0, grep_handle);
1551     if (r)
1552         logf (LOG_WARN, "dict_lookup_grep fail, rel=gt: %d", r);
1553     logf (LOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1554     return 1;
1555 }
1556
1557 static int numeric_term (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1558                          const char **term_sub, 
1559                          oid_value attributeSet, struct grep_info *grep_info,
1560                          int reg_type, int complete_flag,
1561                          int num_bases, char **basenames,
1562                          char *term_dst, int xpath_use, NMEM stream)
1563 {
1564     char term_dict[2*IT_MAX_WORD+2];
1565     int r, base_no;
1566     AttrType use;
1567     int use_value;
1568     const char *use_string = 0;
1569     oid_value curAttributeSet = attributeSet;
1570     const char *termp;
1571     struct rpn_char_map_info rcmi;
1572
1573     int bases_ok = 0;     /* no of databases with OK attribute */
1574     int errCode = 0;      /* err code (if any is not OK) */
1575     char *errString = 0;  /* addinfo */
1576
1577     rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
1578     attr_init (&use, zapt, 1);
1579     use_value = attr_find_ex (&use, &curAttributeSet, &use_string);
1580
1581     if (use_value == -1)
1582         use_value = 1016;
1583
1584     for (base_no = 0; base_no < num_bases; base_no++)
1585     {
1586         attent attp;
1587         data1_local_attribute id_xpath_attr;
1588         data1_local_attribute *local_attr;
1589         int max_pos, prefix_len = 0;
1590
1591         termp = *term_sub;
1592         if (use_value == -2)  /* string attribute (assume IDXPATH/any) */
1593         {
1594             use_value = xpath_use;
1595             attp.local_attributes = &id_xpath_attr;
1596             attp.attset_ordinal = VAL_IDXPATH;
1597             id_xpath_attr.next = 0;
1598             id_xpath_attr.local = use_value;
1599         }
1600         else if (curAttributeSet == VAL_IDXPATH)
1601         {
1602             attp.local_attributes = &id_xpath_attr;
1603             attp.attset_ordinal = VAL_IDXPATH;
1604             id_xpath_attr.next = 0;
1605             id_xpath_attr.local = use_value;
1606         }
1607         else
1608         {
1609             if ((r=att_getentbyatt (zh, &attp, curAttributeSet, use_value,
1610                                             use_string)))
1611             {
1612                 logf (LOG_DEBUG, "att_getentbyatt fail. set=%d use=%d r=%d",
1613                       curAttributeSet, use_value, r);
1614                 if (r == -1)
1615                 {
1616                     char val_str[32];
1617                     sprintf (val_str, "%d", use_value);
1618                     errString = nmem_strdup (stream, val_str);
1619                     errCode = 114;
1620                 }
1621                 else
1622                     errCode = 121;
1623                 continue;
1624             }
1625         }
1626         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1627         {
1628             zh->errCode = 109; /* Database unavailable */
1629             zh->errString = basenames[base_no];
1630             return -1;
1631         }
1632         for (local_attr = attp.local_attributes; local_attr;
1633              local_attr = local_attr->next)
1634         {
1635             int ord;
1636             char ord_buf[32];
1637             int i, ord_len;
1638
1639             ord = zebraExplain_lookupSU (zh->reg->zei, attp.attset_ordinal,
1640                                           local_attr->local);
1641             if (ord < 0)
1642                 continue;
1643             if (prefix_len)
1644                 term_dict[prefix_len++] = '|';
1645             else
1646                 term_dict[prefix_len++] = '(';
1647
1648             ord_len = key_SU_encode (ord, ord_buf);
1649             for (i = 0; i<ord_len; i++)
1650             {
1651                 term_dict[prefix_len++] = 1;
1652                 term_dict[prefix_len++] = ord_buf[i];
1653             }
1654         }
1655         if (!prefix_len)
1656         {
1657             char val_str[32];
1658             sprintf (val_str, "%d", use_value);
1659             errCode = 114;
1660             errString = nmem_strdup (stream, val_str);
1661             continue;
1662         }
1663         bases_ok++;
1664         term_dict[prefix_len++] = ')';        
1665         term_dict[prefix_len++] = 1;
1666         term_dict[prefix_len++] = reg_type;
1667         logf (LOG_DEBUG, "reg_type = %d", term_dict[prefix_len-1]);
1668         term_dict[prefix_len] = '\0';
1669         if (!numeric_relation (zh, zapt, &termp, term_dict,
1670                                attributeSet, grep_info, &max_pos, reg_type,
1671                                term_dst))
1672             return 0;
1673     }
1674     if (!bases_ok)
1675     {
1676         zh->errCode = errCode;
1677         zh->errString = errString;
1678         return -1;
1679     }
1680     *term_sub = termp;
1681     logf (LOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1682     return 1;
1683 }
1684
1685 static RSET rpn_search_APT_numeric (ZebraHandle zh,
1686                                     Z_AttributesPlusTerm *zapt,
1687                                     const char *termz,
1688                                     oid_value attributeSet,
1689                                     NMEM stream,
1690                                     int reg_type, int complete_flag,
1691                                     const char *rank_type, int xpath_use,
1692                                     int num_bases, char **basenames,
1693                                     NMEM rset_nmem)
1694 {
1695     char term_dst[IT_MAX_WORD+1];
1696     const char *termp = termz;
1697     RSET rset[60], result;
1698     int i, r, rset_no = 0;
1699     struct grep_info grep_info;
1700
1701     if (grep_info_prepare (zh, zapt, &grep_info, reg_type, stream))
1702         return 0;
1703     while (1)
1704     { 
1705         logf (LOG_DEBUG, "APT_numeric termp=%s", termp);
1706         grep_info.isam_p_indx = 0;
1707         r = numeric_term (zh, zapt, &termp, attributeSet, &grep_info,
1708                           reg_type, complete_flag, num_bases, basenames,
1709                           term_dst, xpath_use,
1710                           stream);
1711         if (r < 1)
1712             break;
1713         logf (LOG_DEBUG, "term: %s", term_dst);
1714         rset[rset_no] = rset_trunc (zh, grep_info.isam_p_buf,
1715                                     grep_info.isam_p_indx, term_dst,
1716                                     strlen(term_dst), rank_type,
1717                                     0 /* preserve position */,
1718                                     zapt->term->which, rset_nmem, 
1719                                     key_it_ctrl,key_it_ctrl->scope);
1720         assert (rset[rset_no]);
1721         if (++rset_no >= (int) (sizeof(rset)/sizeof(*rset)))
1722             break;
1723     }
1724     grep_info_delete (&grep_info);
1725     if (rset_no == 0)
1726         return rsnull_create (rset_nmem,key_it_ctrl);
1727     result = rset[0];
1728     for (i = 1; i<rset_no; i++)
1729     {
1730         /* FIXME - Use a proper multi-and */
1731         result= rsbool_create_and(rset_nmem,key_it_ctrl,key_it_ctrl->scope,
1732                 result, rset[i] );
1733     }
1734     return result;
1735 }
1736
1737 static RSET rpn_search_APT_local (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1738                                   const char *termz,
1739                                   oid_value attributeSet,
1740                                   NMEM stream,
1741                                   const char *rank_type, NMEM rset_nmem)
1742 {
1743     RSET result;
1744     RSFD rsfd;
1745     struct it_key key;
1746     int sys;
1747     /*
1748     rset_temp_parms parms;
1749
1750     parms.cmp = key_compare_it;
1751     parms.key_size = sizeof (struct it_key);
1752     parms.temp_path = res_get (zh->res, "setTmpDir");
1753     result = rset_create (rset_kind_temp, &parms);
1754     */
1755     result = rstemp_create( rset_nmem,key_it_ctrl,key_it_ctrl->scope,
1756                      res_get (zh->res, "setTmpDir") );
1757     rsfd = rset_open (result, RSETF_WRITE);
1758
1759     sys = atoi(termz);
1760     if (sys <= 0)
1761         sys = 1;
1762     key.mem[0] = sys;
1763     key.mem[1] = 1;
1764     key.len = 2;
1765     rset_write (rsfd, &key);
1766     rset_close (rsfd);
1767     return result;
1768 }
1769
1770 static RSET rpn_sort_spec (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1771                            oid_value attributeSet, NMEM stream,
1772                            Z_SortKeySpecList *sort_sequence,
1773                            const char *rank_type)
1774 {
1775     int i;
1776     int sort_relation_value;
1777     AttrType sort_relation_type;
1778     int use_value;
1779     AttrType use_type;
1780     Z_SortKeySpec *sks;
1781     Z_SortKey *sk;
1782     Z_AttributeElement *ae;
1783     int oid[OID_SIZE];
1784     oident oe;
1785     char termz[20];
1786     
1787     attr_init (&sort_relation_type, zapt, 7);
1788     sort_relation_value = attr_find (&sort_relation_type, &attributeSet);
1789
1790     attr_init (&use_type, zapt, 1);
1791     use_value = attr_find (&use_type, &attributeSet);
1792
1793     if (!sort_sequence->specs)
1794     {
1795         sort_sequence->num_specs = 10;
1796         sort_sequence->specs = (Z_SortKeySpec **)
1797             nmem_malloc (stream, sort_sequence->num_specs *
1798                          sizeof(*sort_sequence->specs));
1799         for (i = 0; i<sort_sequence->num_specs; i++)
1800             sort_sequence->specs[i] = 0;
1801     }
1802     if (zapt->term->which != Z_Term_general)
1803         i = 0;
1804     else
1805         i = atoi_n ((char *) zapt->term->u.general->buf,
1806                     zapt->term->u.general->len);
1807     if (i >= sort_sequence->num_specs)
1808         i = 0;
1809     sprintf (termz, "%d", i);
1810
1811     oe.proto = PROTO_Z3950;
1812     oe.oclass = CLASS_ATTSET;
1813     oe.value = attributeSet;
1814     if (!oid_ent_to_oid (&oe, oid))
1815         return 0;
1816
1817     sks = (Z_SortKeySpec *) nmem_malloc (stream, sizeof(*sks));
1818     sks->sortElement = (Z_SortElement *)
1819         nmem_malloc (stream, sizeof(*sks->sortElement));
1820     sks->sortElement->which = Z_SortElement_generic;
1821     sk = sks->sortElement->u.generic = (Z_SortKey *)
1822         nmem_malloc (stream, sizeof(*sk));
1823     sk->which = Z_SortKey_sortAttributes;
1824     sk->u.sortAttributes = (Z_SortAttributes *)
1825         nmem_malloc (stream, sizeof(*sk->u.sortAttributes));
1826
1827     sk->u.sortAttributes->id = oid;
1828     sk->u.sortAttributes->list = (Z_AttributeList *)
1829         nmem_malloc (stream, sizeof(*sk->u.sortAttributes->list));
1830     sk->u.sortAttributes->list->num_attributes = 1;
1831     sk->u.sortAttributes->list->attributes = (Z_AttributeElement **)
1832         nmem_malloc (stream, sizeof(*sk->u.sortAttributes->list->attributes));
1833     ae = *sk->u.sortAttributes->list->attributes = (Z_AttributeElement *)
1834         nmem_malloc (stream, sizeof(**sk->u.sortAttributes->list->attributes));
1835     ae->attributeSet = 0;
1836     ae->attributeType = (int *)
1837         nmem_malloc (stream, sizeof(*ae->attributeType));
1838     *ae->attributeType = 1;
1839     ae->which = Z_AttributeValue_numeric;
1840     ae->value.numeric = (int *)
1841         nmem_malloc (stream, sizeof(*ae->value.numeric));
1842     *ae->value.numeric = use_value;
1843
1844     sks->sortRelation = (int *)
1845         nmem_malloc (stream, sizeof(*sks->sortRelation));
1846     if (sort_relation_value == 1)
1847         *sks->sortRelation = Z_SortKeySpec_ascending;
1848     else if (sort_relation_value == 2)
1849         *sks->sortRelation = Z_SortKeySpec_descending;
1850     else 
1851         *sks->sortRelation = Z_SortKeySpec_ascending;
1852
1853     sks->caseSensitivity = (int *)
1854         nmem_malloc (stream, sizeof(*sks->caseSensitivity));
1855     *sks->caseSensitivity = 0;
1856
1857     sks->which = Z_SortKeySpec_null;
1858     sks->u.null = odr_nullval ();
1859     sort_sequence->specs[i] = sks;
1860     return rsnull_create (NULL,key_it_ctrl);
1861         /* FIXME - nmem?? */
1862 }
1863
1864
1865 static int parse_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1866                        oid_value attributeSet,
1867                        struct xpath_location_step *xpath, int max, NMEM mem)
1868 {
1869     oid_value curAttributeSet = attributeSet;
1870     AttrType use;
1871     const char *use_string = 0;
1872     
1873     attr_init (&use, zapt, 1);
1874     attr_find_ex (&use, &curAttributeSet, &use_string);
1875
1876     if (!use_string || *use_string != '/')
1877         return -1;
1878
1879     return zebra_parse_xpath_str(use_string, xpath, max, mem);
1880 }
1881  
1882                
1883
1884 static RSET xpath_trunc(ZebraHandle zh, NMEM stream,
1885                         int reg_type, const char *term, int use,
1886                         oid_value curAttributeSet, NMEM rset_nmem)
1887 {
1888     RSET rset;
1889     struct grep_info grep_info;
1890     char term_dict[2048];
1891     char ord_buf[32];
1892     int prefix_len = 0;
1893     int ord = zebraExplain_lookupSU (zh->reg->zei, curAttributeSet, use);
1894     int ord_len, i, r, max_pos;
1895     int term_type = Z_Term_characterString;
1896     const char *flags = "void";
1897
1898     if (grep_info_prepare (zh, 0 /* zapt */, &grep_info, '0', stream))
1899         return rsnull_create (rset_nmem,key_it_ctrl);
1900
1901     if (ord < 0)
1902         return rsnull_create (rset_nmem,key_it_ctrl);
1903     if (prefix_len)
1904         term_dict[prefix_len++] = '|';
1905     else
1906         term_dict[prefix_len++] = '(';
1907     
1908     ord_len = key_SU_encode (ord, ord_buf);
1909     for (i = 0; i<ord_len; i++)
1910     {
1911         term_dict[prefix_len++] = 1;
1912         term_dict[prefix_len++] = ord_buf[i];
1913     }
1914     term_dict[prefix_len++] = ')';
1915     term_dict[prefix_len++] = 1;
1916     term_dict[prefix_len++] = reg_type;
1917     
1918     strcpy (term_dict+prefix_len, term);
1919     
1920     grep_info.isam_p_indx = 0;
1921     r = dict_lookup_grep (zh->reg->dict, term_dict, 0,
1922                           &grep_info, &max_pos, 0, grep_handle);
1923     yaz_log (LOG_LOG, "%s %d positions", term,
1924              grep_info.isam_p_indx);
1925     rset = rset_trunc (zh, grep_info.isam_p_buf,
1926                        grep_info.isam_p_indx, term, strlen(term),
1927                        flags, 1, term_type,rset_nmem,
1928                        key_it_ctrl, key_it_ctrl->scope);
1929     grep_info_delete (&grep_info);
1930     return rset;
1931 }
1932
1933 static RSET rpn_search_xpath (ZebraHandle zh,
1934                               oid_value attributeSet,
1935                               int num_bases, char **basenames,
1936                               NMEM stream, const char *rank_type, RSET rset,
1937                               int xpath_len, struct xpath_location_step *xpath,
1938                               NMEM rset_nmem)
1939 {
1940     oid_value curAttributeSet = attributeSet;
1941     int base_no;
1942     int i;
1943
1944     if (xpath_len < 0)
1945         return rset;
1946
1947     yaz_log (LOG_LOG, "len=%d", xpath_len);
1948     for (i = 0; i<xpath_len; i++)
1949     {
1950         yaz_log (LOG_LOG, "XPATH %d %s", i, xpath[i].part);
1951
1952     }
1953
1954     curAttributeSet = VAL_IDXPATH;
1955
1956     /*
1957       //a    ->    a/.*
1958       //a/b  ->    b/a/.*
1959       /a     ->    a/
1960       /a/b   ->    b/a/
1961
1962       /      ->    none
1963
1964    a[@attr=value]/b[@other=othervalue]
1965
1966  /e/@a val      range(e/,range(@a,freetext(w,1015,val),@a),e/)
1967  /a/b val       range(b/a/,freetext(w,1016,val),b/a/)
1968  /a/b/@c val    range(b/a/,range(@c,freetext(w,1016,val),@c),b/a/)
1969  /a/b[@c=y] val range(b/a/,freetext(w,1016,val),b/a/,@c=y)
1970  /a[@c=y]/b val range(a/,range(b/a/,freetext(w,1016,val),b/a/),a/,@c=y)
1971  /a[@c=x]/b[@c=y] range(a/,range(b/a/,freetext(w,1016,val),b/a/,@c=y),a/,@c=x)
1972       
1973     */
1974
1975     dict_grep_cmap (zh->reg->dict, 0, 0);
1976
1977     for (base_no = 0; base_no < num_bases; base_no++)
1978     {
1979         int level = xpath_len;
1980         int first_path = 1;
1981         
1982         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1983         {
1984             zh->errCode = 109; /* Database unavailable */
1985             zh->errString = basenames[base_no];
1986             return rset;
1987         }
1988         while (--level >= 0)
1989         {
1990             char xpath_rev[128];
1991             int i, len;
1992             RSET rset_start_tag = 0, rset_end_tag = 0, rset_attr = 0;
1993
1994             *xpath_rev = 0;
1995             len = 0;
1996             for (i = level; i >= 1; --i)
1997             {
1998                 const char *cp = xpath[i].part;
1999                 if (*cp)
2000                 {
2001                     for (;*cp; cp++)
2002                         if (*cp == '*')
2003                         {
2004                             memcpy (xpath_rev + len, "[^/]*", 5);
2005                             len += 5;
2006                         }
2007                         else if (*cp == ' ')
2008                         {
2009
2010                             xpath_rev[len++] = 1;
2011                             xpath_rev[len++] = ' ';
2012                         }
2013
2014                         else
2015                             xpath_rev[len++] = *cp;
2016                     xpath_rev[len++] = '/';
2017                 }
2018                 else if (i == 1)  /* // case */
2019                 {
2020                     xpath_rev[len++] = '.';
2021                     xpath_rev[len++] = '*';
2022                 }
2023             }
2024             xpath_rev[len] = 0;
2025
2026             if (xpath[level].predicate &&
2027                 xpath[level].predicate->which == XPATH_PREDICATE_RELATION &&
2028                 xpath[level].predicate->u.relation.name[0])
2029             {
2030                 WRBUF wbuf = wrbuf_alloc();
2031                 wrbuf_puts(wbuf, xpath[level].predicate->u.relation.name+1);
2032                 if (xpath[level].predicate->u.relation.value)
2033                 {
2034                     const char *cp = xpath[level].predicate->u.relation.value;
2035                     wrbuf_putc(wbuf, '=');
2036                     
2037                     while (*cp)
2038                     {
2039                         if (strchr(REGEX_CHARS, *cp))
2040                             wrbuf_putc(wbuf, '\\');
2041                         wrbuf_putc(wbuf, *cp);
2042                         cp++;
2043                     }
2044                 }
2045                 wrbuf_puts(wbuf, "");
2046                 rset_attr = xpath_trunc (
2047                     zh, stream, '0', wrbuf_buf(wbuf), 3, 
2048                     curAttributeSet,rset_nmem);
2049                 wrbuf_free(wbuf, 1);
2050             } 
2051             else 
2052             {
2053                 if (!first_path)
2054                     continue;
2055             }
2056             yaz_log (LOG_LOG, "xpath_rev (%d) = %s", level, xpath_rev);
2057             if (strlen(xpath_rev))
2058             {
2059                 rset_start_tag = xpath_trunc(zh, stream, '0', 
2060                         xpath_rev, 1, curAttributeSet, rset_nmem);
2061             
2062                 rset_end_tag = xpath_trunc(zh, stream, '0', 
2063                         xpath_rev, 2, curAttributeSet, rset_nmem);
2064
2065                 /*
2066                 parms.key_size = sizeof(struct it_key);
2067                 parms.cmp = key_compare_it;
2068                 parms.rset_l = rset_start_tag;
2069                 parms.rset_m = rset;
2070                 parms.rset_r = rset_end_tag;
2071                 parms.rset_attr = rset_attr;
2072                 parms.printer = key_print_it;
2073                 rset = rset_create (rset_kind_between, &parms);
2074                 */
2075                 rset=rsbetween_create( rset_nmem,key_it_ctrl,key_it_ctrl->scope,
2076                         rset_start_tag, rset, rset_end_tag, rset_attr);
2077             }
2078             first_path = 0;
2079         }
2080     }
2081
2082     return rset;
2083 }
2084
2085
2086
2087 static RSET rpn_search_APT (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2088                             oid_value attributeSet, NMEM stream,
2089                             Z_SortKeySpecList *sort_sequence,
2090                             int num_bases, char **basenames, 
2091                             NMEM rset_nmem)
2092 {
2093     unsigned reg_id;
2094     char *search_type = NULL;
2095     char rank_type[128];
2096     int complete_flag;
2097     int sort_flag;
2098     char termz[IT_MAX_WORD+1];
2099     RSET rset = 0;
2100     int xpath_len;
2101     int xpath_use = 0;
2102     struct xpath_location_step xpath[10];
2103
2104     zebra_maps_attr (zh->reg->zebra_maps, zapt, &reg_id, &search_type,
2105                      rank_type, &complete_flag, &sort_flag);
2106     
2107     logf (LOG_DEBUG, "reg_id=%c", reg_id);
2108     logf (LOG_DEBUG, "complete_flag=%d", complete_flag);
2109     logf (LOG_DEBUG, "search_type=%s", search_type);
2110     logf (LOG_DEBUG, "rank_type=%s", rank_type);
2111
2112     if (zapt_term_to_utf8(zh, zapt, termz))
2113         return 0;
2114
2115     if (sort_flag)
2116         return rpn_sort_spec (zh, zapt, attributeSet, stream, sort_sequence,
2117                               rank_type);
2118     xpath_len = parse_xpath(zh, zapt, attributeSet, xpath, 10, stream);
2119     if (xpath_len >= 0)
2120     {
2121         xpath_use = 1016;
2122         if (xpath[xpath_len-1].part[0] == '@')
2123             xpath_use = 1015;
2124     }
2125
2126     if (!strcmp (search_type, "phrase"))
2127     {
2128         rset = rpn_search_APT_phrase (zh, zapt, termz, attributeSet, stream,
2129                                       reg_id, complete_flag, rank_type,
2130                                       xpath_use,
2131                                       num_bases, basenames, rset_nmem);
2132     }
2133     else if (!strcmp (search_type, "and-list"))
2134     {
2135         rset = rpn_search_APT_and_list (zh, zapt, termz, attributeSet, stream,
2136                                         reg_id, complete_flag, rank_type,
2137                                         xpath_use,
2138                                         num_bases, basenames, rset_nmem);
2139     }
2140     else if (!strcmp (search_type, "or-list"))
2141     {
2142         rset = rpn_search_APT_or_list (zh, zapt, termz, attributeSet, stream,
2143                                        reg_id, complete_flag, rank_type,
2144                                        xpath_use,
2145                                        num_bases, basenames, rset_nmem);
2146     }
2147     else if (!strcmp (search_type, "local"))
2148     {
2149         rset = rpn_search_APT_local (zh, zapt, termz, attributeSet, stream,
2150                                      rank_type, rset_nmem);
2151     }
2152     else if (!strcmp (search_type, "numeric"))
2153     {
2154         rset = rpn_search_APT_numeric (zh, zapt, termz, attributeSet, stream,
2155                                        reg_id, complete_flag, rank_type,
2156                                        xpath_use,
2157                                        num_bases, basenames, rset_nmem);
2158     }
2159     else if (!strcmp (search_type, "always"))
2160     {
2161         rset = 0;
2162     }
2163     else
2164         zh->errCode = 118;
2165     return rpn_search_xpath (zh, attributeSet, num_bases, basenames,
2166                              stream, rank_type, rset, 
2167                              xpath_len, xpath, rset_nmem);
2168 }
2169
2170 static RSET rpn_search_structure (ZebraHandle zh, Z_RPNStructure *zs,
2171                                   oid_value attributeSet, 
2172                                   NMEM stream, NMEM rset_nmem,
2173                                   Z_SortKeySpecList *sort_sequence,
2174                                   int num_bases, char **basenames)
2175 {
2176     RSET r = NULL;
2177     if (zs->which == Z_RPNStructure_complex)
2178     {
2179         Z_Operator *zop = zs->u.complex->roperator;
2180         RSET rset_l;
2181         RSET rset_r;
2182
2183         rset_l = rpn_search_structure (zh, zs->u.complex->s1,
2184                                        attributeSet, stream, rset_nmem,
2185                                        sort_sequence,
2186                                        num_bases, basenames);
2187         if (rset_l == NULL)
2188             return NULL;
2189         rset_r = rpn_search_structure (zh, zs->u.complex->s2,
2190                                        attributeSet, stream, rset_nmem,
2191                                        sort_sequence,
2192                                        num_bases, basenames);
2193         if (rset_r == NULL)
2194         {
2195             rset_delete (rset_l);
2196             return NULL;
2197         }
2198
2199         switch (zop->which)
2200         {
2201         case Z_Operator_and:
2202             r = rsbool_create_and(rset_nmem,key_it_ctrl, key_it_ctrl->scope,
2203                     rset_l,rset_r );
2204             break;
2205         case Z_Operator_or:
2206             r = rsbool_create_or(rset_nmem,key_it_ctrl, key_it_ctrl->scope,
2207                     rset_l,rset_r );
2208             break;
2209         case Z_Operator_and_not:
2210             r = rsbool_create_not(rset_nmem,key_it_ctrl, key_it_ctrl->scope,
2211                     rset_l,rset_r );
2212             break;
2213         case Z_Operator_prox:
2214             if (zop->u.prox->which != Z_ProximityOperator_known)
2215             {
2216                 zh->errCode = 132;
2217                 return NULL;
2218             }
2219             if (*zop->u.prox->u.known != Z_ProxUnit_word)
2220             {
2221                 char *val = (char *) nmem_malloc (stream, 16);
2222                 zh->errCode = 132;
2223                 zh->errString = val;
2224                 sprintf (val, "%d", *zop->u.prox->u.known);
2225                 return NULL;
2226             }
2227             else
2228             {
2229                 /* new / old prox */
2230                 RSET twosets[2];
2231                 
2232                 twosets[0] = rset_l;
2233                 twosets[1] = rset_r;
2234                 r=rsprox_create(rset_nmem,key_it_ctrl,key_it_ctrl->scope,
2235                          2, twosets, 
2236                          *zop->u.prox->ordered,
2237                          (!zop->u.prox->exclusion ? 
2238                               0 : *zop->u.prox->exclusion),
2239                          *zop->u.prox->relationType,
2240                          *zop->u.prox->distance );
2241             }
2242             break;
2243         default:
2244             zh->errCode = 110;
2245             return NULL;
2246         }
2247     }
2248     else if (zs->which == Z_RPNStructure_simple)
2249     {
2250         if (zs->u.simple->which == Z_Operand_APT)
2251         {
2252             logf (LOG_DEBUG, "rpn_search_APT");
2253             r = rpn_search_APT (zh, zs->u.simple->u.attributesPlusTerm,
2254                                 attributeSet, stream, sort_sequence,
2255                                 num_bases, basenames,rset_nmem);
2256         }
2257         else if (zs->u.simple->which == Z_Operand_resultSetId)
2258         {
2259             logf (LOG_DEBUG, "rpn_search_ref");
2260             r = resultSetRef (zh, zs->u.simple->u.resultSetId);
2261             if (!r)
2262             {
2263                 r = rsnull_create (rset_nmem,key_it_ctrl);
2264                 zh->errCode = 30;
2265                 zh->errString =
2266                     nmem_strdup (stream, zs->u.simple->u.resultSetId);
2267                 return 0;
2268             }
2269             else
2270                 rset_dup(r);
2271         }
2272         else
2273         {
2274             zh->errCode = 3;
2275             return 0;
2276         }
2277     }
2278     else
2279     {
2280         zh->errCode = 3;
2281         return 0;
2282     }
2283     return r;
2284 }
2285
2286
2287 RSET rpn_search (ZebraHandle zh, NMEM nmem, NMEM rset_nmem,
2288                  Z_RPNQuery *rpn, int num_bases, char **basenames, 
2289                  const char *setname,
2290                  ZebraSet sset)
2291 {
2292     RSET rset;
2293     oident *attrset;
2294     oid_value attributeSet;
2295     Z_SortKeySpecList *sort_sequence;
2296     int sort_status, i;
2297
2298     zh->errCode = 0;
2299     zh->errString = NULL;
2300     zh->hits = 0;
2301
2302     sort_sequence = (Z_SortKeySpecList *)
2303         nmem_malloc (nmem, sizeof(*sort_sequence));
2304     sort_sequence->num_specs = 10;
2305     sort_sequence->specs = (Z_SortKeySpec **)
2306         nmem_malloc (nmem, sort_sequence->num_specs *
2307                      sizeof(*sort_sequence->specs));
2308     for (i = 0; i<sort_sequence->num_specs; i++)
2309         sort_sequence->specs[i] = 0;
2310     
2311     attrset = oid_getentbyoid (rpn->attributeSetId);
2312     attributeSet = attrset->value;
2313     rset = rpn_search_structure (zh, rpn->RPNStructure, attributeSet,
2314                                  nmem, rset_nmem,
2315                                  sort_sequence, num_bases, basenames);
2316     if (!rset)
2317         return 0;
2318
2319     if (zh->errCode)
2320         logf (LOG_DEBUG, "search error: %d", zh->errCode);
2321     
2322     for (i = 0; sort_sequence->specs[i]; i++)
2323         ;
2324     sort_sequence->num_specs = i;
2325     if (!i)
2326         resultSetRank (zh, sset, rset);
2327     else
2328     {
2329         logf (LOG_DEBUG, "resultSetSortSingle in rpn_search");
2330         resultSetSortSingle (zh, nmem, sset, rset,
2331                              sort_sequence, &sort_status);
2332         if (zh->errCode)
2333         {
2334             logf (LOG_DEBUG, "resultSetSortSingle status = %d", zh->errCode);
2335         }
2336     }
2337     return rset;
2338 }
2339
2340 struct scan_info_entry {
2341     char *term;
2342     ISAMC_P isam_p;
2343 };
2344
2345 struct scan_info {
2346     struct scan_info_entry *list;
2347     ODR odr;
2348     int before, after;
2349     char prefix[20];
2350 };
2351
2352 static int scan_handle (char *name, const char *info, int pos, void *client)
2353 {
2354     int len_prefix, idx;
2355     struct scan_info *scan_info = (struct scan_info *) client;
2356
2357     len_prefix = strlen(scan_info->prefix);
2358     if (memcmp (name, scan_info->prefix, len_prefix))
2359         return 1;
2360     if (pos > 0)        idx = scan_info->after - pos + scan_info->before;
2361     else
2362         idx = - pos - 1;
2363     scan_info->list[idx].term = (char *)
2364         odr_malloc (scan_info->odr, strlen(name + len_prefix)+1);
2365     strcpy (scan_info->list[idx].term, name + len_prefix);
2366     assert (*info == sizeof(ISAMC_P));
2367     memcpy (&scan_info->list[idx].isam_p, info+1, sizeof(ISAMC_P));
2368     return 0;
2369 }
2370
2371 static void scan_term_untrans (ZebraHandle zh, NMEM stream, int reg_type,
2372                                char **dst, const char *src)
2373 {
2374     char term_src[IT_MAX_WORD];
2375     char term_dst[IT_MAX_WORD];
2376     
2377     term_untrans (zh, reg_type, term_src, src);
2378
2379     if (zh->iconv_from_utf8 != 0)
2380     {
2381         int len;
2382         char *inbuf = term_src;
2383         size_t inleft = strlen(term_src);
2384         char *outbuf = term_dst;
2385         size_t outleft = sizeof(term_dst)-1;
2386         size_t ret;
2387         
2388         ret = yaz_iconv (zh->iconv_from_utf8, &inbuf, &inleft,
2389                          &outbuf, &outleft);
2390         if (ret == (size_t)(-1))
2391             len = 0;
2392         else
2393             len = outbuf - term_dst;
2394         *dst = nmem_malloc (stream, len + 1);
2395         if (len > 0)
2396             memcpy (*dst, term_dst, len);
2397         (*dst)[len] = '\0';
2398     }
2399     else
2400         *dst = nmem_strdup (stream, term_src);
2401 }
2402
2403 static void count_set (RSET r, int *count)
2404 {
2405     zint psysno = 0;
2406     int kno = 0;
2407     struct it_key key;
2408     RSFD rfd;
2409
2410     logf (LOG_DEBUG, "count_set");
2411
2412     *count = 0;
2413     rfd = rset_open (r, RSETF_READ);
2414     while (rset_read (rfd, &key))
2415     {
2416         if (key.mem[0] != psysno)
2417         {
2418             psysno = key.mem[0];
2419             (*count)++;
2420         }
2421         kno++;
2422     }
2423     rset_close (rfd);
2424     logf (LOG_DEBUG, "%d keys, %d records", kno, *count);
2425 }
2426
2427 void rpn_scan (ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt,
2428                oid_value attributeset,
2429                int num_bases, char **basenames,
2430                int *position, int *num_entries, ZebraScanEntry **list,
2431                int *is_partial, RSET limit_set, int return_zero)
2432 {
2433     int i;
2434     int pos = *position;
2435     int num = *num_entries;
2436     int before;
2437     int after;
2438     int base_no;
2439     char termz[IT_MAX_WORD+20];
2440     AttrType use;
2441     int use_value;
2442     const char *use_string = 0;
2443     struct scan_info *scan_info_array;
2444     ZebraScanEntry *glist;
2445     int ords[32], ord_no = 0;
2446     int ptr[32];
2447
2448     int bases_ok = 0;     /* no of databases with OK attribute */
2449     int errCode = 0;      /* err code (if any is not OK) */
2450     char *errString = 0;  /* addinfo */
2451
2452     unsigned reg_id;
2453     char *search_type = NULL;
2454     char rank_type[128];
2455     int complete_flag;
2456     int sort_flag;
2457     NMEM rset_nmem=NULL; 
2458
2459     *list = 0;
2460
2461     if (attributeset == VAL_NONE)
2462         attributeset = VAL_BIB1;
2463
2464     if (!limit_set)
2465     {
2466         AttrType termset;
2467         int termset_value_numeric;
2468         const char *termset_value_string;
2469         attr_init (&termset, zapt, 8);
2470         termset_value_numeric =
2471             attr_find_ex (&termset, NULL, &termset_value_string);
2472         if (termset_value_numeric != -1)
2473         {
2474             char resname[32];
2475             const char *termset_name = 0;
2476             
2477             if (termset_value_numeric != -2)
2478             {
2479                 
2480                 sprintf (resname, "%d", termset_value_numeric);
2481                 termset_name = resname;
2482             }
2483             else
2484                 termset_name = termset_value_string;
2485             
2486             limit_set = resultSetRef (zh, termset_name);
2487         }
2488     }
2489         
2490     yaz_log (LOG_DEBUG, "position = %d, num = %d set=%d",
2491              pos, num, attributeset);
2492         
2493     attr_init (&use, zapt, 1);
2494     use_value = attr_find_ex (&use, &attributeset, &use_string);
2495
2496     if (zebra_maps_attr (zh->reg->zebra_maps, zapt, &reg_id, &search_type,
2497                          rank_type, &complete_flag, &sort_flag))
2498     {
2499         *num_entries = 0;
2500         zh->errCode = 113;
2501         return ;
2502     }
2503     yaz_log (LOG_DEBUG, "use_value = %d", use_value);
2504
2505     if (use_value == -1)
2506         use_value = 1016;
2507     for (base_no = 0; base_no < num_bases && ord_no < 32; base_no++)
2508     {
2509         int r;
2510         attent attp;
2511         data1_local_attribute *local_attr;
2512
2513         if ((r=att_getentbyatt (zh, &attp, attributeset, use_value,
2514                                 use_string)))
2515         {
2516             logf (LOG_DEBUG, "att_getentbyatt fail. set=%d use=%d",
2517                   attributeset, use_value);
2518             if (r == -1)
2519             {
2520                 char val_str[32];
2521                 sprintf (val_str, "%d", use_value);
2522                 errCode = 114;
2523                 errString = odr_strdup (stream, val_str);
2524             }   
2525             else
2526                 errCode = 121;
2527             continue;
2528         }
2529         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
2530         {
2531             zh->errString = basenames[base_no];
2532             zh->errCode = 109; /* Database unavailable */
2533             *num_entries = 0;
2534             return;
2535         }
2536         bases_ok++;
2537         for (local_attr = attp.local_attributes; local_attr && ord_no < 32;
2538              local_attr = local_attr->next)
2539         {
2540             int ord;
2541
2542             ord = zebraExplain_lookupSU (zh->reg->zei, attp.attset_ordinal,
2543                                          local_attr->local);
2544             if (ord > 0)
2545                 ords[ord_no++] = ord;
2546         }
2547     }
2548     if (!bases_ok && errCode)
2549     {
2550         zh->errCode = errCode;
2551         zh->errString = errString;
2552         *num_entries = 0;
2553     }
2554     if (ord_no == 0)
2555     {
2556         char val_str[32];
2557         sprintf (val_str, "%d", use_value);
2558         zh->errCode = 114;
2559         zh->errString = odr_strdup (stream, val_str);
2560
2561         *num_entries = 0;
2562         return;
2563     }
2564     /* prepare dictionary scanning */
2565     before = pos-1;
2566     after = 1+num-pos;
2567     scan_info_array = (struct scan_info *)
2568         odr_malloc (stream, ord_no * sizeof(*scan_info_array));
2569     for (i = 0; i < ord_no; i++)
2570     {
2571         int j, prefix_len = 0;
2572         int before_tmp = before, after_tmp = after;
2573         struct scan_info *scan_info = scan_info_array + i;
2574         struct rpn_char_map_info rcmi;
2575
2576         rpn_char_map_prepare (zh->reg, reg_id, &rcmi);
2577
2578         scan_info->before = before;
2579         scan_info->after = after;
2580         scan_info->odr = stream;
2581
2582         scan_info->list = (struct scan_info_entry *)
2583             odr_malloc (stream, (before+after) * sizeof(*scan_info->list));
2584         for (j = 0; j<before+after; j++)
2585             scan_info->list[j].term = NULL;
2586
2587         prefix_len += key_SU_encode (ords[i], termz + prefix_len);
2588         termz[prefix_len++] = reg_id;
2589         termz[prefix_len] = 0;
2590         strcpy (scan_info->prefix, termz);
2591
2592         if (trans_scan_term (zh, zapt, termz+prefix_len, reg_id))
2593             return ;
2594                     
2595         dict_scan (zh->reg->dict, termz, &before_tmp, &after_tmp,
2596                    scan_info, scan_handle);
2597     }
2598     glist = (ZebraScanEntry *)
2599         odr_malloc (stream, (before+after)*sizeof(*glist));
2600
2601     rset_nmem=nmem_create();
2602
2603     /* consider terms after main term */
2604     for (i = 0; i < ord_no; i++)
2605         ptr[i] = before;
2606     
2607     *is_partial = 0;
2608     for (i = 0; i<after; i++)
2609     {
2610         int j, j0 = -1;
2611         const char *mterm = NULL;
2612         const char *tst;
2613         RSET rset;
2614         
2615         for (j = 0; j < ord_no; j++)
2616         {
2617             if (ptr[j] < before+after &&
2618                 (tst=scan_info_array[j].list[ptr[j]].term) &&
2619                 (!mterm || strcmp (tst, mterm) < 0))
2620             {
2621                 j0 = j;
2622                 mterm = tst;
2623             }
2624         }
2625         if (j0 == -1)
2626             break;
2627         scan_term_untrans (zh, stream->mem, reg_id,
2628                            &glist[i+before].term, mterm);
2629         rset = rset_trunc (zh, &scan_info_array[j0].list[ptr[j0]].isam_p, 1,
2630                            glist[i+before].term, strlen(glist[i+before].term),
2631                            NULL, 0, zapt->term->which, rset_nmem, 
2632                            key_it_ctrl,key_it_ctrl->scope);
2633         ptr[j0]++;
2634         for (j = j0+1; j<ord_no; j++)
2635         {
2636             if (ptr[j] < before+after &&
2637                 (tst=scan_info_array[j].list[ptr[j]].term) &&
2638                 !strcmp (tst, mterm))
2639             {
2640                 RSET rset2;
2641
2642                 rset2 =
2643                    rset_trunc (zh, &scan_info_array[j].list[ptr[j]].isam_p, 1,
2644                                glist[i+before].term,
2645                                strlen(glist[i+before].term), NULL, 0,
2646                                zapt->term->which,rset_nmem,
2647                                key_it_ctrl, key_it_ctrl->scope);
2648                 rset = rsbool_create_or(rset_nmem,key_it_ctrl,
2649                                key_it_ctrl->scope, rset, rset2);
2650                 /* FIXME - Use a proper multi-or */
2651
2652                 ptr[j]++;
2653             }
2654         }
2655         if (limit_set)
2656             rset = rsbool_create_and(rset_nmem,key_it_ctrl,key_it_ctrl->scope,
2657                             rset, rset_dup(limit_set));
2658         count_set (rset, &glist[i+before].occurrences);
2659         rset_delete (rset);
2660     }
2661     if (i < after)
2662     {
2663         *num_entries -= (after-i);
2664         *is_partial = 1;
2665     }
2666
2667     /* consider terms before main term */
2668     for (i = 0; i<ord_no; i++)
2669         ptr[i] = 0;
2670
2671     for (i = 0; i<before; i++)
2672     {
2673         int j, j0 = -1;
2674         const char *mterm = NULL;
2675         const char *tst;
2676         RSET rset;
2677         
2678         for (j = 0; j <ord_no; j++)
2679         {
2680             if (ptr[j] < before &&
2681                 (tst=scan_info_array[j].list[before-1-ptr[j]].term) &&
2682                 (!mterm || strcmp (tst, mterm) > 0))
2683             {
2684                 j0 = j;
2685                 mterm = tst;
2686             }
2687         }
2688         if (j0 == -1)
2689             break;
2690
2691         scan_term_untrans (zh, stream->mem, reg_id,
2692                            &glist[before-1-i].term, mterm);
2693
2694         rset = rset_trunc
2695                (zh, &scan_info_array[j0].list[before-1-ptr[j0]].isam_p, 1,
2696                 glist[before-1-i].term, strlen(glist[before-1-i].term),
2697                 NULL, 0, zapt->term->which,rset_nmem,
2698                 key_it_ctrl,key_it_ctrl->scope);
2699
2700         ptr[j0]++;
2701
2702         for (j = j0+1; j<ord_no; j++)
2703         {
2704             if (ptr[j] < before &&
2705                 (tst=scan_info_array[j].list[before-1-ptr[j]].term) &&
2706                 !strcmp (tst, mterm))
2707             {
2708                 RSET rset2;
2709
2710                 rset2 = rset_trunc (zh,
2711                          &scan_info_array[j].list[before-1-ptr[j]].isam_p, 1,
2712                                     glist[before-1-i].term,
2713                                     strlen(glist[before-1-i].term), NULL, 0,
2714                                     zapt->term->which, rset_nmem,
2715                                     key_it_ctrl, key_it_ctrl->scope);
2716                 rset = rsbool_create_and(rset_nmem,key_it_ctrl,
2717                             key_it_ctrl->scope, rset, rset2);
2718                 /* FIXME - multi-and ?? */
2719                 ptr[j]++;
2720             }
2721         }
2722         if (limit_set)
2723             rset = rsbool_create_and(rset_nmem,key_it_ctrl,key_it_ctrl->scope,
2724                             rset, rset_dup(limit_set));
2725         count_set (rset, &glist[before-1-i].occurrences);
2726         rset_delete (rset);
2727     }
2728     i = before-i;
2729     if (i)
2730     {
2731         *is_partial = 1;
2732         *position -= i;
2733         *num_entries -= i;
2734     }
2735
2736     nmem_destroy(rset_nmem);
2737     *list = glist + i;               /* list is set to first 'real' entry */
2738     
2739     logf (LOG_DEBUG, "position = %d, num_entries = %d",
2740           *position, *num_entries);
2741     if (zh->errCode)
2742         logf (LOG_DEBUG, "scan error: %d", zh->errCode);
2743 }
2744