9f9a0a8b2e82a25c5a271c7ee0648b5797f23120
[idzebra-moved-to-github.git] / index / zrpn.c
1 /* $Id: zrpn.c,v 1.154 2004-09-28 16:39:46 heikki Exp $
2    Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003,2004
3    Index Data Aps
4
5 This file is part of the Zebra server.
6
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
11
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with Zebra; see the file LICENSE.zebra.  If not, write to the
19 Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
20 02111-1307, USA.
21 */
22
23
24 #include <stdio.h>
25 #include <assert.h>
26 #ifdef WIN32
27 #include <io.h>
28 #else
29 #include <unistd.h>
30 #endif
31 #include <ctype.h>
32
33 #include "index.h"
34 #include <zebra_xpath.h>
35
36 #include <charmap.h>
37 #include <rset.h>
38
39
40 static const struct key_control it_ctrl = { 
41     sizeof(struct it_key),
42     2, /* we have sysnos and seqnos in this key, nothing more */
43     key_compare_it, 
44     key_logdump_txt,   /* FIXME  - clean up these functions */
45     key_get_seq,
46 };
47
48
49 const struct key_control *key_it_ctrl = &it_ctrl;
50
51 struct rpn_char_map_info {
52     ZebraMaps zm;
53     int reg_type;
54 };
55
56 typedef struct {
57     int type;
58     int major;
59     int minor;
60     Z_AttributesPlusTerm *zapt;
61 } AttrType;
62
63
64 static const char **rpn_char_map_handler (void *vp, const char **from, int len)
65 {
66     struct rpn_char_map_info *p = (struct rpn_char_map_info *) vp;
67     const char **out = zebra_maps_input (p->zm, p->reg_type, from, len, 0);
68 #if 0
69     if (out && *out)
70     {
71         const char *outp = *out;
72         yaz_log (LOG_LOG, "---");
73         while (*outp)
74         {
75             yaz_log (LOG_LOG, "%02X", *outp);
76             outp++;
77         }
78     }
79 #endif
80     return out;
81 }
82
83 static void rpn_char_map_prepare (struct zebra_register *reg, int reg_type,
84                                   struct rpn_char_map_info *map_info)
85 {
86     map_info->zm = reg->zebra_maps;
87     map_info->reg_type = reg_type;
88     dict_grep_cmap (reg->dict, map_info, rpn_char_map_handler);
89 }
90
91 static int attr_find_ex (AttrType *src, oid_value *attributeSetP,
92                          const char **string_value)
93 {
94     int num_attributes;
95
96     num_attributes = src->zapt->attributes->num_attributes;
97     while (src->major < num_attributes)
98     {
99         Z_AttributeElement *element;
100
101         element = src->zapt->attributes->attributes[src->major];
102         if (src->type == *element->attributeType)
103         {
104             switch (element->which) 
105             {
106             case Z_AttributeValue_numeric:
107                 ++(src->major);
108                 if (element->attributeSet && attributeSetP)
109                 {
110                     oident *attrset;
111
112                     attrset = oid_getentbyoid (element->attributeSet);
113                     *attributeSetP = attrset->value;
114                 }
115                 return *element->value.numeric;
116                 break;
117             case Z_AttributeValue_complex:
118                 if (src->minor >= element->value.complex->num_list)
119                     break;
120                 if (element->attributeSet && attributeSetP)
121                 {
122                     oident *attrset;
123                     
124                     attrset = oid_getentbyoid (element->attributeSet);
125                     *attributeSetP = attrset->value;
126                 }
127                 if (element->value.complex->list[src->minor]->which ==  
128                     Z_StringOrNumeric_numeric)
129                 {
130                     ++(src->minor);
131                     return
132                         *element->value.complex->list[src->minor-1]->u.numeric;
133                 }
134                 else if (element->value.complex->list[src->minor]->which ==  
135                          Z_StringOrNumeric_string)
136                 {
137                     if (!string_value)
138                         break;
139                     ++(src->minor);
140                     *string_value = 
141                         element->value.complex->list[src->minor-1]->u.string;
142                     return -2;
143                 }
144                 else
145                     break;
146             default:
147                 assert (0);
148             }
149         }
150         ++(src->major);
151     }
152     return -1;
153 }
154
155 static int attr_find (AttrType *src, oid_value *attributeSetP)
156 {
157     return attr_find_ex (src, attributeSetP, 0);
158 }
159
160 static void attr_init (AttrType *src, Z_AttributesPlusTerm *zapt,
161                        int type)
162 {
163     src->zapt = zapt;
164     src->type = type;
165     src->major = 0;
166     src->minor = 0;
167 }
168
169 #define TERM_COUNT        
170        
171 struct grep_info {        
172 #ifdef TERM_COUNT        
173     int *term_no;        
174 #endif        
175     ISAMC_P *isam_p_buf;
176     int isam_p_size;        
177     int isam_p_indx;
178     ZebraHandle zh;
179     int reg_type;
180     ZebraSet termset;
181 };        
182
183 static void term_untrans  (ZebraHandle zh, int reg_type,
184                            char *dst, const char *src)
185 {
186     int len = 0;
187     while (*src)
188     {
189         const char *cp = zebra_maps_output (zh->reg->zebra_maps,
190                                             reg_type, &src);
191         if (!cp && len < IT_MAX_WORD-1)
192             dst[len++] = *src++;
193         else
194             while (*cp && len < IT_MAX_WORD-1)
195                 dst[len++] = *cp++;
196     }
197     dst[len] = '\0';
198 }
199
200 static void add_isam_p (const char *name, const char *info,
201                         struct grep_info *p)
202 {
203     if (p->isam_p_indx == p->isam_p_size)
204     {
205         ISAMC_P *new_isam_p_buf;
206 #ifdef TERM_COUNT        
207         int *new_term_no;        
208 #endif
209         p->isam_p_size = 2*p->isam_p_size + 100;
210         new_isam_p_buf = (ISAMC_P *) xmalloc (sizeof(*new_isam_p_buf) *
211                                              p->isam_p_size);
212         if (p->isam_p_buf)
213         {
214             memcpy (new_isam_p_buf, p->isam_p_buf,
215                     p->isam_p_indx * sizeof(*p->isam_p_buf));
216             xfree (p->isam_p_buf);
217         }
218         p->isam_p_buf = new_isam_p_buf;
219
220 #ifdef TERM_COUNT
221         new_term_no = (int *) xmalloc (sizeof(*new_term_no) *
222                                        p->isam_p_size);
223         if (p->term_no)
224         {
225             memcpy (new_term_no, p->isam_p_buf,
226                     p->isam_p_indx * sizeof(*p->term_no));
227             xfree (p->term_no);
228         }
229         p->term_no = new_term_no;
230 #endif
231     }
232     assert (*info == sizeof(*p->isam_p_buf));
233     memcpy (p->isam_p_buf + p->isam_p_indx, info+1, sizeof(*p->isam_p_buf));
234
235 #if 1
236     if (p->termset)
237     {
238         const char *db;
239         int set, use;
240         char term_tmp[IT_MAX_WORD];
241         int su_code = 0;
242         int len = key_SU_decode (&su_code, name);
243         
244         term_untrans  (p->zh, p->reg_type, term_tmp, name+len+1);
245         logf (LOG_LOG, "grep: %d %c %s", su_code, name[len], term_tmp);
246         zebraExplain_lookup_ord (p->zh->reg->zei,
247                                  su_code, &db, &set, &use);
248         logf (LOG_LOG, "grep:  set=%d use=%d db=%s", set, use, db);
249         
250         resultSetAddTerm (p->zh, p->termset, name[len], db,
251                           set, use, term_tmp);
252     }
253 #endif
254     (p->isam_p_indx)++;
255 }
256
257 static int grep_handle (char *name, const char *info, void *p)
258 {
259     add_isam_p (name, info, (struct grep_info *) p);
260     return 0;
261 }
262
263 static int term_pre (ZebraMaps zebra_maps, int reg_type, const char **src,
264                      const char *ct1, const char *ct2, int first)
265 {
266     const char *s1, *s0 = *src;
267     const char **map;
268
269     /* skip white space */
270     while (*s0)
271     {
272         if (ct1 && strchr (ct1, *s0))
273             break;
274         if (ct2 && strchr (ct2, *s0))
275             break;
276         s1 = s0;
277         map = zebra_maps_input (zebra_maps, reg_type, &s1, strlen(s1), first);
278         if (**map != *CHR_SPACE)
279             break;
280         s0 = s1;
281     }
282     *src = s0;
283     return *s0;
284 }
285
286 #define REGEX_CHARS " []()|.*+?!"
287
288 /* term_100: handle term, where trunc=none (no operators at all) */
289 static int term_100 (ZebraMaps zebra_maps, int reg_type,
290                      const char **src, char *dst, int space_split,
291                      char *dst_term)
292 {
293     const char *s0, *s1;
294     const char **map;
295     int i = 0;
296     int j = 0;
297
298     const char *space_start = 0;
299     const char *space_end = 0;
300
301     if (!term_pre (zebra_maps, reg_type, src, NULL, NULL, !space_split))
302         return 0;
303     s0 = *src;
304     while (*s0)
305     {
306         s1 = s0;
307         map = zebra_maps_input (zebra_maps, reg_type, &s0, strlen(s0), 0);
308         if (space_split)
309         {
310             if (**map == *CHR_SPACE)
311                 break;
312         }
313         else  /* complete subfield only. */
314         {
315             if (**map == *CHR_SPACE)
316             {   /* save space mapping for later  .. */
317                 space_start = s1;
318                 space_end = s0;
319                 continue;
320             }
321             else if (space_start)
322             {   /* reload last space */
323                 while (space_start < space_end)
324                 {
325                     if (strchr (REGEX_CHARS, *space_start))
326                         dst[i++] = '\\';
327                     dst_term[j++] = *space_start;
328                     dst[i++] = *space_start++;
329                 }
330                 /* and reset */
331                 space_start = space_end = 0;
332             }
333         }
334         /* add non-space char */
335         while (s1 < s0)
336         {
337             if (strchr(REGEX_CHARS, *s1))
338                 dst[i++] = '\\';
339             dst_term[j++] = *s1;
340             dst[i++] = *s1++;
341         }
342     }
343     dst[i] = '\0';
344     dst_term[j] = '\0';
345     *src = s0;
346     return i;
347 }
348
349 /* term_101: handle term, where trunc=Process # */
350 static int term_101 (ZebraMaps zebra_maps, int reg_type,
351                      const char **src, char *dst, int space_split,
352                      char *dst_term)
353 {
354     const char *s0, *s1;
355     const char **map;
356     int i = 0;
357     int j = 0;
358
359     if (!term_pre (zebra_maps, reg_type, src, "#", "#", !space_split))
360         return 0;
361     s0 = *src;
362     while (*s0)
363     {
364         if (*s0 == '#')
365         {
366             dst[i++] = '.';
367             dst[i++] = '*';
368             dst_term[j++] = *s0++;
369         }
370         else
371         {
372             s1 = s0;
373             map = zebra_maps_input (zebra_maps, reg_type, &s0, strlen(s0), 0);
374             if (space_split && **map == *CHR_SPACE)
375                 break;
376             while (s1 < s0)
377             {
378                 if (strchr(REGEX_CHARS, *s1))
379                     dst[i++] = '\\';
380                 dst_term[j++] = *s1;
381                 dst[i++] = *s1++;
382             }
383         }
384     }
385     dst[i] = '\0';
386     dst_term[j++] = '\0';
387     *src = s0;
388     return i;
389 }
390
391 /* term_103: handle term, where trunc=re-2 (regular expressions) */
392 static int term_103 (ZebraMaps zebra_maps, int reg_type, const char **src,
393                      char *dst, int *errors, int space_split,
394                      char *dst_term)
395 {
396     int i = 0;
397     int j = 0;
398     const char *s0, *s1;
399     const char **map;
400
401     if (!term_pre (zebra_maps, reg_type, src, "^\\()[].*+?|", "(", !space_split))
402         return 0;
403     s0 = *src;
404     if (errors && *s0 == '+' && s0[1] && s0[2] == '+' && s0[3] &&
405         isdigit (s0[1]))
406     {
407         *errors = s0[1] - '0';
408         s0 += 3;
409         if (*errors > 3)
410             *errors = 3;
411     }
412     while (*s0)
413     {
414         if (strchr ("^\\()[].*+?|-", *s0))
415         {
416             dst_term[j++] = *s0;
417             dst[i++] = *s0++;
418         }
419         else
420         {
421             s1 = s0;
422             map = zebra_maps_input (zebra_maps, reg_type, &s0, strlen(s0), 0);
423             if (**map == *CHR_SPACE)
424                 break;
425             while (s1 < s0)
426             {
427                 if (strchr(REGEX_CHARS, *s1))
428                     dst[i++] = '\\';
429                 dst_term[j++] = *s1;
430                 dst[i++] = *s1++;
431             }
432         }
433     }
434     dst[i] = '\0';
435     dst_term[j] = '\0';
436     *src = s0;
437     return i;
438 }
439
440 /* term_103: handle term, where trunc=re-1 (regular expressions) */
441 static int term_102 (ZebraMaps zebra_maps, int reg_type, const char **src,
442                      char *dst, int space_split, char *dst_term)
443 {
444     return term_103 (zebra_maps, reg_type, src, dst, NULL, space_split,
445                      dst_term);
446 }
447
448
449 /* term_104: handle term, where trunc=Process # and ! */
450 static int term_104 (ZebraMaps zebra_maps, int reg_type,
451                      const char **src, char *dst, int space_split,
452                      char *dst_term)
453 {
454     const char *s0, *s1;
455     const char **map;
456     int i = 0;
457     int j = 0;
458
459     if (!term_pre (zebra_maps, reg_type, src, "?*#", "?*#", !space_split))
460         return 0;
461     s0 = *src;
462     while (*s0)
463     {
464         if (*s0 == '?')
465         {
466             dst_term[j++] = *s0++;
467             if (*s0 >= '0' && *s0 <= '9')
468             {
469                 int limit = 0;
470                 while (*s0 >= '0' && *s0 <= '9')
471                 {
472                     limit = limit * 10 + (*s0 - '0');
473                     dst_term[j++] = *s0++;
474                 }
475                 if (limit > 20)
476                     limit = 20;
477                 while (--limit >= 0)
478                 {
479                     dst[i++] = '.';
480                     dst[i++] = '?';
481                 }
482             }
483             else
484             {
485                 dst[i++] = '.';
486                 dst[i++] = '*';
487             }
488         }
489         else if (*s0 == '*')
490         {
491             dst[i++] = '.';
492             dst[i++] = '*';
493             dst_term[j++] = *s0++;
494         }
495         else if (*s0 == '#')
496         {
497             dst[i++] = '.';
498             dst_term[j++] = *s0++;
499         }
500         {
501             s1 = s0;
502             map = zebra_maps_input (zebra_maps, reg_type, &s0, strlen(s0), 0);
503             if (space_split && **map == *CHR_SPACE)
504                 break;
505             while (s1 < s0)
506             {
507                 if (strchr(REGEX_CHARS, *s1))
508                     dst[i++] = '\\';
509                 dst_term[j++] = *s1;
510                 dst[i++] = *s1++;
511             }
512         }
513     }
514     dst[i] = '\0';
515     dst_term[j++] = '\0';
516     *src = s0;
517     return i;
518 }
519
520 /* term_105/106: handle term, where trunc=Process * and ! and right trunc */
521 static int term_105 (ZebraMaps zebra_maps, int reg_type,
522                      const char **src, char *dst, int space_split,
523                      char *dst_term, int right_truncate)
524 {
525     const char *s0, *s1;
526     const char **map;
527     int i = 0;
528     int j = 0;
529
530     if (!term_pre (zebra_maps, reg_type, src, "*!", "*!", !space_split))
531         return 0;
532     s0 = *src;
533     while (*s0)
534     {
535         if (*s0 == '*')
536         {
537             dst[i++] = '.';
538             dst[i++] = '*';
539             dst_term[j++] = *s0++;
540         }
541         else if (*s0 == '!')
542         {
543             dst[i++] = '.';
544             dst_term[j++] = *s0++;
545         }
546         {
547             s1 = s0;
548             map = zebra_maps_input (zebra_maps, reg_type, &s0, strlen(s0), 0);
549             if (space_split && **map == *CHR_SPACE)
550                 break;
551             while (s1 < s0)
552             {
553                 if (strchr(REGEX_CHARS, *s1))
554                     dst[i++] = '\\';
555                 dst_term[j++] = *s1;
556                 dst[i++] = *s1++;
557             }
558         }
559     }
560     if (right_truncate)
561     {
562         dst[i++] = '.';
563         dst[i++] = '*';
564     }
565     dst[i] = '\0';
566     
567     dst_term[j++] = '\0';
568     *src = s0;
569     return i;
570 }
571
572
573 /* gen_regular_rel - generate regular expression from relation
574  *  val:     border value (inclusive)
575  *  islt:    1 if <=; 0 if >=.
576  */
577 static void gen_regular_rel (char *dst, int val, int islt)
578 {
579     int dst_p;
580     int w, d, i;
581     int pos = 0;
582     char numstr[20];
583
584     logf (LOG_DEBUG, "gen_regular_rel. val=%d, islt=%d", val, islt);
585     if (val >= 0)
586     {
587         if (islt)
588             strcpy (dst, "(-[0-9]+|(");
589         else
590             strcpy (dst, "((");
591     } 
592     else
593     {
594         if (!islt)
595         {
596             strcpy (dst, "([0-9]+|-(");
597             dst_p = strlen (dst);
598             islt = 1;
599         }
600         else
601         {
602             strcpy (dst, "(-(");
603             islt = 0;
604         }
605         val = -val;
606     }
607     dst_p = strlen (dst);
608     sprintf (numstr, "%d", val);
609     for (w = strlen(numstr); --w >= 0; pos++)
610     {
611         d = numstr[w];
612         if (pos > 0)
613         {
614             if (islt)
615             {
616                 if (d == '0')
617                     continue;
618                 d--;
619             } 
620             else
621             {
622                 if (d == '9')
623                     continue;
624                 d++;
625             }
626         }
627         
628         strcpy (dst + dst_p, numstr);
629         dst_p = strlen(dst) - pos - 1;
630
631         if (islt)
632         {
633             if (d != '0')
634             {
635                 dst[dst_p++] = '[';
636                 dst[dst_p++] = '0';
637                 dst[dst_p++] = '-';
638                 dst[dst_p++] = d;
639                 dst[dst_p++] = ']';
640             }
641             else
642                 dst[dst_p++] = d;
643         }
644         else
645         {
646             if (d != '9')
647             { 
648                 dst[dst_p++] = '[';
649                 dst[dst_p++] = d;
650                 dst[dst_p++] = '-';
651                 dst[dst_p++] = '9';
652                 dst[dst_p++] = ']';
653             }
654             else
655                 dst[dst_p++] = d;
656         }
657         for (i = 0; i<pos; i++)
658         {
659             dst[dst_p++] = '[';
660             dst[dst_p++] = '0';
661             dst[dst_p++] = '-';
662             dst[dst_p++] = '9';
663             dst[dst_p++] = ']';
664         }
665         dst[dst_p++] = '|';
666     }
667     dst[dst_p] = '\0';
668     if (islt)
669     {
670         /* match everything less than 10^(pos-1) */
671         strcat (dst, "0*");
672         for (i=1; i<pos; i++)
673             strcat (dst, "[0-9]?");
674     }
675     else
676     {
677         /* match everything greater than 10^pos */
678         for (i = 0; i <= pos; i++)
679             strcat (dst, "[0-9]");
680         strcat (dst, "[0-9]*");
681     }
682     strcat (dst, "))");
683 }
684
685 void string_rel_add_char (char **term_p, const char *src, int *indx)
686 {
687     if (src[*indx] == '\\')
688         *(*term_p)++ = src[(*indx)++];
689     *(*term_p)++ = src[(*indx)++];
690 }
691
692 /*
693  *   >  abc     ([b-].*|a[c-].*|ab[d-].*|abc.+)
694  *              ([^-a].*|a[^-b].*ab[^-c].*|abc.+)
695  *   >= abc     ([b-].*|a[c-].*|ab[c-].*)
696  *              ([^-a].*|a[^-b].*|ab[c-].*)
697  *   <  abc     ([-0].*|a[-a].*|ab[-b].*)
698  *              ([^a-].*|a[^b-].*|ab[^c-].*)
699  *   <= abc     ([-0].*|a[-a].*|ab[-b].*|abc)
700  *              ([^a-].*|a[^b-].*|ab[^c-].*|abc)
701  */
702 static int string_relation (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
703                             const char **term_sub, char *term_dict,
704                             oid_value attributeSet,
705                             int reg_type, int space_split, char *term_dst)
706 {
707     AttrType relation;
708     int relation_value;
709     int i;
710     char *term_tmp = term_dict + strlen(term_dict);
711     char term_component[2*IT_MAX_WORD+20];
712
713     attr_init (&relation, zapt, 2);
714     relation_value = attr_find (&relation, NULL);
715
716     logf (LOG_DEBUG, "string relation value=%d", relation_value);
717     switch (relation_value)
718     {
719     case 1:
720         if (!term_100 (zh->reg->zebra_maps, reg_type,
721                        term_sub, term_component,
722                        space_split, term_dst))
723             return 0;
724         logf (LOG_DEBUG, "Relation <");
725         
726         *term_tmp++ = '(';
727         for (i = 0; term_component[i]; )
728         {
729             int j = 0;
730
731             if (i)
732                 *term_tmp++ = '|';
733             while (j < i)
734                 string_rel_add_char (&term_tmp, term_component, &j);
735
736             *term_tmp++ = '[';
737
738             *term_tmp++ = '^';
739             string_rel_add_char (&term_tmp, term_component, &i);
740             *term_tmp++ = '-';
741
742             *term_tmp++ = ']';
743             *term_tmp++ = '.';
744             *term_tmp++ = '*';
745
746             if ((term_tmp - term_dict) > IT_MAX_WORD)
747                 break;
748         }
749         *term_tmp++ = ')';
750         *term_tmp = '\0';
751         break;
752     case 2:
753         if (!term_100 (zh->reg->zebra_maps, reg_type,
754                        term_sub, term_component,
755                        space_split, term_dst))
756             return 0;
757         logf (LOG_DEBUG, "Relation <=");
758
759         *term_tmp++ = '(';
760         for (i = 0; term_component[i]; )
761         {
762             int j = 0;
763
764             while (j < i)
765                 string_rel_add_char (&term_tmp, term_component, &j);
766             *term_tmp++ = '[';
767
768             *term_tmp++ = '^';
769             string_rel_add_char (&term_tmp, term_component, &i);
770             *term_tmp++ = '-';
771
772             *term_tmp++ = ']';
773             *term_tmp++ = '.';
774             *term_tmp++ = '*';
775
776             *term_tmp++ = '|';
777
778             if ((term_tmp - term_dict) > IT_MAX_WORD)
779                 break;
780         }
781         for (i = 0; term_component[i]; )
782             string_rel_add_char (&term_tmp, term_component, &i);
783         *term_tmp++ = ')';
784         *term_tmp = '\0';
785         break;
786     case 5:
787         if (!term_100 (zh->reg->zebra_maps, reg_type,
788                        term_sub, term_component, space_split, term_dst))
789             return 0;
790         logf (LOG_DEBUG, "Relation >");
791
792         *term_tmp++ = '(';
793         for (i = 0; term_component[i];)
794         {
795             int j = 0;
796
797             while (j < i)
798                 string_rel_add_char (&term_tmp, term_component, &j);
799             *term_tmp++ = '[';
800             
801             *term_tmp++ = '^';
802             *term_tmp++ = '-';
803             string_rel_add_char (&term_tmp, term_component, &i);
804
805             *term_tmp++ = ']';
806             *term_tmp++ = '.';
807             *term_tmp++ = '*';
808
809             *term_tmp++ = '|';
810
811             if ((term_tmp - term_dict) > IT_MAX_WORD)
812                 break;
813         }
814         for (i = 0; term_component[i];)
815             string_rel_add_char (&term_tmp, term_component, &i);
816         *term_tmp++ = '.';
817         *term_tmp++ = '+';
818         *term_tmp++ = ')';
819         *term_tmp = '\0';
820         break;
821     case 4:
822         if (!term_100 (zh->reg->zebra_maps, reg_type, term_sub,
823                        term_component, space_split, term_dst))
824             return 0;
825         logf (LOG_DEBUG, "Relation >=");
826
827         *term_tmp++ = '(';
828         for (i = 0; term_component[i];)
829         {
830             int j = 0;
831
832             if (i)
833                 *term_tmp++ = '|';
834             while (j < i)
835                 string_rel_add_char (&term_tmp, term_component, &j);
836             *term_tmp++ = '[';
837
838             if (term_component[i+1])
839             {
840                 *term_tmp++ = '^';
841                 *term_tmp++ = '-';
842                 string_rel_add_char (&term_tmp, term_component, &i);
843             }
844             else
845             {
846                 string_rel_add_char (&term_tmp, term_component, &i);
847                 *term_tmp++ = '-';
848             }
849             *term_tmp++ = ']';
850             *term_tmp++ = '.';
851             *term_tmp++ = '*';
852
853             if ((term_tmp - term_dict) > IT_MAX_WORD)
854                 break;
855         }
856         *term_tmp++ = ')';
857         *term_tmp = '\0';
858         break;
859     case 3:
860     default:
861         logf (LOG_DEBUG, "Relation =");
862         if (!term_100 (zh->reg->zebra_maps, reg_type, term_sub,
863                        term_component, space_split, term_dst))
864             return 0;
865         strcat (term_tmp, "(");
866         strcat (term_tmp, term_component);
867         strcat (term_tmp, ")");
868     }
869     return 1;
870 }
871
872 static int string_term (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
873                         const char **term_sub, 
874                         oid_value attributeSet, NMEM stream,
875                         struct grep_info *grep_info,
876                         int reg_type, int complete_flag,
877                         int num_bases, char **basenames,
878                         char *term_dst, int xpath_use);
879
880 static RSET term_trunc (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
881                         const char **term_sub, 
882                         oid_value attributeSet, NMEM stream,
883                         struct grep_info *grep_info,
884                         int reg_type, int complete_flag,
885                         int num_bases, char **basenames,
886                         char *term_dst,
887                         const char *rank_type, int xpath_use,
888                         NMEM rset_nmem)
889 {
890     int r;
891     grep_info->isam_p_indx = 0;
892     r = string_term (zh, zapt, term_sub, attributeSet, stream, grep_info,
893                      reg_type, complete_flag, num_bases, basenames,
894                      term_dst, xpath_use);
895     if (r < 1)
896         return 0;
897     logf (LOG_DEBUG, "term: %s", term_dst);
898     return rset_trunc (zh, grep_info->isam_p_buf,
899                        grep_info->isam_p_indx, term_dst,
900                        strlen(term_dst), rank_type, 1 /* preserve pos */,
901                        zapt->term->which, rset_nmem,
902                        key_it_ctrl,key_it_ctrl->scope);
903 }
904
905
906 static int string_term (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
907                         const char **term_sub, 
908                         oid_value attributeSet, NMEM stream,
909                         struct grep_info *grep_info,
910                         int reg_type, int complete_flag,
911                         int num_bases, char **basenames,
912                         char *term_dst, int xpath_use)
913 {
914     char term_dict[2*IT_MAX_WORD+4000];
915     int j, r, base_no;
916     AttrType truncation;
917     int truncation_value;
918     AttrType use;
919     int use_value;
920     const char *use_string = 0;
921     oid_value curAttributeSet = attributeSet;
922     const char *termp;
923     struct rpn_char_map_info rcmi;
924     int space_split = complete_flag ? 0 : 1;
925
926     int bases_ok = 0;     /* no of databases with OK attribute */
927     int errCode = 0;      /* err code (if any is not OK) */
928     char *errString = 0;  /* addinfo */
929
930     rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
931     attr_init (&use, zapt, 1);
932     use_value = attr_find_ex (&use, &curAttributeSet, &use_string);
933     logf (LOG_DEBUG, "string_term, use value %d", use_value);
934     attr_init (&truncation, zapt, 5);
935     truncation_value = attr_find (&truncation, NULL);
936     logf (LOG_DEBUG, "truncation value %d", truncation_value);
937
938     if (use_value == -1)    /* no attribute - assumy "any" */
939         use_value = 1016;
940     for (base_no = 0; base_no < num_bases; base_no++)
941     {
942         attent attp;
943         data1_local_attribute id_xpath_attr;
944         data1_local_attribute *local_attr;
945         int max_pos, prefix_len = 0;
946
947         termp = *term_sub;
948
949         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
950         {
951             zh->errCode = 109; /* Database unavailable */
952             zh->errString = basenames[base_no];
953             return -1;
954         }
955         if (xpath_use > 0 && use_value == -2) 
956         {
957             use_value = xpath_use;
958             attp.local_attributes = &id_xpath_attr;
959             attp.attset_ordinal = VAL_IDXPATH;
960             id_xpath_attr.next = 0;
961             id_xpath_attr.local = use_value;
962         }
963         else if (curAttributeSet == VAL_IDXPATH)
964         {
965             attp.local_attributes = &id_xpath_attr;
966             attp.attset_ordinal = VAL_IDXPATH;
967             id_xpath_attr.next = 0;
968             id_xpath_attr.local = use_value;
969         }
970         else
971         {
972             if ((r=att_getentbyatt (zh, &attp, curAttributeSet, use_value,
973                                             use_string)))
974             {
975                 logf (LOG_DEBUG, "att_getentbyatt fail. set=%d use=%d r=%d",
976                       curAttributeSet, use_value, r);
977                 if (r == -1)
978                 {
979                     /* set was found, but value wasn't defined */
980                     errCode = 114;
981                     if (use_string)
982                         errString = nmem_strdup(stream, use_string);
983                     else
984                     {
985                         char val_str[32];
986                         sprintf (val_str, "%d", use_value);
987                         errString = nmem_strdup (stream, val_str);
988                     }
989                 }
990                 else
991                 {
992                     int oid[OID_SIZE];
993                     struct oident oident;
994                     
995                     oident.proto = PROTO_Z3950;
996                     oident.oclass = CLASS_ATTSET;
997                     oident.value = curAttributeSet;
998                     oid_ent_to_oid (&oident, oid);
999                     
1000                     errCode = 121;
1001                     errString = nmem_strdup (stream, oident.desc);
1002                 }
1003                 continue;
1004             }
1005         }
1006         for (local_attr = attp.local_attributes; local_attr;
1007              local_attr = local_attr->next)
1008         {
1009             int ord;
1010             char ord_buf[32];
1011             int i, ord_len;
1012             
1013             ord = zebraExplain_lookupSU (zh->reg->zei, attp.attset_ordinal,
1014                                          local_attr->local);
1015             if (ord < 0)
1016                 continue;
1017             if (prefix_len)
1018                 term_dict[prefix_len++] = '|';
1019             else
1020                 term_dict[prefix_len++] = '(';
1021             
1022             ord_len = key_SU_encode (ord, ord_buf);
1023             for (i = 0; i<ord_len; i++)
1024             {
1025                 term_dict[prefix_len++] = 1;
1026                 term_dict[prefix_len++] = ord_buf[i];
1027             }
1028         }
1029         if (!prefix_len)
1030         {
1031 #if 1
1032             bases_ok++;
1033 #else
1034             char val_str[32];
1035             sprintf (val_str, "%d", use_value);
1036             errCode = 114;
1037             errString = nmem_strdup (stream, val_str);
1038 #endif
1039             continue;
1040         }
1041         bases_ok++; /* this has OK attributes */
1042
1043         term_dict[prefix_len++] = ')';
1044         term_dict[prefix_len++] = 1;
1045         term_dict[prefix_len++] = reg_type;
1046         logf (LOG_DEBUG, "reg_type = %d", term_dict[prefix_len-1]);
1047         term_dict[prefix_len] = '\0';
1048         j = prefix_len;
1049         switch (truncation_value)
1050         {
1051         case -1:         /* not specified */
1052         case 100:        /* do not truncate */
1053             if (!string_relation (zh, zapt, &termp, term_dict,
1054                                   attributeSet,
1055                                   reg_type, space_split, term_dst))
1056                 return 0;
1057             logf (LOG_DEBUG, "dict_lookup_grep: %s", term_dict+prefix_len);
1058             r = dict_lookup_grep (zh->reg->dict, term_dict, 0,
1059                                   grep_info, &max_pos, 0, grep_handle);
1060             if (r)
1061                 logf (LOG_WARN, "dict_lookup_grep fail %d", r);
1062             break;
1063         case 1:          /* right truncation */
1064             term_dict[j++] = '(';
1065             if (!term_100 (zh->reg->zebra_maps, reg_type,
1066                            &termp, term_dict + j, space_split, term_dst))
1067                 return 0;
1068             strcat (term_dict, ".*)");
1069             dict_lookup_grep (zh->reg->dict, term_dict, 0, grep_info,
1070                               &max_pos, 0, grep_handle);
1071             break;
1072         case 2:          /* keft truncation */
1073             term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*';
1074             if (!term_100 (zh->reg->zebra_maps, reg_type,
1075                            &termp, term_dict + j, space_split, term_dst))
1076                 return 0;
1077             strcat (term_dict, ")");
1078             dict_lookup_grep (zh->reg->dict, term_dict, 0, grep_info,
1079                               &max_pos, 0, grep_handle);
1080             break;
1081         case 3:          /* left&right truncation */
1082             term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*';
1083             if (!term_100 (zh->reg->zebra_maps, reg_type,
1084                            &termp, term_dict + j, space_split, term_dst))
1085                 return 0;
1086             strcat (term_dict, ".*)");
1087             dict_lookup_grep (zh->reg->dict, term_dict, 0, grep_info,
1088                               &max_pos, 0, grep_handle);
1089             break;
1090             zh->errCode = 120;
1091             return -1;
1092         case 101:        /* process # in term */
1093             term_dict[j++] = '(';
1094             if (!term_101 (zh->reg->zebra_maps, reg_type,
1095                            &termp, term_dict + j, space_split, term_dst))
1096                 return 0;
1097             strcat (term_dict, ")");
1098             r = dict_lookup_grep (zh->reg->dict, term_dict, 0, grep_info,
1099                                   &max_pos, 0, grep_handle);
1100             if (r)
1101                 logf (LOG_WARN, "dict_lookup_grep err, trunc=#: %d", r);
1102             break;
1103         case 102:        /* Regexp-1 */
1104             term_dict[j++] = '(';
1105             if (!term_102 (zh->reg->zebra_maps, reg_type,
1106                            &termp, term_dict + j, space_split, term_dst))
1107                 return 0;
1108             strcat (term_dict, ")");
1109             logf (LOG_DEBUG, "Regexp-1 tolerance=%d", r);
1110             r = dict_lookup_grep (zh->reg->dict, term_dict, 0, grep_info,
1111                                   &max_pos, 0, grep_handle);
1112             if (r)
1113                 logf (LOG_WARN, "dict_lookup_grep err, trunc=regular: %d",
1114                       r);
1115             break;
1116         case 103:       /* Regexp-2 */
1117             r = 1;
1118             term_dict[j++] = '(';
1119             if (!term_103 (zh->reg->zebra_maps, reg_type,
1120                            &termp, term_dict + j, &r, space_split, term_dst))
1121                 return 0;
1122             strcat (term_dict, ")");
1123             logf (LOG_DEBUG, "Regexp-2 tolerance=%d", r);
1124             r = dict_lookup_grep (zh->reg->dict, term_dict, r, grep_info,
1125                                   &max_pos, 2, grep_handle);
1126             if (r)
1127                 logf (LOG_WARN, "dict_lookup_grep err, trunc=eregular: %d",
1128                       r);
1129             break;
1130         case 104:        /* process # and ! in term */
1131             term_dict[j++] = '(';
1132             if (!term_104 (zh->reg->zebra_maps, reg_type,
1133                            &termp, term_dict + j, space_split, term_dst))
1134                 return 0;
1135             strcat (term_dict, ")");
1136             r = dict_lookup_grep (zh->reg->dict, term_dict, 0, grep_info,
1137                                   &max_pos, 0, grep_handle);
1138             if (r)
1139                 logf (LOG_WARN, "dict_lookup_grep err, trunc=#/!: %d", r);
1140             break;
1141         case 105:        /* process * and ! in term */
1142             term_dict[j++] = '(';
1143             if (!term_105 (zh->reg->zebra_maps, reg_type,
1144                            &termp, term_dict + j, space_split, term_dst, 1))
1145                 return 0;
1146             strcat (term_dict, ")");
1147             r = dict_lookup_grep (zh->reg->dict, term_dict, 0, grep_info,
1148                                   &max_pos, 0, grep_handle);
1149             if (r)
1150                 logf (LOG_WARN, "dict_lookup_grep err, trunc=*/!: %d", r);
1151             break;
1152         case 106:        /* process * and ! in term */
1153             term_dict[j++] = '(';
1154             if (!term_105 (zh->reg->zebra_maps, reg_type,
1155                            &termp, term_dict + j, space_split, term_dst, 0))
1156                 return 0;
1157             strcat (term_dict, ")");
1158             r = dict_lookup_grep (zh->reg->dict, term_dict, 0, grep_info,
1159                                   &max_pos, 0, grep_handle);
1160             if (r)
1161                 logf (LOG_WARN, "dict_lookup_grep err, trunc=*/!: %d", r);
1162             break;
1163         }
1164     }
1165     if (!bases_ok)
1166     {
1167         zh->errCode = errCode;
1168         zh->errString = errString;
1169         return -1;
1170     }
1171     *term_sub = termp;
1172     logf (LOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1173     return 1;
1174 }
1175
1176
1177 /* convert APT search term to UTF8 */
1178 static int zapt_term_to_utf8 (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1179                               char *termz)
1180 {
1181     size_t sizez;
1182     Z_Term *term = zapt->term;
1183
1184     switch (term->which)
1185     {
1186     case Z_Term_general:
1187         if (zh->iconv_to_utf8 != 0)
1188         {
1189             char *inbuf = term->u.general->buf;
1190             size_t inleft = term->u.general->len;
1191             char *outbuf = termz;
1192             size_t outleft = IT_MAX_WORD-1;
1193             size_t ret;
1194
1195             ret = yaz_iconv(zh->iconv_to_utf8, &inbuf, &inleft,
1196                         &outbuf, &outleft);
1197             if (ret == (size_t)(-1))
1198             {
1199                 ret = yaz_iconv(zh->iconv_to_utf8, 0, 0, 0, 0);
1200                 zh->errCode = 125;
1201                 return -1;
1202             }
1203             *outbuf = 0;
1204         }
1205         else
1206         {
1207             sizez = term->u.general->len;
1208             if (sizez > IT_MAX_WORD-1)
1209                 sizez = IT_MAX_WORD-1;
1210             memcpy (termz, term->u.general->buf, sizez);
1211             termz[sizez] = '\0';
1212         }
1213         break;
1214     case Z_Term_characterString:
1215         sizez = strlen(term->u.characterString);
1216         if (sizez > IT_MAX_WORD-1)
1217             sizez = IT_MAX_WORD-1;
1218         memcpy (termz, term->u.characterString, sizez);
1219         termz[sizez] = '\0';
1220         break;
1221     default:
1222         zh->errCode = 124;
1223         return -1;
1224     }
1225     return 0;
1226 }
1227
1228 /* convert APT SCAN term to internal cmap */
1229 static int trans_scan_term (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1230                             char *termz, int reg_type)
1231 {
1232     char termz0[IT_MAX_WORD];
1233
1234     if (zapt_term_to_utf8(zh, zapt, termz0))
1235         return -1;    /* error */
1236     else
1237     {
1238         const char **map;
1239         const char *cp = (const char *) termz0;
1240         const char *cp_end = cp + strlen(cp);
1241         const char *src;
1242         int i = 0;
1243         const char *space_map = NULL;
1244         int len;
1245             
1246         while ((len = (cp_end - cp)) > 0)
1247         {
1248             map = zebra_maps_input (zh->reg->zebra_maps, reg_type, &cp, len, 0);
1249             if (**map == *CHR_SPACE)
1250                 space_map = *map;
1251             else
1252             {
1253                 if (i && space_map)
1254                     for (src = space_map; *src; src++)
1255                         termz[i++] = *src;
1256                 space_map = NULL;
1257                 for (src = *map; *src; src++)
1258                     termz[i++] = *src;
1259             }
1260         }
1261         termz[i] = '\0';
1262     }
1263     return 0;
1264 }
1265
1266 char *normalize_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1267                      const char *termz, NMEM stream, unsigned reg_id)
1268 {
1269     WRBUF wrbuf = 0;
1270     AttrType truncation;
1271     int truncation_value;
1272     char *ex_list = 0;
1273
1274     attr_init (&truncation, zapt, 5);
1275     truncation_value = attr_find (&truncation, NULL);
1276
1277     switch (truncation_value)
1278     {
1279     default:
1280         ex_list = "";
1281         break;
1282     case 101:
1283         ex_list = "#";
1284         break;
1285     case 102:
1286     case 103:
1287         ex_list = 0;
1288         break;
1289     case 104:
1290         ex_list = "!#";
1291         break;
1292     case 105:
1293         ex_list = "!*";
1294         break;
1295     }
1296     if (ex_list)
1297         wrbuf = zebra_replace(zh->reg->zebra_maps, reg_id, ex_list,
1298                               termz, strlen(termz));
1299     if (!wrbuf)
1300         return nmem_strdup(stream, termz);
1301     else
1302     {
1303         char *buf = (char*) nmem_malloc (stream, wrbuf_len(wrbuf)+1);
1304         memcpy (buf, wrbuf_buf(wrbuf), wrbuf_len(wrbuf));
1305         buf[wrbuf_len(wrbuf)] = '\0';
1306         return buf;
1307     }
1308 }
1309
1310 static void grep_info_delete (struct grep_info *grep_info)
1311 {
1312 #ifdef TERM_COUNT
1313     xfree(grep_info->term_no);
1314 #endif
1315     xfree (grep_info->isam_p_buf);
1316 }
1317
1318 static int grep_info_prepare (ZebraHandle zh,
1319                               Z_AttributesPlusTerm *zapt,
1320                               struct grep_info *grep_info,
1321                               int reg_type,
1322                               NMEM stream)
1323 {
1324     AttrType termset;
1325     int termset_value_numeric;
1326     const char *termset_value_string;
1327
1328 #ifdef TERM_COUNT
1329     grep_info->term_no = 0;
1330 #endif
1331     grep_info->isam_p_size = 0;
1332     grep_info->isam_p_buf = NULL;
1333     grep_info->zh = zh;
1334     grep_info->reg_type = reg_type;
1335     grep_info->termset = 0;
1336
1337     if (!zapt)
1338         return 0;
1339     attr_init (&termset, zapt, 8);
1340     termset_value_numeric =
1341         attr_find_ex (&termset, NULL, &termset_value_string);
1342     if (termset_value_numeric != -1)
1343     {
1344         char resname[32];
1345         const char *termset_name = 0;
1346         if (termset_value_numeric != -2)
1347         {
1348     
1349             sprintf (resname, "%d", termset_value_numeric);
1350             termset_name = resname;
1351         }
1352         else
1353             termset_name = termset_value_string;
1354         logf (LOG_LOG, "creating termset set %s", termset_name);
1355         grep_info->termset = resultSetAdd (zh, termset_name, 1);
1356         if (!grep_info->termset)
1357         {
1358             zh->errCode = 128;
1359             zh->errString = nmem_strdup (stream, termset_name);
1360             return -1;
1361         }
1362     }
1363     return 0;
1364 }
1365                                
1366
1367 static RSET rpn_search_APT_phrase (ZebraHandle zh,
1368                                    Z_AttributesPlusTerm *zapt,
1369                                    const char *termz_org,
1370                                    oid_value attributeSet,
1371                                    NMEM stream,
1372                                    int reg_type, int complete_flag,
1373                                    const char *rank_type, int xpath_use,
1374                                    int num_bases, char **basenames, 
1375                                    NMEM rset_nmem)
1376 {
1377     char term_dst[IT_MAX_WORD+1];
1378     RSET rset[60], result;
1379     int rset_no = 0;
1380     struct grep_info grep_info;
1381     char *termz = normalize_term(zh, zapt, termz_org, stream, reg_type);
1382     const char *termp = termz;
1383
1384     *term_dst = 0;
1385     if (grep_info_prepare (zh, zapt, &grep_info, reg_type, stream))
1386         return 0;
1387     while (1)
1388     { 
1389         logf (LOG_DEBUG, "APT_phrase termp=%s", termp);
1390         rset[rset_no] = term_trunc (zh, zapt, &termp, attributeSet,
1391                                     stream, &grep_info,
1392                                     reg_type, complete_flag,
1393                                     num_bases, basenames,
1394                                     term_dst, rank_type,
1395                                     xpath_use,rset_nmem);
1396         if (!rset[rset_no])
1397             break;
1398         if (++rset_no >= (int) (sizeof(rset)/sizeof(*rset)))
1399             break;
1400     }
1401     grep_info_delete (&grep_info);
1402     if (rset_no == 0)
1403         return rsnull_create (rset_nmem,key_it_ctrl); 
1404     else if (rset_no == 1)
1405         return (rset[0]);
1406     else
1407         result = rsprox_create( rset_nmem, key_it_ctrl, key_it_ctrl->scope,
1408                        rset_no, rset,
1409                        1 /* ordered */, 0 /* exclusion */,
1410                        3 /* relation */, 1 /* distance */);
1411     return result;
1412 }
1413
1414 static RSET rpn_search_APT_or_list (ZebraHandle zh,
1415                                     Z_AttributesPlusTerm *zapt,
1416                                     const char *termz_org,
1417                                     oid_value attributeSet,
1418                                     NMEM stream,
1419                                     int reg_type, int complete_flag,
1420                                     const char *rank_type,
1421                                     int xpath_use,
1422                                     int num_bases, char **basenames,
1423                                     NMEM rset_nmem)
1424 {
1425     char term_dst[IT_MAX_WORD+1];
1426     RSET rset[60];
1427     int rset_no = 0;
1428     struct grep_info grep_info;
1429     char *termz = normalize_term(zh, zapt, termz_org, stream, reg_type);
1430     const char *termp = termz;
1431
1432     if (grep_info_prepare (zh, zapt, &grep_info, reg_type, stream))
1433         return 0;
1434     while (1)
1435     { 
1436         logf (LOG_DEBUG, "APT_or_list termp=%s", termp);
1437         rset[rset_no] = term_trunc (zh, zapt, &termp, attributeSet,
1438                                     stream, &grep_info,
1439                                     reg_type, complete_flag,
1440                                     num_bases, basenames,
1441                                     term_dst, rank_type,
1442                                     xpath_use,rset_nmem);
1443         if (!rset[rset_no])
1444             break;
1445         if (++rset_no >= (int) (sizeof(rset)/sizeof(*rset)))
1446             break;
1447     }
1448     grep_info_delete (&grep_info);
1449     if (rset_no == 0)
1450         return rsnull_create (rset_nmem,key_it_ctrl);  
1451     return rsmultior_create(rset_nmem, key_it_ctrl,key_it_ctrl->scope,
1452                             rset_no, rset);
1453 }
1454
1455 static RSET rpn_search_APT_and_list (ZebraHandle zh,
1456                                      Z_AttributesPlusTerm *zapt,
1457                                      const char *termz_org,
1458                                      oid_value attributeSet,
1459                                      NMEM stream,
1460                                      int reg_type, int complete_flag,
1461                                      const char *rank_type, 
1462                                      int xpath_use,
1463                                      int num_bases, char **basenames,
1464                                      NMEM rset_nmem)
1465 {
1466     char term_dst[IT_MAX_WORD+1];
1467     RSET rset[60]; /* FIXME - bug 160 - should be dynamic somehow */
1468     int rset_no = 0;
1469     struct grep_info grep_info;
1470     char *termz = normalize_term(zh, zapt, termz_org, stream, reg_type);
1471     const char *termp = termz;
1472
1473     if (grep_info_prepare (zh, zapt, &grep_info, reg_type, stream))
1474         return 0;
1475     while (1)
1476     { 
1477         logf (LOG_DEBUG, "APT_and_list termp=%s", termp);
1478         rset[rset_no] = term_trunc (zh, zapt, &termp, attributeSet,
1479                                     stream, &grep_info,
1480                                     reg_type, complete_flag,
1481                                     num_bases, basenames,
1482                                     term_dst, rank_type,
1483                                     xpath_use, rset_nmem);
1484         if (!rset[rset_no])
1485             break;
1486         assert (rset[rset_no]);
1487         if (++rset_no >= (int) (sizeof(rset)/sizeof(*rset)))
1488             break;
1489     }
1490     grep_info_delete (&grep_info);
1491     if (rset_no == 0)
1492         return rsnull_create (rset_nmem,key_it_ctrl); 
1493
1494     return rsmultiand_create( rset_nmem, key_it_ctrl, key_it_ctrl->scope,
1495                               rset_no, rset);
1496 }
1497
1498 static int numeric_relation (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1499                              const char **term_sub,
1500                              char *term_dict,
1501                              oid_value attributeSet,
1502                              struct grep_info *grep_info,
1503                              int *max_pos,
1504                              int reg_type,
1505                              char *term_dst)
1506 {
1507     AttrType relation;
1508     int relation_value;
1509     int term_value;
1510     int r;
1511     char *term_tmp = term_dict + strlen(term_dict);
1512
1513     attr_init (&relation, zapt, 2);
1514     relation_value = attr_find (&relation, NULL);
1515
1516     logf (LOG_DEBUG, "numeric relation value=%d", relation_value);
1517
1518     if (!term_100 (zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1519                    term_dst))
1520         return 0;
1521     term_value = atoi (term_tmp);
1522     switch (relation_value)
1523     {
1524     case 1:
1525         logf (LOG_DEBUG, "Relation <");
1526         gen_regular_rel (term_tmp, term_value-1, 1);
1527         break;
1528     case 2:
1529         logf (LOG_DEBUG, "Relation <=");
1530         gen_regular_rel (term_tmp, term_value, 1);
1531         break;
1532     case 4:
1533         logf (LOG_DEBUG, "Relation >=");
1534         gen_regular_rel (term_tmp, term_value, 0);
1535         break;
1536     case 5:
1537         logf (LOG_DEBUG, "Relation >");
1538         gen_regular_rel (term_tmp, term_value+1, 0);
1539         break;
1540     case 3:
1541     default:
1542         logf (LOG_DEBUG, "Relation =");
1543         sprintf (term_tmp, "(0*%d)", term_value);
1544     }
1545     logf (LOG_DEBUG, "dict_lookup_grep: %s", term_tmp);
1546     r = dict_lookup_grep (zh->reg->dict, term_dict, 0, grep_info, max_pos,
1547                           0, grep_handle);
1548     if (r)
1549         logf (LOG_WARN, "dict_lookup_grep fail, rel=gt: %d", r);
1550     logf (LOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1551     return 1;
1552 }
1553
1554 static int numeric_term (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1555                          const char **term_sub, 
1556                          oid_value attributeSet, struct grep_info *grep_info,
1557                          int reg_type, int complete_flag,
1558                          int num_bases, char **basenames,
1559                          char *term_dst, int xpath_use, NMEM stream)
1560 {
1561     char term_dict[2*IT_MAX_WORD+2];
1562     int r, base_no;
1563     AttrType use;
1564     int use_value;
1565     const char *use_string = 0;
1566     oid_value curAttributeSet = attributeSet;
1567     const char *termp;
1568     struct rpn_char_map_info rcmi;
1569
1570     int bases_ok = 0;     /* no of databases with OK attribute */
1571     int errCode = 0;      /* err code (if any is not OK) */
1572     char *errString = 0;  /* addinfo */
1573
1574     rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
1575     attr_init (&use, zapt, 1);
1576     use_value = attr_find_ex (&use, &curAttributeSet, &use_string);
1577
1578     if (use_value == -1)
1579         use_value = 1016;
1580
1581     for (base_no = 0; base_no < num_bases; base_no++)
1582     {
1583         attent attp;
1584         data1_local_attribute id_xpath_attr;
1585         data1_local_attribute *local_attr;
1586         int max_pos, prefix_len = 0;
1587
1588         termp = *term_sub;
1589         if (use_value == -2)  /* string attribute (assume IDXPATH/any) */
1590         {
1591             use_value = xpath_use;
1592             attp.local_attributes = &id_xpath_attr;
1593             attp.attset_ordinal = VAL_IDXPATH;
1594             id_xpath_attr.next = 0;
1595             id_xpath_attr.local = use_value;
1596         }
1597         else if (curAttributeSet == VAL_IDXPATH)
1598         {
1599             attp.local_attributes = &id_xpath_attr;
1600             attp.attset_ordinal = VAL_IDXPATH;
1601             id_xpath_attr.next = 0;
1602             id_xpath_attr.local = use_value;
1603         }
1604         else
1605         {
1606             if ((r=att_getentbyatt (zh, &attp, curAttributeSet, use_value,
1607                                             use_string)))
1608             {
1609                 logf (LOG_DEBUG, "att_getentbyatt fail. set=%d use=%d r=%d",
1610                       curAttributeSet, use_value, r);
1611                 if (r == -1)
1612                 {
1613                     char val_str[32];
1614                     sprintf (val_str, "%d", use_value);
1615                     errString = nmem_strdup (stream, val_str);
1616                     errCode = 114;
1617                 }
1618                 else
1619                     errCode = 121;
1620                 continue;
1621             }
1622         }
1623         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1624         {
1625             zh->errCode = 109; /* Database unavailable */
1626             zh->errString = basenames[base_no];
1627             return -1;
1628         }
1629         for (local_attr = attp.local_attributes; local_attr;
1630              local_attr = local_attr->next)
1631         {
1632             int ord;
1633             char ord_buf[32];
1634             int i, ord_len;
1635
1636             ord = zebraExplain_lookupSU (zh->reg->zei, attp.attset_ordinal,
1637                                           local_attr->local);
1638             if (ord < 0)
1639                 continue;
1640             if (prefix_len)
1641                 term_dict[prefix_len++] = '|';
1642             else
1643                 term_dict[prefix_len++] = '(';
1644
1645             ord_len = key_SU_encode (ord, ord_buf);
1646             for (i = 0; i<ord_len; i++)
1647             {
1648                 term_dict[prefix_len++] = 1;
1649                 term_dict[prefix_len++] = ord_buf[i];
1650             }
1651         }
1652         if (!prefix_len)
1653         {
1654             char val_str[32];
1655             sprintf (val_str, "%d", use_value);
1656             errCode = 114;
1657             errString = nmem_strdup (stream, val_str);
1658             continue;
1659         }
1660         bases_ok++;
1661         term_dict[prefix_len++] = ')';        
1662         term_dict[prefix_len++] = 1;
1663         term_dict[prefix_len++] = reg_type;
1664         logf (LOG_DEBUG, "reg_type = %d", term_dict[prefix_len-1]);
1665         term_dict[prefix_len] = '\0';
1666         if (!numeric_relation (zh, zapt, &termp, term_dict,
1667                                attributeSet, grep_info, &max_pos, reg_type,
1668                                term_dst))
1669             return 0;
1670     }
1671     if (!bases_ok)
1672     {
1673         zh->errCode = errCode;
1674         zh->errString = errString;
1675         return -1;
1676     }
1677     *term_sub = termp;
1678     logf (LOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1679     return 1;
1680 }
1681
1682 static RSET rpn_search_APT_numeric (ZebraHandle zh,
1683                                     Z_AttributesPlusTerm *zapt,
1684                                     const char *termz,
1685                                     oid_value attributeSet,
1686                                     NMEM stream,
1687                                     int reg_type, int complete_flag,
1688                                     const char *rank_type, int xpath_use,
1689                                     int num_bases, char **basenames,
1690                                     NMEM rset_nmem)
1691 {
1692     char term_dst[IT_MAX_WORD+1];
1693     const char *termp = termz;
1694     RSET rset[60], result;
1695     int i, r, rset_no = 0;
1696     struct grep_info grep_info;
1697
1698     if (grep_info_prepare (zh, zapt, &grep_info, reg_type, stream))
1699         return 0;
1700     while (1)
1701     { 
1702         logf (LOG_DEBUG, "APT_numeric termp=%s", termp);
1703         grep_info.isam_p_indx = 0;
1704         r = numeric_term (zh, zapt, &termp, attributeSet, &grep_info,
1705                           reg_type, complete_flag, num_bases, basenames,
1706                           term_dst, xpath_use,
1707                           stream);
1708         if (r < 1)
1709             break;
1710         logf (LOG_DEBUG, "term: %s", term_dst);
1711         rset[rset_no] = rset_trunc (zh, grep_info.isam_p_buf,
1712                                     grep_info.isam_p_indx, term_dst,
1713                                     strlen(term_dst), rank_type,
1714                                     0 /* preserve position */,
1715                                     zapt->term->which, rset_nmem, 
1716                                     key_it_ctrl,key_it_ctrl->scope);
1717         assert (rset[rset_no]);
1718         if (++rset_no >= (int) (sizeof(rset)/sizeof(*rset)))
1719             break;
1720     }
1721     grep_info_delete (&grep_info);
1722     if (rset_no == 0)
1723         return rsnull_create (rset_nmem,key_it_ctrl);
1724     result = rset[0];
1725     for (i = 1; i<rset_no; i++)
1726     {
1727         /* FIXME - Use a proper multi-and */
1728         result= rsbool_create_and(rset_nmem,key_it_ctrl,key_it_ctrl->scope,
1729                 result, rset[i] );
1730     }
1731     return result;
1732 }
1733
1734 static RSET rpn_search_APT_local (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1735                                   const char *termz,
1736                                   oid_value attributeSet,
1737                                   NMEM stream,
1738                                   const char *rank_type, NMEM rset_nmem)
1739 {
1740     RSET result;
1741     RSFD rsfd;
1742     struct it_key key;
1743     int sys;
1744     /*
1745     rset_temp_parms parms;
1746
1747     parms.cmp = key_compare_it;
1748     parms.key_size = sizeof (struct it_key);
1749     parms.temp_path = res_get (zh->res, "setTmpDir");
1750     result = rset_create (rset_kind_temp, &parms);
1751     */
1752     result = rstemp_create( rset_nmem,key_it_ctrl,key_it_ctrl->scope,
1753                      res_get (zh->res, "setTmpDir") );
1754     rsfd = rset_open (result, RSETF_WRITE);
1755
1756     sys = atoi(termz);
1757     if (sys <= 0)
1758         sys = 1;
1759     key.mem[0] = sys;
1760     key.mem[1] = 1;
1761     key.len = 2;
1762     rset_write (rsfd, &key);
1763     rset_close (rsfd);
1764     return result;
1765 }
1766
1767 static RSET rpn_sort_spec (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1768                            oid_value attributeSet, NMEM stream,
1769                            Z_SortKeySpecList *sort_sequence,
1770                            const char *rank_type)
1771 {
1772     int i;
1773     int sort_relation_value;
1774     AttrType sort_relation_type;
1775     int use_value;
1776     AttrType use_type;
1777     Z_SortKeySpec *sks;
1778     Z_SortKey *sk;
1779     Z_AttributeElement *ae;
1780     int oid[OID_SIZE];
1781     oident oe;
1782     char termz[20];
1783     
1784     attr_init (&sort_relation_type, zapt, 7);
1785     sort_relation_value = attr_find (&sort_relation_type, &attributeSet);
1786
1787     attr_init (&use_type, zapt, 1);
1788     use_value = attr_find (&use_type, &attributeSet);
1789
1790     if (!sort_sequence->specs)
1791     {
1792         sort_sequence->num_specs = 10;
1793         sort_sequence->specs = (Z_SortKeySpec **)
1794             nmem_malloc (stream, sort_sequence->num_specs *
1795                          sizeof(*sort_sequence->specs));
1796         for (i = 0; i<sort_sequence->num_specs; i++)
1797             sort_sequence->specs[i] = 0;
1798     }
1799     if (zapt->term->which != Z_Term_general)
1800         i = 0;
1801     else
1802         i = atoi_n ((char *) zapt->term->u.general->buf,
1803                     zapt->term->u.general->len);
1804     if (i >= sort_sequence->num_specs)
1805         i = 0;
1806     sprintf (termz, "%d", i);
1807
1808     oe.proto = PROTO_Z3950;
1809     oe.oclass = CLASS_ATTSET;
1810     oe.value = attributeSet;
1811     if (!oid_ent_to_oid (&oe, oid))
1812         return 0;
1813
1814     sks = (Z_SortKeySpec *) nmem_malloc (stream, sizeof(*sks));
1815     sks->sortElement = (Z_SortElement *)
1816         nmem_malloc (stream, sizeof(*sks->sortElement));
1817     sks->sortElement->which = Z_SortElement_generic;
1818     sk = sks->sortElement->u.generic = (Z_SortKey *)
1819         nmem_malloc (stream, sizeof(*sk));
1820     sk->which = Z_SortKey_sortAttributes;
1821     sk->u.sortAttributes = (Z_SortAttributes *)
1822         nmem_malloc (stream, sizeof(*sk->u.sortAttributes));
1823
1824     sk->u.sortAttributes->id = oid;
1825     sk->u.sortAttributes->list = (Z_AttributeList *)
1826         nmem_malloc (stream, sizeof(*sk->u.sortAttributes->list));
1827     sk->u.sortAttributes->list->num_attributes = 1;
1828     sk->u.sortAttributes->list->attributes = (Z_AttributeElement **)
1829         nmem_malloc (stream, sizeof(*sk->u.sortAttributes->list->attributes));
1830     ae = *sk->u.sortAttributes->list->attributes = (Z_AttributeElement *)
1831         nmem_malloc (stream, sizeof(**sk->u.sortAttributes->list->attributes));
1832     ae->attributeSet = 0;
1833     ae->attributeType = (int *)
1834         nmem_malloc (stream, sizeof(*ae->attributeType));
1835     *ae->attributeType = 1;
1836     ae->which = Z_AttributeValue_numeric;
1837     ae->value.numeric = (int *)
1838         nmem_malloc (stream, sizeof(*ae->value.numeric));
1839     *ae->value.numeric = use_value;
1840
1841     sks->sortRelation = (int *)
1842         nmem_malloc (stream, sizeof(*sks->sortRelation));
1843     if (sort_relation_value == 1)
1844         *sks->sortRelation = Z_SortKeySpec_ascending;
1845     else if (sort_relation_value == 2)
1846         *sks->sortRelation = Z_SortKeySpec_descending;
1847     else 
1848         *sks->sortRelation = Z_SortKeySpec_ascending;
1849
1850     sks->caseSensitivity = (int *)
1851         nmem_malloc (stream, sizeof(*sks->caseSensitivity));
1852     *sks->caseSensitivity = 0;
1853
1854     sks->which = Z_SortKeySpec_null;
1855     sks->u.null = odr_nullval ();
1856     sort_sequence->specs[i] = sks;
1857     return rsnull_create (NULL,key_it_ctrl);
1858         /* FIXME - nmem?? */
1859 }
1860
1861
1862 static int parse_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1863                        oid_value attributeSet,
1864                        struct xpath_location_step *xpath, int max, NMEM mem)
1865 {
1866     oid_value curAttributeSet = attributeSet;
1867     AttrType use;
1868     const char *use_string = 0;
1869     
1870     attr_init (&use, zapt, 1);
1871     attr_find_ex (&use, &curAttributeSet, &use_string);
1872
1873     if (!use_string || *use_string != '/')
1874         return -1;
1875
1876     return zebra_parse_xpath_str(use_string, xpath, max, mem);
1877 }
1878  
1879                
1880
1881 static RSET xpath_trunc(ZebraHandle zh, NMEM stream,
1882                         int reg_type, const char *term, int use,
1883                         oid_value curAttributeSet, NMEM rset_nmem)
1884 {
1885     RSET rset;
1886     struct grep_info grep_info;
1887     char term_dict[2048];
1888     char ord_buf[32];
1889     int prefix_len = 0;
1890     int ord = zebraExplain_lookupSU (zh->reg->zei, curAttributeSet, use);
1891     int ord_len, i, r, max_pos;
1892     int term_type = Z_Term_characterString;
1893     const char *flags = "void";
1894
1895     if (grep_info_prepare (zh, 0 /* zapt */, &grep_info, '0', stream))
1896         return rsnull_create (rset_nmem,key_it_ctrl);
1897
1898     if (ord < 0)
1899         return rsnull_create (rset_nmem,key_it_ctrl);
1900     if (prefix_len)
1901         term_dict[prefix_len++] = '|';
1902     else
1903         term_dict[prefix_len++] = '(';
1904     
1905     ord_len = key_SU_encode (ord, ord_buf);
1906     for (i = 0; i<ord_len; i++)
1907     {
1908         term_dict[prefix_len++] = 1;
1909         term_dict[prefix_len++] = ord_buf[i];
1910     }
1911     term_dict[prefix_len++] = ')';
1912     term_dict[prefix_len++] = 1;
1913     term_dict[prefix_len++] = reg_type;
1914     
1915     strcpy (term_dict+prefix_len, term);
1916     
1917     grep_info.isam_p_indx = 0;
1918     r = dict_lookup_grep (zh->reg->dict, term_dict, 0,
1919                           &grep_info, &max_pos, 0, grep_handle);
1920     yaz_log (LOG_LOG, "%s %d positions", term,
1921              grep_info.isam_p_indx);
1922     rset = rset_trunc (zh, grep_info.isam_p_buf,
1923                        grep_info.isam_p_indx, term, strlen(term),
1924                        flags, 1, term_type,rset_nmem,
1925                        key_it_ctrl, key_it_ctrl->scope);
1926     grep_info_delete (&grep_info);
1927     return rset;
1928 }
1929
1930 static RSET rpn_search_xpath (ZebraHandle zh,
1931                               oid_value attributeSet,
1932                               int num_bases, char **basenames,
1933                               NMEM stream, const char *rank_type, RSET rset,
1934                               int xpath_len, struct xpath_location_step *xpath,
1935                               NMEM rset_nmem)
1936 {
1937     oid_value curAttributeSet = attributeSet;
1938     int base_no;
1939     int i;
1940
1941     if (xpath_len < 0)
1942         return rset;
1943
1944     yaz_log (LOG_LOG, "len=%d", xpath_len);
1945     for (i = 0; i<xpath_len; i++)
1946     {
1947         yaz_log (LOG_LOG, "XPATH %d %s", i, xpath[i].part);
1948
1949     }
1950
1951     curAttributeSet = VAL_IDXPATH;
1952
1953     /*
1954       //a    ->    a/.*
1955       //a/b  ->    b/a/.*
1956       /a     ->    a/
1957       /a/b   ->    b/a/
1958
1959       /      ->    none
1960
1961    a[@attr=value]/b[@other=othervalue]
1962
1963  /e/@a val      range(e/,range(@a,freetext(w,1015,val),@a),e/)
1964  /a/b val       range(b/a/,freetext(w,1016,val),b/a/)
1965  /a/b/@c val    range(b/a/,range(@c,freetext(w,1016,val),@c),b/a/)
1966  /a/b[@c=y] val range(b/a/,freetext(w,1016,val),b/a/,@c=y)
1967  /a[@c=y]/b val range(a/,range(b/a/,freetext(w,1016,val),b/a/),a/,@c=y)
1968  /a[@c=x]/b[@c=y] range(a/,range(b/a/,freetext(w,1016,val),b/a/,@c=y),a/,@c=x)
1969       
1970     */
1971
1972     dict_grep_cmap (zh->reg->dict, 0, 0);
1973
1974     for (base_no = 0; base_no < num_bases; base_no++)
1975     {
1976         int level = xpath_len;
1977         int first_path = 1;
1978         
1979         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1980         {
1981             zh->errCode = 109; /* Database unavailable */
1982             zh->errString = basenames[base_no];
1983             return rset;
1984         }
1985         while (--level >= 0)
1986         {
1987             char xpath_rev[128];
1988             int i, len;
1989             RSET rset_start_tag = 0, rset_end_tag = 0, rset_attr = 0;
1990
1991             *xpath_rev = 0;
1992             len = 0;
1993             for (i = level; i >= 1; --i)
1994             {
1995                 const char *cp = xpath[i].part;
1996                 if (*cp)
1997                 {
1998                     for (;*cp; cp++)
1999                         if (*cp == '*')
2000                         {
2001                             memcpy (xpath_rev + len, "[^/]*", 5);
2002                             len += 5;
2003                         }
2004                         else if (*cp == ' ')
2005                         {
2006
2007                             xpath_rev[len++] = 1;
2008                             xpath_rev[len++] = ' ';
2009                         }
2010
2011                         else
2012                             xpath_rev[len++] = *cp;
2013                     xpath_rev[len++] = '/';
2014                 }
2015                 else if (i == 1)  /* // case */
2016                 {
2017                     xpath_rev[len++] = '.';
2018                     xpath_rev[len++] = '*';
2019                 }
2020             }
2021             xpath_rev[len] = 0;
2022
2023             if (xpath[level].predicate &&
2024                 xpath[level].predicate->which == XPATH_PREDICATE_RELATION &&
2025                 xpath[level].predicate->u.relation.name[0])
2026             {
2027                 WRBUF wbuf = wrbuf_alloc();
2028                 wrbuf_puts(wbuf, xpath[level].predicate->u.relation.name+1);
2029                 if (xpath[level].predicate->u.relation.value)
2030                 {
2031                     const char *cp = xpath[level].predicate->u.relation.value;
2032                     wrbuf_putc(wbuf, '=');
2033                     
2034                     while (*cp)
2035                     {
2036                         if (strchr(REGEX_CHARS, *cp))
2037                             wrbuf_putc(wbuf, '\\');
2038                         wrbuf_putc(wbuf, *cp);
2039                         cp++;
2040                     }
2041                 }
2042                 wrbuf_puts(wbuf, "");
2043                 rset_attr = xpath_trunc (
2044                     zh, stream, '0', wrbuf_buf(wbuf), 3, 
2045                     curAttributeSet,rset_nmem);
2046                 wrbuf_free(wbuf, 1);
2047             } 
2048             else 
2049             {
2050                 if (!first_path)
2051                     continue;
2052             }
2053             yaz_log (LOG_LOG, "xpath_rev (%d) = %s", level, xpath_rev);
2054             if (strlen(xpath_rev))
2055             {
2056                 rset_start_tag = xpath_trunc(zh, stream, '0', 
2057                         xpath_rev, 1, curAttributeSet, rset_nmem);
2058             
2059                 rset_end_tag = xpath_trunc(zh, stream, '0', 
2060                         xpath_rev, 2, curAttributeSet, rset_nmem);
2061
2062                 /*
2063                 parms.key_size = sizeof(struct it_key);
2064                 parms.cmp = key_compare_it;
2065                 parms.rset_l = rset_start_tag;
2066                 parms.rset_m = rset;
2067                 parms.rset_r = rset_end_tag;
2068                 parms.rset_attr = rset_attr;
2069                 parms.printer = key_print_it;
2070                 rset = rset_create (rset_kind_between, &parms);
2071                 */
2072                 rset=rsbetween_create( rset_nmem,key_it_ctrl,key_it_ctrl->scope,
2073                         rset_start_tag, rset, rset_end_tag, rset_attr);
2074             }
2075             first_path = 0;
2076         }
2077     }
2078
2079     return rset;
2080 }
2081
2082
2083
2084 static RSET rpn_search_APT (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2085                             oid_value attributeSet, NMEM stream,
2086                             Z_SortKeySpecList *sort_sequence,
2087                             int num_bases, char **basenames, 
2088                             NMEM rset_nmem)
2089 {
2090     unsigned reg_id;
2091     char *search_type = NULL;
2092     char rank_type[128];
2093     int complete_flag;
2094     int sort_flag;
2095     char termz[IT_MAX_WORD+1];
2096     RSET rset = 0;
2097     int xpath_len;
2098     int xpath_use = 0;
2099     struct xpath_location_step xpath[10];
2100
2101     zebra_maps_attr (zh->reg->zebra_maps, zapt, &reg_id, &search_type,
2102                      rank_type, &complete_flag, &sort_flag);
2103     
2104     logf (LOG_DEBUG, "reg_id=%c", reg_id);
2105     logf (LOG_DEBUG, "complete_flag=%d", complete_flag);
2106     logf (LOG_DEBUG, "search_type=%s", search_type);
2107     logf (LOG_DEBUG, "rank_type=%s", rank_type);
2108
2109     if (zapt_term_to_utf8(zh, zapt, termz))
2110         return 0;
2111
2112     if (sort_flag)
2113         return rpn_sort_spec (zh, zapt, attributeSet, stream, sort_sequence,
2114                               rank_type);
2115     xpath_len = parse_xpath(zh, zapt, attributeSet, xpath, 10, stream);
2116     if (xpath_len >= 0)
2117     {
2118         xpath_use = 1016;
2119         if (xpath[xpath_len-1].part[0] == '@')
2120             xpath_use = 1015;
2121     }
2122
2123     if (!strcmp (search_type, "phrase"))
2124     {
2125         rset = rpn_search_APT_phrase (zh, zapt, termz, attributeSet, stream,
2126                                       reg_id, complete_flag, rank_type,
2127                                       xpath_use,
2128                                       num_bases, basenames, rset_nmem);
2129     }
2130     else if (!strcmp (search_type, "and-list"))
2131     {
2132         rset = rpn_search_APT_and_list (zh, zapt, termz, attributeSet, stream,
2133                                         reg_id, complete_flag, rank_type,
2134                                         xpath_use,
2135                                         num_bases, basenames, rset_nmem);
2136     }
2137     else if (!strcmp (search_type, "or-list"))
2138     {
2139         rset = rpn_search_APT_or_list (zh, zapt, termz, attributeSet, stream,
2140                                        reg_id, complete_flag, rank_type,
2141                                        xpath_use,
2142                                        num_bases, basenames, rset_nmem);
2143     }
2144     else if (!strcmp (search_type, "local"))
2145     {
2146         rset = rpn_search_APT_local (zh, zapt, termz, attributeSet, stream,
2147                                      rank_type, rset_nmem);
2148     }
2149     else if (!strcmp (search_type, "numeric"))
2150     {
2151         rset = rpn_search_APT_numeric (zh, zapt, termz, attributeSet, stream,
2152                                        reg_id, complete_flag, rank_type,
2153                                        xpath_use,
2154                                        num_bases, basenames, rset_nmem);
2155     }
2156     else if (!strcmp (search_type, "always"))
2157     {
2158         rset = 0;
2159     }
2160     else
2161         zh->errCode = 118;
2162     return rpn_search_xpath (zh, attributeSet, num_bases, basenames,
2163                              stream, rank_type, rset, 
2164                              xpath_len, xpath, rset_nmem);
2165 }
2166
2167 static RSET rpn_search_structure (ZebraHandle zh, Z_RPNStructure *zs,
2168                                   oid_value attributeSet, 
2169                                   NMEM stream, NMEM rset_nmem,
2170                                   Z_SortKeySpecList *sort_sequence,
2171                                   int num_bases, char **basenames)
2172 {
2173     RSET r = NULL;
2174     if (zs->which == Z_RPNStructure_complex)
2175     {
2176         Z_Operator *zop = zs->u.complex->roperator;
2177         RSET rset_l;
2178         RSET rset_r;
2179
2180         rset_l = rpn_search_structure (zh, zs->u.complex->s1,
2181                                        attributeSet, stream, rset_nmem,
2182                                        sort_sequence,
2183                                        num_bases, basenames);
2184         if (rset_l == NULL)
2185             return NULL;
2186         rset_r = rpn_search_structure (zh, zs->u.complex->s2,
2187                                        attributeSet, stream, rset_nmem,
2188                                        sort_sequence,
2189                                        num_bases, basenames);
2190         if (rset_r == NULL)
2191         {
2192             rset_delete (rset_l);
2193             return NULL;
2194         }
2195
2196         switch (zop->which)
2197         {
2198         case Z_Operator_and:
2199             r = rsbool_create_and(rset_nmem,key_it_ctrl, key_it_ctrl->scope,
2200                     rset_l,rset_r );
2201             break;
2202         case Z_Operator_or:
2203             r = rsbool_create_or(rset_nmem,key_it_ctrl, key_it_ctrl->scope,
2204                     rset_l,rset_r );
2205             break;
2206         case Z_Operator_and_not:
2207             r = rsbool_create_not(rset_nmem,key_it_ctrl, key_it_ctrl->scope,
2208                     rset_l,rset_r );
2209             break;
2210         case Z_Operator_prox:
2211             if (zop->u.prox->which != Z_ProximityOperator_known)
2212             {
2213                 zh->errCode = 132;
2214                 return NULL;
2215             }
2216             if (*zop->u.prox->u.known != Z_ProxUnit_word)
2217             {
2218                 char *val = (char *) nmem_malloc (stream, 16);
2219                 zh->errCode = 132;
2220                 zh->errString = val;
2221                 sprintf (val, "%d", *zop->u.prox->u.known);
2222                 return NULL;
2223             }
2224             else
2225             {
2226                 /* new / old prox */
2227                 RSET twosets[2];
2228                 
2229                 twosets[0] = rset_l;
2230                 twosets[1] = rset_r;
2231                 r=rsprox_create(rset_nmem,key_it_ctrl,key_it_ctrl->scope,
2232                          2, twosets, 
2233                          *zop->u.prox->ordered,
2234                          (!zop->u.prox->exclusion ? 
2235                               0 : *zop->u.prox->exclusion),
2236                          *zop->u.prox->relationType,
2237                          *zop->u.prox->distance );
2238             }
2239             break;
2240         default:
2241             zh->errCode = 110;
2242             return NULL;
2243         }
2244     }
2245     else if (zs->which == Z_RPNStructure_simple)
2246     {
2247         if (zs->u.simple->which == Z_Operand_APT)
2248         {
2249             logf (LOG_DEBUG, "rpn_search_APT");
2250             r = rpn_search_APT (zh, zs->u.simple->u.attributesPlusTerm,
2251                                 attributeSet, stream, sort_sequence,
2252                                 num_bases, basenames,rset_nmem);
2253         }
2254         else if (zs->u.simple->which == Z_Operand_resultSetId)
2255         {
2256             logf (LOG_DEBUG, "rpn_search_ref");
2257             r = resultSetRef (zh, zs->u.simple->u.resultSetId);
2258             if (!r)
2259             {
2260                 r = rsnull_create (rset_nmem,key_it_ctrl);
2261                 zh->errCode = 30;
2262                 zh->errString =
2263                     nmem_strdup (stream, zs->u.simple->u.resultSetId);
2264                 return 0;
2265             }
2266             else
2267                 rset_dup(r);
2268         }
2269         else
2270         {
2271             zh->errCode = 3;
2272             return 0;
2273         }
2274     }
2275     else
2276     {
2277         zh->errCode = 3;
2278         return 0;
2279     }
2280     return r;
2281 }
2282
2283
2284 RSET rpn_search (ZebraHandle zh, NMEM nmem, NMEM rset_nmem,
2285                  Z_RPNQuery *rpn, int num_bases, char **basenames, 
2286                  const char *setname,
2287                  ZebraSet sset)
2288 {
2289     RSET rset;
2290     oident *attrset;
2291     oid_value attributeSet;
2292     Z_SortKeySpecList *sort_sequence;
2293     int sort_status, i;
2294
2295     zh->errCode = 0;
2296     zh->errString = NULL;
2297     zh->hits = 0;
2298
2299     sort_sequence = (Z_SortKeySpecList *)
2300         nmem_malloc (nmem, sizeof(*sort_sequence));
2301     sort_sequence->num_specs = 10;
2302     sort_sequence->specs = (Z_SortKeySpec **)
2303         nmem_malloc (nmem, sort_sequence->num_specs *
2304                      sizeof(*sort_sequence->specs));
2305     for (i = 0; i<sort_sequence->num_specs; i++)
2306         sort_sequence->specs[i] = 0;
2307     
2308     attrset = oid_getentbyoid (rpn->attributeSetId);
2309     attributeSet = attrset->value;
2310     rset = rpn_search_structure (zh, rpn->RPNStructure, attributeSet,
2311                                  nmem, rset_nmem,
2312                                  sort_sequence, num_bases, basenames);
2313     if (!rset)
2314         return 0;
2315
2316     if (zh->errCode)
2317         logf (LOG_DEBUG, "search error: %d", zh->errCode);
2318     
2319     for (i = 0; sort_sequence->specs[i]; i++)
2320         ;
2321     sort_sequence->num_specs = i;
2322     if (!i)
2323         resultSetRank (zh, sset, rset);
2324     else
2325     {
2326         logf (LOG_DEBUG, "resultSetSortSingle in rpn_search");
2327         resultSetSortSingle (zh, nmem, sset, rset,
2328                              sort_sequence, &sort_status);
2329         if (zh->errCode)
2330         {
2331             logf (LOG_DEBUG, "resultSetSortSingle status = %d", zh->errCode);
2332         }
2333     }
2334     return rset;
2335 }
2336
2337 struct scan_info_entry {
2338     char *term;
2339     ISAMC_P isam_p;
2340 };
2341
2342 struct scan_info {
2343     struct scan_info_entry *list;
2344     ODR odr;
2345     int before, after;
2346     char prefix[20];
2347 };
2348
2349 static int scan_handle (char *name, const char *info, int pos, void *client)
2350 {
2351     int len_prefix, idx;
2352     struct scan_info *scan_info = (struct scan_info *) client;
2353
2354     len_prefix = strlen(scan_info->prefix);
2355     if (memcmp (name, scan_info->prefix, len_prefix))
2356         return 1;
2357     if (pos > 0)        idx = scan_info->after - pos + scan_info->before;
2358     else
2359         idx = - pos - 1;
2360     scan_info->list[idx].term = (char *)
2361         odr_malloc (scan_info->odr, strlen(name + len_prefix)+1);
2362     strcpy (scan_info->list[idx].term, name + len_prefix);
2363     assert (*info == sizeof(ISAMC_P));
2364     memcpy (&scan_info->list[idx].isam_p, info+1, sizeof(ISAMC_P));
2365     return 0;
2366 }
2367
2368 static void scan_term_untrans (ZebraHandle zh, NMEM stream, int reg_type,
2369                                char **dst, const char *src)
2370 {
2371     char term_src[IT_MAX_WORD];
2372     char term_dst[IT_MAX_WORD];
2373     
2374     term_untrans (zh, reg_type, term_src, src);
2375
2376     if (zh->iconv_from_utf8 != 0)
2377     {
2378         int len;
2379         char *inbuf = term_src;
2380         size_t inleft = strlen(term_src);
2381         char *outbuf = term_dst;
2382         size_t outleft = sizeof(term_dst)-1;
2383         size_t ret;
2384         
2385         ret = yaz_iconv (zh->iconv_from_utf8, &inbuf, &inleft,
2386                          &outbuf, &outleft);
2387         if (ret == (size_t)(-1))
2388             len = 0;
2389         else
2390             len = outbuf - term_dst;
2391         *dst = nmem_malloc (stream, len + 1);
2392         if (len > 0)
2393             memcpy (*dst, term_dst, len);
2394         (*dst)[len] = '\0';
2395     }
2396     else
2397         *dst = nmem_strdup (stream, term_src);
2398 }
2399
2400 static void count_set (RSET r, int *count)
2401 {
2402     zint psysno = 0;
2403     int kno = 0;
2404     struct it_key key;
2405     RSFD rfd;
2406
2407     logf (LOG_DEBUG, "count_set");
2408
2409     *count = 0;
2410     rfd = rset_open (r, RSETF_READ);
2411     while (rset_read (rfd, &key))
2412     {
2413         if (key.mem[0] != psysno)
2414         {
2415             psysno = key.mem[0];
2416             (*count)++;
2417         }
2418         kno++;
2419     }
2420     rset_close (rfd);
2421     logf (LOG_DEBUG, "%d keys, %d records", kno, *count);
2422 }
2423
2424 void rpn_scan (ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt,
2425                oid_value attributeset,
2426                int num_bases, char **basenames,
2427                int *position, int *num_entries, ZebraScanEntry **list,
2428                int *is_partial, RSET limit_set, int return_zero)
2429 {
2430     int i;
2431     int pos = *position;
2432     int num = *num_entries;
2433     int before;
2434     int after;
2435     int base_no;
2436     char termz[IT_MAX_WORD+20];
2437     AttrType use;
2438     int use_value;
2439     const char *use_string = 0;
2440     struct scan_info *scan_info_array;
2441     ZebraScanEntry *glist;
2442     int ords[32], ord_no = 0;
2443     int ptr[32];
2444
2445     int bases_ok = 0;     /* no of databases with OK attribute */
2446     int errCode = 0;      /* err code (if any is not OK) */
2447     char *errString = 0;  /* addinfo */
2448
2449     unsigned reg_id;
2450     char *search_type = NULL;
2451     char rank_type[128];
2452     int complete_flag;
2453     int sort_flag;
2454     NMEM rset_nmem=NULL; 
2455
2456     *list = 0;
2457
2458     if (attributeset == VAL_NONE)
2459         attributeset = VAL_BIB1;
2460
2461     if (!limit_set)
2462     {
2463         AttrType termset;
2464         int termset_value_numeric;
2465         const char *termset_value_string;
2466         attr_init (&termset, zapt, 8);
2467         termset_value_numeric =
2468             attr_find_ex (&termset, NULL, &termset_value_string);
2469         if (termset_value_numeric != -1)
2470         {
2471             char resname[32];
2472             const char *termset_name = 0;
2473             
2474             if (termset_value_numeric != -2)
2475             {
2476                 
2477                 sprintf (resname, "%d", termset_value_numeric);
2478                 termset_name = resname;
2479             }
2480             else
2481                 termset_name = termset_value_string;
2482             
2483             limit_set = resultSetRef (zh, termset_name);
2484         }
2485     }
2486         
2487     yaz_log (LOG_DEBUG, "position = %d, num = %d set=%d",
2488              pos, num, attributeset);
2489         
2490     attr_init (&use, zapt, 1);
2491     use_value = attr_find_ex (&use, &attributeset, &use_string);
2492
2493     if (zebra_maps_attr (zh->reg->zebra_maps, zapt, &reg_id, &search_type,
2494                          rank_type, &complete_flag, &sort_flag))
2495     {
2496         *num_entries = 0;
2497         zh->errCode = 113;
2498         return ;
2499     }
2500     yaz_log (LOG_DEBUG, "use_value = %d", use_value);
2501
2502     if (use_value == -1)
2503         use_value = 1016;
2504     for (base_no = 0; base_no < num_bases && ord_no < 32; base_no++)
2505     {
2506         int r;
2507         attent attp;
2508         data1_local_attribute *local_attr;
2509
2510         if ((r=att_getentbyatt (zh, &attp, attributeset, use_value,
2511                                 use_string)))
2512         {
2513             logf (LOG_DEBUG, "att_getentbyatt fail. set=%d use=%d",
2514                   attributeset, use_value);
2515             if (r == -1)
2516             {
2517                 char val_str[32];
2518                 sprintf (val_str, "%d", use_value);
2519                 errCode = 114;
2520                 errString = odr_strdup (stream, val_str);
2521             }   
2522             else
2523                 errCode = 121;
2524             continue;
2525         }
2526         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
2527         {
2528             zh->errString = basenames[base_no];
2529             zh->errCode = 109; /* Database unavailable */
2530             *num_entries = 0;
2531             return;
2532         }
2533         bases_ok++;
2534         for (local_attr = attp.local_attributes; local_attr && ord_no < 32;
2535              local_attr = local_attr->next)
2536         {
2537             int ord;
2538
2539             ord = zebraExplain_lookupSU (zh->reg->zei, attp.attset_ordinal,
2540                                          local_attr->local);
2541             if (ord > 0)
2542                 ords[ord_no++] = ord;
2543         }
2544     }
2545     if (!bases_ok && errCode)
2546     {
2547         zh->errCode = errCode;
2548         zh->errString = errString;
2549         *num_entries = 0;
2550     }
2551     if (ord_no == 0)
2552     {
2553         char val_str[32];
2554         sprintf (val_str, "%d", use_value);
2555         zh->errCode = 114;
2556         zh->errString = odr_strdup (stream, val_str);
2557
2558         *num_entries = 0;
2559         return;
2560     }
2561     /* prepare dictionary scanning */
2562     before = pos-1;
2563     after = 1+num-pos;
2564     scan_info_array = (struct scan_info *)
2565         odr_malloc (stream, ord_no * sizeof(*scan_info_array));
2566     for (i = 0; i < ord_no; i++)
2567     {
2568         int j, prefix_len = 0;
2569         int before_tmp = before, after_tmp = after;
2570         struct scan_info *scan_info = scan_info_array + i;
2571         struct rpn_char_map_info rcmi;
2572
2573         rpn_char_map_prepare (zh->reg, reg_id, &rcmi);
2574
2575         scan_info->before = before;
2576         scan_info->after = after;
2577         scan_info->odr = stream;
2578
2579         scan_info->list = (struct scan_info_entry *)
2580             odr_malloc (stream, (before+after) * sizeof(*scan_info->list));
2581         for (j = 0; j<before+after; j++)
2582             scan_info->list[j].term = NULL;
2583
2584         prefix_len += key_SU_encode (ords[i], termz + prefix_len);
2585         termz[prefix_len++] = reg_id;
2586         termz[prefix_len] = 0;
2587         strcpy (scan_info->prefix, termz);
2588
2589         if (trans_scan_term (zh, zapt, termz+prefix_len, reg_id))
2590             return ;
2591                     
2592         dict_scan (zh->reg->dict, termz, &before_tmp, &after_tmp,
2593                    scan_info, scan_handle);
2594     }
2595     glist = (ZebraScanEntry *)
2596         odr_malloc (stream, (before+after)*sizeof(*glist));
2597
2598     rset_nmem=nmem_create();
2599
2600     /* consider terms after main term */
2601     for (i = 0; i < ord_no; i++)
2602         ptr[i] = before;
2603     
2604     *is_partial = 0;
2605     for (i = 0; i<after; i++)
2606     {
2607         int j, j0 = -1;
2608         const char *mterm = NULL;
2609         const char *tst;
2610         RSET rset;
2611         
2612         for (j = 0; j < ord_no; j++)
2613         {
2614             if (ptr[j] < before+after &&
2615                 (tst=scan_info_array[j].list[ptr[j]].term) &&
2616                 (!mterm || strcmp (tst, mterm) < 0))
2617             {
2618                 j0 = j;
2619                 mterm = tst;
2620             }
2621         }
2622         if (j0 == -1)
2623             break;
2624         scan_term_untrans (zh, stream->mem, reg_id,
2625                            &glist[i+before].term, mterm);
2626         rset = rset_trunc (zh, &scan_info_array[j0].list[ptr[j0]].isam_p, 1,
2627                            glist[i+before].term, strlen(glist[i+before].term),
2628                            NULL, 0, zapt->term->which, rset_nmem, 
2629                            key_it_ctrl,key_it_ctrl->scope);
2630         ptr[j0]++;
2631         for (j = j0+1; j<ord_no; j++)
2632         {
2633             if (ptr[j] < before+after &&
2634                 (tst=scan_info_array[j].list[ptr[j]].term) &&
2635                 !strcmp (tst, mterm))
2636             {
2637                 RSET rset2;
2638
2639                 rset2 =
2640                    rset_trunc (zh, &scan_info_array[j].list[ptr[j]].isam_p, 1,
2641                                glist[i+before].term,
2642                                strlen(glist[i+before].term), NULL, 0,
2643                                zapt->term->which,rset_nmem,
2644                                key_it_ctrl, key_it_ctrl->scope);
2645                 rset = rsbool_create_or(rset_nmem,key_it_ctrl,
2646                                key_it_ctrl->scope, rset, rset2);
2647                 /* FIXME - Use a proper multi-or */
2648
2649                 ptr[j]++;
2650             }
2651         }
2652         if (limit_set)
2653             rset = rsbool_create_and(rset_nmem,key_it_ctrl,key_it_ctrl->scope,
2654                             rset, rset_dup(limit_set));
2655         count_set (rset, &glist[i+before].occurrences);
2656         rset_delete (rset);
2657     }
2658     if (i < after)
2659     {
2660         *num_entries -= (after-i);
2661         *is_partial = 1;
2662     }
2663
2664     /* consider terms before main term */
2665     for (i = 0; i<ord_no; i++)
2666         ptr[i] = 0;
2667
2668     for (i = 0; i<before; i++)
2669     {
2670         int j, j0 = -1;
2671         const char *mterm = NULL;
2672         const char *tst;
2673         RSET rset;
2674         
2675         for (j = 0; j <ord_no; j++)
2676         {
2677             if (ptr[j] < before &&
2678                 (tst=scan_info_array[j].list[before-1-ptr[j]].term) &&
2679                 (!mterm || strcmp (tst, mterm) > 0))
2680             {
2681                 j0 = j;
2682                 mterm = tst;
2683             }
2684         }
2685         if (j0 == -1)
2686             break;
2687
2688         scan_term_untrans (zh, stream->mem, reg_id,
2689                            &glist[before-1-i].term, mterm);
2690
2691         rset = rset_trunc
2692                (zh, &scan_info_array[j0].list[before-1-ptr[j0]].isam_p, 1,
2693                 glist[before-1-i].term, strlen(glist[before-1-i].term),
2694                 NULL, 0, zapt->term->which,rset_nmem,
2695                 key_it_ctrl,key_it_ctrl->scope);
2696
2697         ptr[j0]++;
2698
2699         for (j = j0+1; j<ord_no; j++)
2700         {
2701             if (ptr[j] < before &&
2702                 (tst=scan_info_array[j].list[before-1-ptr[j]].term) &&
2703                 !strcmp (tst, mterm))
2704             {
2705                 RSET rset2;
2706
2707                 rset2 = rset_trunc (zh,
2708                          &scan_info_array[j].list[before-1-ptr[j]].isam_p, 1,
2709                                     glist[before-1-i].term,
2710                                     strlen(glist[before-1-i].term), NULL, 0,
2711                                     zapt->term->which, rset_nmem,
2712                                     key_it_ctrl, key_it_ctrl->scope);
2713                 rset = rsbool_create_and(rset_nmem,key_it_ctrl,
2714                             key_it_ctrl->scope, rset, rset2);
2715                 /* FIXME - multi-and ?? */
2716                 ptr[j]++;
2717             }
2718         }
2719         if (limit_set)
2720             rset = rsbool_create_and(rset_nmem,key_it_ctrl,key_it_ctrl->scope,
2721                             rset, rset_dup(limit_set));
2722         count_set (rset, &glist[before-1-i].occurrences);
2723         rset_delete (rset);
2724     }
2725     i = before-i;
2726     if (i)
2727     {
2728         *is_partial = 1;
2729         *position -= i;
2730         *num_entries -= i;
2731     }
2732
2733     nmem_destroy(rset_nmem);
2734     *list = glist + i;               /* list is set to first 'real' entry */
2735     
2736     logf (LOG_DEBUG, "position = %d, num_entries = %d",
2737           *position, *num_entries);
2738     if (zh->errCode)
2739         logf (LOG_DEBUG, "scan error: %d", zh->errCode);
2740 }
2741