Passing a TERMID to rsets when creating, and getting it back when reading.
[idzebra-moved-to-github.git] / index / zrpn.c
1 /* $Id: zrpn.c,v 1.157 2004-10-15 10:07:32 heikki Exp $
2    Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003,2004
3    Index Data Aps
4
5 This file is part of the Zebra server.
6
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
11
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with Zebra; see the file LICENSE.zebra.  If not, write to the
19 Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
20 02111-1307, USA.
21 */
22
23
24 #include <stdio.h>
25 #include <assert.h>
26 #ifdef WIN32
27 #include <io.h>
28 #else
29 #include <unistd.h>
30 #endif
31 #include <ctype.h>
32
33 #include "index.h"
34 #include <zebra_xpath.h>
35
36 #include <charmap.h>
37 #include <rset.h>
38
39
40 static const struct key_control it_ctrl = { 
41     sizeof(struct it_key),
42     2, /* we have sysnos and seqnos in this key, nothing more */
43     key_compare_it, 
44     key_logdump_txt,   /* FIXME  - clean up these functions */
45     key_get_seq,
46 };
47
48
49 const struct key_control *key_it_ctrl = &it_ctrl;
50
51 struct rpn_char_map_info {
52     ZebraMaps zm;
53     int reg_type;
54 };
55
56 typedef struct {
57     int type;
58     int major;
59     int minor;
60     Z_AttributesPlusTerm *zapt;
61 } AttrType;
62
63
64 static const char **rpn_char_map_handler (void *vp, const char **from, int len)
65 {
66     struct rpn_char_map_info *p = (struct rpn_char_map_info *) vp;
67     const char **out = zebra_maps_input (p->zm, p->reg_type, from, len, 0);
68 #if 0
69     if (out && *out)
70     {
71         const char *outp = *out;
72         yaz_log (LOG_LOG, "---");
73         while (*outp)
74         {
75             yaz_log (LOG_LOG, "%02X", *outp);
76             outp++;
77         }
78     }
79 #endif
80     return out;
81 }
82
83 static void rpn_char_map_prepare (struct zebra_register *reg, int reg_type,
84                                   struct rpn_char_map_info *map_info)
85 {
86     map_info->zm = reg->zebra_maps;
87     map_info->reg_type = reg_type;
88     dict_grep_cmap (reg->dict, map_info, rpn_char_map_handler);
89 }
90
91 static int attr_find_ex (AttrType *src, oid_value *attributeSetP,
92                          const char **string_value)
93 {
94     int num_attributes;
95
96     num_attributes = src->zapt->attributes->num_attributes;
97     while (src->major < num_attributes)
98     {
99         Z_AttributeElement *element;
100
101         element = src->zapt->attributes->attributes[src->major];
102         if (src->type == *element->attributeType)
103         {
104             switch (element->which) 
105             {
106             case Z_AttributeValue_numeric:
107                 ++(src->major);
108                 if (element->attributeSet && attributeSetP)
109                 {
110                     oident *attrset;
111
112                     attrset = oid_getentbyoid (element->attributeSet);
113                     *attributeSetP = attrset->value;
114                 }
115                 return *element->value.numeric;
116                 break;
117             case Z_AttributeValue_complex:
118                 if (src->minor >= element->value.complex->num_list)
119                     break;
120                 if (element->attributeSet && attributeSetP)
121                 {
122                     oident *attrset;
123                     
124                     attrset = oid_getentbyoid (element->attributeSet);
125                     *attributeSetP = attrset->value;
126                 }
127                 if (element->value.complex->list[src->minor]->which ==  
128                     Z_StringOrNumeric_numeric)
129                 {
130                     ++(src->minor);
131                     return
132                         *element->value.complex->list[src->minor-1]->u.numeric;
133                 }
134                 else if (element->value.complex->list[src->minor]->which ==  
135                          Z_StringOrNumeric_string)
136                 {
137                     if (!string_value)
138                         break;
139                     ++(src->minor);
140                     *string_value = 
141                         element->value.complex->list[src->minor-1]->u.string;
142                     return -2;
143                 }
144                 else
145                     break;
146             default:
147                 assert (0);
148             }
149         }
150         ++(src->major);
151     }
152     return -1;
153 }
154
155 static int attr_find (AttrType *src, oid_value *attributeSetP)
156 {
157     return attr_find_ex (src, attributeSetP, 0);
158 }
159
160 static void attr_init (AttrType *src, Z_AttributesPlusTerm *zapt,
161                        int type)
162 {
163     src->zapt = zapt;
164     src->type = type;
165     src->major = 0;
166     src->minor = 0;
167 }
168
169 #define TERM_COUNT        
170        
171 struct grep_info {        
172 #ifdef TERM_COUNT        
173     int *term_no;        
174 #endif        
175     ISAMC_P *isam_p_buf;
176     int isam_p_size;        
177     int isam_p_indx;
178     ZebraHandle zh;
179     int reg_type;
180     ZebraSet termset;
181 };        
182
183 static void term_untrans  (ZebraHandle zh, int reg_type,
184                            char *dst, const char *src)
185 {
186     int len = 0;
187     while (*src)
188     {
189         const char *cp = zebra_maps_output (zh->reg->zebra_maps,
190                                             reg_type, &src);
191         if (!cp && len < IT_MAX_WORD-1)
192             dst[len++] = *src++;
193         else
194             while (*cp && len < IT_MAX_WORD-1)
195                 dst[len++] = *cp++;
196     }
197     dst[len] = '\0';
198 }
199
200 static void add_isam_p (const char *name, const char *info,
201                         struct grep_info *p)
202 {
203     if (p->isam_p_indx == p->isam_p_size)
204     {
205         ISAMC_P *new_isam_p_buf;
206 #ifdef TERM_COUNT        
207         int *new_term_no;        
208 #endif
209         p->isam_p_size = 2*p->isam_p_size + 100;
210         new_isam_p_buf = (ISAMC_P *) xmalloc (sizeof(*new_isam_p_buf) *
211                                              p->isam_p_size);
212         if (p->isam_p_buf)
213         {
214             memcpy (new_isam_p_buf, p->isam_p_buf,
215                     p->isam_p_indx * sizeof(*p->isam_p_buf));
216             xfree (p->isam_p_buf);
217         }
218         p->isam_p_buf = new_isam_p_buf;
219
220 #ifdef TERM_COUNT
221         new_term_no = (int *) xmalloc (sizeof(*new_term_no) *
222                                        p->isam_p_size);
223         if (p->term_no)
224         {
225             memcpy (new_term_no, p->isam_p_buf,
226                     p->isam_p_indx * sizeof(*p->term_no));
227             xfree (p->term_no);
228         }
229         p->term_no = new_term_no;
230 #endif
231     }
232     assert (*info == sizeof(*p->isam_p_buf));
233     memcpy (p->isam_p_buf + p->isam_p_indx, info+1, sizeof(*p->isam_p_buf));
234
235 #if 1
236     if (p->termset)
237     {
238         const char *db;
239         int set, use;
240         char term_tmp[IT_MAX_WORD];
241         int su_code = 0;
242         int len = key_SU_decode (&su_code, name);
243         
244         term_untrans  (p->zh, p->reg_type, term_tmp, name+len+1);
245         logf (LOG_LOG, "grep: %d %c %s", su_code, name[len], term_tmp);
246         zebraExplain_lookup_ord (p->zh->reg->zei,
247                                  su_code, &db, &set, &use);
248         logf (LOG_LOG, "grep:  set=%d use=%d db=%s", set, use, db);
249         
250         resultSetAddTerm (p->zh, p->termset, name[len], db,
251                           set, use, term_tmp);
252     }
253 #endif
254     (p->isam_p_indx)++;
255 }
256
257 static int grep_handle (char *name, const char *info, void *p)
258 {
259     add_isam_p (name, info, (struct grep_info *) p);
260     return 0;
261 }
262
263 static int term_pre (ZebraMaps zebra_maps, int reg_type, const char **src,
264                      const char *ct1, const char *ct2, int first)
265 {
266     const char *s1, *s0 = *src;
267     const char **map;
268
269     /* skip white space */
270     while (*s0)
271     {
272         if (ct1 && strchr (ct1, *s0))
273             break;
274         if (ct2 && strchr (ct2, *s0))
275             break;
276         s1 = s0;
277         map = zebra_maps_input (zebra_maps, reg_type, &s1, strlen(s1), first);
278         if (**map != *CHR_SPACE)
279             break;
280         s0 = s1;
281     }
282     *src = s0;
283     return *s0;
284 }
285
286 #define REGEX_CHARS " []()|.*+?!"
287
288 /* term_100: handle term, where trunc=none (no operators at all) */
289 static int term_100 (ZebraMaps zebra_maps, int reg_type,
290                      const char **src, char *dst, int space_split,
291                      char *dst_term)
292 {
293     const char *s0, *s1;
294     const char **map;
295     int i = 0;
296     int j = 0;
297
298     const char *space_start = 0;
299     const char *space_end = 0;
300
301     if (!term_pre (zebra_maps, reg_type, src, NULL, NULL, !space_split))
302         return 0;
303     s0 = *src;
304     while (*s0)
305     {
306         s1 = s0;
307         map = zebra_maps_input (zebra_maps, reg_type, &s0, strlen(s0), 0);
308         if (space_split)
309         {
310             if (**map == *CHR_SPACE)
311                 break;
312         }
313         else  /* complete subfield only. */
314         {
315             if (**map == *CHR_SPACE)
316             {   /* save space mapping for later  .. */
317                 space_start = s1;
318                 space_end = s0;
319                 continue;
320             }
321             else if (space_start)
322             {   /* reload last space */
323                 while (space_start < space_end)
324                 {
325                     if (strchr (REGEX_CHARS, *space_start))
326                         dst[i++] = '\\';
327                     dst_term[j++] = *space_start;
328                     dst[i++] = *space_start++;
329                 }
330                 /* and reset */
331                 space_start = space_end = 0;
332             }
333         }
334         /* add non-space char */
335         while (s1 < s0)
336         {
337             if (strchr(REGEX_CHARS, *s1))
338                 dst[i++] = '\\';
339             dst_term[j++] = *s1;
340             dst[i++] = *s1++;
341         }
342     }
343     dst[i] = '\0';
344     dst_term[j] = '\0';
345     *src = s0;
346     return i;
347 }
348
349 /* term_101: handle term, where trunc=Process # */
350 static int term_101 (ZebraMaps zebra_maps, int reg_type,
351                      const char **src, char *dst, int space_split,
352                      char *dst_term)
353 {
354     const char *s0, *s1;
355     const char **map;
356     int i = 0;
357     int j = 0;
358
359     if (!term_pre (zebra_maps, reg_type, src, "#", "#", !space_split))
360         return 0;
361     s0 = *src;
362     while (*s0)
363     {
364         if (*s0 == '#')
365         {
366             dst[i++] = '.';
367             dst[i++] = '*';
368             dst_term[j++] = *s0++;
369         }
370         else
371         {
372             s1 = s0;
373             map = zebra_maps_input (zebra_maps, reg_type, &s0, strlen(s0), 0);
374             if (space_split && **map == *CHR_SPACE)
375                 break;
376             while (s1 < s0)
377             {
378                 if (strchr(REGEX_CHARS, *s1))
379                     dst[i++] = '\\';
380                 dst_term[j++] = *s1;
381                 dst[i++] = *s1++;
382             }
383         }
384     }
385     dst[i] = '\0';
386     dst_term[j++] = '\0';
387     *src = s0;
388     return i;
389 }
390
391 /* term_103: handle term, where trunc=re-2 (regular expressions) */
392 static int term_103 (ZebraMaps zebra_maps, int reg_type, const char **src,
393                      char *dst, int *errors, int space_split,
394                      char *dst_term)
395 {
396     int i = 0;
397     int j = 0;
398     const char *s0, *s1;
399     const char **map;
400
401     if (!term_pre (zebra_maps, reg_type, src, "^\\()[].*+?|", "(", !space_split))
402         return 0;
403     s0 = *src;
404     if (errors && *s0 == '+' && s0[1] && s0[2] == '+' && s0[3] &&
405         isdigit (s0[1]))
406     {
407         *errors = s0[1] - '0';
408         s0 += 3;
409         if (*errors > 3)
410             *errors = 3;
411     }
412     while (*s0)
413     {
414         if (strchr ("^\\()[].*+?|-", *s0))
415         {
416             dst_term[j++] = *s0;
417             dst[i++] = *s0++;
418         }
419         else
420         {
421             s1 = s0;
422             map = zebra_maps_input (zebra_maps, reg_type, &s0, strlen(s0), 0);
423             if (**map == *CHR_SPACE)
424                 break;
425             while (s1 < s0)
426             {
427                 if (strchr(REGEX_CHARS, *s1))
428                     dst[i++] = '\\';
429                 dst_term[j++] = *s1;
430                 dst[i++] = *s1++;
431             }
432         }
433     }
434     dst[i] = '\0';
435     dst_term[j] = '\0';
436     *src = s0;
437     return i;
438 }
439
440 /* term_103: handle term, where trunc=re-1 (regular expressions) */
441 static int term_102 (ZebraMaps zebra_maps, int reg_type, const char **src,
442                      char *dst, int space_split, char *dst_term)
443 {
444     return term_103 (zebra_maps, reg_type, src, dst, NULL, space_split,
445                      dst_term);
446 }
447
448
449 /* term_104: handle term, where trunc=Process # and ! */
450 static int term_104 (ZebraMaps zebra_maps, int reg_type,
451                      const char **src, char *dst, int space_split,
452                      char *dst_term)
453 {
454     const char *s0, *s1;
455     const char **map;
456     int i = 0;
457     int j = 0;
458
459     if (!term_pre (zebra_maps, reg_type, src, "?*#", "?*#", !space_split))
460         return 0;
461     s0 = *src;
462     while (*s0)
463     {
464         if (*s0 == '?')
465         {
466             dst_term[j++] = *s0++;
467             if (*s0 >= '0' && *s0 <= '9')
468             {
469                 int limit = 0;
470                 while (*s0 >= '0' && *s0 <= '9')
471                 {
472                     limit = limit * 10 + (*s0 - '0');
473                     dst_term[j++] = *s0++;
474                 }
475                 if (limit > 20)
476                     limit = 20;
477                 while (--limit >= 0)
478                 {
479                     dst[i++] = '.';
480                     dst[i++] = '?';
481                 }
482             }
483             else
484             {
485                 dst[i++] = '.';
486                 dst[i++] = '*';
487             }
488         }
489         else if (*s0 == '*')
490         {
491             dst[i++] = '.';
492             dst[i++] = '*';
493             dst_term[j++] = *s0++;
494         }
495         else if (*s0 == '#')
496         {
497             dst[i++] = '.';
498             dst_term[j++] = *s0++;
499         }
500         {
501             s1 = s0;
502             map = zebra_maps_input (zebra_maps, reg_type, &s0, strlen(s0), 0);
503             if (space_split && **map == *CHR_SPACE)
504                 break;
505             while (s1 < s0)
506             {
507                 if (strchr(REGEX_CHARS, *s1))
508                     dst[i++] = '\\';
509                 dst_term[j++] = *s1;
510                 dst[i++] = *s1++;
511             }
512         }
513     }
514     dst[i] = '\0';
515     dst_term[j++] = '\0';
516     *src = s0;
517     return i;
518 }
519
520 /* term_105/106: handle term, where trunc=Process * and ! and right trunc */
521 static int term_105 (ZebraMaps zebra_maps, int reg_type,
522                      const char **src, char *dst, int space_split,
523                      char *dst_term, int right_truncate)
524 {
525     const char *s0, *s1;
526     const char **map;
527     int i = 0;
528     int j = 0;
529
530     if (!term_pre (zebra_maps, reg_type, src, "*!", "*!", !space_split))
531         return 0;
532     s0 = *src;
533     while (*s0)
534     {
535         if (*s0 == '*')
536         {
537             dst[i++] = '.';
538             dst[i++] = '*';
539             dst_term[j++] = *s0++;
540         }
541         else if (*s0 == '!')
542         {
543             dst[i++] = '.';
544             dst_term[j++] = *s0++;
545         }
546         {
547             s1 = s0;
548             map = zebra_maps_input (zebra_maps, reg_type, &s0, strlen(s0), 0);
549             if (space_split && **map == *CHR_SPACE)
550                 break;
551             while (s1 < s0)
552             {
553                 if (strchr(REGEX_CHARS, *s1))
554                     dst[i++] = '\\';
555                 dst_term[j++] = *s1;
556                 dst[i++] = *s1++;
557             }
558         }
559     }
560     if (right_truncate)
561     {
562         dst[i++] = '.';
563         dst[i++] = '*';
564     }
565     dst[i] = '\0';
566     
567     dst_term[j++] = '\0';
568     *src = s0;
569     return i;
570 }
571
572
573 /* gen_regular_rel - generate regular expression from relation
574  *  val:     border value (inclusive)
575  *  islt:    1 if <=; 0 if >=.
576  */
577 static void gen_regular_rel (char *dst, int val, int islt)
578 {
579     int dst_p;
580     int w, d, i;
581     int pos = 0;
582     char numstr[20];
583
584     logf (LOG_DEBUG, "gen_regular_rel. val=%d, islt=%d", val, islt);
585     if (val >= 0)
586     {
587         if (islt)
588             strcpy (dst, "(-[0-9]+|(");
589         else
590             strcpy (dst, "((");
591     } 
592     else
593     {
594         if (!islt)
595         {
596             strcpy (dst, "([0-9]+|-(");
597             dst_p = strlen (dst);
598             islt = 1;
599         }
600         else
601         {
602             strcpy (dst, "(-(");
603             islt = 0;
604         }
605         val = -val;
606     }
607     dst_p = strlen (dst);
608     sprintf (numstr, "%d", val);
609     for (w = strlen(numstr); --w >= 0; pos++)
610     {
611         d = numstr[w];
612         if (pos > 0)
613         {
614             if (islt)
615             {
616                 if (d == '0')
617                     continue;
618                 d--;
619             } 
620             else
621             {
622                 if (d == '9')
623                     continue;
624                 d++;
625             }
626         }
627         
628         strcpy (dst + dst_p, numstr);
629         dst_p = strlen(dst) - pos - 1;
630
631         if (islt)
632         {
633             if (d != '0')
634             {
635                 dst[dst_p++] = '[';
636                 dst[dst_p++] = '0';
637                 dst[dst_p++] = '-';
638                 dst[dst_p++] = d;
639                 dst[dst_p++] = ']';
640             }
641             else
642                 dst[dst_p++] = d;
643         }
644         else
645         {
646             if (d != '9')
647             { 
648                 dst[dst_p++] = '[';
649                 dst[dst_p++] = d;
650                 dst[dst_p++] = '-';
651                 dst[dst_p++] = '9';
652                 dst[dst_p++] = ']';
653             }
654             else
655                 dst[dst_p++] = d;
656         }
657         for (i = 0; i<pos; i++)
658         {
659             dst[dst_p++] = '[';
660             dst[dst_p++] = '0';
661             dst[dst_p++] = '-';
662             dst[dst_p++] = '9';
663             dst[dst_p++] = ']';
664         }
665         dst[dst_p++] = '|';
666     }
667     dst[dst_p] = '\0';
668     if (islt)
669     {
670         /* match everything less than 10^(pos-1) */
671         strcat (dst, "0*");
672         for (i=1; i<pos; i++)
673             strcat (dst, "[0-9]?");
674     }
675     else
676     {
677         /* match everything greater than 10^pos */
678         for (i = 0; i <= pos; i++)
679             strcat (dst, "[0-9]");
680         strcat (dst, "[0-9]*");
681     }
682     strcat (dst, "))");
683 }
684
685 void string_rel_add_char (char **term_p, const char *src, int *indx)
686 {
687     if (src[*indx] == '\\')
688         *(*term_p)++ = src[(*indx)++];
689     *(*term_p)++ = src[(*indx)++];
690 }
691
692 /*
693  *   >  abc     ([b-].*|a[c-].*|ab[d-].*|abc.+)
694  *              ([^-a].*|a[^-b].*ab[^-c].*|abc.+)
695  *   >= abc     ([b-].*|a[c-].*|ab[c-].*)
696  *              ([^-a].*|a[^-b].*|ab[c-].*)
697  *   <  abc     ([-0].*|a[-a].*|ab[-b].*)
698  *              ([^a-].*|a[^b-].*|ab[^c-].*)
699  *   <= abc     ([-0].*|a[-a].*|ab[-b].*|abc)
700  *              ([^a-].*|a[^b-].*|ab[^c-].*|abc)
701  */
702 static int string_relation (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
703                             const char **term_sub, char *term_dict,
704                             oid_value attributeSet,
705                             int reg_type, int space_split, char *term_dst)
706 {
707     AttrType relation;
708     int relation_value;
709     int i;
710     char *term_tmp = term_dict + strlen(term_dict);
711     char term_component[2*IT_MAX_WORD+20];
712
713     attr_init (&relation, zapt, 2);
714     relation_value = attr_find (&relation, NULL);
715
716     logf (LOG_DEBUG, "string relation value=%d", relation_value);
717     switch (relation_value)
718     {
719     case 1:
720         if (!term_100 (zh->reg->zebra_maps, reg_type,
721                        term_sub, term_component,
722                        space_split, term_dst))
723             return 0;
724         logf (LOG_DEBUG, "Relation <");
725         
726         *term_tmp++ = '(';
727         for (i = 0; term_component[i]; )
728         {
729             int j = 0;
730
731             if (i)
732                 *term_tmp++ = '|';
733             while (j < i)
734                 string_rel_add_char (&term_tmp, term_component, &j);
735
736             *term_tmp++ = '[';
737
738             *term_tmp++ = '^';
739             string_rel_add_char (&term_tmp, term_component, &i);
740             *term_tmp++ = '-';
741
742             *term_tmp++ = ']';
743             *term_tmp++ = '.';
744             *term_tmp++ = '*';
745
746             if ((term_tmp - term_dict) > IT_MAX_WORD)
747                 break;
748         }
749         *term_tmp++ = ')';
750         *term_tmp = '\0';
751         break;
752     case 2:
753         if (!term_100 (zh->reg->zebra_maps, reg_type,
754                        term_sub, term_component,
755                        space_split, term_dst))
756             return 0;
757         logf (LOG_DEBUG, "Relation <=");
758
759         *term_tmp++ = '(';
760         for (i = 0; term_component[i]; )
761         {
762             int j = 0;
763
764             while (j < i)
765                 string_rel_add_char (&term_tmp, term_component, &j);
766             *term_tmp++ = '[';
767
768             *term_tmp++ = '^';
769             string_rel_add_char (&term_tmp, term_component, &i);
770             *term_tmp++ = '-';
771
772             *term_tmp++ = ']';
773             *term_tmp++ = '.';
774             *term_tmp++ = '*';
775
776             *term_tmp++ = '|';
777
778             if ((term_tmp - term_dict) > IT_MAX_WORD)
779                 break;
780         }
781         for (i = 0; term_component[i]; )
782             string_rel_add_char (&term_tmp, term_component, &i);
783         *term_tmp++ = ')';
784         *term_tmp = '\0';
785         break;
786     case 5:
787         if (!term_100 (zh->reg->zebra_maps, reg_type,
788                        term_sub, term_component, space_split, term_dst))
789             return 0;
790         logf (LOG_DEBUG, "Relation >");
791
792         *term_tmp++ = '(';
793         for (i = 0; term_component[i];)
794         {
795             int j = 0;
796
797             while (j < i)
798                 string_rel_add_char (&term_tmp, term_component, &j);
799             *term_tmp++ = '[';
800             
801             *term_tmp++ = '^';
802             *term_tmp++ = '-';
803             string_rel_add_char (&term_tmp, term_component, &i);
804
805             *term_tmp++ = ']';
806             *term_tmp++ = '.';
807             *term_tmp++ = '*';
808
809             *term_tmp++ = '|';
810
811             if ((term_tmp - term_dict) > IT_MAX_WORD)
812                 break;
813         }
814         for (i = 0; term_component[i];)
815             string_rel_add_char (&term_tmp, term_component, &i);
816         *term_tmp++ = '.';
817         *term_tmp++ = '+';
818         *term_tmp++ = ')';
819         *term_tmp = '\0';
820         break;
821     case 4:
822         if (!term_100 (zh->reg->zebra_maps, reg_type, term_sub,
823                        term_component, space_split, term_dst))
824             return 0;
825         logf (LOG_DEBUG, "Relation >=");
826
827         *term_tmp++ = '(';
828         for (i = 0; term_component[i];)
829         {
830             int j = 0;
831
832             if (i)
833                 *term_tmp++ = '|';
834             while (j < i)
835                 string_rel_add_char (&term_tmp, term_component, &j);
836             *term_tmp++ = '[';
837
838             if (term_component[i+1])
839             {
840                 *term_tmp++ = '^';
841                 *term_tmp++ = '-';
842                 string_rel_add_char (&term_tmp, term_component, &i);
843             }
844             else
845             {
846                 string_rel_add_char (&term_tmp, term_component, &i);
847                 *term_tmp++ = '-';
848             }
849             *term_tmp++ = ']';
850             *term_tmp++ = '.';
851             *term_tmp++ = '*';
852
853             if ((term_tmp - term_dict) > IT_MAX_WORD)
854                 break;
855         }
856         *term_tmp++ = ')';
857         *term_tmp = '\0';
858         break;
859     case 3:
860     default:
861         logf (LOG_DEBUG, "Relation =");
862         if (!term_100 (zh->reg->zebra_maps, reg_type, term_sub,
863                        term_component, space_split, term_dst))
864             return 0;
865         strcat (term_tmp, "(");
866         strcat (term_tmp, term_component);
867         strcat (term_tmp, ")");
868     }
869     return 1;
870 }
871
872 static int string_term (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
873                         const char **term_sub, 
874                         oid_value attributeSet, NMEM stream,
875                         struct grep_info *grep_info,
876                         int reg_type, int complete_flag,
877                         int num_bases, char **basenames,
878                         char *term_dst, int xpath_use);
879
880 static RSET term_trunc (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
881                         const char **term_sub, 
882                         oid_value attributeSet, NMEM stream,
883                         struct grep_info *grep_info,
884                         int reg_type, int complete_flag,
885                         int num_bases, char **basenames,
886                         char *term_dst,
887                         const char *rank_type, int xpath_use,
888                         NMEM rset_nmem)
889 {
890     int r;
891     grep_info->isam_p_indx = 0;
892     r = string_term (zh, zapt, term_sub, attributeSet, stream, grep_info,
893                      reg_type, complete_flag, num_bases, basenames,
894                      term_dst, xpath_use);
895     if (r < 1)
896         return 0;
897     logf (LOG_DEBUG, "term: %s", term_dst);
898     return rset_trunc (zh, grep_info->isam_p_buf,
899                        grep_info->isam_p_indx, term_dst,
900                        strlen(term_dst), rank_type, 1 /* preserve pos */,
901                        zapt->term->which, rset_nmem,
902                        key_it_ctrl,key_it_ctrl->scope);
903 }
904
905
906 static int string_term (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
907                         const char **term_sub, 
908                         oid_value attributeSet, NMEM stream,
909                         struct grep_info *grep_info,
910                         int reg_type, int complete_flag,
911                         int num_bases, char **basenames,
912                         char *term_dst, int xpath_use)
913 {
914     char term_dict[2*IT_MAX_WORD+4000];
915     int j, r, base_no;
916     AttrType truncation;
917     int truncation_value;
918     AttrType use;
919     int use_value;
920     const char *use_string = 0;
921     oid_value curAttributeSet = attributeSet;
922     const char *termp;
923     struct rpn_char_map_info rcmi;
924     int space_split = complete_flag ? 0 : 1;
925
926     int bases_ok = 0;     /* no of databases with OK attribute */
927     int errCode = 0;      /* err code (if any is not OK) */
928     char *errString = 0;  /* addinfo */
929
930     rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
931     attr_init (&use, zapt, 1);
932     use_value = attr_find_ex (&use, &curAttributeSet, &use_string);
933     logf (LOG_DEBUG, "string_term, use value %d", use_value);
934     attr_init (&truncation, zapt, 5);
935     truncation_value = attr_find (&truncation, NULL);
936     logf (LOG_DEBUG, "truncation value %d", truncation_value);
937
938     if (use_value == -1)    /* no attribute - assumy "any" */
939         use_value = 1016;
940     for (base_no = 0; base_no < num_bases; base_no++)
941     {
942         attent attp;
943         data1_local_attribute id_xpath_attr;
944         data1_local_attribute *local_attr;
945         int max_pos, prefix_len = 0;
946
947         termp = *term_sub;
948
949         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
950         {
951             zh->errCode = 109; /* Database unavailable */
952             zh->errString = basenames[base_no];
953             return -1;
954         }
955         if (xpath_use > 0 && use_value == -2) 
956         {
957             use_value = xpath_use;
958             attp.local_attributes = &id_xpath_attr;
959             attp.attset_ordinal = VAL_IDXPATH;
960             id_xpath_attr.next = 0;
961             id_xpath_attr.local = use_value;
962         }
963         else if (curAttributeSet == VAL_IDXPATH)
964         {
965             attp.local_attributes = &id_xpath_attr;
966             attp.attset_ordinal = VAL_IDXPATH;
967             id_xpath_attr.next = 0;
968             id_xpath_attr.local = use_value;
969         }
970         else
971         {
972             if ((r=att_getentbyatt (zh, &attp, curAttributeSet, use_value,
973                                             use_string)))
974             {
975                 logf (LOG_DEBUG, "att_getentbyatt fail. set=%d use=%d r=%d",
976                       curAttributeSet, use_value, r);
977                 if (r == -1)
978                 {
979                     /* set was found, but value wasn't defined */
980                     errCode = 114;
981                     if (use_string)
982                         errString = nmem_strdup(stream, use_string);
983                     else
984                     {
985                         char val_str[32];
986                         sprintf (val_str, "%d", use_value);
987                         errString = nmem_strdup (stream, val_str);
988                     }
989                 }
990                 else
991                 {
992                     int oid[OID_SIZE];
993                     struct oident oident;
994                     
995                     oident.proto = PROTO_Z3950;
996                     oident.oclass = CLASS_ATTSET;
997                     oident.value = curAttributeSet;
998                     oid_ent_to_oid (&oident, oid);
999                     
1000                     errCode = 121;
1001                     errString = nmem_strdup (stream, oident.desc);
1002                 }
1003                 continue;
1004             }
1005         }
1006         for (local_attr = attp.local_attributes; local_attr;
1007              local_attr = local_attr->next)
1008         {
1009             int ord;
1010             char ord_buf[32];
1011             int i, ord_len;
1012             
1013             ord = zebraExplain_lookupSU (zh->reg->zei, attp.attset_ordinal,
1014                                          local_attr->local);
1015             if (ord < 0)
1016                 continue;
1017             if (prefix_len)
1018                 term_dict[prefix_len++] = '|';
1019             else
1020                 term_dict[prefix_len++] = '(';
1021             
1022             ord_len = key_SU_encode (ord, ord_buf);
1023             for (i = 0; i<ord_len; i++)
1024             {
1025                 term_dict[prefix_len++] = 1;
1026                 term_dict[prefix_len++] = ord_buf[i];
1027             }
1028         }
1029         if (!prefix_len)
1030         {
1031 #if 1
1032             bases_ok++;
1033 #else
1034             char val_str[32];
1035             sprintf (val_str, "%d", use_value);
1036             errCode = 114;
1037             errString = nmem_strdup (stream, val_str);
1038 #endif
1039             continue;
1040         }
1041         bases_ok++; /* this has OK attributes */
1042
1043         term_dict[prefix_len++] = ')';
1044         term_dict[prefix_len++] = 1;
1045         term_dict[prefix_len++] = reg_type;
1046         logf (LOG_DEBUG, "reg_type = %d", term_dict[prefix_len-1]);
1047         term_dict[prefix_len] = '\0';
1048         j = prefix_len;
1049         switch (truncation_value)
1050         {
1051         case -1:         /* not specified */
1052         case 100:        /* do not truncate */
1053             if (!string_relation (zh, zapt, &termp, term_dict,
1054                                   attributeSet,
1055                                   reg_type, space_split, term_dst))
1056                 return 0;
1057             logf (LOG_DEBUG, "dict_lookup_grep: %s", term_dict+prefix_len);
1058             r = dict_lookup_grep (zh->reg->dict, term_dict, 0,
1059                                   grep_info, &max_pos, 0, grep_handle);
1060             if (r)
1061                 logf (LOG_WARN, "dict_lookup_grep fail %d", r);
1062             break;
1063         case 1:          /* right truncation */
1064             term_dict[j++] = '(';
1065             if (!term_100 (zh->reg->zebra_maps, reg_type,
1066                            &termp, term_dict + j, space_split, term_dst))
1067                 return 0;
1068             strcat (term_dict, ".*)");
1069             dict_lookup_grep (zh->reg->dict, term_dict, 0, grep_info,
1070                               &max_pos, 0, grep_handle);
1071             break;
1072         case 2:          /* keft truncation */
1073             term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*';
1074             if (!term_100 (zh->reg->zebra_maps, reg_type,
1075                            &termp, term_dict + j, space_split, term_dst))
1076                 return 0;
1077             strcat (term_dict, ")");
1078             dict_lookup_grep (zh->reg->dict, term_dict, 0, grep_info,
1079                               &max_pos, 0, grep_handle);
1080             break;
1081         case 3:          /* left&right truncation */
1082             term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*';
1083             if (!term_100 (zh->reg->zebra_maps, reg_type,
1084                            &termp, term_dict + j, space_split, term_dst))
1085                 return 0;
1086             strcat (term_dict, ".*)");
1087             dict_lookup_grep (zh->reg->dict, term_dict, 0, grep_info,
1088                               &max_pos, 0, grep_handle);
1089             break;
1090             zh->errCode = 120;
1091             return -1;
1092         case 101:        /* process # in term */
1093             term_dict[j++] = '(';
1094             if (!term_101 (zh->reg->zebra_maps, reg_type,
1095                            &termp, term_dict + j, space_split, term_dst))
1096                 return 0;
1097             strcat (term_dict, ")");
1098             r = dict_lookup_grep (zh->reg->dict, term_dict, 0, grep_info,
1099                                   &max_pos, 0, grep_handle);
1100             if (r)
1101                 logf (LOG_WARN, "dict_lookup_grep err, trunc=#: %d", r);
1102             break;
1103         case 102:        /* Regexp-1 */
1104             term_dict[j++] = '(';
1105             if (!term_102 (zh->reg->zebra_maps, reg_type,
1106                            &termp, term_dict + j, space_split, term_dst))
1107                 return 0;
1108             strcat (term_dict, ")");
1109             logf (LOG_DEBUG, "Regexp-1 tolerance=%d", r);
1110             r = dict_lookup_grep (zh->reg->dict, term_dict, 0, grep_info,
1111                                   &max_pos, 0, grep_handle);
1112             if (r)
1113                 logf (LOG_WARN, "dict_lookup_grep err, trunc=regular: %d",
1114                       r);
1115             break;
1116         case 103:       /* Regexp-2 */
1117             r = 1;
1118             term_dict[j++] = '(';
1119             if (!term_103 (zh->reg->zebra_maps, reg_type,
1120                            &termp, term_dict + j, &r, space_split, term_dst))
1121                 return 0;
1122             strcat (term_dict, ")");
1123             logf (LOG_DEBUG, "Regexp-2 tolerance=%d", r);
1124             r = dict_lookup_grep (zh->reg->dict, term_dict, r, grep_info,
1125                                   &max_pos, 2, grep_handle);
1126             if (r)
1127                 logf (LOG_WARN, "dict_lookup_grep err, trunc=eregular: %d",
1128                       r);
1129             break;
1130         case 104:        /* process # and ! in term */
1131             term_dict[j++] = '(';
1132             if (!term_104 (zh->reg->zebra_maps, reg_type,
1133                            &termp, term_dict + j, space_split, term_dst))
1134                 return 0;
1135             strcat (term_dict, ")");
1136             r = dict_lookup_grep (zh->reg->dict, term_dict, 0, grep_info,
1137                                   &max_pos, 0, grep_handle);
1138             if (r)
1139                 logf (LOG_WARN, "dict_lookup_grep err, trunc=#/!: %d", r);
1140             break;
1141         case 105:        /* process * and ! in term */
1142             term_dict[j++] = '(';
1143             if (!term_105 (zh->reg->zebra_maps, reg_type,
1144                            &termp, term_dict + j, space_split, term_dst, 1))
1145                 return 0;
1146             strcat (term_dict, ")");
1147             r = dict_lookup_grep (zh->reg->dict, term_dict, 0, grep_info,
1148                                   &max_pos, 0, grep_handle);
1149             if (r)
1150                 logf (LOG_WARN, "dict_lookup_grep err, trunc=*/!: %d", r);
1151             break;
1152         case 106:        /* process * and ! in term */
1153             term_dict[j++] = '(';
1154             if (!term_105 (zh->reg->zebra_maps, reg_type,
1155                            &termp, term_dict + j, space_split, term_dst, 0))
1156                 return 0;
1157             strcat (term_dict, ")");
1158             r = dict_lookup_grep (zh->reg->dict, term_dict, 0, grep_info,
1159                                   &max_pos, 0, grep_handle);
1160             if (r)
1161                 logf (LOG_WARN, "dict_lookup_grep err, trunc=*/!: %d", r);
1162             break;
1163         }
1164     }
1165     if (!bases_ok)
1166     {
1167         zh->errCode = errCode;
1168         zh->errString = errString;
1169         return -1;
1170     }
1171     *term_sub = termp;
1172     logf (LOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1173     return 1;
1174 }
1175
1176
1177 /* convert APT search term to UTF8 */
1178 static int zapt_term_to_utf8 (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1179                               char *termz)
1180 {
1181     size_t sizez;
1182     Z_Term *term = zapt->term;
1183
1184     switch (term->which)
1185     {
1186     case Z_Term_general:
1187         if (zh->iconv_to_utf8 != 0)
1188         {
1189             char *inbuf = term->u.general->buf;
1190             size_t inleft = term->u.general->len;
1191             char *outbuf = termz;
1192             size_t outleft = IT_MAX_WORD-1;
1193             size_t ret;
1194
1195             ret = yaz_iconv(zh->iconv_to_utf8, &inbuf, &inleft,
1196                         &outbuf, &outleft);
1197             if (ret == (size_t)(-1))
1198             {
1199                 ret = yaz_iconv(zh->iconv_to_utf8, 0, 0, 0, 0);
1200                 zh->errCode = 125;
1201                 return -1;
1202             }
1203             *outbuf = 0;
1204         }
1205         else
1206         {
1207             sizez = term->u.general->len;
1208             if (sizez > IT_MAX_WORD-1)
1209                 sizez = IT_MAX_WORD-1;
1210             memcpy (termz, term->u.general->buf, sizez);
1211             termz[sizez] = '\0';
1212         }
1213         break;
1214     case Z_Term_characterString:
1215         sizez = strlen(term->u.characterString);
1216         if (sizez > IT_MAX_WORD-1)
1217             sizez = IT_MAX_WORD-1;
1218         memcpy (termz, term->u.characterString, sizez);
1219         termz[sizez] = '\0';
1220         break;
1221     default:
1222         zh->errCode = 124;
1223         return -1;
1224     }
1225     return 0;
1226 }
1227
1228 /* convert APT SCAN term to internal cmap */
1229 static int trans_scan_term (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1230                             char *termz, int reg_type)
1231 {
1232     char termz0[IT_MAX_WORD];
1233
1234     if (zapt_term_to_utf8(zh, zapt, termz0))
1235         return -1;    /* error */
1236     else
1237     {
1238         const char **map;
1239         const char *cp = (const char *) termz0;
1240         const char *cp_end = cp + strlen(cp);
1241         const char *src;
1242         int i = 0;
1243         const char *space_map = NULL;
1244         int len;
1245             
1246         while ((len = (cp_end - cp)) > 0)
1247         {
1248             map = zebra_maps_input (zh->reg->zebra_maps, reg_type, &cp, len, 0);
1249             if (**map == *CHR_SPACE)
1250                 space_map = *map;
1251             else
1252             {
1253                 if (i && space_map)
1254                     for (src = space_map; *src; src++)
1255                         termz[i++] = *src;
1256                 space_map = NULL;
1257                 for (src = *map; *src; src++)
1258                     termz[i++] = *src;
1259             }
1260         }
1261         termz[i] = '\0';
1262     }
1263     return 0;
1264 }
1265
1266 char *normalize_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1267                      const char *termz, NMEM stream, unsigned reg_id)
1268 {
1269     WRBUF wrbuf = 0;
1270     AttrType truncation;
1271     int truncation_value;
1272     char *ex_list = 0;
1273
1274     attr_init (&truncation, zapt, 5);
1275     truncation_value = attr_find (&truncation, NULL);
1276
1277     switch (truncation_value)
1278     {
1279     default:
1280         ex_list = "";
1281         break;
1282     case 101:
1283         ex_list = "#";
1284         break;
1285     case 102:
1286     case 103:
1287         ex_list = 0;
1288         break;
1289     case 104:
1290         ex_list = "!#";
1291         break;
1292     case 105:
1293         ex_list = "!*";
1294         break;
1295     }
1296     if (ex_list)
1297         wrbuf = zebra_replace(zh->reg->zebra_maps, reg_id, ex_list,
1298                               termz, strlen(termz));
1299     if (!wrbuf)
1300         return nmem_strdup(stream, termz);
1301     else
1302     {
1303         char *buf = (char*) nmem_malloc (stream, wrbuf_len(wrbuf)+1);
1304         memcpy (buf, wrbuf_buf(wrbuf), wrbuf_len(wrbuf));
1305         buf[wrbuf_len(wrbuf)] = '\0';
1306         return buf;
1307     }
1308 }
1309
1310 static void grep_info_delete (struct grep_info *grep_info)
1311 {
1312 #ifdef TERM_COUNT
1313     xfree(grep_info->term_no);
1314 #endif
1315     xfree (grep_info->isam_p_buf);
1316 }
1317
1318 static int grep_info_prepare (ZebraHandle zh,
1319                               Z_AttributesPlusTerm *zapt,
1320                               struct grep_info *grep_info,
1321                               int reg_type,
1322                               NMEM stream)
1323 {
1324     AttrType termset;
1325     int termset_value_numeric;
1326     const char *termset_value_string;
1327
1328 #ifdef TERM_COUNT
1329     grep_info->term_no = 0;
1330 #endif
1331     grep_info->isam_p_size = 0;
1332     grep_info->isam_p_buf = NULL;
1333     grep_info->zh = zh;
1334     grep_info->reg_type = reg_type;
1335     grep_info->termset = 0;
1336
1337     if (!zapt)
1338         return 0;
1339     attr_init (&termset, zapt, 8);
1340     termset_value_numeric =
1341         attr_find_ex (&termset, NULL, &termset_value_string);
1342     if (termset_value_numeric != -1)
1343     {
1344         char resname[32];
1345         const char *termset_name = 0;
1346         if (termset_value_numeric != -2)
1347         {
1348     
1349             sprintf (resname, "%d", termset_value_numeric);
1350             termset_name = resname;
1351         }
1352         else
1353             termset_name = termset_value_string;
1354         logf (LOG_LOG, "creating termset set %s", termset_name);
1355         grep_info->termset = resultSetAdd (zh, termset_name, 1);
1356         if (!grep_info->termset)
1357         {
1358             zh->errCode = 128;
1359             zh->errString = nmem_strdup (stream, termset_name);
1360             return -1;
1361         }
1362     }
1363     return 0;
1364 }
1365                                
1366
1367 static RSET rpn_search_APT_phrase (ZebraHandle zh,
1368                                    Z_AttributesPlusTerm *zapt,
1369                                    const char *termz_org,
1370                                    oid_value attributeSet,
1371                                    NMEM stream,
1372                                    int reg_type, int complete_flag,
1373                                    const char *rank_type, int xpath_use,
1374                                    int num_bases, char **basenames, 
1375                                    NMEM rset_nmem)
1376 {
1377     char term_dst[IT_MAX_WORD+1];
1378     RSET rset[60], result;
1379     int rset_no = 0;
1380     struct grep_info grep_info;
1381     char *termz = normalize_term(zh, zapt, termz_org, stream, reg_type);
1382     const char *termp = termz;
1383
1384     *term_dst = 0;
1385     if (grep_info_prepare (zh, zapt, &grep_info, reg_type, stream))
1386         return 0;
1387     while (1)
1388     { 
1389         logf (LOG_DEBUG, "APT_phrase termp=%s", termp);
1390         rset[rset_no] = term_trunc (zh, zapt, &termp, attributeSet,
1391                                     stream, &grep_info,
1392                                     reg_type, complete_flag,
1393                                     num_bases, basenames,
1394                                     term_dst, rank_type,
1395                                     xpath_use,rset_nmem);
1396         if (!rset[rset_no])
1397             break;
1398         if (++rset_no >= (int) (sizeof(rset)/sizeof(*rset)))
1399             break;
1400     }
1401     grep_info_delete (&grep_info);
1402     if (rset_no == 0)
1403         return rsnull_create (rset_nmem,key_it_ctrl); 
1404     else if (rset_no == 1)
1405         return (rset[0]);
1406     else
1407         result = rsprox_create( rset_nmem, key_it_ctrl, key_it_ctrl->scope,
1408                        rset_no, rset,
1409                        1 /* ordered */, 0 /* exclusion */,
1410                        3 /* relation */, 1 /* distance */);
1411     return result;
1412 }
1413
1414 static RSET rpn_search_APT_or_list (ZebraHandle zh,
1415                                     Z_AttributesPlusTerm *zapt,
1416                                     const char *termz_org,
1417                                     oid_value attributeSet,
1418                                     NMEM stream,
1419                                     int reg_type, int complete_flag,
1420                                     const char *rank_type,
1421                                     int xpath_use,
1422                                     int num_bases, char **basenames,
1423                                     NMEM rset_nmem)
1424 {
1425     char term_dst[IT_MAX_WORD+1];
1426     RSET rset[60];
1427     int rset_no = 0;
1428     struct grep_info grep_info;
1429     char *termz = normalize_term(zh, zapt, termz_org, stream, reg_type);
1430     const char *termp = termz;
1431
1432     if (grep_info_prepare (zh, zapt, &grep_info, reg_type, stream))
1433         return 0;
1434     while (1)
1435     { 
1436         logf (LOG_DEBUG, "APT_or_list termp=%s", termp);
1437         rset[rset_no] = term_trunc (zh, zapt, &termp, attributeSet,
1438                                     stream, &grep_info,
1439                                     reg_type, complete_flag,
1440                                     num_bases, basenames,
1441                                     term_dst, rank_type,
1442                                     xpath_use,rset_nmem);
1443         if (!rset[rset_no])
1444             break;
1445         if (++rset_no >= (int) (sizeof(rset)/sizeof(*rset)))
1446             break;
1447     }
1448     grep_info_delete (&grep_info);
1449     if (rset_no == 0)
1450         return rsnull_create (rset_nmem,key_it_ctrl);  
1451     return rsmultior_create(rset_nmem, key_it_ctrl,key_it_ctrl->scope,
1452                             rset_no, rset);
1453 }
1454
1455 static RSET rpn_search_APT_and_list (ZebraHandle zh,
1456                                      Z_AttributesPlusTerm *zapt,
1457                                      const char *termz_org,
1458                                      oid_value attributeSet,
1459                                      NMEM stream,
1460                                      int reg_type, int complete_flag,
1461                                      const char *rank_type, 
1462                                      int xpath_use,
1463                                      int num_bases, char **basenames,
1464                                      NMEM rset_nmem)
1465 {
1466     char term_dst[IT_MAX_WORD+1];
1467     RSET rset[60]; /* FIXME - bug 160 - should be dynamic somehow */
1468     int rset_no = 0;
1469     struct grep_info grep_info;
1470     char *termz = normalize_term(zh, zapt, termz_org, stream, reg_type);
1471     const char *termp = termz;
1472
1473     if (grep_info_prepare (zh, zapt, &grep_info, reg_type, stream))
1474         return 0;
1475     while (1)
1476     { 
1477         logf (LOG_DEBUG, "APT_and_list termp=%s", termp);
1478         rset[rset_no] = term_trunc (zh, zapt, &termp, attributeSet,
1479                                     stream, &grep_info,
1480                                     reg_type, complete_flag,
1481                                     num_bases, basenames,
1482                                     term_dst, rank_type,
1483                                     xpath_use, rset_nmem);
1484         if (!rset[rset_no])
1485             break;
1486         assert (rset[rset_no]);
1487         if (++rset_no >= (int) (sizeof(rset)/sizeof(*rset)))
1488             break;
1489     }
1490     grep_info_delete (&grep_info);
1491     if (rset_no == 0)
1492         return rsnull_create (rset_nmem,key_it_ctrl); 
1493
1494     return rsmultiand_create( rset_nmem, key_it_ctrl, key_it_ctrl->scope,
1495                               rset_no, rset);
1496 }
1497
1498 static int numeric_relation (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1499                              const char **term_sub,
1500                              char *term_dict,
1501                              oid_value attributeSet,
1502                              struct grep_info *grep_info,
1503                              int *max_pos,
1504                              int reg_type,
1505                              char *term_dst)
1506 {
1507     AttrType relation;
1508     int relation_value;
1509     int term_value;
1510     int r;
1511     char *term_tmp = term_dict + strlen(term_dict);
1512
1513     attr_init (&relation, zapt, 2);
1514     relation_value = attr_find (&relation, NULL);
1515
1516     logf (LOG_DEBUG, "numeric relation value=%d", relation_value);
1517
1518     if (!term_100 (zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1519                    term_dst))
1520         return 0;
1521     term_value = atoi (term_tmp);
1522     switch (relation_value)
1523     {
1524     case 1:
1525         logf (LOG_DEBUG, "Relation <");
1526         gen_regular_rel (term_tmp, term_value-1, 1);
1527         break;
1528     case 2:
1529         logf (LOG_DEBUG, "Relation <=");
1530         gen_regular_rel (term_tmp, term_value, 1);
1531         break;
1532     case 4:
1533         logf (LOG_DEBUG, "Relation >=");
1534         gen_regular_rel (term_tmp, term_value, 0);
1535         break;
1536     case 5:
1537         logf (LOG_DEBUG, "Relation >");
1538         gen_regular_rel (term_tmp, term_value+1, 0);
1539         break;
1540     case 3:
1541     default:
1542         logf (LOG_DEBUG, "Relation =");
1543         sprintf (term_tmp, "(0*%d)", term_value);
1544     }
1545     logf (LOG_DEBUG, "dict_lookup_grep: %s", term_tmp);
1546     r = dict_lookup_grep (zh->reg->dict, term_dict, 0, grep_info, max_pos,
1547                           0, grep_handle);
1548     if (r)
1549         logf (LOG_WARN, "dict_lookup_grep fail, rel=gt: %d", r);
1550     logf (LOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1551     return 1;
1552 }
1553
1554 static int numeric_term (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1555                          const char **term_sub, 
1556                          oid_value attributeSet, struct grep_info *grep_info,
1557                          int reg_type, int complete_flag,
1558                          int num_bases, char **basenames,
1559                          char *term_dst, int xpath_use, NMEM stream)
1560 {
1561     char term_dict[2*IT_MAX_WORD+2];
1562     int r, base_no;
1563     AttrType use;
1564     int use_value;
1565     const char *use_string = 0;
1566     oid_value curAttributeSet = attributeSet;
1567     const char *termp;
1568     struct rpn_char_map_info rcmi;
1569
1570     int bases_ok = 0;     /* no of databases with OK attribute */
1571     int errCode = 0;      /* err code (if any is not OK) */
1572     char *errString = 0;  /* addinfo */
1573
1574     rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
1575     attr_init (&use, zapt, 1);
1576     use_value = attr_find_ex (&use, &curAttributeSet, &use_string);
1577
1578     if (use_value == -1)
1579         use_value = 1016;
1580
1581     for (base_no = 0; base_no < num_bases; base_no++)
1582     {
1583         attent attp;
1584         data1_local_attribute id_xpath_attr;
1585         data1_local_attribute *local_attr;
1586         int max_pos, prefix_len = 0;
1587
1588         termp = *term_sub;
1589         if (use_value == -2)  /* string attribute (assume IDXPATH/any) */
1590         {
1591             use_value = xpath_use;
1592             attp.local_attributes = &id_xpath_attr;
1593             attp.attset_ordinal = VAL_IDXPATH;
1594             id_xpath_attr.next = 0;
1595             id_xpath_attr.local = use_value;
1596         }
1597         else if (curAttributeSet == VAL_IDXPATH)
1598         {
1599             attp.local_attributes = &id_xpath_attr;
1600             attp.attset_ordinal = VAL_IDXPATH;
1601             id_xpath_attr.next = 0;
1602             id_xpath_attr.local = use_value;
1603         }
1604         else
1605         {
1606             if ((r=att_getentbyatt (zh, &attp, curAttributeSet, use_value,
1607                                             use_string)))
1608             {
1609                 logf (LOG_DEBUG, "att_getentbyatt fail. set=%d use=%d r=%d",
1610                       curAttributeSet, use_value, r);
1611                 if (r == -1)
1612                 {
1613                     char val_str[32];
1614                     sprintf (val_str, "%d", use_value);
1615                     errString = nmem_strdup (stream, val_str);
1616                     errCode = 114;
1617                 }
1618                 else
1619                     errCode = 121;
1620                 continue;
1621             }
1622         }
1623         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1624         {
1625             zh->errCode = 109; /* Database unavailable */
1626             zh->errString = basenames[base_no];
1627             return -1;
1628         }
1629         for (local_attr = attp.local_attributes; local_attr;
1630              local_attr = local_attr->next)
1631         {
1632             int ord;
1633             char ord_buf[32];
1634             int i, ord_len;
1635
1636             ord = zebraExplain_lookupSU (zh->reg->zei, attp.attset_ordinal,
1637                                           local_attr->local);
1638             if (ord < 0)
1639                 continue;
1640             if (prefix_len)
1641                 term_dict[prefix_len++] = '|';
1642             else
1643                 term_dict[prefix_len++] = '(';
1644
1645             ord_len = key_SU_encode (ord, ord_buf);
1646             for (i = 0; i<ord_len; i++)
1647             {
1648                 term_dict[prefix_len++] = 1;
1649                 term_dict[prefix_len++] = ord_buf[i];
1650             }
1651         }
1652         if (!prefix_len)
1653         {
1654             char val_str[32];
1655             sprintf (val_str, "%d", use_value);
1656             errCode = 114;
1657             errString = nmem_strdup (stream, val_str);
1658             continue;
1659         }
1660         bases_ok++;
1661         term_dict[prefix_len++] = ')';        
1662         term_dict[prefix_len++] = 1;
1663         term_dict[prefix_len++] = reg_type;
1664         logf (LOG_DEBUG, "reg_type = %d", term_dict[prefix_len-1]);
1665         term_dict[prefix_len] = '\0';
1666         if (!numeric_relation (zh, zapt, &termp, term_dict,
1667                                attributeSet, grep_info, &max_pos, reg_type,
1668                                term_dst))
1669             return 0;
1670     }
1671     if (!bases_ok)
1672     {
1673         zh->errCode = errCode;
1674         zh->errString = errString;
1675         return -1;
1676     }
1677     *term_sub = termp;
1678     logf (LOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1679     return 1;
1680 }
1681
1682 static RSET rpn_search_APT_numeric (ZebraHandle zh,
1683                                     Z_AttributesPlusTerm *zapt,
1684                                     const char *termz,
1685                                     oid_value attributeSet,
1686                                     NMEM stream,
1687                                     int reg_type, int complete_flag,
1688                                     const char *rank_type, int xpath_use,
1689                                     int num_bases, char **basenames,
1690                                     NMEM rset_nmem)
1691 {
1692     char term_dst[IT_MAX_WORD+1];
1693     const char *termp = termz;
1694     RSET rset[60]; /* FIXME - hard-coded magic number */
1695     int  r, rset_no = 0;
1696     struct grep_info grep_info;
1697
1698     logf (LOG_DEBUG, "APT_numeric t='%s'",termz);
1699     if (grep_info_prepare (zh, zapt, &grep_info, reg_type, stream))
1700         return 0;
1701     while (1)
1702     { 
1703         logf (LOG_DEBUG, "APT_numeric termp=%s", termp);
1704         grep_info.isam_p_indx = 0;
1705         r = numeric_term (zh, zapt, &termp, attributeSet, &grep_info,
1706                           reg_type, complete_flag, num_bases, basenames,
1707                           term_dst, xpath_use,
1708                           stream);
1709         if (r < 1)
1710             break;
1711         logf (LOG_DEBUG, "term: %s", term_dst);
1712         rset[rset_no] = rset_trunc (zh, grep_info.isam_p_buf,
1713                                     grep_info.isam_p_indx, term_dst,
1714                                     strlen(term_dst), rank_type,
1715                                     0 /* preserve position */,
1716                                     zapt->term->which, rset_nmem, 
1717                                     key_it_ctrl,key_it_ctrl->scope);
1718         assert (rset[rset_no]);
1719         if (++rset_no >= (int) (sizeof(rset)/sizeof(*rset)))
1720             break;
1721     }
1722     grep_info_delete (&grep_info);
1723     if (rset_no == 0)
1724         return rsnull_create (rset_nmem,key_it_ctrl);
1725     if (rset_no == 1)
1726         return rset[0];
1727     return rsmultiand_create(rset_nmem,key_it_ctrl,key_it_ctrl->scope,
1728                rset_no, rset);
1729 }
1730
1731 static RSET rpn_search_APT_local (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1732                                   const char *termz,
1733                                   oid_value attributeSet,
1734                                   NMEM stream,
1735                                   const char *rank_type, NMEM rset_nmem)
1736 {
1737     RSET result;
1738     RSFD rsfd;
1739     struct it_key key;
1740     int sys;
1741     result = rstemp_create( rset_nmem,key_it_ctrl,key_it_ctrl->scope,
1742                      res_get (zh->res, "setTmpDir") );
1743     rsfd = rset_open (result, RSETF_WRITE);
1744
1745     sys = atoi(termz);
1746     if (sys <= 0)
1747         sys = 1;
1748     key.mem[0] = sys;
1749     key.mem[1] = 1;
1750     key.len = 2;
1751     rset_write (rsfd, &key);
1752     rset_close (rsfd);
1753     return result;
1754 }
1755
1756 static RSET rpn_sort_spec (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1757                            oid_value attributeSet, NMEM stream,
1758                            Z_SortKeySpecList *sort_sequence,
1759                            const char *rank_type)
1760 {
1761     int i;
1762     int sort_relation_value;
1763     AttrType sort_relation_type;
1764     int use_value;
1765     AttrType use_type;
1766     Z_SortKeySpec *sks;
1767     Z_SortKey *sk;
1768     Z_AttributeElement *ae;
1769     int oid[OID_SIZE];
1770     oident oe;
1771     char termz[20];
1772     
1773     attr_init (&sort_relation_type, zapt, 7);
1774     sort_relation_value = attr_find (&sort_relation_type, &attributeSet);
1775
1776     attr_init (&use_type, zapt, 1);
1777     use_value = attr_find (&use_type, &attributeSet);
1778
1779     if (!sort_sequence->specs)
1780     {
1781         sort_sequence->num_specs = 10;
1782         sort_sequence->specs = (Z_SortKeySpec **)
1783             nmem_malloc (stream, sort_sequence->num_specs *
1784                          sizeof(*sort_sequence->specs));
1785         for (i = 0; i<sort_sequence->num_specs; i++)
1786             sort_sequence->specs[i] = 0;
1787     }
1788     if (zapt->term->which != Z_Term_general)
1789         i = 0;
1790     else
1791         i = atoi_n ((char *) zapt->term->u.general->buf,
1792                     zapt->term->u.general->len);
1793     if (i >= sort_sequence->num_specs)
1794         i = 0;
1795     sprintf (termz, "%d", i);
1796
1797     oe.proto = PROTO_Z3950;
1798     oe.oclass = CLASS_ATTSET;
1799     oe.value = attributeSet;
1800     if (!oid_ent_to_oid (&oe, oid))
1801         return 0;
1802
1803     sks = (Z_SortKeySpec *) nmem_malloc (stream, sizeof(*sks));
1804     sks->sortElement = (Z_SortElement *)
1805         nmem_malloc (stream, sizeof(*sks->sortElement));
1806     sks->sortElement->which = Z_SortElement_generic;
1807     sk = sks->sortElement->u.generic = (Z_SortKey *)
1808         nmem_malloc (stream, sizeof(*sk));
1809     sk->which = Z_SortKey_sortAttributes;
1810     sk->u.sortAttributes = (Z_SortAttributes *)
1811         nmem_malloc (stream, sizeof(*sk->u.sortAttributes));
1812
1813     sk->u.sortAttributes->id = oid;
1814     sk->u.sortAttributes->list = (Z_AttributeList *)
1815         nmem_malloc (stream, sizeof(*sk->u.sortAttributes->list));
1816     sk->u.sortAttributes->list->num_attributes = 1;
1817     sk->u.sortAttributes->list->attributes = (Z_AttributeElement **)
1818         nmem_malloc (stream, sizeof(*sk->u.sortAttributes->list->attributes));
1819     ae = *sk->u.sortAttributes->list->attributes = (Z_AttributeElement *)
1820         nmem_malloc (stream, sizeof(**sk->u.sortAttributes->list->attributes));
1821     ae->attributeSet = 0;
1822     ae->attributeType = (int *)
1823         nmem_malloc (stream, sizeof(*ae->attributeType));
1824     *ae->attributeType = 1;
1825     ae->which = Z_AttributeValue_numeric;
1826     ae->value.numeric = (int *)
1827         nmem_malloc (stream, sizeof(*ae->value.numeric));
1828     *ae->value.numeric = use_value;
1829
1830     sks->sortRelation = (int *)
1831         nmem_malloc (stream, sizeof(*sks->sortRelation));
1832     if (sort_relation_value == 1)
1833         *sks->sortRelation = Z_SortKeySpec_ascending;
1834     else if (sort_relation_value == 2)
1835         *sks->sortRelation = Z_SortKeySpec_descending;
1836     else 
1837         *sks->sortRelation = Z_SortKeySpec_ascending;
1838
1839     sks->caseSensitivity = (int *)
1840         nmem_malloc (stream, sizeof(*sks->caseSensitivity));
1841     *sks->caseSensitivity = 0;
1842
1843     sks->which = Z_SortKeySpec_null;
1844     sks->u.null = odr_nullval ();
1845     sort_sequence->specs[i] = sks;
1846     return rsnull_create (NULL,key_it_ctrl);
1847         /* FIXME - nmem?? */
1848 }
1849
1850
1851 static int parse_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1852                        oid_value attributeSet,
1853                        struct xpath_location_step *xpath, int max, NMEM mem)
1854 {
1855     oid_value curAttributeSet = attributeSet;
1856     AttrType use;
1857     const char *use_string = 0;
1858     
1859     attr_init (&use, zapt, 1);
1860     attr_find_ex (&use, &curAttributeSet, &use_string);
1861
1862     if (!use_string || *use_string != '/')
1863         return -1;
1864
1865     return zebra_parse_xpath_str(use_string, xpath, max, mem);
1866 }
1867  
1868                
1869
1870 static RSET xpath_trunc(ZebraHandle zh, NMEM stream,
1871                         int reg_type, const char *term, int use,
1872                         oid_value curAttributeSet, NMEM rset_nmem)
1873 {
1874     RSET rset;
1875     struct grep_info grep_info;
1876     char term_dict[2048];
1877     char ord_buf[32];
1878     int prefix_len = 0;
1879     int ord = zebraExplain_lookupSU (zh->reg->zei, curAttributeSet, use);
1880     int ord_len, i, r, max_pos;
1881     int term_type = Z_Term_characterString;
1882     const char *flags = "void";
1883
1884     if (grep_info_prepare (zh, 0 /* zapt */, &grep_info, '0', stream))
1885         return rsnull_create (rset_nmem,key_it_ctrl);
1886
1887     if (ord < 0)
1888         return rsnull_create (rset_nmem,key_it_ctrl);
1889     if (prefix_len)
1890         term_dict[prefix_len++] = '|';
1891     else
1892         term_dict[prefix_len++] = '(';
1893     
1894     ord_len = key_SU_encode (ord, ord_buf);
1895     for (i = 0; i<ord_len; i++)
1896     {
1897         term_dict[prefix_len++] = 1;
1898         term_dict[prefix_len++] = ord_buf[i];
1899     }
1900     term_dict[prefix_len++] = ')';
1901     term_dict[prefix_len++] = 1;
1902     term_dict[prefix_len++] = reg_type;
1903     
1904     strcpy (term_dict+prefix_len, term);
1905     
1906     grep_info.isam_p_indx = 0;
1907     r = dict_lookup_grep (zh->reg->dict, term_dict, 0,
1908                           &grep_info, &max_pos, 0, grep_handle);
1909     yaz_log (LOG_LOG, "%s %d positions", term,
1910              grep_info.isam_p_indx);
1911     rset = rset_trunc (zh, grep_info.isam_p_buf,
1912                        grep_info.isam_p_indx, term, strlen(term),
1913                        flags, 1, term_type,rset_nmem,
1914                        key_it_ctrl, key_it_ctrl->scope);
1915     grep_info_delete (&grep_info);
1916     return rset;
1917 }
1918
1919 static RSET rpn_search_xpath (ZebraHandle zh,
1920                               oid_value attributeSet,
1921                               int num_bases, char **basenames,
1922                               NMEM stream, const char *rank_type, RSET rset,
1923                               int xpath_len, struct xpath_location_step *xpath,
1924                               NMEM rset_nmem)
1925 {
1926     oid_value curAttributeSet = attributeSet;
1927     int base_no;
1928     int i;
1929
1930     if (xpath_len < 0)
1931         return rset;
1932
1933     yaz_log (LOG_LOG, "len=%d", xpath_len);
1934     for (i = 0; i<xpath_len; i++)
1935     {
1936         yaz_log (LOG_LOG, "XPATH %d %s", i, xpath[i].part);
1937
1938     }
1939
1940     curAttributeSet = VAL_IDXPATH;
1941
1942     /*
1943       //a    ->    a/.*
1944       //a/b  ->    b/a/.*
1945       /a     ->    a/
1946       /a/b   ->    b/a/
1947
1948       /      ->    none
1949
1950    a[@attr=value]/b[@other=othervalue]
1951
1952  /e/@a val      range(e/,range(@a,freetext(w,1015,val),@a),e/)
1953  /a/b val       range(b/a/,freetext(w,1016,val),b/a/)
1954  /a/b/@c val    range(b/a/,range(@c,freetext(w,1016,val),@c),b/a/)
1955  /a/b[@c=y] val range(b/a/,freetext(w,1016,val),b/a/,@c=y)
1956  /a[@c=y]/b val range(a/,range(b/a/,freetext(w,1016,val),b/a/),a/,@c=y)
1957  /a[@c=x]/b[@c=y] range(a/,range(b/a/,freetext(w,1016,val),b/a/,@c=y),a/,@c=x)
1958       
1959     */
1960
1961     dict_grep_cmap (zh->reg->dict, 0, 0);
1962
1963     for (base_no = 0; base_no < num_bases; base_no++)
1964     {
1965         int level = xpath_len;
1966         int first_path = 1;
1967         
1968         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1969         {
1970             zh->errCode = 109; /* Database unavailable */
1971             zh->errString = basenames[base_no];
1972             return rset;
1973         }
1974         while (--level >= 0)
1975         {
1976             char xpath_rev[128];
1977             int i, len;
1978             RSET rset_start_tag = 0, rset_end_tag = 0, rset_attr = 0;
1979
1980             *xpath_rev = 0;
1981             len = 0;
1982             for (i = level; i >= 1; --i)
1983             {
1984                 const char *cp = xpath[i].part;
1985                 if (*cp)
1986                 {
1987                     for (;*cp; cp++)
1988                         if (*cp == '*')
1989                         {
1990                             memcpy (xpath_rev + len, "[^/]*", 5);
1991                             len += 5;
1992                         }
1993                         else if (*cp == ' ')
1994                         {
1995
1996                             xpath_rev[len++] = 1;
1997                             xpath_rev[len++] = ' ';
1998                         }
1999
2000                         else
2001                             xpath_rev[len++] = *cp;
2002                     xpath_rev[len++] = '/';
2003                 }
2004                 else if (i == 1)  /* // case */
2005                 {
2006                     xpath_rev[len++] = '.';
2007                     xpath_rev[len++] = '*';
2008                 }
2009             }
2010             xpath_rev[len] = 0;
2011
2012             if (xpath[level].predicate &&
2013                 xpath[level].predicate->which == XPATH_PREDICATE_RELATION &&
2014                 xpath[level].predicate->u.relation.name[0])
2015             {
2016                 WRBUF wbuf = wrbuf_alloc();
2017                 wrbuf_puts(wbuf, xpath[level].predicate->u.relation.name+1);
2018                 if (xpath[level].predicate->u.relation.value)
2019                 {
2020                     const char *cp = xpath[level].predicate->u.relation.value;
2021                     wrbuf_putc(wbuf, '=');
2022                     
2023                     while (*cp)
2024                     {
2025                         if (strchr(REGEX_CHARS, *cp))
2026                             wrbuf_putc(wbuf, '\\');
2027                         wrbuf_putc(wbuf, *cp);
2028                         cp++;
2029                     }
2030                 }
2031                 wrbuf_puts(wbuf, "");
2032                 rset_attr = xpath_trunc (
2033                     zh, stream, '0', wrbuf_buf(wbuf), 3, 
2034                     curAttributeSet,rset_nmem);
2035                 wrbuf_free(wbuf, 1);
2036             } 
2037             else 
2038             {
2039                 if (!first_path)
2040                     continue;
2041             }
2042             yaz_log (LOG_LOG, "xpath_rev (%d) = %s", level, xpath_rev);
2043             if (strlen(xpath_rev))
2044             {
2045                 rset_start_tag = xpath_trunc(zh, stream, '0', 
2046                         xpath_rev, 1, curAttributeSet, rset_nmem);
2047             
2048                 rset_end_tag = xpath_trunc(zh, stream, '0', 
2049                         xpath_rev, 2, curAttributeSet, rset_nmem);
2050
2051                 /*
2052                 parms.key_size = sizeof(struct it_key);
2053                 parms.cmp = key_compare_it;
2054                 parms.rset_l = rset_start_tag;
2055                 parms.rset_m = rset;
2056                 parms.rset_r = rset_end_tag;
2057                 parms.rset_attr = rset_attr;
2058                 parms.printer = key_print_it;
2059                 rset = rset_create (rset_kind_between, &parms);
2060                 */
2061                 rset=rsbetween_create( rset_nmem,key_it_ctrl,key_it_ctrl->scope,
2062                         rset_start_tag, rset, rset_end_tag, rset_attr);
2063             }
2064             first_path = 0;
2065         }
2066     }
2067
2068     return rset;
2069 }
2070
2071
2072
2073 static RSET rpn_search_APT (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2074                             oid_value attributeSet, NMEM stream,
2075                             Z_SortKeySpecList *sort_sequence,
2076                             int num_bases, char **basenames, 
2077                             NMEM rset_nmem)
2078 {
2079     unsigned reg_id;
2080     char *search_type = NULL;
2081     char rank_type[128];
2082     int complete_flag;
2083     int sort_flag;
2084     char termz[IT_MAX_WORD+1];
2085     RSET rset = 0;
2086     int xpath_len;
2087     int xpath_use = 0;
2088     struct xpath_location_step xpath[10];
2089
2090     zebra_maps_attr (zh->reg->zebra_maps, zapt, &reg_id, &search_type,
2091                      rank_type, &complete_flag, &sort_flag);
2092     
2093     logf (LOG_DEBUG, "reg_id=%c", reg_id);
2094     logf (LOG_DEBUG, "complete_flag=%d", complete_flag);
2095     logf (LOG_DEBUG, "search_type=%s", search_type);
2096     logf (LOG_DEBUG, "rank_type=%s", rank_type);
2097
2098     if (zapt_term_to_utf8(zh, zapt, termz))
2099         return 0;
2100
2101     if (sort_flag)
2102         return rpn_sort_spec (zh, zapt, attributeSet, stream, sort_sequence,
2103                               rank_type);
2104     xpath_len = parse_xpath(zh, zapt, attributeSet, xpath, 10, stream);
2105     if (xpath_len >= 0)
2106     {
2107         xpath_use = 1016;
2108         if (xpath[xpath_len-1].part[0] == '@')
2109             xpath_use = 1015;
2110     }
2111
2112     if (!strcmp (search_type, "phrase"))
2113     {
2114         rset = rpn_search_APT_phrase (zh, zapt, termz, attributeSet, stream,
2115                                       reg_id, complete_flag, rank_type,
2116                                       xpath_use,
2117                                       num_bases, basenames, rset_nmem);
2118     }
2119     else if (!strcmp (search_type, "and-list"))
2120     {
2121         rset = rpn_search_APT_and_list (zh, zapt, termz, attributeSet, stream,
2122                                         reg_id, complete_flag, rank_type,
2123                                         xpath_use,
2124                                         num_bases, basenames, rset_nmem);
2125     }
2126     else if (!strcmp (search_type, "or-list"))
2127     {
2128         rset = rpn_search_APT_or_list (zh, zapt, termz, attributeSet, stream,
2129                                        reg_id, complete_flag, rank_type,
2130                                        xpath_use,
2131                                        num_bases, basenames, rset_nmem);
2132     }
2133     else if (!strcmp (search_type, "local"))
2134     {
2135         rset = rpn_search_APT_local (zh, zapt, termz, attributeSet, stream,
2136                                      rank_type, rset_nmem);
2137     }
2138     else if (!strcmp (search_type, "numeric"))
2139     {
2140         rset = rpn_search_APT_numeric (zh, zapt, termz, attributeSet, stream,
2141                                        reg_id, complete_flag, rank_type,
2142                                        xpath_use,
2143                                        num_bases, basenames, rset_nmem);
2144     }
2145     else if (!strcmp (search_type, "always"))
2146     {
2147         rset = 0;
2148     }
2149     else
2150         zh->errCode = 118;
2151     return rpn_search_xpath (zh, attributeSet, num_bases, basenames,
2152                              stream, rank_type, rset, 
2153                              xpath_len, xpath, rset_nmem);
2154 }
2155
2156 static RSET rpn_search_structure (ZebraHandle zh, Z_RPNStructure *zs,
2157                                   oid_value attributeSet, 
2158                                   NMEM stream, NMEM rset_nmem,
2159                                   Z_SortKeySpecList *sort_sequence,
2160                                   int num_bases, char **basenames)
2161 {
2162     RSET r = NULL;
2163     if (zs->which == Z_RPNStructure_complex)
2164     {
2165         Z_Operator *zop = zs->u.complex->roperator;
2166         RSET rsets[2]; /* l and r argument */
2167
2168         rsets[0]=rpn_search_structure (zh, zs->u.complex->s1,
2169                                        attributeSet, stream, rset_nmem,
2170                                        sort_sequence,
2171                                        num_bases, basenames);
2172         if (rsets[0] == NULL)
2173             return NULL;
2174         rsets[1]=rpn_search_structure (zh, zs->u.complex->s2,
2175                                        attributeSet, stream, rset_nmem,
2176                                        sort_sequence,
2177                                        num_bases, basenames);
2178         if (rsets[1] == NULL)
2179         {
2180             rset_delete (rsets[0]);
2181             return NULL;
2182         }
2183
2184         switch (zop->which)
2185         {
2186         case Z_Operator_and:
2187             r=rsmultiand_create(rset_nmem, key_it_ctrl, key_it_ctrl->scope,
2188                         2, rsets);
2189             break;
2190         case Z_Operator_or:
2191             r=rsmultior_create(rset_nmem, key_it_ctrl, key_it_ctrl->scope,
2192                         2, rsets);
2193             break;
2194         case Z_Operator_and_not:
2195             r = rsbool_create_not(rset_nmem,key_it_ctrl, key_it_ctrl->scope,
2196                     rsets[0],rsets[1]);
2197             break;
2198         case Z_Operator_prox:
2199             if (zop->u.prox->which != Z_ProximityOperator_known)
2200             {
2201                 zh->errCode = 132;
2202                 return NULL;
2203             }
2204             if (*zop->u.prox->u.known != Z_ProxUnit_word)
2205             {
2206                 char *val = (char *) nmem_malloc (stream, 16);
2207                 zh->errCode = 132;
2208                 zh->errString = val;
2209                 sprintf (val, "%d", *zop->u.prox->u.known);
2210                 return NULL;
2211             }
2212             else
2213             {
2214                 /* new / old prox */
2215                 r=rsprox_create(rset_nmem,key_it_ctrl,key_it_ctrl->scope,
2216                          2, rsets, 
2217                          *zop->u.prox->ordered,
2218                          (!zop->u.prox->exclusion ? 
2219                               0 : *zop->u.prox->exclusion),
2220                          *zop->u.prox->relationType,
2221                          *zop->u.prox->distance );
2222             }
2223             break;
2224         default:
2225             zh->errCode = 110;
2226             return NULL;
2227         }
2228     }
2229     else if (zs->which == Z_RPNStructure_simple)
2230     {
2231         if (zs->u.simple->which == Z_Operand_APT)
2232         {
2233             logf (LOG_DEBUG, "rpn_search_APT");
2234             r = rpn_search_APT (zh, zs->u.simple->u.attributesPlusTerm,
2235                                 attributeSet, stream, sort_sequence,
2236                                 num_bases, basenames,rset_nmem);
2237         }
2238         else if (zs->u.simple->which == Z_Operand_resultSetId)
2239         {
2240             logf (LOG_DEBUG, "rpn_search_ref");
2241             r = resultSetRef (zh, zs->u.simple->u.resultSetId);
2242             if (!r)
2243             {
2244                 r = rsnull_create (rset_nmem,key_it_ctrl);
2245                 zh->errCode = 30;
2246                 zh->errString =
2247                     nmem_strdup (stream, zs->u.simple->u.resultSetId);
2248                 return 0;
2249             }
2250             else
2251                 rset_dup(r);
2252         }
2253         else
2254         {
2255             zh->errCode = 3;
2256             return 0;
2257         }
2258     }
2259     else
2260     {
2261         zh->errCode = 3;
2262         return 0;
2263     }
2264     return r;
2265 }
2266
2267
2268 RSET rpn_search (ZebraHandle zh, NMEM nmem, NMEM rset_nmem,
2269                  Z_RPNQuery *rpn, int num_bases, char **basenames, 
2270                  const char *setname,
2271                  ZebraSet sset)
2272 {
2273     RSET rset;
2274     oident *attrset;
2275     oid_value attributeSet;
2276     Z_SortKeySpecList *sort_sequence;
2277     int sort_status, i;
2278
2279     zh->errCode = 0;
2280     zh->errString = NULL;
2281     zh->hits = 0;
2282
2283     sort_sequence = (Z_SortKeySpecList *)
2284         nmem_malloc (nmem, sizeof(*sort_sequence));
2285     sort_sequence->num_specs = 10; /* FIXME - Hard-coded number */
2286     sort_sequence->specs = (Z_SortKeySpec **)
2287         nmem_malloc (nmem, sort_sequence->num_specs *
2288                      sizeof(*sort_sequence->specs));
2289     for (i = 0; i<sort_sequence->num_specs; i++)
2290         sort_sequence->specs[i] = 0;
2291     
2292     attrset = oid_getentbyoid (rpn->attributeSetId);
2293     attributeSet = attrset->value;
2294     rset = rpn_search_structure (zh, rpn->RPNStructure, attributeSet,
2295                                  nmem, rset_nmem,
2296                                  sort_sequence, num_bases, basenames);
2297     if (!rset)
2298         return 0;
2299
2300     if (zh->errCode)
2301         logf (LOG_DEBUG, "search error: %d", zh->errCode);
2302     
2303     for (i = 0; sort_sequence->specs[i]; i++)
2304         ;
2305     sort_sequence->num_specs = i;
2306     if (!i)
2307         resultSetRank (zh, sset, rset);
2308     else
2309     {
2310         logf (LOG_DEBUG, "resultSetSortSingle in rpn_search");
2311         resultSetSortSingle (zh, nmem, sset, rset,
2312                              sort_sequence, &sort_status);
2313         if (zh->errCode)
2314         {
2315             logf (LOG_DEBUG, "resultSetSortSingle status = %d", zh->errCode);
2316         }
2317     }
2318     return rset;
2319 }
2320
2321 struct scan_info_entry {
2322     char *term;
2323     ISAMC_P isam_p;
2324 };
2325
2326 struct scan_info {
2327     struct scan_info_entry *list;
2328     ODR odr;
2329     int before, after;
2330     char prefix[20];
2331 };
2332
2333 static int scan_handle (char *name, const char *info, int pos, void *client)
2334 {
2335     int len_prefix, idx;
2336     struct scan_info *scan_info = (struct scan_info *) client;
2337
2338     len_prefix = strlen(scan_info->prefix);
2339     if (memcmp (name, scan_info->prefix, len_prefix))
2340         return 1;
2341     if (pos > 0)        idx = scan_info->after - pos + scan_info->before;
2342     else
2343         idx = - pos - 1;
2344     scan_info->list[idx].term = (char *)
2345         odr_malloc (scan_info->odr, strlen(name + len_prefix)+1);
2346     strcpy (scan_info->list[idx].term, name + len_prefix);
2347     assert (*info == sizeof(ISAMC_P));
2348     memcpy (&scan_info->list[idx].isam_p, info+1, sizeof(ISAMC_P));
2349     return 0;
2350 }
2351
2352 static void scan_term_untrans (ZebraHandle zh, NMEM stream, int reg_type,
2353                                char **dst, const char *src)
2354 {
2355     char term_src[IT_MAX_WORD];
2356     char term_dst[IT_MAX_WORD];
2357     
2358     term_untrans (zh, reg_type, term_src, src);
2359
2360     if (zh->iconv_from_utf8 != 0)
2361     {
2362         int len;
2363         char *inbuf = term_src;
2364         size_t inleft = strlen(term_src);
2365         char *outbuf = term_dst;
2366         size_t outleft = sizeof(term_dst)-1;
2367         size_t ret;
2368         
2369         ret = yaz_iconv (zh->iconv_from_utf8, &inbuf, &inleft,
2370                          &outbuf, &outleft);
2371         if (ret == (size_t)(-1))
2372             len = 0;
2373         else
2374             len = outbuf - term_dst;
2375         *dst = nmem_malloc (stream, len + 1);
2376         if (len > 0)
2377             memcpy (*dst, term_dst, len);
2378         (*dst)[len] = '\0';
2379     }
2380     else
2381         *dst = nmem_strdup (stream, term_src);
2382 }
2383
2384 static void count_set (RSET r, int *count)
2385 {
2386     zint psysno = 0;
2387     int kno = 0;
2388     struct it_key key;
2389     RSFD rfd;
2390
2391     logf (LOG_DEBUG, "count_set");
2392
2393     *count = 0;
2394     rfd = rset_open (r, RSETF_READ);
2395     while (rset_read (rfd, &key,0 /* never mind terms */))
2396     {
2397         if (key.mem[0] != psysno)
2398         {
2399             psysno = key.mem[0];
2400             (*count)++;
2401         }
2402         kno++;
2403     }
2404     rset_close (rfd);
2405     logf (LOG_DEBUG, "%d keys, %d records", kno, *count);
2406 }
2407
2408 void rpn_scan (ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt,
2409                oid_value attributeset,
2410                int num_bases, char **basenames,
2411                int *position, int *num_entries, ZebraScanEntry **list,
2412                int *is_partial, RSET limit_set, int return_zero)
2413 {
2414     int i;
2415     int pos = *position;
2416     int num = *num_entries;
2417     int before;
2418     int after;
2419     int base_no;
2420     char termz[IT_MAX_WORD+20];
2421     AttrType use;
2422     int use_value;
2423     const char *use_string = 0;
2424     struct scan_info *scan_info_array;
2425     ZebraScanEntry *glist;
2426     int ords[32], ord_no = 0;
2427     int ptr[32];
2428
2429     int bases_ok = 0;     /* no of databases with OK attribute */
2430     int errCode = 0;      /* err code (if any is not OK) */
2431     char *errString = 0;  /* addinfo */
2432
2433     unsigned reg_id;
2434     char *search_type = NULL;
2435     char rank_type[128];
2436     int complete_flag;
2437     int sort_flag;
2438     NMEM rset_nmem=NULL; 
2439
2440     *list = 0;
2441
2442     if (attributeset == VAL_NONE)
2443         attributeset = VAL_BIB1;
2444
2445     if (!limit_set)
2446     {
2447         AttrType termset;
2448         int termset_value_numeric;
2449         const char *termset_value_string;
2450         attr_init (&termset, zapt, 8);
2451         termset_value_numeric =
2452             attr_find_ex (&termset, NULL, &termset_value_string);
2453         if (termset_value_numeric != -1)
2454         {
2455             char resname[32];
2456             const char *termset_name = 0;
2457             
2458             if (termset_value_numeric != -2)
2459             {
2460                 
2461                 sprintf (resname, "%d", termset_value_numeric);
2462                 termset_name = resname;
2463             }
2464             else
2465                 termset_name = termset_value_string;
2466             
2467             limit_set = resultSetRef (zh, termset_name);
2468         }
2469     }
2470         
2471     yaz_log (LOG_DEBUG, "position = %d, num = %d set=%d",
2472              pos, num, attributeset);
2473         
2474     attr_init (&use, zapt, 1);
2475     use_value = attr_find_ex (&use, &attributeset, &use_string);
2476
2477     if (zebra_maps_attr (zh->reg->zebra_maps, zapt, &reg_id, &search_type,
2478                          rank_type, &complete_flag, &sort_flag))
2479     {
2480         *num_entries = 0;
2481         zh->errCode = 113;
2482         return ;
2483     }
2484     yaz_log (LOG_DEBUG, "use_value = %d", use_value);
2485
2486     if (use_value == -1)
2487         use_value = 1016;
2488     for (base_no = 0; base_no < num_bases && ord_no < 32; base_no++)
2489     {
2490         int r;
2491         attent attp;
2492         data1_local_attribute *local_attr;
2493
2494         if ((r=att_getentbyatt (zh, &attp, attributeset, use_value,
2495                                 use_string)))
2496         {
2497             logf (LOG_DEBUG, "att_getentbyatt fail. set=%d use=%d",
2498                   attributeset, use_value);
2499             if (r == -1)
2500             {
2501                 char val_str[32];
2502                 sprintf (val_str, "%d", use_value);
2503                 errCode = 114;
2504                 errString = odr_strdup (stream, val_str);
2505             }   
2506             else
2507                 errCode = 121;
2508             continue;
2509         }
2510         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
2511         {
2512             zh->errString = basenames[base_no];
2513             zh->errCode = 109; /* Database unavailable */
2514             *num_entries = 0;
2515             return;
2516         }
2517         bases_ok++;
2518         for (local_attr = attp.local_attributes; local_attr && ord_no < 32;
2519              local_attr = local_attr->next)
2520         {
2521             int ord;
2522
2523             ord = zebraExplain_lookupSU (zh->reg->zei, attp.attset_ordinal,
2524                                          local_attr->local);
2525             if (ord > 0)
2526                 ords[ord_no++] = ord;
2527         }
2528     }
2529     if (!bases_ok && errCode)
2530     {
2531         zh->errCode = errCode;
2532         zh->errString = errString;
2533         *num_entries = 0;
2534     }
2535     if (ord_no == 0)
2536     {
2537         char val_str[32];
2538         sprintf (val_str, "%d", use_value);
2539         zh->errCode = 114;
2540         zh->errString = odr_strdup (stream, val_str);
2541
2542         *num_entries = 0;
2543         return;
2544     }
2545     /* prepare dictionary scanning */
2546     before = pos-1;
2547     after = 1+num-pos;
2548     scan_info_array = (struct scan_info *)
2549         odr_malloc (stream, ord_no * sizeof(*scan_info_array));
2550     for (i = 0; i < ord_no; i++)
2551     {
2552         int j, prefix_len = 0;
2553         int before_tmp = before, after_tmp = after;
2554         struct scan_info *scan_info = scan_info_array + i;
2555         struct rpn_char_map_info rcmi;
2556
2557         rpn_char_map_prepare (zh->reg, reg_id, &rcmi);
2558
2559         scan_info->before = before;
2560         scan_info->after = after;
2561         scan_info->odr = stream;
2562
2563         scan_info->list = (struct scan_info_entry *)
2564             odr_malloc (stream, (before+after) * sizeof(*scan_info->list));
2565         for (j = 0; j<before+after; j++)
2566             scan_info->list[j].term = NULL;
2567
2568         prefix_len += key_SU_encode (ords[i], termz + prefix_len);
2569         termz[prefix_len++] = reg_id;
2570         termz[prefix_len] = 0;
2571         strcpy (scan_info->prefix, termz);
2572
2573         if (trans_scan_term (zh, zapt, termz+prefix_len, reg_id))
2574             return ;
2575                     
2576         dict_scan (zh->reg->dict, termz, &before_tmp, &after_tmp,
2577                    scan_info, scan_handle);
2578     }
2579     glist = (ZebraScanEntry *)
2580         odr_malloc (stream, (before+after)*sizeof(*glist));
2581
2582     rset_nmem=nmem_create();
2583
2584     /* consider terms after main term */
2585     for (i = 0; i < ord_no; i++)
2586         ptr[i] = before;
2587     
2588     *is_partial = 0;
2589     for (i = 0; i<after; i++)
2590     {
2591         int j, j0 = -1;
2592         const char *mterm = NULL;
2593         const char *tst;
2594         RSET rset;
2595         
2596         for (j = 0; j < ord_no; j++)
2597         {
2598             if (ptr[j] < before+after &&
2599                 (tst=scan_info_array[j].list[ptr[j]].term) &&
2600                 (!mterm || strcmp (tst, mterm) < 0))
2601             {
2602                 j0 = j;
2603                 mterm = tst;
2604             }
2605         }
2606         if (j0 == -1)
2607             break;
2608         scan_term_untrans (zh, stream->mem, reg_id,
2609                            &glist[i+before].term, mterm);
2610         rset = rset_trunc (zh, &scan_info_array[j0].list[ptr[j0]].isam_p, 1,
2611                            glist[i+before].term, strlen(glist[i+before].term),
2612                            NULL, 0, zapt->term->which, rset_nmem, 
2613                            key_it_ctrl,key_it_ctrl->scope);
2614         ptr[j0]++;
2615         for (j = j0+1; j<ord_no; j++)
2616         {
2617             if (ptr[j] < before+after &&
2618                 (tst=scan_info_array[j].list[ptr[j]].term) &&
2619                 !strcmp (tst, mterm))
2620             {
2621                 RSET rset2;
2622
2623                 rset2 =
2624                    rset_trunc (zh, &scan_info_array[j].list[ptr[j]].isam_p, 1,
2625                                glist[i+before].term,
2626                                strlen(glist[i+before].term), NULL, 0,
2627                                zapt->term->which,rset_nmem,
2628                                key_it_ctrl, key_it_ctrl->scope);
2629                 rset = rsbool_create_or(rset_nmem,key_it_ctrl,
2630                                key_it_ctrl->scope, rset, rset2);
2631                 /* FIXME - Use a proper multi-or */
2632
2633                 ptr[j]++;
2634             }
2635         }
2636         if (limit_set)
2637             rset = rsbool_create_and(rset_nmem,key_it_ctrl,key_it_ctrl->scope,
2638                             rset, rset_dup(limit_set));
2639         count_set (rset, &glist[i+before].occurrences);
2640         rset_delete (rset);
2641     }
2642     if (i < after)
2643     {
2644         *num_entries -= (after-i);
2645         *is_partial = 1;
2646     }
2647
2648     /* consider terms before main term */
2649     for (i = 0; i<ord_no; i++)
2650         ptr[i] = 0;
2651
2652     for (i = 0; i<before; i++)
2653     {
2654         int j, j0 = -1;
2655         const char *mterm = NULL;
2656         const char *tst;
2657         RSET rset;
2658         
2659         for (j = 0; j <ord_no; j++)
2660         {
2661             if (ptr[j] < before &&
2662                 (tst=scan_info_array[j].list[before-1-ptr[j]].term) &&
2663                 (!mterm || strcmp (tst, mterm) > 0))
2664             {
2665                 j0 = j;
2666                 mterm = tst;
2667             }
2668         }
2669         if (j0 == -1)
2670             break;
2671
2672         scan_term_untrans (zh, stream->mem, reg_id,
2673                            &glist[before-1-i].term, mterm);
2674
2675         rset = rset_trunc
2676                (zh, &scan_info_array[j0].list[before-1-ptr[j0]].isam_p, 1,
2677                 glist[before-1-i].term, strlen(glist[before-1-i].term),
2678                 NULL, 0, zapt->term->which,rset_nmem,
2679                 key_it_ctrl,key_it_ctrl->scope);
2680
2681         ptr[j0]++;
2682
2683         for (j = j0+1; j<ord_no; j++)
2684         {
2685             if (ptr[j] < before &&
2686                 (tst=scan_info_array[j].list[before-1-ptr[j]].term) &&
2687                 !strcmp (tst, mterm))
2688             {
2689                 RSET rset2;
2690
2691                 rset2 = rset_trunc (zh,
2692                          &scan_info_array[j].list[before-1-ptr[j]].isam_p, 1,
2693                                     glist[before-1-i].term,
2694                                     strlen(glist[before-1-i].term), NULL, 0,
2695                                     zapt->term->which, rset_nmem,
2696                                     key_it_ctrl, key_it_ctrl->scope);
2697                 rset = rsbool_create_and(rset_nmem,key_it_ctrl,
2698                             key_it_ctrl->scope, rset, rset2);
2699                 /* FIXME - multi-and ?? */
2700                 ptr[j]++;
2701             }
2702         }
2703         if (limit_set)
2704             rset = rsbool_create_and(rset_nmem,key_it_ctrl,key_it_ctrl->scope,
2705                             rset, rset_dup(limit_set));
2706         count_set (rset, &glist[before-1-i].occurrences);
2707         rset_delete (rset);
2708     }
2709     i = before-i;
2710     if (i)
2711     {
2712         *is_partial = 1;
2713         *position -= i;
2714         *num_entries -= i;
2715     }
2716
2717     nmem_destroy(rset_nmem);
2718     *list = glist + i;               /* list is set to first 'real' entry */
2719     
2720     logf (LOG_DEBUG, "position = %d, num_entries = %d",
2721           *position, *num_entries);
2722     if (zh->errCode)
2723         logf (LOG_DEBUG, "scan error: %d", zh->errCode);
2724 }
2725