Reduce log. LOG_LOG -> LOG_DEBUG
[idzebra-moved-to-github.git] / index / zrpn.c
1 /* $Id: zrpn.c,v 1.151 2004-09-13 09:02:16 adam Exp $
2    Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003,2004
3    Index Data Aps
4
5 This file is part of the Zebra server.
6
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
11
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with Zebra; see the file LICENSE.zebra.  If not, write to the
19 Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
20 02111-1307, USA.
21 */
22
23
24 #include <stdio.h>
25 #include <assert.h>
26 #ifdef WIN32
27 #include <io.h>
28 #else
29 #include <unistd.h>
30 #endif
31 #include <ctype.h>
32
33 #include "index.h"
34 #include <zebra_xpath.h>
35
36 #include <charmap.h>
37 #include <rset.h>
38
39
40 static const struct key_control it_ctrl = { 
41     sizeof(struct it_key),
42     2, /* we have sysnos and seqnos in this key, nothing more */
43     key_compare_it, 
44     key_logdump_txt,   /* FIXME  - clean up these functions */
45     key_get_seq,
46 };
47
48
49 const struct key_control *key_it_ctrl = &it_ctrl;
50
51 struct rpn_char_map_info {
52     ZebraMaps zm;
53     int reg_type;
54 };
55
56 typedef struct {
57     int type;
58     int major;
59     int minor;
60     Z_AttributesPlusTerm *zapt;
61 } AttrType;
62
63
64 static const char **rpn_char_map_handler (void *vp, const char **from, int len)
65 {
66     struct rpn_char_map_info *p = (struct rpn_char_map_info *) vp;
67     const char **out = zebra_maps_input (p->zm, p->reg_type, from, len);
68 #if 0
69     if (out && *out)
70     {
71         const char *outp = *out;
72         yaz_log (LOG_LOG, "---");
73         while (*outp)
74         {
75             yaz_log (LOG_LOG, "%02X", *outp);
76             outp++;
77         }
78     }
79 #endif
80     return out;
81 }
82
83 static void rpn_char_map_prepare (struct zebra_register *reg, int reg_type,
84                                   struct rpn_char_map_info *map_info)
85 {
86     map_info->zm = reg->zebra_maps;
87     map_info->reg_type = reg_type;
88     dict_grep_cmap (reg->dict, map_info, rpn_char_map_handler);
89 }
90
91 static int attr_find_ex (AttrType *src, oid_value *attributeSetP,
92                          const char **string_value)
93 {
94     int num_attributes;
95
96     num_attributes = src->zapt->attributes->num_attributes;
97     while (src->major < num_attributes)
98     {
99         Z_AttributeElement *element;
100
101         element = src->zapt->attributes->attributes[src->major];
102         if (src->type == *element->attributeType)
103         {
104             switch (element->which) 
105             {
106             case Z_AttributeValue_numeric:
107                 ++(src->major);
108                 if (element->attributeSet && attributeSetP)
109                 {
110                     oident *attrset;
111
112                     attrset = oid_getentbyoid (element->attributeSet);
113                     *attributeSetP = attrset->value;
114                 }
115                 return *element->value.numeric;
116                 break;
117             case Z_AttributeValue_complex:
118                 if (src->minor >= element->value.complex->num_list)
119                     break;
120                 if (element->attributeSet && attributeSetP)
121                 {
122                     oident *attrset;
123                     
124                     attrset = oid_getentbyoid (element->attributeSet);
125                     *attributeSetP = attrset->value;
126                 }
127                 if (element->value.complex->list[src->minor]->which ==  
128                     Z_StringOrNumeric_numeric)
129                 {
130                     ++(src->minor);
131                     return
132                         *element->value.complex->list[src->minor-1]->u.numeric;
133                 }
134                 else if (element->value.complex->list[src->minor]->which ==  
135                          Z_StringOrNumeric_string)
136                 {
137                     if (!string_value)
138                         break;
139                     ++(src->minor);
140                     *string_value = 
141                         element->value.complex->list[src->minor-1]->u.string;
142                     return -2;
143                 }
144                 else
145                     break;
146             default:
147                 assert (0);
148             }
149         }
150         ++(src->major);
151     }
152     return -1;
153 }
154
155 static int attr_find (AttrType *src, oid_value *attributeSetP)
156 {
157     return attr_find_ex (src, attributeSetP, 0);
158 }
159
160 static void attr_init (AttrType *src, Z_AttributesPlusTerm *zapt,
161                        int type)
162 {
163     src->zapt = zapt;
164     src->type = type;
165     src->major = 0;
166     src->minor = 0;
167 }
168
169 #define TERM_COUNT        
170        
171 struct grep_info {        
172 #ifdef TERM_COUNT        
173     int *term_no;        
174 #endif        
175     ISAMC_P *isam_p_buf;
176     int isam_p_size;        
177     int isam_p_indx;
178     ZebraHandle zh;
179     int reg_type;
180     ZebraSet termset;
181 };        
182
183 static void term_untrans  (ZebraHandle zh, int reg_type,
184                            char *dst, const char *src)
185 {
186     int len = 0;
187     while (*src)
188     {
189         const char *cp = zebra_maps_output (zh->reg->zebra_maps,
190                                             reg_type, &src);
191         if (!cp && len < IT_MAX_WORD-1)
192             dst[len++] = *src++;
193         else
194             while (*cp && len < IT_MAX_WORD-1)
195                 dst[len++] = *cp++;
196     }
197     dst[len] = '\0';
198 }
199
200 static void add_isam_p (const char *name, const char *info,
201                         struct grep_info *p)
202 {
203     if (p->isam_p_indx == p->isam_p_size)
204     {
205         ISAMC_P *new_isam_p_buf;
206 #ifdef TERM_COUNT        
207         int *new_term_no;        
208 #endif
209         p->isam_p_size = 2*p->isam_p_size + 100;
210         new_isam_p_buf = (ISAMC_P *) xmalloc (sizeof(*new_isam_p_buf) *
211                                              p->isam_p_size);
212         if (p->isam_p_buf)
213         {
214             memcpy (new_isam_p_buf, p->isam_p_buf,
215                     p->isam_p_indx * sizeof(*p->isam_p_buf));
216             xfree (p->isam_p_buf);
217         }
218         p->isam_p_buf = new_isam_p_buf;
219
220 #ifdef TERM_COUNT
221         new_term_no = (int *) xmalloc (sizeof(*new_term_no) *
222                                        p->isam_p_size);
223         if (p->term_no)
224         {
225             memcpy (new_term_no, p->isam_p_buf,
226                     p->isam_p_indx * sizeof(*p->term_no));
227             xfree (p->term_no);
228         }
229         p->term_no = new_term_no;
230 #endif
231     }
232     assert (*info == sizeof(*p->isam_p_buf));
233     memcpy (p->isam_p_buf + p->isam_p_indx, info+1, sizeof(*p->isam_p_buf));
234
235 #if 1
236     if (p->termset)
237     {
238         const char *db;
239         int set, use;
240         char term_tmp[IT_MAX_WORD];
241         int su_code = 0;
242         int len = key_SU_decode (&su_code, name);
243         
244         term_untrans  (p->zh, p->reg_type, term_tmp, name+len+1);
245         logf (LOG_LOG, "grep: %d %c %s", su_code, name[len], term_tmp);
246         zebraExplain_lookup_ord (p->zh->reg->zei,
247                                  su_code, &db, &set, &use);
248         logf (LOG_LOG, "grep:  set=%d use=%d db=%s", set, use, db);
249         
250         resultSetAddTerm (p->zh, p->termset, name[len], db,
251                           set, use, term_tmp);
252     }
253 #endif
254     (p->isam_p_indx)++;
255 }
256
257 static int grep_handle (char *name, const char *info, void *p)
258 {
259     add_isam_p (name, info, (struct grep_info *) p);
260     return 0;
261 }
262
263 static int term_pre (ZebraMaps zebra_maps, int reg_type, const char **src,
264                      const char *ct1, const char *ct2)
265 {
266     const char *s1, *s0 = *src;
267     const char **map;
268
269     /* skip white space */
270     while (*s0)
271     {
272         if (ct1 && strchr (ct1, *s0))
273             break;
274         if (ct2 && strchr (ct2, *s0))
275             break;
276         s1 = s0;
277         map = zebra_maps_input (zebra_maps, reg_type, &s1, strlen(s1));
278         if (**map != *CHR_SPACE)
279             break;
280         s0 = s1;
281     }
282     *src = s0;
283     return *s0;
284 }
285
286 #define REGEX_CHARS " []()|.*+?!"
287
288 /* term_100: handle term, where trunc=none (no operators at all) */
289 static int term_100 (ZebraMaps zebra_maps, int reg_type,
290                      const char **src, char *dst, int space_split,
291                      char *dst_term)
292 {
293     const char *s0, *s1;
294     const char **map;
295     int i = 0;
296     int j = 0;
297
298     const char *space_start = 0;
299     const char *space_end = 0;
300
301     if (!term_pre (zebra_maps, reg_type, src, NULL, NULL))
302         return 0;
303     s0 = *src;
304     while (*s0)
305     {
306         s1 = s0;
307         map = zebra_maps_input (zebra_maps, reg_type, &s0, strlen(s0));
308         if (space_split)
309         {
310             if (**map == *CHR_SPACE)
311                 break;
312         }
313         else  /* complete subfield only. */
314         {
315             if (**map == *CHR_SPACE)
316             {   /* save space mapping for later  .. */
317                 space_start = s1;
318                 space_end = s0;
319                 continue;
320             }
321             else if (space_start)
322             {   /* reload last space */
323                 while (space_start < space_end)
324                 {
325                     if (strchr (REGEX_CHARS, *space_start))
326                         dst[i++] = '\\';
327                     dst_term[j++] = *space_start;
328                     dst[i++] = *space_start++;
329                 }
330                 /* and reset */
331                 space_start = space_end = 0;
332             }
333         }
334         /* add non-space char */
335         while (s1 < s0)
336         {
337             if (strchr(REGEX_CHARS, *s1))
338                 dst[i++] = '\\';
339             dst_term[j++] = *s1;
340             dst[i++] = *s1++;
341         }
342     }
343     dst[i] = '\0';
344     dst_term[j] = '\0';
345     *src = s0;
346     return i;
347 }
348
349 /* term_101: handle term, where trunc=Process # */
350 static int term_101 (ZebraMaps zebra_maps, int reg_type,
351                      const char **src, char *dst, int space_split,
352                      char *dst_term)
353 {
354     const char *s0, *s1;
355     const char **map;
356     int i = 0;
357     int j = 0;
358
359     if (!term_pre (zebra_maps, reg_type, src, "#", "#"))
360         return 0;
361     s0 = *src;
362     while (*s0)
363     {
364         if (*s0 == '#')
365         {
366             dst[i++] = '.';
367             dst[i++] = '*';
368             dst_term[j++] = *s0++;
369         }
370         else
371         {
372             s1 = s0;
373             map = zebra_maps_input (zebra_maps, reg_type, &s0, strlen(s0));
374             if (space_split && **map == *CHR_SPACE)
375                 break;
376             while (s1 < s0)
377             {
378                 if (strchr(REGEX_CHARS, *s1))
379                     dst[i++] = '\\';
380                 dst_term[j++] = *s1;
381                 dst[i++] = *s1++;
382             }
383         }
384     }
385     dst[i] = '\0';
386     dst_term[j++] = '\0';
387     *src = s0;
388     return i;
389 }
390
391 /* term_103: handle term, where trunc=re-2 (regular expressions) */
392 static int term_103 (ZebraMaps zebra_maps, int reg_type, const char **src,
393                      char *dst, int *errors, int space_split,
394                      char *dst_term)
395 {
396     int i = 0;
397     int j = 0;
398     const char *s0, *s1;
399     const char **map;
400
401     if (!term_pre (zebra_maps, reg_type, src, "^\\()[].*+?|", "("))
402         return 0;
403     s0 = *src;
404     if (errors && *s0 == '+' && s0[1] && s0[2] == '+' && s0[3] &&
405         isdigit (s0[1]))
406     {
407         *errors = s0[1] - '0';
408         s0 += 3;
409         if (*errors > 3)
410             *errors = 3;
411     }
412     while (*s0)
413     {
414         if (strchr ("^\\()[].*+?|-", *s0))
415         {
416             dst_term[j++] = *s0;
417             dst[i++] = *s0++;
418         }
419         else
420         {
421             s1 = s0;
422             map = zebra_maps_input (zebra_maps, reg_type, &s0, strlen(s0));
423             if (**map == *CHR_SPACE)
424                 break;
425             while (s1 < s0)
426             {
427                 if (strchr(REGEX_CHARS, *s1))
428                     dst[i++] = '\\';
429                 dst_term[j++] = *s1;
430                 dst[i++] = *s1++;
431             }
432         }
433     }
434     dst[i] = '\0';
435     dst_term[j] = '\0';
436     *src = s0;
437     return i;
438 }
439
440 /* term_103: handle term, where trunc=re-1 (regular expressions) */
441 static int term_102 (ZebraMaps zebra_maps, int reg_type, const char **src,
442                      char *dst, int space_split, char *dst_term)
443 {
444     return term_103 (zebra_maps, reg_type, src, dst, NULL, space_split,
445                      dst_term);
446 }
447
448
449 /* term_104: handle term, where trunc=Process # and ! */
450 static int term_104 (ZebraMaps zebra_maps, int reg_type,
451                      const char **src, char *dst, int space_split,
452                      char *dst_term)
453 {
454     const char *s0, *s1;
455     const char **map;
456     int i = 0;
457     int j = 0;
458
459     if (!term_pre (zebra_maps, reg_type, src, "?*#", "?*#"))
460         return 0;
461     s0 = *src;
462     while (*s0)
463     {
464         if (*s0 == '?')
465         {
466             dst_term[j++] = *s0++;
467             if (*s0 >= '0' && *s0 <= '9')
468             {
469                 int limit = 0;
470                 while (*s0 >= '0' && *s0 <= '9')
471                 {
472                     limit = limit * 10 + (*s0 - '0');
473                     dst_term[j++] = *s0++;
474                 }
475                 if (limit > 20)
476                     limit = 20;
477                 while (--limit >= 0)
478                 {
479                     dst[i++] = '.';
480                     dst[i++] = '?';
481                 }
482             }
483             else
484             {
485                 dst[i++] = '.';
486                 dst[i++] = '*';
487             }
488         }
489         else if (*s0 == '*')
490         {
491             dst[i++] = '.';
492             dst[i++] = '*';
493             dst_term[j++] = *s0++;
494         }
495         else if (*s0 == '#')
496         {
497             dst[i++] = '.';
498             dst_term[j++] = *s0++;
499         }
500         {
501             s1 = s0;
502             map = zebra_maps_input (zebra_maps, reg_type, &s0, strlen(s0));
503             if (space_split && **map == *CHR_SPACE)
504                 break;
505             while (s1 < s0)
506             {
507                 if (strchr(REGEX_CHARS, *s1))
508                     dst[i++] = '\\';
509                 dst_term[j++] = *s1;
510                 dst[i++] = *s1++;
511             }
512         }
513     }
514     dst[i] = '\0';
515     dst_term[j++] = '\0';
516     *src = s0;
517     return i;
518 }
519
520 /* term_105/106: handle term, where trunc=Process * and ! and right trunc */
521 static int term_105 (ZebraMaps zebra_maps, int reg_type,
522                      const char **src, char *dst, int space_split,
523                      char *dst_term, int right_truncate)
524 {
525     const char *s0, *s1;
526     const char **map;
527     int i = 0;
528     int j = 0;
529
530     if (!term_pre (zebra_maps, reg_type, src, "*!", "*!"))
531         return 0;
532     s0 = *src;
533     while (*s0)
534     {
535         if (*s0 == '*')
536         {
537             dst[i++] = '.';
538             dst[i++] = '*';
539             dst_term[j++] = *s0++;
540         }
541         else if (*s0 == '!')
542         {
543             dst[i++] = '.';
544             dst_term[j++] = *s0++;
545         }
546         {
547             s1 = s0;
548             map = zebra_maps_input (zebra_maps, reg_type, &s0, strlen(s0));
549             if (space_split && **map == *CHR_SPACE)
550                 break;
551             while (s1 < s0)
552             {
553                 if (strchr(REGEX_CHARS, *s1))
554                     dst[i++] = '\\';
555                 dst_term[j++] = *s1;
556                 dst[i++] = *s1++;
557             }
558         }
559     }
560     if (right_truncate)
561     {
562         dst[i++] = '.';
563         dst[i++] = '*';
564     }
565     dst[i] = '\0';
566     
567     dst_term[j++] = '\0';
568     *src = s0;
569     return i;
570 }
571
572
573 /* gen_regular_rel - generate regular expression from relation
574  *  val:     border value (inclusive)
575  *  islt:    1 if <=; 0 if >=.
576  */
577 static void gen_regular_rel (char *dst, int val, int islt)
578 {
579     int dst_p;
580     int w, d, i;
581     int pos = 0;
582     char numstr[20];
583
584     logf (LOG_DEBUG, "gen_regular_rel. val=%d, islt=%d", val, islt);
585     if (val >= 0)
586     {
587         if (islt)
588             strcpy (dst, "(-[0-9]+|(");
589         else
590             strcpy (dst, "((");
591     } 
592     else
593     {
594         if (!islt)
595         {
596             strcpy (dst, "([0-9]+|-(");
597             dst_p = strlen (dst);
598             islt = 1;
599         }
600         else
601         {
602             strcpy (dst, "(-(");
603             islt = 0;
604         }
605         val = -val;
606     }
607     dst_p = strlen (dst);
608     sprintf (numstr, "%d", val);
609     for (w = strlen(numstr); --w >= 0; pos++)
610     {
611         d = numstr[w];
612         if (pos > 0)
613         {
614             if (islt)
615             {
616                 if (d == '0')
617                     continue;
618                 d--;
619             } 
620             else
621             {
622                 if (d == '9')
623                     continue;
624                 d++;
625             }
626         }
627         
628         strcpy (dst + dst_p, numstr);
629         dst_p = strlen(dst) - pos - 1;
630
631         if (islt)
632         {
633             if (d != '0')
634             {
635                 dst[dst_p++] = '[';
636                 dst[dst_p++] = '0';
637                 dst[dst_p++] = '-';
638                 dst[dst_p++] = d;
639                 dst[dst_p++] = ']';
640             }
641             else
642                 dst[dst_p++] = d;
643         }
644         else
645         {
646             if (d != '9')
647             { 
648                 dst[dst_p++] = '[';
649                 dst[dst_p++] = d;
650                 dst[dst_p++] = '-';
651                 dst[dst_p++] = '9';
652                 dst[dst_p++] = ']';
653             }
654             else
655                 dst[dst_p++] = d;
656         }
657         for (i = 0; i<pos; i++)
658         {
659             dst[dst_p++] = '[';
660             dst[dst_p++] = '0';
661             dst[dst_p++] = '-';
662             dst[dst_p++] = '9';
663             dst[dst_p++] = ']';
664         }
665         dst[dst_p++] = '|';
666     }
667     dst[dst_p] = '\0';
668     if (islt)
669     {
670         /* match everything less than 10^(pos-1) */
671         strcat (dst, "0*");
672         for (i=1; i<pos; i++)
673             strcat (dst, "[0-9]?");
674     }
675     else
676     {
677         /* match everything greater than 10^pos */
678         for (i = 0; i <= pos; i++)
679             strcat (dst, "[0-9]");
680         strcat (dst, "[0-9]*");
681     }
682     strcat (dst, "))");
683 }
684
685 void string_rel_add_char (char **term_p, const char *src, int *indx)
686 {
687     if (src[*indx] == '\\')
688         *(*term_p)++ = src[(*indx)++];
689     *(*term_p)++ = src[(*indx)++];
690 }
691
692 /*
693  *   >  abc     ([b-].*|a[c-].*|ab[d-].*|abc.+)
694  *              ([^-a].*|a[^-b].*ab[^-c].*|abc.+)
695  *   >= abc     ([b-].*|a[c-].*|ab[c-].*)
696  *              ([^-a].*|a[^-b].*|ab[c-].*)
697  *   <  abc     ([-0].*|a[-a].*|ab[-b].*)
698  *              ([^a-].*|a[^b-].*|ab[^c-].*)
699  *   <= abc     ([-0].*|a[-a].*|ab[-b].*|abc)
700  *              ([^a-].*|a[^b-].*|ab[^c-].*|abc)
701  */
702 static int string_relation (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
703                             const char **term_sub, char *term_dict,
704                             oid_value attributeSet,
705                             int reg_type, int space_split, char *term_dst)
706 {
707     AttrType relation;
708     int relation_value;
709     int i;
710     char *term_tmp = term_dict + strlen(term_dict);
711     char term_component[2*IT_MAX_WORD+20];
712
713     attr_init (&relation, zapt, 2);
714     relation_value = attr_find (&relation, NULL);
715
716     logf (LOG_DEBUG, "string relation value=%d", relation_value);
717     switch (relation_value)
718     {
719     case 1:
720         if (!term_100 (zh->reg->zebra_maps, reg_type,
721                        term_sub, term_component,
722                        space_split, term_dst))
723             return 0;
724         logf (LOG_DEBUG, "Relation <");
725         
726         *term_tmp++ = '(';
727         for (i = 0; term_component[i]; )
728         {
729             int j = 0;
730
731             if (i)
732                 *term_tmp++ = '|';
733             while (j < i)
734                 string_rel_add_char (&term_tmp, term_component, &j);
735
736             *term_tmp++ = '[';
737
738             *term_tmp++ = '^';
739             string_rel_add_char (&term_tmp, term_component, &i);
740             *term_tmp++ = '-';
741
742             *term_tmp++ = ']';
743             *term_tmp++ = '.';
744             *term_tmp++ = '*';
745
746             if ((term_tmp - term_dict) > IT_MAX_WORD)
747                 break;
748         }
749         *term_tmp++ = ')';
750         *term_tmp = '\0';
751         break;
752     case 2:
753         if (!term_100 (zh->reg->zebra_maps, reg_type,
754                        term_sub, term_component,
755                        space_split, term_dst))
756             return 0;
757         logf (LOG_DEBUG, "Relation <=");
758
759         *term_tmp++ = '(';
760         for (i = 0; term_component[i]; )
761         {
762             int j = 0;
763
764             while (j < i)
765                 string_rel_add_char (&term_tmp, term_component, &j);
766             *term_tmp++ = '[';
767
768             *term_tmp++ = '^';
769             string_rel_add_char (&term_tmp, term_component, &i);
770             *term_tmp++ = '-';
771
772             *term_tmp++ = ']';
773             *term_tmp++ = '.';
774             *term_tmp++ = '*';
775
776             *term_tmp++ = '|';
777
778             if ((term_tmp - term_dict) > IT_MAX_WORD)
779                 break;
780         }
781         for (i = 0; term_component[i]; )
782             string_rel_add_char (&term_tmp, term_component, &i);
783         *term_tmp++ = ')';
784         *term_tmp = '\0';
785         break;
786     case 5:
787         if (!term_100 (zh->reg->zebra_maps, reg_type,
788                        term_sub, term_component, space_split, term_dst))
789             return 0;
790         logf (LOG_DEBUG, "Relation >");
791
792         *term_tmp++ = '(';
793         for (i = 0; term_component[i];)
794         {
795             int j = 0;
796
797             while (j < i)
798                 string_rel_add_char (&term_tmp, term_component, &j);
799             *term_tmp++ = '[';
800             
801             *term_tmp++ = '^';
802             *term_tmp++ = '-';
803             string_rel_add_char (&term_tmp, term_component, &i);
804
805             *term_tmp++ = ']';
806             *term_tmp++ = '.';
807             *term_tmp++ = '*';
808
809             *term_tmp++ = '|';
810
811             if ((term_tmp - term_dict) > IT_MAX_WORD)
812                 break;
813         }
814         for (i = 0; term_component[i];)
815             string_rel_add_char (&term_tmp, term_component, &i);
816         *term_tmp++ = '.';
817         *term_tmp++ = '+';
818         *term_tmp++ = ')';
819         *term_tmp = '\0';
820         break;
821     case 4:
822         if (!term_100 (zh->reg->zebra_maps, reg_type, term_sub,
823                        term_component, space_split, term_dst))
824             return 0;
825         logf (LOG_DEBUG, "Relation >=");
826
827         *term_tmp++ = '(';
828         for (i = 0; term_component[i];)
829         {
830             int j = 0;
831
832             if (i)
833                 *term_tmp++ = '|';
834             while (j < i)
835                 string_rel_add_char (&term_tmp, term_component, &j);
836             *term_tmp++ = '[';
837
838             if (term_component[i+1])
839             {
840                 *term_tmp++ = '^';
841                 *term_tmp++ = '-';
842                 string_rel_add_char (&term_tmp, term_component, &i);
843             }
844             else
845             {
846                 string_rel_add_char (&term_tmp, term_component, &i);
847                 *term_tmp++ = '-';
848             }
849             *term_tmp++ = ']';
850             *term_tmp++ = '.';
851             *term_tmp++ = '*';
852
853             if ((term_tmp - term_dict) > IT_MAX_WORD)
854                 break;
855         }
856         *term_tmp++ = ')';
857         *term_tmp = '\0';
858         break;
859     case 3:
860     default:
861         logf (LOG_DEBUG, "Relation =");
862         if (!term_100 (zh->reg->zebra_maps, reg_type, term_sub,
863                        term_component, space_split, term_dst))
864             return 0;
865         strcat (term_tmp, "(");
866         strcat (term_tmp, term_component);
867         strcat (term_tmp, ")");
868     }
869     return 1;
870 }
871
872 static int string_term (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
873                         const char **term_sub, 
874                         oid_value attributeSet, NMEM stream,
875                         struct grep_info *grep_info,
876                         int reg_type, int complete_flag,
877                         int num_bases, char **basenames,
878                         char *term_dst, int xpath_use);
879
880 static RSET term_trunc (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
881                         const char **term_sub, 
882                         oid_value attributeSet, NMEM stream,
883                         struct grep_info *grep_info,
884                         int reg_type, int complete_flag,
885                         int num_bases, char **basenames,
886                         char *term_dst,
887                         const char *rank_type, int xpath_use,
888                         NMEM rset_nmem)
889 {
890     int r;
891     grep_info->isam_p_indx = 0;
892     r = string_term (zh, zapt, term_sub, attributeSet, stream, grep_info,
893                      reg_type, complete_flag, num_bases, basenames,
894                      term_dst, xpath_use);
895     if (r < 1)
896         return 0;
897     logf (LOG_DEBUG, "term: %s", term_dst);
898     return rset_trunc (zh, grep_info->isam_p_buf,
899                        grep_info->isam_p_indx, term_dst,
900                        strlen(term_dst), rank_type, 1 /* preserve pos */,
901                        zapt->term->which, rset_nmem,
902                        key_it_ctrl,key_it_ctrl->scope);
903 }
904
905
906 static int string_term (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
907                         const char **term_sub, 
908                         oid_value attributeSet, NMEM stream,
909                         struct grep_info *grep_info,
910                         int reg_type, int complete_flag,
911                         int num_bases, char **basenames,
912                         char *term_dst, int xpath_use)
913 {
914     char term_dict[2*IT_MAX_WORD+4000];
915     int j, r, base_no;
916     AttrType truncation;
917     int truncation_value;
918     AttrType use;
919     int use_value;
920     const char *use_string = 0;
921     oid_value curAttributeSet = attributeSet;
922     const char *termp;
923     struct rpn_char_map_info rcmi;
924     int space_split = complete_flag ? 0 : 1;
925
926     int bases_ok = 0;     /* no of databases with OK attribute */
927     int errCode = 0;      /* err code (if any is not OK) */
928     char *errString = 0;  /* addinfo */
929
930     rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
931     attr_init (&use, zapt, 1);
932     use_value = attr_find_ex (&use, &curAttributeSet, &use_string);
933     logf (LOG_DEBUG, "string_term, use value %d", use_value);
934     attr_init (&truncation, zapt, 5);
935     truncation_value = attr_find (&truncation, NULL);
936     logf (LOG_DEBUG, "truncation value %d", truncation_value);
937
938     if (use_value == -1)    /* no attribute - assumy "any" */
939         use_value = 1016;
940     for (base_no = 0; base_no < num_bases; base_no++)
941     {
942         attent attp;
943         data1_local_attribute id_xpath_attr;
944         data1_local_attribute *local_attr;
945         int max_pos, prefix_len = 0;
946
947         termp = *term_sub;
948
949         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
950         {
951             zh->errCode = 109; /* Database unavailable */
952             zh->errString = basenames[base_no];
953             return -1;
954         }
955         if (xpath_use > 0 && use_value == -2) 
956         {
957             use_value = xpath_use;
958             attp.local_attributes = &id_xpath_attr;
959             attp.attset_ordinal = VAL_IDXPATH;
960             id_xpath_attr.next = 0;
961             id_xpath_attr.local = use_value;
962         }
963         else if (curAttributeSet == VAL_IDXPATH)
964         {
965             attp.local_attributes = &id_xpath_attr;
966             attp.attset_ordinal = VAL_IDXPATH;
967             id_xpath_attr.next = 0;
968             id_xpath_attr.local = use_value;
969         }
970         else
971         {
972             if ((r=att_getentbyatt (zh, &attp, curAttributeSet, use_value,
973                                             use_string)))
974             {
975                 logf (LOG_DEBUG, "att_getentbyatt fail. set=%d use=%d r=%d",
976                       curAttributeSet, use_value, r);
977                 if (r == -1)
978                 {
979                     /* set was found, but value wasn't defined */
980                     errCode = 114;
981                     if (use_string)
982                         errString = nmem_strdup(stream, use_string);
983                     else
984                     {
985                         char val_str[32];
986                         sprintf (val_str, "%d", use_value);
987                         errString = nmem_strdup (stream, val_str);
988                     }
989                 }
990                 else
991                 {
992                     int oid[OID_SIZE];
993                     struct oident oident;
994                     
995                     oident.proto = PROTO_Z3950;
996                     oident.oclass = CLASS_ATTSET;
997                     oident.value = curAttributeSet;
998                     oid_ent_to_oid (&oident, oid);
999                     
1000                     errCode = 121;
1001                     errString = nmem_strdup (stream, oident.desc);
1002                 }
1003                 continue;
1004             }
1005         }
1006         for (local_attr = attp.local_attributes; local_attr;
1007              local_attr = local_attr->next)
1008         {
1009             int ord;
1010             char ord_buf[32];
1011             int i, ord_len;
1012             
1013             ord = zebraExplain_lookupSU (zh->reg->zei, attp.attset_ordinal,
1014                                          local_attr->local);
1015             if (ord < 0)
1016                 continue;
1017             if (prefix_len)
1018                 term_dict[prefix_len++] = '|';
1019             else
1020                 term_dict[prefix_len++] = '(';
1021             
1022             ord_len = key_SU_encode (ord, ord_buf);
1023             for (i = 0; i<ord_len; i++)
1024             {
1025                 term_dict[prefix_len++] = 1;
1026                 term_dict[prefix_len++] = ord_buf[i];
1027             }
1028         }
1029         if (!prefix_len)
1030         {
1031 #if 1
1032             bases_ok++;
1033 #else
1034             char val_str[32];
1035             sprintf (val_str, "%d", use_value);
1036             errCode = 114;
1037             errString = nmem_strdup (stream, val_str);
1038 #endif
1039             continue;
1040         }
1041         bases_ok++; /* this has OK attributes */
1042
1043         term_dict[prefix_len++] = ')';
1044         term_dict[prefix_len++] = 1;
1045         term_dict[prefix_len++] = reg_type;
1046         logf (LOG_DEBUG, "reg_type = %d", term_dict[prefix_len-1]);
1047         term_dict[prefix_len] = '\0';
1048         j = prefix_len;
1049         switch (truncation_value)
1050         {
1051         case -1:         /* not specified */
1052         case 100:        /* do not truncate */
1053             if (!string_relation (zh, zapt, &termp, term_dict,
1054                                   attributeSet,
1055                                   reg_type, space_split, term_dst))
1056                 return 0;
1057             logf (LOG_DEBUG, "dict_lookup_grep: %s", term_dict+prefix_len);
1058             r = dict_lookup_grep (zh->reg->dict, term_dict, 0,
1059                                   grep_info, &max_pos, 0, grep_handle);
1060             if (r)
1061                 logf (LOG_WARN, "dict_lookup_grep fail %d", r);
1062             break;
1063         case 1:          /* right truncation */
1064             term_dict[j++] = '(';
1065             if (!term_100 (zh->reg->zebra_maps, reg_type,
1066                            &termp, term_dict + j, space_split, term_dst))
1067                 return 0;
1068             strcat (term_dict, ".*)");
1069             dict_lookup_grep (zh->reg->dict, term_dict, 0, grep_info,
1070                               &max_pos, 0, grep_handle);
1071             break;
1072         case 2:          /* keft truncation */
1073             term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*';
1074             if (!term_100 (zh->reg->zebra_maps, reg_type,
1075                            &termp, term_dict + j, space_split, term_dst))
1076                 return 0;
1077             strcat (term_dict, ")");
1078             dict_lookup_grep (zh->reg->dict, term_dict, 0, grep_info,
1079                               &max_pos, 0, grep_handle);
1080             break;
1081         case 3:          /* left&right truncation */
1082             term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*';
1083             if (!term_100 (zh->reg->zebra_maps, reg_type,
1084                            &termp, term_dict + j, space_split, term_dst))
1085                 return 0;
1086             strcat (term_dict, ".*)");
1087             dict_lookup_grep (zh->reg->dict, term_dict, 0, grep_info,
1088                               &max_pos, 0, grep_handle);
1089             break;
1090             zh->errCode = 120;
1091             return -1;
1092         case 101:        /* process # in term */
1093             term_dict[j++] = '(';
1094             if (!term_101 (zh->reg->zebra_maps, reg_type,
1095                            &termp, term_dict + j, space_split, term_dst))
1096                 return 0;
1097             strcat (term_dict, ")");
1098             r = dict_lookup_grep (zh->reg->dict, term_dict, 0, grep_info,
1099                                   &max_pos, 0, grep_handle);
1100             if (r)
1101                 logf (LOG_WARN, "dict_lookup_grep err, trunc=#: %d", r);
1102             break;
1103         case 102:        /* Regexp-1 */
1104             term_dict[j++] = '(';
1105             if (!term_102 (zh->reg->zebra_maps, reg_type,
1106                            &termp, term_dict + j, space_split, term_dst))
1107                 return 0;
1108             strcat (term_dict, ")");
1109             logf (LOG_DEBUG, "Regexp-1 tolerance=%d", r);
1110             r = dict_lookup_grep (zh->reg->dict, term_dict, 0, grep_info,
1111                                   &max_pos, 0, grep_handle);
1112             if (r)
1113                 logf (LOG_WARN, "dict_lookup_grep err, trunc=regular: %d",
1114                       r);
1115             break;
1116         case 103:       /* Regexp-2 */
1117             r = 1;
1118             term_dict[j++] = '(';
1119             if (!term_103 (zh->reg->zebra_maps, reg_type,
1120                            &termp, term_dict + j, &r, space_split, term_dst))
1121                 return 0;
1122             strcat (term_dict, ")");
1123             logf (LOG_DEBUG, "Regexp-2 tolerance=%d", r);
1124             r = dict_lookup_grep (zh->reg->dict, term_dict, r, grep_info,
1125                                   &max_pos, 2, grep_handle);
1126             if (r)
1127                 logf (LOG_WARN, "dict_lookup_grep err, trunc=eregular: %d",
1128                       r);
1129             break;
1130         case 104:        /* process # and ! in term */
1131             term_dict[j++] = '(';
1132             if (!term_104 (zh->reg->zebra_maps, reg_type,
1133                            &termp, term_dict + j, space_split, term_dst))
1134                 return 0;
1135             strcat (term_dict, ")");
1136             r = dict_lookup_grep (zh->reg->dict, term_dict, 0, grep_info,
1137                                   &max_pos, 0, grep_handle);
1138             if (r)
1139                 logf (LOG_WARN, "dict_lookup_grep err, trunc=#/!: %d", r);
1140             break;
1141         case 105:        /* process * and ! in term */
1142             term_dict[j++] = '(';
1143             if (!term_105 (zh->reg->zebra_maps, reg_type,
1144                            &termp, term_dict + j, space_split, term_dst, 1))
1145                 return 0;
1146             strcat (term_dict, ")");
1147             r = dict_lookup_grep (zh->reg->dict, term_dict, 0, grep_info,
1148                                   &max_pos, 0, grep_handle);
1149             if (r)
1150                 logf (LOG_WARN, "dict_lookup_grep err, trunc=*/!: %d", r);
1151             break;
1152         case 106:        /* process * and ! in term */
1153             term_dict[j++] = '(';
1154             if (!term_105 (zh->reg->zebra_maps, reg_type,
1155                            &termp, term_dict + j, space_split, term_dst, 0))
1156                 return 0;
1157             strcat (term_dict, ")");
1158             r = dict_lookup_grep (zh->reg->dict, term_dict, 0, grep_info,
1159                                   &max_pos, 0, grep_handle);
1160             if (r)
1161                 logf (LOG_WARN, "dict_lookup_grep err, trunc=*/!: %d", r);
1162             break;
1163         }
1164     }
1165     if (!bases_ok)
1166     {
1167         zh->errCode = errCode;
1168         zh->errString = errString;
1169         return -1;
1170     }
1171     *term_sub = termp;
1172     logf (LOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1173     return 1;
1174 }
1175
1176
1177 /* convert APT search term to UTF8 */
1178 static int zapt_term_to_utf8 (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1179                               char *termz)
1180 {
1181     size_t sizez;
1182     Z_Term *term = zapt->term;
1183
1184     switch (term->which)
1185     {
1186     case Z_Term_general:
1187         if (zh->iconv_to_utf8 != 0)
1188         {
1189             char *inbuf = term->u.general->buf;
1190             size_t inleft = term->u.general->len;
1191             char *outbuf = termz;
1192             size_t outleft = IT_MAX_WORD-1;
1193             size_t ret;
1194
1195             ret = yaz_iconv(zh->iconv_to_utf8, &inbuf, &inleft,
1196                         &outbuf, &outleft);
1197             if (ret == (size_t)(-1))
1198             {
1199                 ret = yaz_iconv(zh->iconv_to_utf8, 0, 0, 0, 0);
1200                 zh->errCode = 125;
1201                 return -1;
1202             }
1203             *outbuf = 0;
1204         }
1205         else
1206         {
1207             sizez = term->u.general->len;
1208             if (sizez > IT_MAX_WORD-1)
1209                 sizez = IT_MAX_WORD-1;
1210             memcpy (termz, term->u.general->buf, sizez);
1211             termz[sizez] = '\0';
1212         }
1213         break;
1214     case Z_Term_characterString:
1215         sizez = strlen(term->u.characterString);
1216         if (sizez > IT_MAX_WORD-1)
1217             sizez = IT_MAX_WORD-1;
1218         memcpy (termz, term->u.characterString, sizez);
1219         termz[sizez] = '\0';
1220         break;
1221     default:
1222         zh->errCode = 124;
1223         return -1;
1224     }
1225     return 0;
1226 }
1227
1228 /* convert APT SCAN term to internal cmap */
1229 static int trans_scan_term (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1230                             char *termz, int reg_type)
1231 {
1232     char termz0[IT_MAX_WORD];
1233
1234     if (zapt_term_to_utf8(zh, zapt, termz0))
1235         return -1;    /* error */
1236     else
1237     {
1238         const char **map;
1239         const char *cp = (const char *) termz0;
1240         const char *cp_end = cp + strlen(cp);
1241         const char *src;
1242         int i = 0;
1243         const char *space_map = NULL;
1244         int len;
1245             
1246         while ((len = (cp_end - cp)) > 0)
1247         {
1248             map = zebra_maps_input (zh->reg->zebra_maps, reg_type, &cp, len);
1249             if (**map == *CHR_SPACE)
1250                 space_map = *map;
1251             else
1252             {
1253                 if (i && space_map)
1254                     for (src = space_map; *src; src++)
1255                         termz[i++] = *src;
1256                 space_map = NULL;
1257                 for (src = *map; *src; src++)
1258                     termz[i++] = *src;
1259             }
1260         }
1261         termz[i] = '\0';
1262     }
1263     return 0;
1264 }
1265
1266 char *normalize_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1267                      const char *termz, NMEM stream, unsigned reg_id)
1268 {
1269     WRBUF wrbuf = 0;
1270     AttrType truncation;
1271     int truncation_value;
1272     char *ex_list = 0;
1273
1274     attr_init (&truncation, zapt, 5);
1275     truncation_value = attr_find (&truncation, NULL);
1276
1277     switch (truncation_value)
1278     {
1279     default:
1280         ex_list = "";
1281         break;
1282     case 101:
1283         ex_list = "#";
1284         break;
1285     case 102:
1286     case 103:
1287         ex_list = 0;
1288         break;
1289     case 104:
1290         ex_list = "!#";
1291         break;
1292     case 105:
1293         ex_list = "!*";
1294         break;
1295     }
1296     if (ex_list)
1297         wrbuf = zebra_replace(zh->reg->zebra_maps, reg_id, ex_list,
1298                               termz, strlen(termz));
1299     if (!wrbuf)
1300         return nmem_strdup(stream, termz);
1301     else
1302     {
1303         char *buf = (char*) nmem_malloc (stream, wrbuf_len(wrbuf)+1);
1304         memcpy (buf, wrbuf_buf(wrbuf), wrbuf_len(wrbuf));
1305         buf[wrbuf_len(wrbuf)] = '\0';
1306         return buf;
1307     }
1308 }
1309
1310 static void grep_info_delete (struct grep_info *grep_info)
1311 {
1312 #ifdef TERM_COUNT
1313     xfree(grep_info->term_no);
1314 #endif
1315     xfree (grep_info->isam_p_buf);
1316 }
1317
1318 static int grep_info_prepare (ZebraHandle zh,
1319                               Z_AttributesPlusTerm *zapt,
1320                               struct grep_info *grep_info,
1321                               int reg_type,
1322                               NMEM stream)
1323 {
1324     AttrType termset;
1325     int termset_value_numeric;
1326     const char *termset_value_string;
1327
1328 #ifdef TERM_COUNT
1329     grep_info->term_no = 0;
1330 #endif
1331     grep_info->isam_p_size = 0;
1332     grep_info->isam_p_buf = NULL;
1333     grep_info->zh = zh;
1334     grep_info->reg_type = reg_type;
1335     grep_info->termset = 0;
1336
1337     if (!zapt)
1338         return 0;
1339     attr_init (&termset, zapt, 8);
1340     termset_value_numeric =
1341         attr_find_ex (&termset, NULL, &termset_value_string);
1342     if (termset_value_numeric != -1)
1343     {
1344         char resname[32];
1345         const char *termset_name = 0;
1346         if (termset_value_numeric != -2)
1347         {
1348     
1349             sprintf (resname, "%d", termset_value_numeric);
1350             termset_name = resname;
1351         }
1352         else
1353             termset_name = termset_value_string;
1354         logf (LOG_LOG, "creating termset set %s", termset_name);
1355         grep_info->termset = resultSetAdd (zh, termset_name, 1);
1356         if (!grep_info->termset)
1357         {
1358             zh->errCode = 128;
1359             zh->errString = nmem_strdup (stream, termset_name);
1360             return -1;
1361         }
1362     }
1363     return 0;
1364 }
1365                                
1366
1367 static RSET rpn_search_APT_phrase (ZebraHandle zh,
1368                                    Z_AttributesPlusTerm *zapt,
1369                                    const char *termz_org,
1370                                    oid_value attributeSet,
1371                                    NMEM stream,
1372                                    int reg_type, int complete_flag,
1373                                    const char *rank_type, int xpath_use,
1374                                    int num_bases, char **basenames, 
1375                                    NMEM rset_nmem)
1376 {
1377     char term_dst[IT_MAX_WORD+1];
1378     RSET rset[60], result;
1379     int rset_no = 0;
1380     struct grep_info grep_info;
1381     char *termz = normalize_term(zh, zapt, termz_org, stream, reg_type);
1382     const char *termp = termz;
1383
1384     *term_dst = 0;
1385     if (grep_info_prepare (zh, zapt, &grep_info, reg_type, stream))
1386         return 0;
1387     while (1)
1388     { 
1389         logf (LOG_DEBUG, "APT_phrase termp=%s", termp);
1390         rset[rset_no] = term_trunc (zh, zapt, &termp, attributeSet,
1391                                     stream, &grep_info,
1392                                     reg_type, complete_flag,
1393                                     num_bases, basenames,
1394                                     term_dst, rank_type,
1395                                     xpath_use,rset_nmem);
1396         if (!rset[rset_no])
1397             break;
1398         if (++rset_no >= (int) (sizeof(rset)/sizeof(*rset)))
1399             break;
1400     }
1401     grep_info_delete (&grep_info);
1402     if (rset_no == 0)
1403         return rsnull_create (rset_nmem,key_it_ctrl); 
1404     else if (rset_no == 1)
1405         return (rset[0]);
1406     else
1407         result = rsprox_create( rset_nmem, key_it_ctrl, key_it_ctrl->scope,
1408                        rset_no, rset,
1409                        1 /* ordered */, 0 /* exclusion */,
1410                        3 /* relation */, 1 /* distance */);
1411     return result;
1412 }
1413
1414 static RSET rpn_search_APT_or_list (ZebraHandle zh,
1415                                     Z_AttributesPlusTerm *zapt,
1416                                     const char *termz_org,
1417                                     oid_value attributeSet,
1418                                     NMEM stream,
1419                                     int reg_type, int complete_flag,
1420                                     const char *rank_type,
1421                                     int xpath_use,
1422                                     int num_bases, char **basenames,
1423                                     NMEM rset_nmem)
1424 {
1425     char term_dst[IT_MAX_WORD+1];
1426     RSET rset[60];
1427     int rset_no = 0;
1428     struct grep_info grep_info;
1429     char *termz = normalize_term(zh, zapt, termz_org, stream, reg_type);
1430     const char *termp = termz;
1431
1432     if (grep_info_prepare (zh, zapt, &grep_info, reg_type, stream))
1433         return 0;
1434     while (1)
1435     { 
1436         logf (LOG_DEBUG, "APT_or_list termp=%s", termp);
1437         rset[rset_no] = term_trunc (zh, zapt, &termp, attributeSet,
1438                                     stream, &grep_info,
1439                                     reg_type, complete_flag,
1440                                     num_bases, basenames,
1441                                     term_dst, rank_type,
1442                                     xpath_use,rset_nmem);
1443         if (!rset[rset_no])
1444             break;
1445         if (++rset_no >= (int) (sizeof(rset)/sizeof(*rset)))
1446             break;
1447     }
1448     grep_info_delete (&grep_info);
1449     if (rset_no == 0)
1450         return rsnull_create (rset_nmem,key_it_ctrl);  
1451     return rsmultior_create(rset_nmem, key_it_ctrl,key_it_ctrl->scope,
1452                             rset_no, rset);
1453 }
1454
1455 static RSET rpn_search_APT_and_list (ZebraHandle zh,
1456                                      Z_AttributesPlusTerm *zapt,
1457                                      const char *termz_org,
1458                                      oid_value attributeSet,
1459                                      NMEM stream,
1460                                      int reg_type, int complete_flag,
1461                                      const char *rank_type, 
1462                                      int xpath_use,
1463                                      int num_bases, char **basenames,
1464                                      NMEM rset_nmem)
1465 {
1466     char term_dst[IT_MAX_WORD+1];
1467     RSET rset[60], result;
1468     int i, rset_no = 0;
1469     struct grep_info grep_info;
1470     char *termz = normalize_term(zh, zapt, termz_org, stream, reg_type);
1471     const char *termp = termz;
1472
1473     if (grep_info_prepare (zh, zapt, &grep_info, reg_type, stream))
1474         return 0;
1475     while (1)
1476     { 
1477         logf (LOG_DEBUG, "APT_and_list termp=%s", termp);
1478         rset[rset_no] = term_trunc (zh, zapt, &termp, attributeSet,
1479                                     stream, &grep_info,
1480                                     reg_type, complete_flag,
1481                                     num_bases, basenames,
1482                                     term_dst, rank_type,
1483                                     xpath_use, rset_nmem);
1484         if (!rset[rset_no])
1485             break;
1486         assert (rset[rset_no]);
1487         if (++rset_no >= (int) (sizeof(rset)/sizeof(*rset)))
1488             break;
1489     }
1490     grep_info_delete (&grep_info);
1491     if (rset_no == 0)
1492         return rsnull_create (rset_nmem,key_it_ctrl); 
1493     result = rset[0];
1494     /* FIXME - Use a proper rsmultiand */
1495     for (i = 1; i<rset_no; i++)
1496         result= rsbool_create_and(rset_nmem,key_it_ctrl,key_it_ctrl->scope,
1497                 result, rset[i] );
1498     return result;
1499 }
1500
1501 static int numeric_relation (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1502                              const char **term_sub,
1503                              char *term_dict,
1504                              oid_value attributeSet,
1505                              struct grep_info *grep_info,
1506                              int *max_pos,
1507                              int reg_type,
1508                              char *term_dst)
1509 {
1510     AttrType relation;
1511     int relation_value;
1512     int term_value;
1513     int r;
1514     char *term_tmp = term_dict + strlen(term_dict);
1515
1516     attr_init (&relation, zapt, 2);
1517     relation_value = attr_find (&relation, NULL);
1518
1519     logf (LOG_DEBUG, "numeric relation value=%d", relation_value);
1520
1521     if (!term_100 (zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1522                    term_dst))
1523         return 0;
1524     term_value = atoi (term_tmp);
1525     switch (relation_value)
1526     {
1527     case 1:
1528         logf (LOG_DEBUG, "Relation <");
1529         gen_regular_rel (term_tmp, term_value-1, 1);
1530         break;
1531     case 2:
1532         logf (LOG_DEBUG, "Relation <=");
1533         gen_regular_rel (term_tmp, term_value, 1);
1534         break;
1535     case 4:
1536         logf (LOG_DEBUG, "Relation >=");
1537         gen_regular_rel (term_tmp, term_value, 0);
1538         break;
1539     case 5:
1540         logf (LOG_DEBUG, "Relation >");
1541         gen_regular_rel (term_tmp, term_value+1, 0);
1542         break;
1543     case 3:
1544     default:
1545         logf (LOG_DEBUG, "Relation =");
1546         sprintf (term_tmp, "(0*%d)", term_value);
1547     }
1548     logf (LOG_DEBUG, "dict_lookup_grep: %s", term_tmp);
1549     r = dict_lookup_grep (zh->reg->dict, term_dict, 0, grep_info, max_pos,
1550                           0, grep_handle);
1551     if (r)
1552         logf (LOG_WARN, "dict_lookup_grep fail, rel=gt: %d", r);
1553     logf (LOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1554     return 1;
1555 }
1556
1557 static int numeric_term (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1558                          const char **term_sub, 
1559                          oid_value attributeSet, struct grep_info *grep_info,
1560                          int reg_type, int complete_flag,
1561                          int num_bases, char **basenames,
1562                          char *term_dst, int xpath_use, NMEM stream)
1563 {
1564     char term_dict[2*IT_MAX_WORD+2];
1565     int r, base_no;
1566     AttrType use;
1567     int use_value;
1568     const char *use_string = 0;
1569     oid_value curAttributeSet = attributeSet;
1570     const char *termp;
1571     struct rpn_char_map_info rcmi;
1572
1573     int bases_ok = 0;     /* no of databases with OK attribute */
1574     int errCode = 0;      /* err code (if any is not OK) */
1575     char *errString = 0;  /* addinfo */
1576
1577     rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
1578     attr_init (&use, zapt, 1);
1579     use_value = attr_find_ex (&use, &curAttributeSet, &use_string);
1580
1581     if (use_value == -1)
1582         use_value = 1016;
1583
1584     for (base_no = 0; base_no < num_bases; base_no++)
1585     {
1586         attent attp;
1587         data1_local_attribute id_xpath_attr;
1588         data1_local_attribute *local_attr;
1589         int max_pos, prefix_len = 0;
1590
1591         termp = *term_sub;
1592         if (use_value == -2)  /* string attribute (assume IDXPATH/any) */
1593         {
1594             use_value = xpath_use;
1595             attp.local_attributes = &id_xpath_attr;
1596             attp.attset_ordinal = VAL_IDXPATH;
1597             id_xpath_attr.next = 0;
1598             id_xpath_attr.local = use_value;
1599         }
1600         else if (curAttributeSet == VAL_IDXPATH)
1601         {
1602             attp.local_attributes = &id_xpath_attr;
1603             attp.attset_ordinal = VAL_IDXPATH;
1604             id_xpath_attr.next = 0;
1605             id_xpath_attr.local = use_value;
1606         }
1607         else
1608         {
1609             if ((r=att_getentbyatt (zh, &attp, curAttributeSet, use_value,
1610                                             use_string)))
1611             {
1612                 logf (LOG_DEBUG, "att_getentbyatt fail. set=%d use=%d r=%d",
1613                       curAttributeSet, use_value, r);
1614                 if (r == -1)
1615                 {
1616                     char val_str[32];
1617                     sprintf (val_str, "%d", use_value);
1618                     errString = nmem_strdup (stream, val_str);
1619                     errCode = 114;
1620                 }
1621                 else
1622                     errCode = 121;
1623                 continue;
1624             }
1625         }
1626         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1627         {
1628             zh->errCode = 109; /* Database unavailable */
1629             zh->errString = basenames[base_no];
1630             return -1;
1631         }
1632         for (local_attr = attp.local_attributes; local_attr;
1633              local_attr = local_attr->next)
1634         {
1635             int ord;
1636             char ord_buf[32];
1637             int i, ord_len;
1638
1639             ord = zebraExplain_lookupSU (zh->reg->zei, attp.attset_ordinal,
1640                                           local_attr->local);
1641             if (ord < 0)
1642                 continue;
1643             if (prefix_len)
1644                 term_dict[prefix_len++] = '|';
1645             else
1646                 term_dict[prefix_len++] = '(';
1647
1648             ord_len = key_SU_encode (ord, ord_buf);
1649             for (i = 0; i<ord_len; i++)
1650             {
1651                 term_dict[prefix_len++] = 1;
1652                 term_dict[prefix_len++] = ord_buf[i];
1653             }
1654         }
1655         if (!prefix_len)
1656         {
1657             char val_str[32];
1658             sprintf (val_str, "%d", use_value);
1659             errCode = 114;
1660             errString = nmem_strdup (stream, val_str);
1661             continue;
1662         }
1663         bases_ok++;
1664         term_dict[prefix_len++] = ')';        
1665         term_dict[prefix_len++] = 1;
1666         term_dict[prefix_len++] = reg_type;
1667         logf (LOG_DEBUG, "reg_type = %d", term_dict[prefix_len-1]);
1668         term_dict[prefix_len] = '\0';
1669         if (!numeric_relation (zh, zapt, &termp, term_dict,
1670                                attributeSet, grep_info, &max_pos, reg_type,
1671                                term_dst))
1672             return 0;
1673     }
1674     if (!bases_ok)
1675     {
1676         zh->errCode = errCode;
1677         zh->errString = errString;
1678         return -1;
1679     }
1680     *term_sub = termp;
1681     logf (LOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1682     return 1;
1683 }
1684
1685 static RSET rpn_search_APT_numeric (ZebraHandle zh,
1686                                     Z_AttributesPlusTerm *zapt,
1687                                     const char *termz,
1688                                     oid_value attributeSet,
1689                                     NMEM stream,
1690                                     int reg_type, int complete_flag,
1691                                     const char *rank_type, int xpath_use,
1692                                     int num_bases, char **basenames,
1693                                     NMEM rset_nmem)
1694 {
1695     char term_dst[IT_MAX_WORD+1];
1696     const char *termp = termz;
1697     RSET rset[60], result;
1698     int i, r, rset_no = 0;
1699     struct grep_info grep_info;
1700
1701     if (grep_info_prepare (zh, zapt, &grep_info, reg_type, stream))
1702         return 0;
1703     while (1)
1704     { 
1705         logf (LOG_DEBUG, "APT_numeric termp=%s", termp);
1706         grep_info.isam_p_indx = 0;
1707         r = numeric_term (zh, zapt, &termp, attributeSet, &grep_info,
1708                           reg_type, complete_flag, num_bases, basenames,
1709                           term_dst, xpath_use,
1710                           stream);
1711         if (r < 1)
1712             break;
1713         logf (LOG_DEBUG, "term: %s", term_dst);
1714         rset[rset_no] = rset_trunc (zh, grep_info.isam_p_buf,
1715                                     grep_info.isam_p_indx, term_dst,
1716                                     strlen(term_dst), rank_type,
1717                                     0 /* preserve position */,
1718                                     zapt->term->which, rset_nmem, 
1719                                     key_it_ctrl,key_it_ctrl->scope);
1720         assert (rset[rset_no]);
1721         if (++rset_no >= (int) (sizeof(rset)/sizeof(*rset)))
1722             break;
1723     }
1724     grep_info_delete (&grep_info);
1725     if (rset_no == 0)
1726         return rsnull_create (rset_nmem,key_it_ctrl);
1727     result = rset[0];
1728     for (i = 1; i<rset_no; i++)
1729     {
1730         /* FIXME - Use a proper multi-and */
1731         result= rsbool_create_and(rset_nmem,key_it_ctrl,key_it_ctrl->scope,
1732                 result, rset[i] );
1733     }
1734     return result;
1735 }
1736
1737 static RSET rpn_search_APT_local (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1738                                   const char *termz,
1739                                   oid_value attributeSet,
1740                                   NMEM stream,
1741                                   const char *rank_type, NMEM rset_nmem)
1742 {
1743     RSET result;
1744     RSFD rsfd;
1745     struct it_key key;
1746     int sys;
1747     /*
1748     rset_temp_parms parms;
1749
1750     parms.cmp = key_compare_it;
1751     parms.key_size = sizeof (struct it_key);
1752     parms.temp_path = res_get (zh->res, "setTmpDir");
1753     result = rset_create (rset_kind_temp, &parms);
1754     */
1755     result = rstemp_create( rset_nmem,key_it_ctrl,key_it_ctrl->scope,
1756                      res_get (zh->res, "setTmpDir") );
1757     rsfd = rset_open (result, RSETF_WRITE);
1758
1759     sys = atoi(termz);
1760     if (sys <= 0)
1761         sys = 1;
1762 #if IT_KEY_NEW
1763     key.mem[0] = sys;
1764     key.mem[1] = 1;
1765     key.len = 2;
1766 #else
1767     key.sysno = sys;
1768     key.seqno = 1;
1769     if (key.sysno <= 0)
1770         key.sysno = 1;
1771 #endif
1772     rset_write (rsfd, &key);
1773     rset_close (rsfd);
1774     return result;
1775 }
1776
1777 static RSET rpn_sort_spec (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1778                            oid_value attributeSet, NMEM stream,
1779                            Z_SortKeySpecList *sort_sequence,
1780                            const char *rank_type)
1781 {
1782     int i;
1783     int sort_relation_value;
1784     AttrType sort_relation_type;
1785     int use_value;
1786     AttrType use_type;
1787     Z_SortKeySpec *sks;
1788     Z_SortKey *sk;
1789     Z_AttributeElement *ae;
1790     int oid[OID_SIZE];
1791     oident oe;
1792     char termz[20];
1793     
1794     attr_init (&sort_relation_type, zapt, 7);
1795     sort_relation_value = attr_find (&sort_relation_type, &attributeSet);
1796
1797     attr_init (&use_type, zapt, 1);
1798     use_value = attr_find (&use_type, &attributeSet);
1799
1800     if (!sort_sequence->specs)
1801     {
1802         sort_sequence->num_specs = 10;
1803         sort_sequence->specs = (Z_SortKeySpec **)
1804             nmem_malloc (stream, sort_sequence->num_specs *
1805                          sizeof(*sort_sequence->specs));
1806         for (i = 0; i<sort_sequence->num_specs; i++)
1807             sort_sequence->specs[i] = 0;
1808     }
1809     if (zapt->term->which != Z_Term_general)
1810         i = 0;
1811     else
1812         i = atoi_n ((char *) zapt->term->u.general->buf,
1813                     zapt->term->u.general->len);
1814     if (i >= sort_sequence->num_specs)
1815         i = 0;
1816     sprintf (termz, "%d", i);
1817
1818     oe.proto = PROTO_Z3950;
1819     oe.oclass = CLASS_ATTSET;
1820     oe.value = attributeSet;
1821     if (!oid_ent_to_oid (&oe, oid))
1822         return 0;
1823
1824     sks = (Z_SortKeySpec *) nmem_malloc (stream, sizeof(*sks));
1825     sks->sortElement = (Z_SortElement *)
1826         nmem_malloc (stream, sizeof(*sks->sortElement));
1827     sks->sortElement->which = Z_SortElement_generic;
1828     sk = sks->sortElement->u.generic = (Z_SortKey *)
1829         nmem_malloc (stream, sizeof(*sk));
1830     sk->which = Z_SortKey_sortAttributes;
1831     sk->u.sortAttributes = (Z_SortAttributes *)
1832         nmem_malloc (stream, sizeof(*sk->u.sortAttributes));
1833
1834     sk->u.sortAttributes->id = oid;
1835     sk->u.sortAttributes->list = (Z_AttributeList *)
1836         nmem_malloc (stream, sizeof(*sk->u.sortAttributes->list));
1837     sk->u.sortAttributes->list->num_attributes = 1;
1838     sk->u.sortAttributes->list->attributes = (Z_AttributeElement **)
1839         nmem_malloc (stream, sizeof(*sk->u.sortAttributes->list->attributes));
1840     ae = *sk->u.sortAttributes->list->attributes = (Z_AttributeElement *)
1841         nmem_malloc (stream, sizeof(**sk->u.sortAttributes->list->attributes));
1842     ae->attributeSet = 0;
1843     ae->attributeType = (int *)
1844         nmem_malloc (stream, sizeof(*ae->attributeType));
1845     *ae->attributeType = 1;
1846     ae->which = Z_AttributeValue_numeric;
1847     ae->value.numeric = (int *)
1848         nmem_malloc (stream, sizeof(*ae->value.numeric));
1849     *ae->value.numeric = use_value;
1850
1851     sks->sortRelation = (int *)
1852         nmem_malloc (stream, sizeof(*sks->sortRelation));
1853     if (sort_relation_value == 1)
1854         *sks->sortRelation = Z_SortKeySpec_ascending;
1855     else if (sort_relation_value == 2)
1856         *sks->sortRelation = Z_SortKeySpec_descending;
1857     else 
1858         *sks->sortRelation = Z_SortKeySpec_ascending;
1859
1860     sks->caseSensitivity = (int *)
1861         nmem_malloc (stream, sizeof(*sks->caseSensitivity));
1862     *sks->caseSensitivity = 0;
1863
1864     sks->which = Z_SortKeySpec_null;
1865     sks->u.null = odr_nullval ();
1866     sort_sequence->specs[i] = sks;
1867     return rsnull_create (NULL,key_it_ctrl);
1868         /* FIXME - nmem?? */
1869 }
1870
1871
1872 static int parse_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1873                        oid_value attributeSet,
1874                        struct xpath_location_step *xpath, int max, NMEM mem)
1875 {
1876     oid_value curAttributeSet = attributeSet;
1877     AttrType use;
1878     const char *use_string = 0;
1879     
1880     attr_init (&use, zapt, 1);
1881     attr_find_ex (&use, &curAttributeSet, &use_string);
1882
1883     if (!use_string || *use_string != '/')
1884         return -1;
1885
1886     return zebra_parse_xpath_str(use_string, xpath, max, mem);
1887 }
1888  
1889                
1890
1891 static RSET xpath_trunc(ZebraHandle zh, NMEM stream,
1892                         int reg_type, const char *term, int use,
1893                         oid_value curAttributeSet, NMEM rset_nmem)
1894 {
1895     RSET rset;
1896     struct grep_info grep_info;
1897     char term_dict[2048];
1898     char ord_buf[32];
1899     int prefix_len = 0;
1900     int ord = zebraExplain_lookupSU (zh->reg->zei, curAttributeSet, use);
1901     int ord_len, i, r, max_pos;
1902     int term_type = Z_Term_characterString;
1903     const char *flags = "void";
1904
1905     if (grep_info_prepare (zh, 0 /* zapt */, &grep_info, '0', stream))
1906         return rsnull_create (rset_nmem,key_it_ctrl);
1907
1908     if (ord < 0)
1909         return rsnull_create (rset_nmem,key_it_ctrl);
1910     if (prefix_len)
1911         term_dict[prefix_len++] = '|';
1912     else
1913         term_dict[prefix_len++] = '(';
1914     
1915     ord_len = key_SU_encode (ord, ord_buf);
1916     for (i = 0; i<ord_len; i++)
1917     {
1918         term_dict[prefix_len++] = 1;
1919         term_dict[prefix_len++] = ord_buf[i];
1920     }
1921     term_dict[prefix_len++] = ')';
1922     term_dict[prefix_len++] = 1;
1923     term_dict[prefix_len++] = reg_type;
1924     
1925     strcpy (term_dict+prefix_len, term);
1926     
1927     grep_info.isam_p_indx = 0;
1928     r = dict_lookup_grep (zh->reg->dict, term_dict, 0,
1929                           &grep_info, &max_pos, 0, grep_handle);
1930     yaz_log (LOG_LOG, "%s %d positions", term,
1931              grep_info.isam_p_indx);
1932     rset = rset_trunc (zh, grep_info.isam_p_buf,
1933                        grep_info.isam_p_indx, term, strlen(term),
1934                        flags, 1, term_type,rset_nmem,
1935                        key_it_ctrl, key_it_ctrl->scope);
1936     grep_info_delete (&grep_info);
1937     return rset;
1938 }
1939
1940 static RSET rpn_search_xpath (ZebraHandle zh,
1941                               oid_value attributeSet,
1942                               int num_bases, char **basenames,
1943                               NMEM stream, const char *rank_type, RSET rset,
1944                               int xpath_len, struct xpath_location_step *xpath,
1945                               NMEM rset_nmem)
1946 {
1947     oid_value curAttributeSet = attributeSet;
1948     int base_no;
1949     int i;
1950
1951     if (xpath_len < 0)
1952         return rset;
1953
1954     yaz_log (LOG_LOG, "len=%d", xpath_len);
1955     for (i = 0; i<xpath_len; i++)
1956     {
1957         yaz_log (LOG_LOG, "XPATH %d %s", i, xpath[i].part);
1958
1959     }
1960
1961     curAttributeSet = VAL_IDXPATH;
1962
1963     /*
1964       //a    ->    a/.*
1965       //a/b  ->    b/a/.*
1966       /a     ->    a/
1967       /a/b   ->    b/a/
1968
1969       /      ->    none
1970
1971    a[@attr=value]/b[@other=othervalue]
1972
1973  /e/@a val      range(e/,range(@a,freetext(w,1015,val),@a),e/)
1974  /a/b val       range(b/a/,freetext(w,1016,val),b/a/)
1975  /a/b/@c val    range(b/a/,range(@c,freetext(w,1016,val),@c),b/a/)
1976  /a/b[@c=y] val range(b/a/,freetext(w,1016,val),b/a/,@c=y)
1977  /a[@c=y]/b val range(a/,range(b/a/,freetext(w,1016,val),b/a/),a/,@c=y)
1978  /a[@c=x]/b[@c=y] range(a/,range(b/a/,freetext(w,1016,val),b/a/,@c=y),a/,@c=x)
1979       
1980     */
1981
1982     dict_grep_cmap (zh->reg->dict, 0, 0);
1983
1984     for (base_no = 0; base_no < num_bases; base_no++)
1985     {
1986         int level = xpath_len;
1987         int first_path = 1;
1988         
1989         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1990         {
1991             zh->errCode = 109; /* Database unavailable */
1992             zh->errString = basenames[base_no];
1993             return rset;
1994         }
1995         while (--level >= 0)
1996         {
1997             char xpath_rev[128];
1998             int i, len;
1999             RSET rset_start_tag = 0, rset_end_tag = 0, rset_attr = 0;
2000
2001             *xpath_rev = 0;
2002             len = 0;
2003             for (i = level; i >= 1; --i)
2004             {
2005                 const char *cp = xpath[i].part;
2006                 if (*cp)
2007                 {
2008                     for (;*cp; cp++)
2009                         if (*cp == '*')
2010                         {
2011                             memcpy (xpath_rev + len, "[^/]*", 5);
2012                             len += 5;
2013                         }
2014                         else if (*cp == ' ')
2015                         {
2016
2017                             xpath_rev[len++] = 1;
2018                             xpath_rev[len++] = ' ';
2019                         }
2020
2021                         else
2022                             xpath_rev[len++] = *cp;
2023                     xpath_rev[len++] = '/';
2024                 }
2025                 else if (i == 1)  /* // case */
2026                 {
2027                     xpath_rev[len++] = '.';
2028                     xpath_rev[len++] = '*';
2029                 }
2030             }
2031             xpath_rev[len] = 0;
2032
2033             if (xpath[level].predicate &&
2034                 xpath[level].predicate->which == XPATH_PREDICATE_RELATION &&
2035                 xpath[level].predicate->u.relation.name[0])
2036             {
2037                 WRBUF wbuf = wrbuf_alloc();
2038                 wrbuf_puts(wbuf, xpath[level].predicate->u.relation.name+1);
2039                 if (xpath[level].predicate->u.relation.value)
2040                 {
2041                     const char *cp = xpath[level].predicate->u.relation.value;
2042                     wrbuf_putc(wbuf, '=');
2043                     
2044                     while (*cp)
2045                     {
2046                         if (strchr(REGEX_CHARS, *cp))
2047                             wrbuf_putc(wbuf, '\\');
2048                         wrbuf_putc(wbuf, *cp);
2049                         cp++;
2050                     }
2051                 }
2052                 wrbuf_puts(wbuf, "");
2053                 rset_attr = xpath_trunc (
2054                     zh, stream, '0', wrbuf_buf(wbuf), 3, 
2055                     curAttributeSet,rset_nmem);
2056                 wrbuf_free(wbuf, 1);
2057             } 
2058             else 
2059             {
2060                 if (!first_path)
2061                     continue;
2062             }
2063             yaz_log (LOG_LOG, "xpath_rev (%d) = %s", level, xpath_rev);
2064             if (strlen(xpath_rev))
2065             {
2066                 rset_start_tag = xpath_trunc(zh, stream, '0', 
2067                         xpath_rev, 1, curAttributeSet, rset_nmem);
2068             
2069                 rset_end_tag = xpath_trunc(zh, stream, '0', 
2070                         xpath_rev, 2, curAttributeSet, rset_nmem);
2071
2072                 /*
2073                 parms.key_size = sizeof(struct it_key);
2074                 parms.cmp = key_compare_it;
2075                 parms.rset_l = rset_start_tag;
2076                 parms.rset_m = rset;
2077                 parms.rset_r = rset_end_tag;
2078                 parms.rset_attr = rset_attr;
2079                 parms.printer = key_print_it;
2080                 rset = rset_create (rset_kind_between, &parms);
2081                 */
2082                 rset=rsbetween_create( rset_nmem,key_it_ctrl,key_it_ctrl->scope,
2083                         rset_start_tag, rset, rset_end_tag, rset_attr);
2084             }
2085             first_path = 0;
2086         }
2087     }
2088
2089     return rset;
2090 }
2091
2092
2093
2094 static RSET rpn_search_APT (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2095                             oid_value attributeSet, NMEM stream,
2096                             Z_SortKeySpecList *sort_sequence,
2097                             int num_bases, char **basenames, 
2098                             NMEM rset_nmem)
2099 {
2100     unsigned reg_id;
2101     char *search_type = NULL;
2102     char rank_type[128];
2103     int complete_flag;
2104     int sort_flag;
2105     char termz[IT_MAX_WORD+1];
2106     RSET rset = 0;
2107     int xpath_len;
2108     int xpath_use = 0;
2109     struct xpath_location_step xpath[10];
2110
2111     zebra_maps_attr (zh->reg->zebra_maps, zapt, &reg_id, &search_type,
2112                      rank_type, &complete_flag, &sort_flag);
2113     
2114     logf (LOG_DEBUG, "reg_id=%c", reg_id);
2115     logf (LOG_DEBUG, "complete_flag=%d", complete_flag);
2116     logf (LOG_DEBUG, "search_type=%s", search_type);
2117     logf (LOG_DEBUG, "rank_type=%s", rank_type);
2118
2119     if (zapt_term_to_utf8(zh, zapt, termz))
2120         return 0;
2121
2122     if (sort_flag)
2123         return rpn_sort_spec (zh, zapt, attributeSet, stream, sort_sequence,
2124                               rank_type);
2125     xpath_len = parse_xpath(zh, zapt, attributeSet, xpath, 10, stream);
2126     if (xpath_len >= 0)
2127     {
2128         xpath_use = 1016;
2129         if (xpath[xpath_len-1].part[0] == '@')
2130             xpath_use = 1015;
2131     }
2132
2133     if (!strcmp (search_type, "phrase"))
2134     {
2135         rset = rpn_search_APT_phrase (zh, zapt, termz, attributeSet, stream,
2136                                       reg_id, complete_flag, rank_type,
2137                                       xpath_use,
2138                                       num_bases, basenames, rset_nmem);
2139     }
2140     else if (!strcmp (search_type, "and-list"))
2141     {
2142         rset = rpn_search_APT_and_list (zh, zapt, termz, attributeSet, stream,
2143                                         reg_id, complete_flag, rank_type,
2144                                         xpath_use,
2145                                         num_bases, basenames, rset_nmem);
2146     }
2147     else if (!strcmp (search_type, "or-list"))
2148     {
2149         rset = rpn_search_APT_or_list (zh, zapt, termz, attributeSet, stream,
2150                                        reg_id, complete_flag, rank_type,
2151                                        xpath_use,
2152                                        num_bases, basenames, rset_nmem);
2153     }
2154     else if (!strcmp (search_type, "local"))
2155     {
2156         rset = rpn_search_APT_local (zh, zapt, termz, attributeSet, stream,
2157                                      rank_type, rset_nmem);
2158     }
2159     else if (!strcmp (search_type, "numeric"))
2160     {
2161         rset = rpn_search_APT_numeric (zh, zapt, termz, attributeSet, stream,
2162                                        reg_id, complete_flag, rank_type,
2163                                        xpath_use,
2164                                        num_bases, basenames, rset_nmem);
2165     }
2166     else if (!strcmp (search_type, "always"))
2167     {
2168         rset = 0;
2169     }
2170     else
2171         zh->errCode = 118;
2172     return rpn_search_xpath (zh, attributeSet, num_bases, basenames,
2173                              stream, rank_type, rset, 
2174                              xpath_len, xpath, rset_nmem);
2175 }
2176
2177 static RSET rpn_search_structure (ZebraHandle zh, Z_RPNStructure *zs,
2178                                   oid_value attributeSet, 
2179                                   NMEM stream, NMEM rset_nmem,
2180                                   Z_SortKeySpecList *sort_sequence,
2181                                   int num_bases, char **basenames)
2182 {
2183     RSET r = NULL;
2184     if (zs->which == Z_RPNStructure_complex)
2185     {
2186         Z_Operator *zop = zs->u.complex->roperator;
2187         RSET rset_l;
2188         RSET rset_r;
2189
2190         rset_l = rpn_search_structure (zh, zs->u.complex->s1,
2191                                        attributeSet, stream, rset_nmem,
2192                                        sort_sequence,
2193                                        num_bases, basenames);
2194         if (rset_l == NULL)
2195             return NULL;
2196         rset_r = rpn_search_structure (zh, zs->u.complex->s2,
2197                                        attributeSet, stream, rset_nmem,
2198                                        sort_sequence,
2199                                        num_bases, basenames);
2200         if (rset_r == NULL)
2201         {
2202             rset_delete (rset_l);
2203             return NULL;
2204         }
2205
2206         switch (zop->which)
2207         {
2208         case Z_Operator_and:
2209             r = rsbool_create_and(rset_nmem,key_it_ctrl, key_it_ctrl->scope,
2210                     rset_l,rset_r );
2211             break;
2212         case Z_Operator_or:
2213             r = rsbool_create_or(rset_nmem,key_it_ctrl, key_it_ctrl->scope,
2214                     rset_l,rset_r );
2215             break;
2216         case Z_Operator_and_not:
2217             r = rsbool_create_not(rset_nmem,key_it_ctrl, key_it_ctrl->scope,
2218                     rset_l,rset_r );
2219             break;
2220         case Z_Operator_prox:
2221             if (zop->u.prox->which != Z_ProximityOperator_known)
2222             {
2223                 zh->errCode = 132;
2224                 return NULL;
2225             }
2226             if (*zop->u.prox->u.known != Z_ProxUnit_word)
2227             {
2228                 char *val = (char *) nmem_malloc (stream, 16);
2229                 zh->errCode = 132;
2230                 zh->errString = val;
2231                 sprintf (val, "%d", *zop->u.prox->u.known);
2232                 return NULL;
2233             }
2234             else
2235             {
2236                 /* new / old prox */
2237                 RSET twosets[2];
2238                 
2239                 twosets[0] = rset_l;
2240                 twosets[1] = rset_r;
2241                 r=rsprox_create(rset_nmem,key_it_ctrl,key_it_ctrl->scope,
2242                          2, twosets, 
2243                          *zop->u.prox->ordered,
2244                          (!zop->u.prox->exclusion ? 
2245                               0 : *zop->u.prox->exclusion),
2246                          *zop->u.prox->relationType,
2247                          *zop->u.prox->distance );
2248             }
2249             break;
2250         default:
2251             zh->errCode = 110;
2252             return NULL;
2253         }
2254     }
2255     else if (zs->which == Z_RPNStructure_simple)
2256     {
2257         if (zs->u.simple->which == Z_Operand_APT)
2258         {
2259             logf (LOG_DEBUG, "rpn_search_APT");
2260             r = rpn_search_APT (zh, zs->u.simple->u.attributesPlusTerm,
2261                                 attributeSet, stream, sort_sequence,
2262                                 num_bases, basenames,rset_nmem);
2263         }
2264         else if (zs->u.simple->which == Z_Operand_resultSetId)
2265         {
2266             logf (LOG_DEBUG, "rpn_search_ref");
2267             r = resultSetRef (zh, zs->u.simple->u.resultSetId);
2268             if (!r)
2269             {
2270                 r = rsnull_create (rset_nmem,key_it_ctrl);
2271                 zh->errCode = 30;
2272                 zh->errString =
2273                     nmem_strdup (stream, zs->u.simple->u.resultSetId);
2274                 return 0;
2275             }
2276             else
2277                 rset_dup(r);
2278         }
2279         else
2280         {
2281             zh->errCode = 3;
2282             return 0;
2283         }
2284     }
2285     else
2286     {
2287         zh->errCode = 3;
2288         return 0;
2289     }
2290     return r;
2291 }
2292
2293
2294 RSET rpn_search (ZebraHandle zh, NMEM nmem, NMEM rset_nmem,
2295                  Z_RPNQuery *rpn, int num_bases, char **basenames, 
2296                  const char *setname,
2297                  ZebraSet sset)
2298 {
2299     RSET rset;
2300     oident *attrset;
2301     oid_value attributeSet;
2302     Z_SortKeySpecList *sort_sequence;
2303     int sort_status, i;
2304
2305     zh->errCode = 0;
2306     zh->errString = NULL;
2307     zh->hits = 0;
2308
2309     sort_sequence = (Z_SortKeySpecList *)
2310         nmem_malloc (nmem, sizeof(*sort_sequence));
2311     sort_sequence->num_specs = 10;
2312     sort_sequence->specs = (Z_SortKeySpec **)
2313         nmem_malloc (nmem, sort_sequence->num_specs *
2314                      sizeof(*sort_sequence->specs));
2315     for (i = 0; i<sort_sequence->num_specs; i++)
2316         sort_sequence->specs[i] = 0;
2317     
2318     attrset = oid_getentbyoid (rpn->attributeSetId);
2319     attributeSet = attrset->value;
2320     rset = rpn_search_structure (zh, rpn->RPNStructure, attributeSet,
2321                                  nmem, rset_nmem,
2322                                  sort_sequence, num_bases, basenames);
2323     if (!rset)
2324         return 0;
2325
2326     if (zh->errCode)
2327         logf (LOG_DEBUG, "search error: %d", zh->errCode);
2328     
2329     for (i = 0; sort_sequence->specs[i]; i++)
2330         ;
2331     sort_sequence->num_specs = i;
2332     if (!i)
2333         resultSetRank (zh, sset, rset);
2334     else
2335     {
2336         logf (LOG_DEBUG, "resultSetSortSingle in rpn_search");
2337         resultSetSortSingle (zh, nmem, sset, rset,
2338                              sort_sequence, &sort_status);
2339         if (zh->errCode)
2340         {
2341             logf (LOG_DEBUG, "resultSetSortSingle status = %d", zh->errCode);
2342         }
2343     }
2344     return rset;
2345 }
2346
2347 struct scan_info_entry {
2348     char *term;
2349     ISAMC_P isam_p;
2350 };
2351
2352 struct scan_info {
2353     struct scan_info_entry *list;
2354     ODR odr;
2355     int before, after;
2356     char prefix[20];
2357 };
2358
2359 static int scan_handle (char *name, const char *info, int pos, void *client)
2360 {
2361     int len_prefix, idx;
2362     struct scan_info *scan_info = (struct scan_info *) client;
2363
2364     len_prefix = strlen(scan_info->prefix);
2365     if (memcmp (name, scan_info->prefix, len_prefix))
2366         return 1;
2367     if (pos > 0)        idx = scan_info->after - pos + scan_info->before;
2368     else
2369         idx = - pos - 1;
2370     scan_info->list[idx].term = (char *)
2371         odr_malloc (scan_info->odr, strlen(name + len_prefix)+1);
2372     strcpy (scan_info->list[idx].term, name + len_prefix);
2373     assert (*info == sizeof(ISAMC_P));
2374     memcpy (&scan_info->list[idx].isam_p, info+1, sizeof(ISAMC_P));
2375     return 0;
2376 }
2377
2378 static void scan_term_untrans (ZebraHandle zh, NMEM stream, int reg_type,
2379                                char **dst, const char *src)
2380 {
2381     char term_src[IT_MAX_WORD];
2382     char term_dst[IT_MAX_WORD];
2383     
2384     term_untrans (zh, reg_type, term_src, src);
2385
2386     if (zh->iconv_from_utf8 != 0)
2387     {
2388         int len;
2389         char *inbuf = term_src;
2390         size_t inleft = strlen(term_src);
2391         char *outbuf = term_dst;
2392         size_t outleft = sizeof(term_dst)-1;
2393         size_t ret;
2394         
2395         ret = yaz_iconv (zh->iconv_from_utf8, &inbuf, &inleft,
2396                          &outbuf, &outleft);
2397         if (ret == (size_t)(-1))
2398             len = 0;
2399         else
2400             len = outbuf - term_dst;
2401         *dst = nmem_malloc (stream, len + 1);
2402         if (len > 0)
2403             memcpy (*dst, term_dst, len);
2404         (*dst)[len] = '\0';
2405     }
2406     else
2407         *dst = nmem_strdup (stream, term_src);
2408 }
2409
2410 static void count_set (RSET r, int *count)
2411 {
2412     zint psysno = 0;
2413     int kno = 0;
2414     struct it_key key;
2415     RSFD rfd;
2416
2417     logf (LOG_DEBUG, "count_set");
2418
2419     *count = 0;
2420     rfd = rset_open (r, RSETF_READ);
2421     while (rset_read (rfd, &key))
2422     {
2423 #if IT_KEY_NEW
2424         if (key.mem[0] != psysno)
2425         {
2426             psysno = key.mem[0];
2427             (*count)++;
2428         }
2429 #else
2430         if (key.sysno != psysno)
2431         {
2432             psysno = key.sysno;
2433             (*count)++;
2434         }
2435 #endif
2436         kno++;
2437     }
2438     rset_close (rfd);
2439     logf (LOG_DEBUG, "%d keys, %d records", kno, *count);
2440 }
2441
2442 void rpn_scan (ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt,
2443                oid_value attributeset,
2444                int num_bases, char **basenames,
2445                int *position, int *num_entries, ZebraScanEntry **list,
2446                int *is_partial, RSET limit_set, int return_zero)
2447 {
2448     int i;
2449     int pos = *position;
2450     int num = *num_entries;
2451     int before;
2452     int after;
2453     int base_no;
2454     char termz[IT_MAX_WORD+20];
2455     AttrType use;
2456     int use_value;
2457     const char *use_string = 0;
2458     struct scan_info *scan_info_array;
2459     ZebraScanEntry *glist;
2460     int ords[32], ord_no = 0;
2461     int ptr[32];
2462
2463     int bases_ok = 0;     /* no of databases with OK attribute */
2464     int errCode = 0;      /* err code (if any is not OK) */
2465     char *errString = 0;  /* addinfo */
2466
2467     unsigned reg_id;
2468     char *search_type = NULL;
2469     char rank_type[128];
2470     int complete_flag;
2471     int sort_flag;
2472     NMEM rset_nmem=NULL; 
2473
2474     *list = 0;
2475
2476     if (attributeset == VAL_NONE)
2477         attributeset = VAL_BIB1;
2478
2479     if (!limit_set)
2480     {
2481         AttrType termset;
2482         int termset_value_numeric;
2483         const char *termset_value_string;
2484         attr_init (&termset, zapt, 8);
2485         termset_value_numeric =
2486             attr_find_ex (&termset, NULL, &termset_value_string);
2487         if (termset_value_numeric != -1)
2488         {
2489             char resname[32];
2490             const char *termset_name = 0;
2491             
2492             if (termset_value_numeric != -2)
2493             {
2494                 
2495                 sprintf (resname, "%d", termset_value_numeric);
2496                 termset_name = resname;
2497             }
2498             else
2499                 termset_name = termset_value_string;
2500             
2501             limit_set = resultSetRef (zh, termset_name);
2502         }
2503     }
2504         
2505     yaz_log (LOG_DEBUG, "position = %d, num = %d set=%d",
2506              pos, num, attributeset);
2507         
2508     attr_init (&use, zapt, 1);
2509     use_value = attr_find_ex (&use, &attributeset, &use_string);
2510
2511     if (zebra_maps_attr (zh->reg->zebra_maps, zapt, &reg_id, &search_type,
2512                          rank_type, &complete_flag, &sort_flag))
2513     {
2514         *num_entries = 0;
2515         zh->errCode = 113;
2516         return ;
2517     }
2518     yaz_log (LOG_DEBUG, "use_value = %d", use_value);
2519
2520     if (use_value == -1)
2521         use_value = 1016;
2522     for (base_no = 0; base_no < num_bases && ord_no < 32; base_no++)
2523     {
2524         int r;
2525         attent attp;
2526         data1_local_attribute *local_attr;
2527
2528         if ((r=att_getentbyatt (zh, &attp, attributeset, use_value,
2529                                 use_string)))
2530         {
2531             logf (LOG_DEBUG, "att_getentbyatt fail. set=%d use=%d",
2532                   attributeset, use_value);
2533             if (r == -1)
2534             {
2535                 char val_str[32];
2536                 sprintf (val_str, "%d", use_value);
2537                 errCode = 114;
2538                 errString = odr_strdup (stream, val_str);
2539             }   
2540             else
2541                 errCode = 121;
2542             continue;
2543         }
2544         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
2545         {
2546             zh->errString = basenames[base_no];
2547             zh->errCode = 109; /* Database unavailable */
2548             *num_entries = 0;
2549             return;
2550         }
2551         bases_ok++;
2552         for (local_attr = attp.local_attributes; local_attr && ord_no < 32;
2553              local_attr = local_attr->next)
2554         {
2555             int ord;
2556
2557             ord = zebraExplain_lookupSU (zh->reg->zei, attp.attset_ordinal,
2558                                          local_attr->local);
2559             if (ord > 0)
2560                 ords[ord_no++] = ord;
2561         }
2562     }
2563     if (!bases_ok && errCode)
2564     {
2565         zh->errCode = errCode;
2566         zh->errString = errString;
2567         *num_entries = 0;
2568     }
2569     if (ord_no == 0)
2570     {
2571         char val_str[32];
2572         sprintf (val_str, "%d", use_value);
2573         zh->errCode = 114;
2574         zh->errString = odr_strdup (stream, val_str);
2575
2576         *num_entries = 0;
2577         return;
2578     }
2579     /* prepare dictionary scanning */
2580     before = pos-1;
2581     after = 1+num-pos;
2582     scan_info_array = (struct scan_info *)
2583         odr_malloc (stream, ord_no * sizeof(*scan_info_array));
2584     for (i = 0; i < ord_no; i++)
2585     {
2586         int j, prefix_len = 0;
2587         int before_tmp = before, after_tmp = after;
2588         struct scan_info *scan_info = scan_info_array + i;
2589         struct rpn_char_map_info rcmi;
2590
2591         rpn_char_map_prepare (zh->reg, reg_id, &rcmi);
2592
2593         scan_info->before = before;
2594         scan_info->after = after;
2595         scan_info->odr = stream;
2596
2597         scan_info->list = (struct scan_info_entry *)
2598             odr_malloc (stream, (before+after) * sizeof(*scan_info->list));
2599         for (j = 0; j<before+after; j++)
2600             scan_info->list[j].term = NULL;
2601
2602         prefix_len += key_SU_encode (ords[i], termz + prefix_len);
2603         termz[prefix_len++] = reg_id;
2604         termz[prefix_len] = 0;
2605         strcpy (scan_info->prefix, termz);
2606
2607         if (trans_scan_term (zh, zapt, termz+prefix_len, reg_id))
2608             return ;
2609                     
2610         dict_scan (zh->reg->dict, termz, &before_tmp, &after_tmp,
2611                    scan_info, scan_handle);
2612     }
2613     glist = (ZebraScanEntry *)
2614         odr_malloc (stream, (before+after)*sizeof(*glist));
2615
2616     rset_nmem=nmem_create();
2617
2618     /* consider terms after main term */
2619     for (i = 0; i < ord_no; i++)
2620         ptr[i] = before;
2621     
2622     *is_partial = 0;
2623     for (i = 0; i<after; i++)
2624     {
2625         int j, j0 = -1;
2626         const char *mterm = NULL;
2627         const char *tst;
2628         RSET rset;
2629         
2630         for (j = 0; j < ord_no; j++)
2631         {
2632             if (ptr[j] < before+after &&
2633                 (tst=scan_info_array[j].list[ptr[j]].term) &&
2634                 (!mterm || strcmp (tst, mterm) < 0))
2635             {
2636                 j0 = j;
2637                 mterm = tst;
2638             }
2639         }
2640         if (j0 == -1)
2641             break;
2642         scan_term_untrans (zh, stream->mem, reg_id,
2643                            &glist[i+before].term, mterm);
2644         rset = rset_trunc (zh, &scan_info_array[j0].list[ptr[j0]].isam_p, 1,
2645                            glist[i+before].term, strlen(glist[i+before].term),
2646                            NULL, 0, zapt->term->which, rset_nmem, 
2647                            key_it_ctrl,key_it_ctrl->scope);
2648         ptr[j0]++;
2649         for (j = j0+1; j<ord_no; j++)
2650         {
2651             if (ptr[j] < before+after &&
2652                 (tst=scan_info_array[j].list[ptr[j]].term) &&
2653                 !strcmp (tst, mterm))
2654             {
2655                 RSET rset2;
2656
2657                 rset2 =
2658                    rset_trunc (zh, &scan_info_array[j].list[ptr[j]].isam_p, 1,
2659                                glist[i+before].term,
2660                                strlen(glist[i+before].term), NULL, 0,
2661                                zapt->term->which,rset_nmem,
2662                                key_it_ctrl, key_it_ctrl->scope);
2663                 rset = rsbool_create_or(rset_nmem,key_it_ctrl,
2664                                key_it_ctrl->scope, rset, rset2);
2665                 /* FIXME - Use a proper multi-or */
2666
2667                 ptr[j]++;
2668             }
2669         }
2670         if (limit_set)
2671             rset = rsbool_create_and(rset_nmem,key_it_ctrl,key_it_ctrl->scope,
2672                             rset, rset_dup(limit_set));
2673         count_set (rset, &glist[i+before].occurrences);
2674         rset_delete (rset);
2675     }
2676     if (i < after)
2677     {
2678         *num_entries -= (after-i);
2679         *is_partial = 1;
2680     }
2681
2682     /* consider terms before main term */
2683     for (i = 0; i<ord_no; i++)
2684         ptr[i] = 0;
2685
2686     for (i = 0; i<before; i++)
2687     {
2688         int j, j0 = -1;
2689         const char *mterm = NULL;
2690         const char *tst;
2691         RSET rset;
2692         
2693         for (j = 0; j <ord_no; j++)
2694         {
2695             if (ptr[j] < before &&
2696                 (tst=scan_info_array[j].list[before-1-ptr[j]].term) &&
2697                 (!mterm || strcmp (tst, mterm) > 0))
2698             {
2699                 j0 = j;
2700                 mterm = tst;
2701             }
2702         }
2703         if (j0 == -1)
2704             break;
2705
2706         scan_term_untrans (zh, stream->mem, reg_id,
2707                            &glist[before-1-i].term, mterm);
2708
2709         rset = rset_trunc
2710                (zh, &scan_info_array[j0].list[before-1-ptr[j0]].isam_p, 1,
2711                 glist[before-1-i].term, strlen(glist[before-1-i].term),
2712                 NULL, 0, zapt->term->which,rset_nmem,
2713                 key_it_ctrl,key_it_ctrl->scope);
2714
2715         ptr[j0]++;
2716
2717         for (j = j0+1; j<ord_no; j++)
2718         {
2719             if (ptr[j] < before &&
2720                 (tst=scan_info_array[j].list[before-1-ptr[j]].term) &&
2721                 !strcmp (tst, mterm))
2722             {
2723                 RSET rset2;
2724
2725                 rset2 = rset_trunc (zh,
2726                          &scan_info_array[j].list[before-1-ptr[j]].isam_p, 1,
2727                                     glist[before-1-i].term,
2728                                     strlen(glist[before-1-i].term), NULL, 0,
2729                                     zapt->term->which, rset_nmem,
2730                                     key_it_ctrl, key_it_ctrl->scope);
2731                 rset = rsbool_create_and(rset_nmem,key_it_ctrl,
2732                             key_it_ctrl->scope, rset, rset2);
2733                 /* FIXME - multi-and ?? */
2734                 ptr[j]++;
2735             }
2736         }
2737         if (limit_set)
2738             rset = rsbool_create_and(rset_nmem,key_it_ctrl,key_it_ctrl->scope,
2739                             rset, rset_dup(limit_set));
2740         count_set (rset, &glist[before-1-i].occurrences);
2741         rset_delete (rset);
2742     }
2743     i = before-i;
2744     if (i)
2745     {
2746         *is_partial = 1;
2747         *position -= i;
2748         *num_entries -= i;
2749     }
2750
2751     nmem_destroy(rset_nmem);
2752     *list = glist + i;               /* list is set to first 'real' entry */
2753     
2754     logf (LOG_DEBUG, "position = %d, num_entries = %d",
2755           *position, *num_entries);
2756     if (zh->errCode)
2757         logf (LOG_DEBUG, "scan error: %d", zh->errCode);
2758 }
2759