Cleaned a bit more logging
[idzebra-moved-to-github.git] / index / zrpn.c
1 /* $Id: zrpn.c,v 1.164 2004-12-10 12:37:07 heikki Exp $
2    Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003,2004
3    Index Data Aps
4
5 This file is part of the Zebra server.
6
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
11
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with Zebra; see the file LICENSE.zebra.  If not, write to the
19 Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
20 02111-1307, USA.
21 */
22
23
24 #include <stdio.h>
25 #include <assert.h>
26 #ifdef WIN32
27 #include <io.h>
28 #else
29 #include <unistd.h>
30 #endif
31 #include <ctype.h>
32
33 #include "index.h"
34 #include <zebra_xpath.h>
35
36 #include <charmap.h>
37 #include <rset.h>
38
39
40 static const struct key_control it_ctrl =
41
42     sizeof(struct it_key),
43     2, /* we have sysnos and seqnos in this key, nothing more */
44     key_compare_it, 
45     key_logdump_txt,   /* FIXME  - clean up these functions */
46     key_get_seq,
47 };
48
49
50 const struct key_control *key_it_ctrl = &it_ctrl;
51
52 struct rpn_char_map_info
53 {
54     ZebraMaps zm;
55     int reg_type;
56 };
57
58 typedef struct
59 {
60     int type;
61     int major;
62     int minor;
63     Z_AttributesPlusTerm *zapt;
64 } AttrType;
65
66
67 static int log_level_set=0;
68 static int log_level_rpn=0;
69
70 static const char **rpn_char_map_handler(void *vp, const char **from, int len)
71 {
72     struct rpn_char_map_info *p = (struct rpn_char_map_info *) vp;
73     const char **out = zebra_maps_input(p->zm, p->reg_type, from, len, 0);
74 #if 0
75     if (out && *out)
76     {
77         const char *outp = *out;
78         yaz_log(YLOG_LOG, "---");
79         while (*outp)
80         {
81             yaz_log(YLOG_LOG, "%02X", *outp);
82             outp++;
83         }
84     }
85 #endif
86     return out;
87 }
88
89 static void rpn_char_map_prepare(struct zebra_register *reg, int reg_type,
90                                   struct rpn_char_map_info *map_info)
91 {
92     map_info->zm = reg->zebra_maps;
93     map_info->reg_type = reg_type;
94     dict_grep_cmap(reg->dict, map_info, rpn_char_map_handler);
95 }
96
97 static int attr_find_ex(AttrType *src, oid_value *attributeSetP,
98                          const char **string_value)
99 {
100     int num_attributes;
101
102     num_attributes = src->zapt->attributes->num_attributes;
103     while (src->major < num_attributes)
104     {
105         Z_AttributeElement *element;
106
107         element = src->zapt->attributes->attributes[src->major];
108         if (src->type == *element->attributeType)
109         {
110             switch (element->which) 
111             {
112             case Z_AttributeValue_numeric:
113                 ++(src->major);
114                 if (element->attributeSet && attributeSetP)
115                 {
116                     oident *attrset;
117
118                     attrset = oid_getentbyoid(element->attributeSet);
119                     *attributeSetP = attrset->value;
120                 }
121                 return *element->value.numeric;
122                 break;
123             case Z_AttributeValue_complex:
124                 if (src->minor >= element->value.complex->num_list)
125                     break;
126                 if (element->attributeSet && attributeSetP)
127                 {
128                     oident *attrset;
129                     
130                     attrset = oid_getentbyoid(element->attributeSet);
131                     *attributeSetP = attrset->value;
132                 }
133                 if (element->value.complex->list[src->minor]->which ==  
134                     Z_StringOrNumeric_numeric)
135                 {
136                     ++(src->minor);
137                     return
138                         *element->value.complex->list[src->minor-1]->u.numeric;
139                 }
140                 else if (element->value.complex->list[src->minor]->which ==  
141                          Z_StringOrNumeric_string)
142                 {
143                     if (!string_value)
144                         break;
145                     ++(src->minor);
146                     *string_value = 
147                         element->value.complex->list[src->minor-1]->u.string;
148                     return -2;
149                 }
150                 else
151                     break;
152             default:
153                 assert(0);
154             }
155         }
156         ++(src->major);
157     }
158     return -1;
159 }
160
161 static int attr_find(AttrType *src, oid_value *attributeSetP)
162 {
163     return attr_find_ex(src, attributeSetP, 0);
164 }
165
166 static void attr_init(AttrType *src, Z_AttributesPlusTerm *zapt,
167                        int type)
168 {
169     src->zapt = zapt;
170     src->type = type;
171     src->major = 0;
172     src->minor = 0;
173 }
174
175 #define TERM_COUNT        
176        
177 struct grep_info {        
178 #ifdef TERM_COUNT        
179     int *term_no;        
180 #endif        
181     ISAMC_P *isam_p_buf;
182     int isam_p_size;        
183     int isam_p_indx;
184     ZebraHandle zh;
185     int reg_type;
186     ZebraSet termset;
187 };        
188
189 static void term_untrans(ZebraHandle zh, int reg_type,
190                            char *dst, const char *src)
191 {
192     int len = 0;
193     while (*src)
194     {
195         const char *cp = zebra_maps_output(zh->reg->zebra_maps,
196                                             reg_type, &src);
197         if (!cp && len < IT_MAX_WORD-1)
198             dst[len++] = *src++;
199         else
200             while (*cp && len < IT_MAX_WORD-1)
201                 dst[len++] = *cp++;
202     }
203     dst[len] = '\0';
204 }
205
206 static void add_isam_p(const char *name, const char *info,
207                         struct grep_info *p)
208 {
209     if (!log_level_set)
210     {
211         log_level_rpn = yaz_log_module_level("rpn");
212         log_level_set=1;
213     }
214     if (p->isam_p_indx == p->isam_p_size)
215     {
216         ISAMC_P *new_isam_p_buf;
217 #ifdef TERM_COUNT        
218         int *new_term_no;        
219 #endif
220         p->isam_p_size = 2*p->isam_p_size + 100;
221         new_isam_p_buf = (ISAMC_P *) xmalloc(sizeof(*new_isam_p_buf) *
222                                              p->isam_p_size);
223         if (p->isam_p_buf)
224         {
225             memcpy(new_isam_p_buf, p->isam_p_buf,
226                     p->isam_p_indx * sizeof(*p->isam_p_buf));
227             xfree(p->isam_p_buf);
228         }
229         p->isam_p_buf = new_isam_p_buf;
230
231 #ifdef TERM_COUNT
232         new_term_no = (int *) xmalloc(sizeof(*new_term_no) *
233                                        p->isam_p_size);
234         if (p->term_no)
235         {
236             memcpy(new_term_no, p->isam_p_buf,
237                     p->isam_p_indx * sizeof(*p->term_no));
238             xfree(p->term_no);
239         }
240         p->term_no = new_term_no;
241 #endif
242     }
243     assert(*info == sizeof(*p->isam_p_buf));
244     memcpy(p->isam_p_buf + p->isam_p_indx, info+1, sizeof(*p->isam_p_buf));
245
246 #if 1
247     if (p->termset)
248     {
249         const char *db;
250         int set, use;
251         char term_tmp[IT_MAX_WORD];
252         int su_code = 0;
253         int len = key_SU_decode (&su_code, name);
254         
255         term_untrans  (p->zh, p->reg_type, term_tmp, name+len+1);
256         yaz_log(log_level_rpn, "grep: %d %c %s", su_code, name[len], term_tmp);
257         zebraExplain_lookup_ord (p->zh->reg->zei,
258                                  su_code, &db, &set, &use);
259         yaz_log(log_level_rpn, "grep:  set=%d use=%d db=%s", set, use, db);
260         
261         resultSetAddTerm(p->zh, p->termset, name[len], db,
262                          set, use, term_tmp);
263     }
264 #endif
265     (p->isam_p_indx)++;
266 }
267
268 static int grep_handle(char *name, const char *info, void *p)
269 {
270     add_isam_p(name, info, (struct grep_info *) p);
271     return 0;
272 }
273
274 static int term_pre(ZebraMaps zebra_maps, int reg_type, const char **src,
275                      const char *ct1, const char *ct2, int first)
276 {
277     const char *s1, *s0 = *src;
278     const char **map;
279
280     /* skip white space */
281     while (*s0)
282     {
283         if (ct1 && strchr(ct1, *s0))
284             break;
285         if (ct2 && strchr(ct2, *s0))
286             break;
287         s1 = s0;
288         map = zebra_maps_input(zebra_maps, reg_type, &s1, strlen(s1), first);
289         if (**map != *CHR_SPACE)
290             break;
291         s0 = s1;
292     }
293     *src = s0;
294     return *s0;
295 }
296
297 #define REGEX_CHARS " []()|.*+?!"
298
299 /* term_100: handle term, where trunc=none(no operators at all) */
300 static int term_100(ZebraMaps zebra_maps, int reg_type,
301                      const char **src, char *dst, int space_split,
302                      char *dst_term)
303 {
304     const char *s0, *s1;
305     const char **map;
306     int i = 0;
307     int j = 0;
308
309     const char *space_start = 0;
310     const char *space_end = 0;
311
312     if (!term_pre(zebra_maps, reg_type, src, NULL, NULL, !space_split))
313         return 0;
314     s0 = *src;
315     while (*s0)
316     {
317         s1 = s0;
318         map = zebra_maps_input(zebra_maps, reg_type, &s0, strlen(s0), 0);
319         if (space_split)
320         {
321             if (**map == *CHR_SPACE)
322                 break;
323         }
324         else  /* complete subfield only. */
325         {
326             if (**map == *CHR_SPACE)
327             {   /* save space mapping for later  .. */
328                 space_start = s1;
329                 space_end = s0;
330                 continue;
331             }
332             else if (space_start)
333             {   /* reload last space */
334                 while (space_start < space_end)
335                 {
336                     if (strchr(REGEX_CHARS, *space_start))
337                         dst[i++] = '\\';
338                     dst_term[j++] = *space_start;
339                     dst[i++] = *space_start++;
340                 }
341                 /* and reset */
342                 space_start = space_end = 0;
343             }
344         }
345         /* add non-space char */
346         while (s1 < s0)
347         {
348             if (strchr(REGEX_CHARS, *s1))
349                 dst[i++] = '\\';
350             dst_term[j++] = *s1;
351             dst[i++] = *s1++;
352         }
353     }
354     dst[i] = '\0';
355     dst_term[j] = '\0';
356     *src = s0;
357     return i;
358 }
359
360 /* term_101: handle term, where trunc=Process # */
361 static int term_101(ZebraMaps zebra_maps, int reg_type,
362                      const char **src, char *dst, int space_split,
363                      char *dst_term)
364 {
365     const char *s0, *s1;
366     const char **map;
367     int i = 0;
368     int j = 0;
369
370     if (!term_pre(zebra_maps, reg_type, src, "#", "#", !space_split))
371         return 0;
372     s0 = *src;
373     while (*s0)
374     {
375         if (*s0 == '#')
376         {
377             dst[i++] = '.';
378             dst[i++] = '*';
379             dst_term[j++] = *s0++;
380         }
381         else
382         {
383             s1 = s0;
384             map = zebra_maps_input(zebra_maps, reg_type, &s0, strlen(s0), 0);
385             if (space_split && **map == *CHR_SPACE)
386                 break;
387             while (s1 < s0)
388             {
389                 if (strchr(REGEX_CHARS, *s1))
390                     dst[i++] = '\\';
391                 dst_term[j++] = *s1;
392                 dst[i++] = *s1++;
393             }
394         }
395     }
396     dst[i] = '\0';
397     dst_term[j++] = '\0';
398     *src = s0;
399     return i;
400 }
401
402 /* term_103: handle term, where trunc=re-2 (regular expressions) */
403 static int term_103(ZebraMaps zebra_maps, int reg_type, const char **src,
404                      char *dst, int *errors, int space_split,
405                      char *dst_term)
406 {
407     int i = 0;
408     int j = 0;
409     const char *s0, *s1;
410     const char **map;
411
412     if (!term_pre(zebra_maps, reg_type, src, "^\\()[].*+?|", "(", !space_split))
413         return 0;
414     s0 = *src;
415     if (errors && *s0 == '+' && s0[1] && s0[2] == '+' && s0[3] &&
416         isdigit(s0[1]))
417     {
418         *errors = s0[1] - '0';
419         s0 += 3;
420         if (*errors > 3)
421             *errors = 3;
422     }
423     while (*s0)
424     {
425         if (strchr("^\\()[].*+?|-", *s0))
426         {
427             dst_term[j++] = *s0;
428             dst[i++] = *s0++;
429         }
430         else
431         {
432             s1 = s0;
433             map = zebra_maps_input(zebra_maps, reg_type, &s0, strlen(s0), 0);
434             if (**map == *CHR_SPACE)
435                 break;
436             while (s1 < s0)
437             {
438                 if (strchr(REGEX_CHARS, *s1))
439                     dst[i++] = '\\';
440                 dst_term[j++] = *s1;
441                 dst[i++] = *s1++;
442             }
443         }
444     }
445     dst[i] = '\0';
446     dst_term[j] = '\0';
447     *src = s0;
448     return i;
449 }
450
451 /* term_103: handle term, where trunc=re-1 (regular expressions) */
452 static int term_102 (ZebraMaps zebra_maps, int reg_type, const char **src,
453                      char *dst, int space_split, char *dst_term)
454 {
455     return term_103(zebra_maps, reg_type, src, dst, NULL, space_split,
456                      dst_term);
457 }
458
459
460 /* term_104: handle term, where trunc=Process # and ! */
461 static int term_104(ZebraMaps zebra_maps, int reg_type,
462                      const char **src, char *dst, int space_split,
463                      char *dst_term)
464 {
465     const char *s0, *s1;
466     const char **map;
467     int i = 0;
468     int j = 0;
469
470     if (!term_pre(zebra_maps, reg_type, src, "?*#", "?*#", !space_split))
471         return 0;
472     s0 = *src;
473     while (*s0)
474     {
475         if (*s0 == '?')
476         {
477             dst_term[j++] = *s0++;
478             if (*s0 >= '0' && *s0 <= '9')
479             {
480                 int limit = 0;
481                 while (*s0 >= '0' && *s0 <= '9')
482                 {
483                     limit = limit * 10 + (*s0 - '0');
484                     dst_term[j++] = *s0++;
485                 }
486                 if (limit > 20)
487                     limit = 20;
488                 while (--limit >= 0)
489                 {
490                     dst[i++] = '.';
491                     dst[i++] = '?';
492                 }
493             }
494             else
495             {
496                 dst[i++] = '.';
497                 dst[i++] = '*';
498             }
499         }
500         else if (*s0 == '*')
501         {
502             dst[i++] = '.';
503             dst[i++] = '*';
504             dst_term[j++] = *s0++;
505         }
506         else if (*s0 == '#')
507         {
508             dst[i++] = '.';
509             dst_term[j++] = *s0++;
510         }
511         {
512             s1 = s0;
513             map = zebra_maps_input(zebra_maps, reg_type, &s0, strlen(s0), 0);
514             if (space_split && **map == *CHR_SPACE)
515                 break;
516             while (s1 < s0)
517             {
518                 if (strchr(REGEX_CHARS, *s1))
519                     dst[i++] = '\\';
520                 dst_term[j++] = *s1;
521                 dst[i++] = *s1++;
522             }
523         }
524     }
525     dst[i] = '\0';
526     dst_term[j++] = '\0';
527     *src = s0;
528     return i;
529 }
530
531 /* term_105/106: handle term, where trunc=Process * and ! and right trunc */
532 static int term_105 (ZebraMaps zebra_maps, int reg_type,
533                      const char **src, char *dst, int space_split,
534                      char *dst_term, int right_truncate)
535 {
536     const char *s0, *s1;
537     const char **map;
538     int i = 0;
539     int j = 0;
540
541     if (!term_pre(zebra_maps, reg_type, src, "*!", "*!", !space_split))
542         return 0;
543     s0 = *src;
544     while (*s0)
545     {
546         if (*s0 == '*')
547         {
548             dst[i++] = '.';
549             dst[i++] = '*';
550             dst_term[j++] = *s0++;
551         }
552         else if (*s0 == '!')
553         {
554             dst[i++] = '.';
555             dst_term[j++] = *s0++;
556         }
557         {
558             s1 = s0;
559             map = zebra_maps_input(zebra_maps, reg_type, &s0, strlen(s0), 0);
560             if (space_split && **map == *CHR_SPACE)
561                 break;
562             while (s1 < s0)
563             {
564                 if (strchr(REGEX_CHARS, *s1))
565                     dst[i++] = '\\';
566                 dst_term[j++] = *s1;
567                 dst[i++] = *s1++;
568             }
569         }
570     }
571     if (right_truncate)
572     {
573         dst[i++] = '.';
574         dst[i++] = '*';
575     }
576     dst[i] = '\0';
577     
578     dst_term[j++] = '\0';
579     *src = s0;
580     return i;
581 }
582
583
584 /* gen_regular_rel - generate regular expression from relation
585  *  val:     border value (inclusive)
586  *  islt:    1 if <=; 0 if >=.
587  */
588 static void gen_regular_rel(char *dst, int val, int islt)
589 {
590     int dst_p;
591     int w, d, i;
592     int pos = 0;
593     char numstr[20];
594
595     yaz_log(YLOG_DEBUG, "gen_regular_rel. val=%d, islt=%d", val, islt);
596     if (val >= 0)
597     {
598         if (islt)
599             strcpy(dst, "(-[0-9]+|(");
600         else
601             strcpy(dst, "((");
602     } 
603     else
604     {
605         if (!islt)
606         {
607             strcpy(dst, "([0-9]+|-(");
608             dst_p = strlen(dst);
609             islt = 1;
610         }
611         else
612         {
613             strcpy(dst, "(-(");
614             islt = 0;
615         }
616         val = -val;
617     }
618     dst_p = strlen(dst);
619     sprintf(numstr, "%d", val);
620     for (w = strlen(numstr); --w >= 0; pos++)
621     {
622         d = numstr[w];
623         if (pos > 0)
624         {
625             if (islt)
626             {
627                 if (d == '0')
628                     continue;
629                 d--;
630             } 
631             else
632             {
633                 if (d == '9')
634                     continue;
635                 d++;
636             }
637         }
638         
639         strcpy(dst + dst_p, numstr);
640         dst_p = strlen(dst) - pos - 1;
641
642         if (islt)
643         {
644             if (d != '0')
645             {
646                 dst[dst_p++] = '[';
647                 dst[dst_p++] = '0';
648                 dst[dst_p++] = '-';
649                 dst[dst_p++] = d;
650                 dst[dst_p++] = ']';
651             }
652             else
653                 dst[dst_p++] = d;
654         }
655         else
656         {
657             if (d != '9')
658             { 
659                 dst[dst_p++] = '[';
660                 dst[dst_p++] = d;
661                 dst[dst_p++] = '-';
662                 dst[dst_p++] = '9';
663                 dst[dst_p++] = ']';
664             }
665             else
666                 dst[dst_p++] = d;
667         }
668         for (i = 0; i<pos; i++)
669         {
670             dst[dst_p++] = '[';
671             dst[dst_p++] = '0';
672             dst[dst_p++] = '-';
673             dst[dst_p++] = '9';
674             dst[dst_p++] = ']';
675         }
676         dst[dst_p++] = '|';
677     }
678     dst[dst_p] = '\0';
679     if (islt)
680     {
681         /* match everything less than 10^(pos-1) */
682         strcat(dst, "0*");
683         for (i=1; i<pos; i++)
684             strcat(dst, "[0-9]?");
685     }
686     else
687     {
688         /* match everything greater than 10^pos */
689         for (i = 0; i <= pos; i++)
690             strcat(dst, "[0-9]");
691         strcat(dst, "[0-9]*");
692     }
693     strcat(dst, "))");
694 }
695
696 void string_rel_add_char(char **term_p, const char *src, int *indx)
697 {
698     if (src[*indx] == '\\')
699         *(*term_p)++ = src[(*indx)++];
700     *(*term_p)++ = src[(*indx)++];
701 }
702
703 /*
704  *   >  abc     ([b-].*|a[c-].*|ab[d-].*|abc.+)
705  *              ([^-a].*|a[^-b].*ab[^-c].*|abc.+)
706  *   >= abc     ([b-].*|a[c-].*|ab[c-].*)
707  *              ([^-a].*|a[^-b].*|ab[c-].*)
708  *   <  abc     ([-0].*|a[-a].*|ab[-b].*)
709  *              ([^a-].*|a[^b-].*|ab[^c-].*)
710  *   <= abc     ([-0].*|a[-a].*|ab[-b].*|abc)
711  *              ([^a-].*|a[^b-].*|ab[^c-].*|abc)
712  */
713 static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
714                             const char **term_sub, char *term_dict,
715                             oid_value attributeSet,
716                             int reg_type, int space_split, char *term_dst)
717 {
718     AttrType relation;
719     int relation_value;
720     int i;
721     char *term_tmp = term_dict + strlen(term_dict);
722     char term_component[2*IT_MAX_WORD+20];
723
724     attr_init(&relation, zapt, 2);
725     relation_value = attr_find(&relation, NULL);
726
727     yaz_log(YLOG_DEBUG, "string relation value=%d", relation_value);
728     switch (relation_value)
729     {
730     case 1:
731         if (!term_100 (zh->reg->zebra_maps, reg_type,
732                        term_sub, term_component,
733                        space_split, term_dst))
734             return 0;
735         yaz_log(log_level_rpn, "Relation <");
736         
737         *term_tmp++ = '(';
738         for (i = 0; term_component[i]; )
739         {
740             int j = 0;
741
742             if (i)
743                 *term_tmp++ = '|';
744             while (j < i)
745                 string_rel_add_char (&term_tmp, term_component, &j);
746
747             *term_tmp++ = '[';
748
749             *term_tmp++ = '^';
750             string_rel_add_char (&term_tmp, term_component, &i);
751             *term_tmp++ = '-';
752
753             *term_tmp++ = ']';
754             *term_tmp++ = '.';
755             *term_tmp++ = '*';
756
757             if ((term_tmp - term_dict) > IT_MAX_WORD)
758                 break;
759         }
760         *term_tmp++ = ')';
761         *term_tmp = '\0';
762         break;
763     case 2:
764         if (!term_100 (zh->reg->zebra_maps, reg_type,
765                        term_sub, term_component,
766                        space_split, term_dst))
767             return 0;
768         yaz_log(log_level_rpn, "Relation <=");
769
770         *term_tmp++ = '(';
771         for (i = 0; term_component[i]; )
772         {
773             int j = 0;
774
775             while (j < i)
776                 string_rel_add_char (&term_tmp, term_component, &j);
777             *term_tmp++ = '[';
778
779             *term_tmp++ = '^';
780             string_rel_add_char (&term_tmp, term_component, &i);
781             *term_tmp++ = '-';
782
783             *term_tmp++ = ']';
784             *term_tmp++ = '.';
785             *term_tmp++ = '*';
786
787             *term_tmp++ = '|';
788
789             if ((term_tmp - term_dict) > IT_MAX_WORD)
790                 break;
791         }
792         for (i = 0; term_component[i]; )
793             string_rel_add_char (&term_tmp, term_component, &i);
794         *term_tmp++ = ')';
795         *term_tmp = '\0';
796         break;
797     case 5:
798         if (!term_100 (zh->reg->zebra_maps, reg_type,
799                        term_sub, term_component, space_split, term_dst))
800             return 0;
801         yaz_log(log_level_rpn, "Relation >");
802
803         *term_tmp++ = '(';
804         for (i = 0; term_component[i];)
805         {
806             int j = 0;
807
808             while (j < i)
809                 string_rel_add_char (&term_tmp, term_component, &j);
810             *term_tmp++ = '[';
811             
812             *term_tmp++ = '^';
813             *term_tmp++ = '-';
814             string_rel_add_char (&term_tmp, term_component, &i);
815
816             *term_tmp++ = ']';
817             *term_tmp++ = '.';
818             *term_tmp++ = '*';
819
820             *term_tmp++ = '|';
821
822             if ((term_tmp - term_dict) > IT_MAX_WORD)
823                 break;
824         }
825         for (i = 0; term_component[i];)
826             string_rel_add_char (&term_tmp, term_component, &i);
827         *term_tmp++ = '.';
828         *term_tmp++ = '+';
829         *term_tmp++ = ')';
830         *term_tmp = '\0';
831         break;
832     case 4:
833         if (!term_100 (zh->reg->zebra_maps, reg_type, term_sub,
834                        term_component, space_split, term_dst))
835             return 0;
836         yaz_log(log_level_rpn, "Relation >=");
837
838         *term_tmp++ = '(';
839         for (i = 0; term_component[i];)
840         {
841             int j = 0;
842
843             if (i)
844                 *term_tmp++ = '|';
845             while (j < i)
846                 string_rel_add_char (&term_tmp, term_component, &j);
847             *term_tmp++ = '[';
848
849             if (term_component[i+1])
850             {
851                 *term_tmp++ = '^';
852                 *term_tmp++ = '-';
853                 string_rel_add_char (&term_tmp, term_component, &i);
854             }
855             else
856             {
857                 string_rel_add_char (&term_tmp, term_component, &i);
858                 *term_tmp++ = '-';
859             }
860             *term_tmp++ = ']';
861             *term_tmp++ = '.';
862             *term_tmp++ = '*';
863
864             if ((term_tmp - term_dict) > IT_MAX_WORD)
865                 break;
866         }
867         *term_tmp++ = ')';
868         *term_tmp = '\0';
869         break;
870     case 3:
871     default:
872         yaz_log(log_level_rpn, "Relation =");
873         if (!term_100 (zh->reg->zebra_maps, reg_type, term_sub,
874                        term_component, space_split, term_dst))
875             return 0;
876         strcat(term_tmp, "(");
877         strcat(term_tmp, term_component);
878         strcat(term_tmp, ")");
879     }
880     return 1;
881 }
882
883 static int string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
884                         const char **term_sub, 
885                         oid_value attributeSet, NMEM stream,
886                         struct grep_info *grep_info,
887                         int reg_type, int complete_flag,
888                         int num_bases, char **basenames,
889                         char *term_dst, int xpath_use);
890
891 static RSET term_trunc(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
892                         const char **term_sub, 
893                         oid_value attributeSet, NMEM stream,
894                         struct grep_info *grep_info,
895                         int reg_type, int complete_flag,
896                         int num_bases, char **basenames,
897                         char *term_dst,
898                         const char *rank_type, int xpath_use,
899                         NMEM rset_nmem)
900 {
901     int r;
902     grep_info->isam_p_indx = 0;
903     r = string_term(zh, zapt, term_sub, attributeSet, stream, grep_info,
904                      reg_type, complete_flag, num_bases, basenames,
905                      term_dst, xpath_use);
906     if (r < 1)
907         return 0;
908     yaz_log(log_level_rpn, "term: %s", term_dst);
909     return rset_trunc(zh, grep_info->isam_p_buf,
910                        grep_info->isam_p_indx, term_dst,
911                        strlen(term_dst), rank_type, 1 /* preserve pos */,
912                        zapt->term->which, rset_nmem,
913                        key_it_ctrl,key_it_ctrl->scope);
914 }
915 static char *nmem_strdup_i(NMEM nmem, int v)
916 {
917     char val_str[64];
918     sprintf (val_str, "%d", v);
919     return nmem_strdup(nmem, val_str);
920 }
921
922 static int string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
923                        const char **term_sub, 
924                        oid_value attributeSet, NMEM stream,
925                        struct grep_info *grep_info,
926                        int reg_type, int complete_flag,
927                        int num_bases, char **basenames,
928                        char *term_dst, int xpath_use)
929 {
930     char term_dict[2*IT_MAX_WORD+4000];
931     int j, r, base_no;
932     AttrType truncation;
933     int truncation_value;
934     AttrType use;
935     int use_value;
936     const char *use_string = 0;
937     oid_value curAttributeSet = attributeSet;
938     const char *termp;
939     struct rpn_char_map_info rcmi;
940     int space_split = complete_flag ? 0 : 1;
941
942     int bases_ok = 0;     /* no of databases with OK attribute */
943     int errCode = 0;      /* err code (if any is not OK) */
944     char *errString = 0;  /* addinfo */
945
946     rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
947     attr_init (&use, zapt, 1);
948     use_value = attr_find_ex (&use, &curAttributeSet, &use_string);
949     yaz_log(log_level_rpn, "string_term, use value %d", use_value);
950     attr_init (&truncation, zapt, 5);
951     truncation_value = attr_find (&truncation, NULL);
952     yaz_log(log_level_rpn, "truncation value %d", truncation_value);
953
954     if (use_value == -1)    /* no attribute - assumy "any" */
955         use_value = 1016;
956     for (base_no = 0; base_no < num_bases; base_no++)
957     {
958         int attr_ok = 0;
959         int regex_range = 0;
960         int init_pos = 0;
961         attent attp;
962         data1_local_attribute id_xpath_attr;
963         data1_local_attribute *local_attr;
964         int max_pos, prefix_len = 0;
965
966         termp = *term_sub;
967
968         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
969         {
970             zh->errCode = 109; /* Database unavailable */
971             zh->errString = basenames[base_no];
972             return -1;
973         }
974         if (xpath_use > 0 && use_value == -2) 
975         {
976             use_value = xpath_use;
977             attp.local_attributes = &id_xpath_attr;
978             attp.attset_ordinal = VAL_IDXPATH;
979             id_xpath_attr.next = 0;
980             id_xpath_attr.local = use_value;
981         }
982         else if (curAttributeSet == VAL_IDXPATH)
983         {
984             attp.local_attributes = &id_xpath_attr;
985             attp.attset_ordinal = VAL_IDXPATH;
986             id_xpath_attr.next = 0;
987             id_xpath_attr.local = use_value;
988         }
989         else
990         {
991             if ((r=att_getentbyatt (zh, &attp, curAttributeSet, use_value,
992                                             use_string)))
993             {
994                 yaz_log(YLOG_DEBUG, "att_getentbyatt fail. set=%d use=%d r=%d",
995                       curAttributeSet, use_value, r);
996                 if (r == -1)
997                 {
998                     /* set was found, but value wasn't defined */
999                     errCode = 114;
1000                     if (use_string)
1001                         errString = nmem_strdup(stream, use_string);
1002                     else
1003                         errString = nmem_strdup_i (stream, use_value);
1004                 }
1005                 else
1006                 {
1007                     int oid[OID_SIZE];
1008                     struct oident oident;
1009                     
1010                     oident.proto = PROTO_Z3950;
1011                     oident.oclass = CLASS_ATTSET;
1012                     oident.value = curAttributeSet;
1013                     oid_ent_to_oid (&oident, oid);
1014                     
1015                     errCode = 121;
1016                     errString = nmem_strdup (stream, oident.desc);
1017                 }
1018                 continue;
1019             }
1020         }
1021         for (local_attr = attp.local_attributes; local_attr;
1022              local_attr = local_attr->next)
1023         {
1024             int ord;
1025             char ord_buf[32];
1026             int i, ord_len;
1027             
1028             ord = zebraExplain_lookupSU (zh->reg->zei, attp.attset_ordinal,
1029                                          local_attr->local);
1030             if (ord < 0)
1031                 continue;
1032             if (prefix_len)
1033                 term_dict[prefix_len++] = '|';
1034             else
1035                 term_dict[prefix_len++] = '(';
1036             
1037             ord_len = key_SU_encode (ord, ord_buf);
1038             for (i = 0; i<ord_len; i++)
1039             {
1040                 term_dict[prefix_len++] = 1;
1041                 term_dict[prefix_len++] = ord_buf[i];
1042             }
1043         }
1044         if (!prefix_len)
1045         {
1046 #if 1
1047             bases_ok++;
1048 #else
1049             errCode = 114;
1050             errString = nmem_strdup_i(stream, use_value);
1051             continue;
1052 #endif
1053         }
1054         else
1055         {
1056             bases_ok++; /* this has OK attributes */
1057             attr_ok = 1;
1058         }
1059
1060         term_dict[prefix_len++] = ')';
1061         term_dict[prefix_len++] = 1;
1062         term_dict[prefix_len++] = reg_type;
1063         yaz_log(log_level_rpn, "reg_type = %d", term_dict[prefix_len-1]);
1064         term_dict[prefix_len] = '\0';
1065         j = prefix_len;
1066         switch (truncation_value)
1067         {
1068         case -1:         /* not specified */
1069         case 100:        /* do not truncate */
1070             if (!string_relation (zh, zapt, &termp, term_dict,
1071                                   attributeSet,
1072                                   reg_type, space_split, term_dst))
1073                 return 0;
1074             break;
1075         case 1:          /* right truncation */
1076             term_dict[j++] = '(';
1077             if (!term_100(zh->reg->zebra_maps, reg_type,
1078                           &termp, term_dict + j, space_split, term_dst))
1079                 return 0;
1080             strcat(term_dict, ".*)");
1081             break;
1082         case 2:          /* keft truncation */
1083             term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*';
1084             if (!term_100(zh->reg->zebra_maps, reg_type,
1085                           &termp, term_dict + j, space_split, term_dst))
1086                 return 0;
1087             strcat(term_dict, ")");
1088             break;
1089         case 3:          /* left&right truncation */
1090             term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*';
1091             if (!term_100(zh->reg->zebra_maps, reg_type,
1092                           &termp, term_dict + j, space_split, term_dst))
1093                 return 0;
1094             strcat(term_dict, ".*)");
1095             break;
1096         case 101:        /* process # in term */
1097             term_dict[j++] = '(';
1098             if (!term_101(zh->reg->zebra_maps, reg_type,
1099                           &termp, term_dict + j, space_split, term_dst))
1100                 return 0;
1101             strcat(term_dict, ")");
1102             break;
1103         case 102:        /* Regexp-1 */
1104             term_dict[j++] = '(';
1105             if (!term_102(zh->reg->zebra_maps, reg_type,
1106                           &termp, term_dict + j, space_split, term_dst))
1107                 return 0;
1108             strcat(term_dict, ")");
1109             break;
1110         case 103:       /* Regexp-2 */
1111             r = 1;
1112             term_dict[j++] = '(';
1113             init_pos = 2;
1114             if (!term_103 (zh->reg->zebra_maps, reg_type,
1115                            &termp, term_dict + j, &regex_range,
1116                            space_split, term_dst))
1117                 return 0;
1118             strcat(term_dict, ")");
1119         case 104:        /* process # and ! in term */
1120             term_dict[j++] = '(';
1121             if (!term_104 (zh->reg->zebra_maps, reg_type,
1122                            &termp, term_dict + j, space_split, term_dst))
1123                 return 0;
1124             strcat(term_dict, ")");
1125             break;
1126         case 105:        /* process * and ! in term */
1127             term_dict[j++] = '(';
1128             if (!term_105 (zh->reg->zebra_maps, reg_type,
1129                            &termp, term_dict + j, space_split, term_dst, 1))
1130                 return 0;
1131             strcat(term_dict, ")");
1132             break;
1133         case 106:        /* process * and ! in term */
1134             term_dict[j++] = '(';
1135             if (!term_105 (zh->reg->zebra_maps, reg_type,
1136                            &termp, term_dict + j, space_split, term_dst, 0))
1137                 return 0;
1138             strcat(term_dict, ")");
1139             break;
1140         default:
1141             zh->errCode = 120;
1142             zh->errString = nmem_strdup_i(stream, truncation_value);
1143             return -1;
1144         }
1145         if (attr_ok)
1146         {
1147             yaz_log(log_level_rpn, "dict_lookup_grep: %s", term_dict+prefix_len);
1148             r = dict_lookup_grep(zh->reg->dict, term_dict, regex_range,
1149                                  grep_info, &max_pos, init_pos,
1150                                  grep_handle);
1151             if (r)
1152                 yaz_log(YLOG_WARN, "dict_lookup_grep fail %d", r);
1153         }
1154     }
1155     if (!bases_ok)
1156     {
1157         zh->errCode = errCode;
1158         zh->errString = errString;
1159         return -1;
1160     }
1161     *term_sub = termp;
1162     yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1163     return 1;
1164 }
1165
1166
1167 /* convert APT search term to UTF8 */
1168 static int zapt_term_to_utf8 (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1169                               char *termz)
1170 {
1171     size_t sizez;
1172     Z_Term *term = zapt->term;
1173
1174     switch (term->which)
1175     {
1176     case Z_Term_general:
1177         if (zh->iconv_to_utf8 != 0)
1178         {
1179             char *inbuf = term->u.general->buf;
1180             size_t inleft = term->u.general->len;
1181             char *outbuf = termz;
1182             size_t outleft = IT_MAX_WORD-1;
1183             size_t ret;
1184
1185             ret = yaz_iconv(zh->iconv_to_utf8, &inbuf, &inleft,
1186                         &outbuf, &outleft);
1187             if (ret == (size_t)(-1))
1188             {
1189                 ret = yaz_iconv(zh->iconv_to_utf8, 0, 0, 0, 0);
1190                 zh->errCode = 125;
1191                 return -1;
1192             }
1193             *outbuf = 0;
1194         }
1195         else
1196         {
1197             sizez = term->u.general->len;
1198             if (sizez > IT_MAX_WORD-1)
1199                 sizez = IT_MAX_WORD-1;
1200             memcpy (termz, term->u.general->buf, sizez);
1201             termz[sizez] = '\0';
1202         }
1203         break;
1204     case Z_Term_characterString:
1205         sizez = strlen(term->u.characterString);
1206         if (sizez > IT_MAX_WORD-1)
1207             sizez = IT_MAX_WORD-1;
1208         memcpy (termz, term->u.characterString, sizez);
1209         termz[sizez] = '\0';
1210         break;
1211     default:
1212         zh->errCode = 124;
1213         return -1;
1214     }
1215     return 0;
1216 }
1217
1218 /* convert APT SCAN term to internal cmap */
1219 static int trans_scan_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1220                             char *termz, int reg_type)
1221 {
1222     char termz0[IT_MAX_WORD];
1223
1224     if (zapt_term_to_utf8(zh, zapt, termz0))
1225         return -1;    /* error */
1226     else
1227     {
1228         const char **map;
1229         const char *cp = (const char *) termz0;
1230         const char *cp_end = cp + strlen(cp);
1231         const char *src;
1232         int i = 0;
1233         const char *space_map = NULL;
1234         int len;
1235             
1236         while ((len = (cp_end - cp)) > 0)
1237         {
1238             map = zebra_maps_input (zh->reg->zebra_maps, reg_type, &cp, len, 0);
1239             if (**map == *CHR_SPACE)
1240                 space_map = *map;
1241             else
1242             {
1243                 if (i && space_map)
1244                     for (src = space_map; *src; src++)
1245                         termz[i++] = *src;
1246                 space_map = NULL;
1247                 for (src = *map; *src; src++)
1248                     termz[i++] = *src;
1249             }
1250         }
1251         termz[i] = '\0';
1252     }
1253     return 0;
1254 }
1255
1256 char *normalize_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1257                      const char *termz, NMEM stream, unsigned reg_id)
1258 {
1259     WRBUF wrbuf = 0;
1260     AttrType truncation;
1261     int truncation_value;
1262     char *ex_list = 0;
1263
1264     attr_init (&truncation, zapt, 5);
1265     truncation_value = attr_find (&truncation, NULL);
1266
1267     switch (truncation_value)
1268     {
1269     default:
1270         ex_list = "";
1271         break;
1272     case 101:
1273         ex_list = "#";
1274         break;
1275     case 102:
1276     case 103:
1277         ex_list = 0;
1278         break;
1279     case 104:
1280         ex_list = "!#";
1281         break;
1282     case 105:
1283         ex_list = "!*";
1284         break;
1285     }
1286     if (ex_list)
1287         wrbuf = zebra_replace(zh->reg->zebra_maps, reg_id, ex_list,
1288                               termz, strlen(termz));
1289     if (!wrbuf)
1290         return nmem_strdup(stream, termz);
1291     else
1292     {
1293         char *buf = (char*) nmem_malloc(stream, wrbuf_len(wrbuf)+1);
1294         memcpy (buf, wrbuf_buf(wrbuf), wrbuf_len(wrbuf));
1295         buf[wrbuf_len(wrbuf)] = '\0';
1296         return buf;
1297     }
1298 }
1299
1300 static void grep_info_delete (struct grep_info *grep_info)
1301 {
1302 #ifdef TERM_COUNT
1303     xfree(grep_info->term_no);
1304 #endif
1305     xfree (grep_info->isam_p_buf);
1306 }
1307
1308 static int grep_info_prepare (ZebraHandle zh,
1309                               Z_AttributesPlusTerm *zapt,
1310                               struct grep_info *grep_info,
1311                               int reg_type,
1312                               NMEM stream)
1313 {
1314     AttrType termset;
1315     int termset_value_numeric;
1316     const char *termset_value_string;
1317
1318 #ifdef TERM_COUNT
1319     grep_info->term_no = 0;
1320 #endif
1321     grep_info->isam_p_size = 0;
1322     grep_info->isam_p_buf = NULL;
1323     grep_info->zh = zh;
1324     grep_info->reg_type = reg_type;
1325     grep_info->termset = 0;
1326
1327     if (!zapt)
1328         return 0;
1329     attr_init (&termset, zapt, 8);
1330     termset_value_numeric =
1331         attr_find_ex (&termset, NULL, &termset_value_string);
1332     if (termset_value_numeric != -1)
1333     {
1334         char resname[32];
1335         const char *termset_name = 0;
1336         if (termset_value_numeric != -2)
1337         {
1338     
1339             sprintf (resname, "%d", termset_value_numeric);
1340             termset_name = resname;
1341         }
1342         else
1343             termset_name = termset_value_string;
1344         yaz_log(log_level_rpn, "creating termset set %s", termset_name);
1345         grep_info->termset = resultSetAdd (zh, termset_name, 1);
1346         if (!grep_info->termset)
1347         {
1348             zh->errCode = 128;
1349             zh->errString = nmem_strdup (stream, termset_name);
1350             return -1;
1351         }
1352     }
1353     return 0;
1354 }
1355                                
1356
1357 static RSET rpn_search_APT_phrase (ZebraHandle zh,
1358                                    Z_AttributesPlusTerm *zapt,
1359                                    const char *termz_org,
1360                                    oid_value attributeSet,
1361                                    NMEM stream,
1362                                    int reg_type, int complete_flag,
1363                                    const char *rank_type, int xpath_use,
1364                                    int num_bases, char **basenames, 
1365                                    NMEM rset_nmem)
1366 {
1367     char term_dst[IT_MAX_WORD+1];
1368     RSET rset[60], result;
1369     int rset_no = 0;
1370     struct grep_info grep_info;
1371     char *termz = normalize_term(zh, zapt, termz_org, stream, reg_type);
1372     const char *termp = termz;
1373
1374     *term_dst = 0;
1375     if (grep_info_prepare (zh, zapt, &grep_info, reg_type, stream))
1376         return 0;
1377     while (1)
1378     { 
1379         yaz_log(log_level_rpn, "APT_phrase termp=%s", termp);
1380         rset[rset_no] = term_trunc(zh, zapt, &termp, attributeSet,
1381                                     stream, &grep_info,
1382                                     reg_type, complete_flag,
1383                                     num_bases, basenames,
1384                                     term_dst, rank_type,
1385                                     xpath_use,rset_nmem);
1386         if (!rset[rset_no])
1387             break;
1388         if (++rset_no >= (int) (sizeof(rset)/sizeof(*rset)))
1389             break;
1390     }
1391     grep_info_delete (&grep_info);
1392     if (rset_no == 0)
1393         return rsnull_create (rset_nmem,key_it_ctrl); 
1394     else if (rset_no == 1)
1395         return (rset[0]);
1396     else
1397         result = rsprox_create( rset_nmem, key_it_ctrl, key_it_ctrl->scope,
1398                        rset_no, rset,
1399                        1 /* ordered */, 0 /* exclusion */,
1400                        3 /* relation */, 1 /* distance */);
1401     return result;
1402 }
1403
1404 static RSET rpn_search_APT_or_list (ZebraHandle zh,
1405                                     Z_AttributesPlusTerm *zapt,
1406                                     const char *termz_org,
1407                                     oid_value attributeSet,
1408                                     NMEM stream,
1409                                     int reg_type, int complete_flag,
1410                                     const char *rank_type,
1411                                     int xpath_use,
1412                                     int num_bases, char **basenames,
1413                                     NMEM rset_nmem)
1414 {
1415     char term_dst[IT_MAX_WORD+1];
1416     RSET rset[60];
1417     int rset_no = 0;
1418     struct grep_info grep_info;
1419     char *termz = normalize_term(zh, zapt, termz_org, stream, reg_type);
1420     const char *termp = termz;
1421
1422     if (grep_info_prepare (zh, zapt, &grep_info, reg_type, stream))
1423         return 0;
1424     while (1)
1425     { 
1426         yaz_log(log_level_rpn, "APT_or_list termp=%s", termp);
1427         rset[rset_no] = term_trunc(zh, zapt, &termp, attributeSet,
1428                                     stream, &grep_info,
1429                                     reg_type, complete_flag,
1430                                     num_bases, basenames,
1431                                     term_dst, rank_type,
1432                                     xpath_use,rset_nmem);
1433         if (!rset[rset_no])
1434             break;
1435         if (++rset_no >= (int) (sizeof(rset)/sizeof(*rset)))
1436             break;
1437     }
1438     grep_info_delete (&grep_info);
1439     if (rset_no == 0)
1440         return rsnull_create (rset_nmem,key_it_ctrl);  
1441     return rsmultior_create(rset_nmem, key_it_ctrl,key_it_ctrl->scope,
1442                             rset_no, rset);
1443 }
1444
1445 static RSET rpn_search_APT_and_list (ZebraHandle zh,
1446                                      Z_AttributesPlusTerm *zapt,
1447                                      const char *termz_org,
1448                                      oid_value attributeSet,
1449                                      NMEM stream,
1450                                      int reg_type, int complete_flag,
1451                                      const char *rank_type, 
1452                                      int xpath_use,
1453                                      int num_bases, char **basenames,
1454                                      NMEM rset_nmem)
1455 {
1456     char term_dst[IT_MAX_WORD+1];
1457     RSET rset[60]; /* FIXME - bug 160 - should be dynamic somehow */
1458     int rset_no = 0;
1459     struct grep_info grep_info;
1460     char *termz = normalize_term(zh, zapt, termz_org, stream, reg_type);
1461     const char *termp = termz;
1462
1463     if (grep_info_prepare (zh, zapt, &grep_info, reg_type, stream))
1464         return 0;
1465     while (1)
1466     { 
1467         yaz_log(log_level_rpn, "APT_and_list termp=%s", termp);
1468         rset[rset_no] = term_trunc(zh, zapt, &termp, attributeSet,
1469                                     stream, &grep_info,
1470                                     reg_type, complete_flag,
1471                                     num_bases, basenames,
1472                                     term_dst, rank_type,
1473                                     xpath_use, rset_nmem);
1474         if (!rset[rset_no])
1475             break;
1476         assert (rset[rset_no]);
1477         if (++rset_no >= (int) (sizeof(rset)/sizeof(*rset)))
1478             break;
1479     }
1480     grep_info_delete (&grep_info);
1481     if (rset_no == 0)
1482         return rsnull_create (rset_nmem,key_it_ctrl); 
1483
1484     return rsmultiand_create( rset_nmem, key_it_ctrl, key_it_ctrl->scope,
1485                               rset_no, rset);
1486 }
1487
1488 static int numeric_relation (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1489                              const char **term_sub,
1490                              char *term_dict,
1491                              oid_value attributeSet,
1492                              struct grep_info *grep_info,
1493                              int *max_pos,
1494                              int reg_type,
1495                              char *term_dst)
1496 {
1497     AttrType relation;
1498     int relation_value;
1499     int term_value;
1500     int r;
1501     char *term_tmp = term_dict + strlen(term_dict);
1502
1503     attr_init (&relation, zapt, 2);
1504     relation_value = attr_find (&relation, NULL);
1505
1506     yaz_log(log_level_rpn, "numeric relation value=%d", relation_value);
1507
1508     if (!term_100 (zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1509                    term_dst))
1510         return 0;
1511     term_value = atoi (term_tmp);
1512     switch (relation_value)
1513     {
1514     case 1:
1515         yaz_log(log_level_rpn, "Relation <");
1516         gen_regular_rel (term_tmp, term_value-1, 1);
1517         break;
1518     case 2:
1519         yaz_log(log_level_rpn, "Relation <=");
1520         gen_regular_rel (term_tmp, term_value, 1);
1521         break;
1522     case 4:
1523         yaz_log(log_level_rpn, "Relation >=");
1524         gen_regular_rel (term_tmp, term_value, 0);
1525         break;
1526     case 5:
1527         yaz_log(log_level_rpn, "Relation >");
1528         gen_regular_rel (term_tmp, term_value+1, 0);
1529         break;
1530     case 3:
1531     default:
1532         yaz_log(log_level_rpn, "Relation =");
1533         sprintf (term_tmp, "(0*%d)", term_value);
1534     }
1535     yaz_log(log_level_rpn, "dict_lookup_grep: %s", term_tmp);
1536     r = dict_lookup_grep(zh->reg->dict, term_dict, 0, grep_info, max_pos,
1537                           0, grep_handle);
1538     if (r)
1539         yaz_log(YLOG_WARN, "dict_lookup_grep fail, rel=gt: %d", r);
1540     yaz_log(log_level_rpn, "%d positions", grep_info->isam_p_indx);
1541     return 1;
1542 }
1543
1544 static int numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1545                          const char **term_sub, 
1546                          oid_value attributeSet, struct grep_info *grep_info,
1547                          int reg_type, int complete_flag,
1548                          int num_bases, char **basenames,
1549                          char *term_dst, int xpath_use, NMEM stream)
1550 {
1551     char term_dict[2*IT_MAX_WORD+2];
1552     int r, base_no;
1553     AttrType use;
1554     int use_value;
1555     const char *use_string = 0;
1556     oid_value curAttributeSet = attributeSet;
1557     const char *termp;
1558     struct rpn_char_map_info rcmi;
1559
1560     int bases_ok = 0;     /* no of databases with OK attribute */
1561     int errCode = 0;      /* err code (if any is not OK) */
1562     char *errString = 0;  /* addinfo */
1563
1564     rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
1565     attr_init (&use, zapt, 1);
1566     use_value = attr_find_ex (&use, &curAttributeSet, &use_string);
1567
1568     if (use_value == -1)
1569         use_value = 1016;
1570
1571     for (base_no = 0; base_no < num_bases; base_no++)
1572     {
1573         attent attp;
1574         data1_local_attribute id_xpath_attr;
1575         data1_local_attribute *local_attr;
1576         int max_pos, prefix_len = 0;
1577
1578         termp = *term_sub;
1579         if (use_value == -2)  /* string attribute (assume IDXPATH/any) */
1580         {
1581             use_value = xpath_use;
1582             attp.local_attributes = &id_xpath_attr;
1583             attp.attset_ordinal = VAL_IDXPATH;
1584             id_xpath_attr.next = 0;
1585             id_xpath_attr.local = use_value;
1586         }
1587         else if (curAttributeSet == VAL_IDXPATH)
1588         {
1589             attp.local_attributes = &id_xpath_attr;
1590             attp.attset_ordinal = VAL_IDXPATH;
1591             id_xpath_attr.next = 0;
1592             id_xpath_attr.local = use_value;
1593         }
1594         else
1595         {
1596             if ((r=att_getentbyatt (zh, &attp, curAttributeSet, use_value,
1597                                             use_string)))
1598             {
1599                 yaz_log(YLOG_DEBUG, "att_getentbyatt fail. set=%d use=%d r=%d",
1600                       curAttributeSet, use_value, r);
1601                 if (r == -1)
1602                 {
1603                     errString = nmem_strdup_i(stream, use_value);
1604                     errCode = 114;
1605                 }
1606                 else
1607                     errCode = 121;
1608                 continue;
1609             }
1610         }
1611         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1612         {
1613             zh->errCode = 109; /* Database unavailable */
1614             zh->errString = basenames[base_no];
1615             return -1;
1616         }
1617         for (local_attr = attp.local_attributes; local_attr;
1618              local_attr = local_attr->next)
1619         {
1620             int ord;
1621             char ord_buf[32];
1622             int i, ord_len;
1623
1624             ord = zebraExplain_lookupSU (zh->reg->zei, attp.attset_ordinal,
1625                                           local_attr->local);
1626             if (ord < 0)
1627                 continue;
1628             if (prefix_len)
1629                 term_dict[prefix_len++] = '|';
1630             else
1631                 term_dict[prefix_len++] = '(';
1632
1633             ord_len = key_SU_encode (ord, ord_buf);
1634             for (i = 0; i<ord_len; i++)
1635             {
1636                 term_dict[prefix_len++] = 1;
1637                 term_dict[prefix_len++] = ord_buf[i];
1638             }
1639         }
1640         if (!prefix_len)
1641         {
1642             errCode = 114;
1643             errString = nmem_strdup_i(stream, use_value);
1644             continue;
1645         }
1646         bases_ok++;
1647         term_dict[prefix_len++] = ')';        
1648         term_dict[prefix_len++] = 1;
1649         term_dict[prefix_len++] = reg_type;
1650         yaz_log(YLOG_DEBUG, "reg_type = %d", term_dict[prefix_len-1]);
1651         term_dict[prefix_len] = '\0';
1652         if (!numeric_relation (zh, zapt, &termp, term_dict,
1653                                attributeSet, grep_info, &max_pos, reg_type,
1654                                term_dst))
1655             return 0;
1656     }
1657     if (!bases_ok)
1658     {
1659         zh->errCode = errCode;
1660         zh->errString = errString;
1661         return -1;
1662     }
1663     *term_sub = termp;
1664     yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1665     return 1;
1666 }
1667
1668 static RSET rpn_search_APT_numeric (ZebraHandle zh,
1669                                     Z_AttributesPlusTerm *zapt,
1670                                     const char *termz,
1671                                     oid_value attributeSet,
1672                                     NMEM stream,
1673                                     int reg_type, int complete_flag,
1674                                     const char *rank_type, int xpath_use,
1675                                     int num_bases, char **basenames,
1676                                     NMEM rset_nmem)
1677 {
1678     char term_dst[IT_MAX_WORD+1];
1679     const char *termp = termz;
1680     RSET rset[60]; /* FIXME - hard-coded magic number */
1681     int  r, rset_no = 0;
1682     struct grep_info grep_info;
1683
1684     yaz_log(log_level_rpn, "APT_numeric t='%s'",termz);
1685     if (grep_info_prepare (zh, zapt, &grep_info, reg_type, stream))
1686         return 0;
1687     while (1)
1688     { 
1689         yaz_log(YLOG_DEBUG, "APT_numeric termp=%s", termp);
1690         grep_info.isam_p_indx = 0;
1691         r = numeric_term(zh, zapt, &termp, attributeSet, &grep_info,
1692                           reg_type, complete_flag, num_bases, basenames,
1693                           term_dst, xpath_use,
1694                           stream);
1695         if (r < 1)
1696             break;
1697         yaz_log(YLOG_DEBUG, "term: %s", term_dst);
1698         rset[rset_no] = rset_trunc(zh, grep_info.isam_p_buf,
1699                                     grep_info.isam_p_indx, term_dst,
1700                                     strlen(term_dst), rank_type,
1701                                     0 /* preserve position */,
1702                                     zapt->term->which, rset_nmem, 
1703                                     key_it_ctrl,key_it_ctrl->scope);
1704         assert (rset[rset_no]);
1705         if (++rset_no >= (int) (sizeof(rset)/sizeof(*rset)))
1706             break;
1707     }
1708     grep_info_delete (&grep_info);
1709     if (rset_no == 0)
1710         return rsnull_create (rset_nmem,key_it_ctrl);
1711     if (rset_no == 1)
1712         return rset[0];
1713     return rsmultiand_create(rset_nmem,key_it_ctrl,key_it_ctrl->scope,
1714                rset_no, rset);
1715 }
1716
1717 static RSET rpn_search_APT_local (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1718                                   const char *termz,
1719                                   oid_value attributeSet,
1720                                   NMEM stream,
1721                                   const char *rank_type, NMEM rset_nmem)
1722 {
1723     RSET result;
1724     RSFD rsfd;
1725     struct it_key key;
1726     int sys;
1727     result = rstemp_create( rset_nmem,key_it_ctrl,key_it_ctrl->scope,
1728                      res_get (zh->res, "setTmpDir"),0 );
1729     rsfd = rset_open (result, RSETF_WRITE);
1730
1731     sys = atoi(termz);
1732     if (sys <= 0)
1733         sys = 1;
1734     key.mem[0] = sys;
1735     key.mem[1] = 1;
1736     key.len = 2;
1737     rset_write (rsfd, &key);
1738     rset_close (rsfd);
1739     return result;
1740 }
1741
1742 static RSET rpn_sort_spec (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1743                            oid_value attributeSet, NMEM stream,
1744                            Z_SortKeySpecList *sort_sequence,
1745                            const char *rank_type)
1746 {
1747     int i;
1748     int sort_relation_value;
1749     AttrType sort_relation_type;
1750     int use_value;
1751     AttrType use_type;
1752     Z_SortKeySpec *sks;
1753     Z_SortKey *sk;
1754     Z_AttributeElement *ae;
1755     int oid[OID_SIZE];
1756     oident oe;
1757     char termz[20];
1758     
1759     attr_init (&sort_relation_type, zapt, 7);
1760     sort_relation_value = attr_find (&sort_relation_type, &attributeSet);
1761
1762     attr_init (&use_type, zapt, 1);
1763     use_value = attr_find (&use_type, &attributeSet);
1764
1765     if (!sort_sequence->specs)
1766     {
1767         sort_sequence->num_specs = 10;
1768         sort_sequence->specs = (Z_SortKeySpec **)
1769             nmem_malloc(stream, sort_sequence->num_specs *
1770                          sizeof(*sort_sequence->specs));
1771         for (i = 0; i<sort_sequence->num_specs; i++)
1772             sort_sequence->specs[i] = 0;
1773     }
1774     if (zapt->term->which != Z_Term_general)
1775         i = 0;
1776     else
1777         i = atoi_n ((char *) zapt->term->u.general->buf,
1778                     zapt->term->u.general->len);
1779     if (i >= sort_sequence->num_specs)
1780         i = 0;
1781     sprintf (termz, "%d", i);
1782
1783     oe.proto = PROTO_Z3950;
1784     oe.oclass = CLASS_ATTSET;
1785     oe.value = attributeSet;
1786     if (!oid_ent_to_oid (&oe, oid))
1787         return 0;
1788
1789     sks = (Z_SortKeySpec *) nmem_malloc(stream, sizeof(*sks));
1790     sks->sortElement = (Z_SortElement *)
1791         nmem_malloc(stream, sizeof(*sks->sortElement));
1792     sks->sortElement->which = Z_SortElement_generic;
1793     sk = sks->sortElement->u.generic = (Z_SortKey *)
1794         nmem_malloc(stream, sizeof(*sk));
1795     sk->which = Z_SortKey_sortAttributes;
1796     sk->u.sortAttributes = (Z_SortAttributes *)
1797         nmem_malloc(stream, sizeof(*sk->u.sortAttributes));
1798
1799     sk->u.sortAttributes->id = oid;
1800     sk->u.sortAttributes->list = (Z_AttributeList *)
1801         nmem_malloc(stream, sizeof(*sk->u.sortAttributes->list));
1802     sk->u.sortAttributes->list->num_attributes = 1;
1803     sk->u.sortAttributes->list->attributes = (Z_AttributeElement **)
1804         nmem_malloc(stream, sizeof(*sk->u.sortAttributes->list->attributes));
1805     ae = *sk->u.sortAttributes->list->attributes = (Z_AttributeElement *)
1806         nmem_malloc(stream, sizeof(**sk->u.sortAttributes->list->attributes));
1807     ae->attributeSet = 0;
1808     ae->attributeType = (int *)
1809         nmem_malloc(stream, sizeof(*ae->attributeType));
1810     *ae->attributeType = 1;
1811     ae->which = Z_AttributeValue_numeric;
1812     ae->value.numeric = (int *)
1813         nmem_malloc(stream, sizeof(*ae->value.numeric));
1814     *ae->value.numeric = use_value;
1815
1816     sks->sortRelation = (int *)
1817         nmem_malloc(stream, sizeof(*sks->sortRelation));
1818     if (sort_relation_value == 1)
1819         *sks->sortRelation = Z_SortKeySpec_ascending;
1820     else if (sort_relation_value == 2)
1821         *sks->sortRelation = Z_SortKeySpec_descending;
1822     else 
1823         *sks->sortRelation = Z_SortKeySpec_ascending;
1824
1825     sks->caseSensitivity = (int *)
1826         nmem_malloc(stream, sizeof(*sks->caseSensitivity));
1827     *sks->caseSensitivity = 0;
1828
1829     sks->which = Z_SortKeySpec_null;
1830     sks->u.null = odr_nullval ();
1831     sort_sequence->specs[i] = sks;
1832     return rsnull_create (NULL,key_it_ctrl);
1833         /* FIXME - nmem?? */
1834 }
1835
1836
1837 static int parse_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1838                        oid_value attributeSet,
1839                        struct xpath_location_step *xpath, int max, NMEM mem)
1840 {
1841     oid_value curAttributeSet = attributeSet;
1842     AttrType use;
1843     const char *use_string = 0;
1844     
1845     attr_init (&use, zapt, 1);
1846     attr_find_ex (&use, &curAttributeSet, &use_string);
1847
1848     if (!use_string || *use_string != '/')
1849         return -1;
1850
1851     return zebra_parse_xpath_str(use_string, xpath, max, mem);
1852 }
1853  
1854                
1855
1856 static RSET xpath_trunc(ZebraHandle zh, NMEM stream,
1857                         int reg_type, const char *term, int use,
1858                         oid_value curAttributeSet, NMEM rset_nmem)
1859 {
1860     RSET rset;
1861     struct grep_info grep_info;
1862     char term_dict[2048];
1863     char ord_buf[32];
1864     int prefix_len = 0;
1865     int ord = zebraExplain_lookupSU (zh->reg->zei, curAttributeSet, use);
1866     int ord_len, i, r, max_pos;
1867     int term_type = Z_Term_characterString;
1868     const char *flags = "void";
1869
1870     if (grep_info_prepare (zh, 0 /* zapt */, &grep_info, '0', stream))
1871         return rsnull_create (rset_nmem,key_it_ctrl);
1872
1873     if (ord < 0)
1874         return rsnull_create (rset_nmem,key_it_ctrl);
1875     if (prefix_len)
1876         term_dict[prefix_len++] = '|';
1877     else
1878         term_dict[prefix_len++] = '(';
1879     
1880     ord_len = key_SU_encode (ord, ord_buf);
1881     for (i = 0; i<ord_len; i++)
1882     {
1883         term_dict[prefix_len++] = 1;
1884         term_dict[prefix_len++] = ord_buf[i];
1885     }
1886     term_dict[prefix_len++] = ')';
1887     term_dict[prefix_len++] = 1;
1888     term_dict[prefix_len++] = reg_type;
1889     
1890     strcpy(term_dict+prefix_len, term);
1891     
1892     grep_info.isam_p_indx = 0;
1893     r = dict_lookup_grep(zh->reg->dict, term_dict, 0,
1894                           &grep_info, &max_pos, 0, grep_handle);
1895     yaz_log (YLOG_LOG, "%s %d positions", term,
1896              grep_info.isam_p_indx);
1897     rset = rset_trunc(zh, grep_info.isam_p_buf,
1898                        grep_info.isam_p_indx, term, strlen(term),
1899                        flags, 1, term_type,rset_nmem,
1900                        key_it_ctrl, key_it_ctrl->scope);
1901     grep_info_delete (&grep_info);
1902     return rset;
1903 }
1904
1905 static RSET rpn_search_xpath (ZebraHandle zh,
1906                               oid_value attributeSet,
1907                               int num_bases, char **basenames,
1908                               NMEM stream, const char *rank_type, RSET rset,
1909                               int xpath_len, struct xpath_location_step *xpath,
1910                               NMEM rset_nmem)
1911 {
1912     oid_value curAttributeSet = attributeSet;
1913     int base_no;
1914     int i;
1915
1916     if (xpath_len < 0)
1917         return rset;
1918
1919     yaz_log (YLOG_DEBUG, "xpath len=%d", xpath_len);
1920     for (i = 0; i<xpath_len; i++)
1921     {
1922         yaz_log (log_level_rpn, "XPATH %d %s", i, xpath[i].part);
1923
1924     }
1925
1926     curAttributeSet = VAL_IDXPATH;
1927
1928     /*
1929       //a    ->    a/.*
1930       //a/b  ->    b/a/.*
1931       /a     ->    a/
1932       /a/b   ->    b/a/
1933
1934       /      ->    none
1935
1936    a[@attr=value]/b[@other=othervalue]
1937
1938  /e/@a val      range(e/,range(@a,freetext(w,1015,val),@a),e/)
1939  /a/b val       range(b/a/,freetext(w,1016,val),b/a/)
1940  /a/b/@c val    range(b/a/,range(@c,freetext(w,1016,val),@c),b/a/)
1941  /a/b[@c=y] val range(b/a/,freetext(w,1016,val),b/a/,@c=y)
1942  /a[@c=y]/b val range(a/,range(b/a/,freetext(w,1016,val),b/a/),a/,@c=y)
1943  /a[@c=x]/b[@c=y] range(a/,range(b/a/,freetext(w,1016,val),b/a/,@c=y),a/,@c=x)
1944       
1945     */
1946
1947     dict_grep_cmap (zh->reg->dict, 0, 0);
1948
1949     for (base_no = 0; base_no < num_bases; base_no++)
1950     {
1951         int level = xpath_len;
1952         int first_path = 1;
1953         
1954         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1955         {
1956             zh->errCode = 109; /* Database unavailable */
1957             zh->errString = basenames[base_no];
1958             return rset;
1959         }
1960         while (--level >= 0)
1961         {
1962             char xpath_rev[128];
1963             int i, len;
1964             RSET rset_start_tag = 0, rset_end_tag = 0, rset_attr = 0;
1965
1966             *xpath_rev = 0;
1967             len = 0;
1968             for (i = level; i >= 1; --i)
1969             {
1970                 const char *cp = xpath[i].part;
1971                 if (*cp)
1972                 {
1973                     for (;*cp; cp++)
1974                         if (*cp == '*')
1975                         {
1976                             memcpy (xpath_rev + len, "[^/]*", 5);
1977                             len += 5;
1978                         }
1979                         else if (*cp == ' ')
1980                         {
1981
1982                             xpath_rev[len++] = 1;
1983                             xpath_rev[len++] = ' ';
1984                         }
1985
1986                         else
1987                             xpath_rev[len++] = *cp;
1988                     xpath_rev[len++] = '/';
1989                 }
1990                 else if (i == 1)  /* // case */
1991                 {
1992                     xpath_rev[len++] = '.';
1993                     xpath_rev[len++] = '*';
1994                 }
1995             }
1996             xpath_rev[len] = 0;
1997
1998             if (xpath[level].predicate &&
1999                 xpath[level].predicate->which == XPATH_PREDICATE_RELATION &&
2000                 xpath[level].predicate->u.relation.name[0])
2001             {
2002                 WRBUF wbuf = wrbuf_alloc();
2003                 wrbuf_puts(wbuf, xpath[level].predicate->u.relation.name+1);
2004                 if (xpath[level].predicate->u.relation.value)
2005                 {
2006                     const char *cp = xpath[level].predicate->u.relation.value;
2007                     wrbuf_putc(wbuf, '=');
2008                     
2009                     while (*cp)
2010                     {
2011                         if (strchr(REGEX_CHARS, *cp))
2012                             wrbuf_putc(wbuf, '\\');
2013                         wrbuf_putc(wbuf, *cp);
2014                         cp++;
2015                     }
2016                 }
2017                 wrbuf_puts(wbuf, "");
2018                 rset_attr = xpath_trunc(
2019                     zh, stream, '0', wrbuf_buf(wbuf), 3, 
2020                     curAttributeSet,rset_nmem);
2021                 wrbuf_free(wbuf, 1);
2022             } 
2023             else 
2024             {
2025                 if (!first_path)
2026                     continue;
2027             }
2028             yaz_log (log_level_rpn, "xpath_rev (%d) = %s", level, xpath_rev);
2029             if (strlen(xpath_rev))
2030             {
2031                 rset_start_tag = xpath_trunc(zh, stream, '0', 
2032                         xpath_rev, 1, curAttributeSet, rset_nmem);
2033             
2034                 rset_end_tag = xpath_trunc(zh, stream, '0', 
2035                         xpath_rev, 2, curAttributeSet, rset_nmem);
2036
2037                 /*
2038                 parms.key_size = sizeof(struct it_key);
2039                 parms.cmp = key_compare_it;
2040                 parms.rset_l = rset_start_tag;
2041                 parms.rset_m = rset;
2042                 parms.rset_r = rset_end_tag;
2043                 parms.rset_attr = rset_attr;
2044                 parms.printer = key_print_it;
2045                 rset = rset_create (rset_kind_between, &parms);
2046                 */
2047                 rset=rsbetween_create( rset_nmem,key_it_ctrl,key_it_ctrl->scope,
2048                         rset_start_tag, rset, rset_end_tag, rset_attr);
2049             }
2050             first_path = 0;
2051         }
2052     }
2053
2054     return rset;
2055 }
2056
2057
2058
2059 static RSET rpn_search_APT (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2060                             oid_value attributeSet, NMEM stream,
2061                             Z_SortKeySpecList *sort_sequence,
2062                             int num_bases, char **basenames, 
2063                             NMEM rset_nmem)
2064 {
2065     unsigned reg_id;
2066     char *search_type = NULL;
2067     char rank_type[128];
2068     int complete_flag;
2069     int sort_flag;
2070     char termz[IT_MAX_WORD+1];
2071     RSET rset = 0;
2072     int xpath_len;
2073     int xpath_use = 0;
2074     struct xpath_location_step xpath[10];
2075
2076     if (!log_level_set)
2077     {
2078         log_level_rpn = yaz_log_module_level("rpn");
2079         log_level_set=1;
2080     }
2081     zebra_maps_attr (zh->reg->zebra_maps, zapt, &reg_id, &search_type,
2082                      rank_type, &complete_flag, &sort_flag);
2083     
2084     yaz_log(YLOG_DEBUG, "reg_id=%c", reg_id);
2085     yaz_log(YLOG_DEBUG, "complete_flag=%d", complete_flag);
2086     yaz_log(YLOG_DEBUG, "search_type=%s", search_type);
2087     yaz_log(YLOG_DEBUG, "rank_type=%s", rank_type);
2088
2089     if (zapt_term_to_utf8(zh, zapt, termz))
2090         return 0;
2091
2092     if (sort_flag)
2093         return rpn_sort_spec (zh, zapt, attributeSet, stream, sort_sequence,
2094                               rank_type);
2095     xpath_len = parse_xpath(zh, zapt, attributeSet, xpath, 10, stream);
2096     if (xpath_len >= 0)
2097     {
2098         xpath_use = 1016;
2099         if (xpath[xpath_len-1].part[0] == '@')
2100             xpath_use = 1015;
2101     }
2102
2103     if (!strcmp (search_type, "phrase"))
2104     {
2105         rset = rpn_search_APT_phrase (zh, zapt, termz, attributeSet, stream,
2106                                       reg_id, complete_flag, rank_type,
2107                                       xpath_use,
2108                                       num_bases, basenames, rset_nmem);
2109     }
2110     else if (!strcmp (search_type, "and-list"))
2111     {
2112         rset = rpn_search_APT_and_list (zh, zapt, termz, attributeSet, stream,
2113                                         reg_id, complete_flag, rank_type,
2114                                         xpath_use,
2115                                         num_bases, basenames, rset_nmem);
2116     }
2117     else if (!strcmp (search_type, "or-list"))
2118     {
2119         rset = rpn_search_APT_or_list (zh, zapt, termz, attributeSet, stream,
2120                                        reg_id, complete_flag, rank_type,
2121                                        xpath_use,
2122                                        num_bases, basenames, rset_nmem);
2123     }
2124     else if (!strcmp (search_type, "local"))
2125     {
2126         rset = rpn_search_APT_local (zh, zapt, termz, attributeSet, stream,
2127                                      rank_type, rset_nmem);
2128     }
2129     else if (!strcmp (search_type, "numeric"))
2130     {
2131         rset = rpn_search_APT_numeric (zh, zapt, termz, attributeSet, stream,
2132                                        reg_id, complete_flag, rank_type,
2133                                        xpath_use,
2134                                        num_bases, basenames, rset_nmem);
2135     }
2136     else if (!strcmp (search_type, "always"))
2137     {
2138         rset = 0;
2139     }
2140     else
2141         zh->errCode = 118;
2142     return rpn_search_xpath (zh, attributeSet, num_bases, basenames,
2143                              stream, rank_type, rset, 
2144                              xpath_len, xpath, rset_nmem);
2145 }
2146
2147 static RSET rpn_search_structure (ZebraHandle zh, Z_RPNStructure *zs,
2148                                   oid_value attributeSet, 
2149                                   NMEM stream, NMEM rset_nmem,
2150                                   Z_SortKeySpecList *sort_sequence,
2151                                   int num_bases, char **basenames)
2152 {
2153     RSET r = NULL;
2154     if (zs->which == Z_RPNStructure_complex)
2155     {
2156         Z_Operator *zop = zs->u.complex->roperator;
2157         RSET rsets[2]; /* l and r argument */
2158
2159         rsets[0]=rpn_search_structure (zh, zs->u.complex->s1,
2160                                        attributeSet, stream, rset_nmem,
2161                                        sort_sequence,
2162                                        num_bases, basenames);
2163         if (rsets[0] == NULL)
2164             return NULL;
2165         rsets[1]=rpn_search_structure (zh, zs->u.complex->s2,
2166                                        attributeSet, stream, rset_nmem,
2167                                        sort_sequence,
2168                                        num_bases, basenames);
2169         if (rsets[1] == NULL)
2170         {
2171             rset_delete (rsets[0]);
2172             return NULL;
2173         }
2174
2175         switch (zop->which)
2176         {
2177         case Z_Operator_and:
2178             r=rsmultiand_create(rset_nmem, key_it_ctrl, key_it_ctrl->scope,
2179                         2, rsets);
2180             break;
2181         case Z_Operator_or:
2182             r=rsmultior_create(rset_nmem, key_it_ctrl, key_it_ctrl->scope,
2183                         2, rsets);
2184             break;
2185         case Z_Operator_and_not:
2186             r = rsbool_create_not(rset_nmem,key_it_ctrl, key_it_ctrl->scope,
2187                     rsets[0],rsets[1]);
2188             break;
2189         case Z_Operator_prox:
2190             if (zop->u.prox->which != Z_ProximityOperator_known)
2191             {
2192                 zh->errCode = 132;
2193                 return NULL;
2194             }
2195             if (*zop->u.prox->u.known != Z_ProxUnit_word)
2196             {
2197                 char *val = (char *) nmem_malloc(stream, 16);
2198                 zh->errCode = 132;
2199                 zh->errString = val;
2200                 sprintf (val, "%d", *zop->u.prox->u.known);
2201                 return NULL;
2202             }
2203             else
2204             {
2205                 /* new / old prox */
2206                 r=rsprox_create(rset_nmem,key_it_ctrl,key_it_ctrl->scope,
2207                          2, rsets, 
2208                          *zop->u.prox->ordered,
2209                          (!zop->u.prox->exclusion ? 
2210                               0 : *zop->u.prox->exclusion),
2211                          *zop->u.prox->relationType,
2212                          *zop->u.prox->distance );
2213             }
2214             break;
2215         default:
2216             zh->errCode = 110;
2217             return NULL;
2218         }
2219     }
2220     else if (zs->which == Z_RPNStructure_simple)
2221     {
2222         if (zs->u.simple->which == Z_Operand_APT)
2223         {
2224             yaz_log(YLOG_DEBUG, "rpn_search_APT");
2225             r = rpn_search_APT (zh, zs->u.simple->u.attributesPlusTerm,
2226                                 attributeSet, stream, sort_sequence,
2227                                 num_bases, basenames,rset_nmem);
2228         }
2229         else if (zs->u.simple->which == Z_Operand_resultSetId)
2230         {
2231             yaz_log(YLOG_DEBUG, "rpn_search_ref");
2232             r = resultSetRef (zh, zs->u.simple->u.resultSetId);
2233             if (!r)
2234             {
2235                 r = rsnull_create (rset_nmem,key_it_ctrl);
2236                 zh->errCode = 30;
2237                 zh->errString =
2238                     nmem_strdup (stream, zs->u.simple->u.resultSetId);
2239                 return 0;
2240             }
2241             else
2242                 rset_dup(r);
2243         }
2244         else
2245         {
2246             zh->errCode = 3;
2247             return 0;
2248         }
2249     }
2250     else
2251     {
2252         zh->errCode = 3;
2253         return 0;
2254     }
2255     return r;
2256 }
2257
2258
2259 RSET rpn_search(ZebraHandle zh, NMEM nmem, NMEM rset_nmem,
2260                 Z_RPNQuery *rpn, int num_bases, char **basenames, 
2261                 const char *setname,
2262                 ZebraSet sset)
2263 {
2264     RSET rset;
2265     oident *attrset;
2266     oid_value attributeSet;
2267     Z_SortKeySpecList *sort_sequence;
2268     int sort_status, i;
2269
2270     zh->errCode = 0;
2271     zh->errString = NULL;
2272     zh->hits = 0;
2273
2274     sort_sequence = (Z_SortKeySpecList *)
2275         nmem_malloc(nmem, sizeof(*sort_sequence));
2276     sort_sequence->num_specs = 10; /* FIXME - Hard-coded number */
2277     sort_sequence->specs = (Z_SortKeySpec **)
2278         nmem_malloc(nmem, sort_sequence->num_specs *
2279                      sizeof(*sort_sequence->specs));
2280     for (i = 0; i<sort_sequence->num_specs; i++)
2281         sort_sequence->specs[i] = 0;
2282     
2283     attrset = oid_getentbyoid (rpn->attributeSetId);
2284     attributeSet = attrset->value;
2285     rset = rpn_search_structure (zh, rpn->RPNStructure, attributeSet,
2286                                  nmem, rset_nmem,
2287                                  sort_sequence, num_bases, basenames);
2288     if (!rset)
2289         return 0;
2290
2291     if (zh->errCode)
2292         yaz_log(YLOG_DEBUG, "search error: %d", zh->errCode);
2293     
2294     for (i = 0; sort_sequence->specs[i]; i++)
2295         ;
2296     sort_sequence->num_specs = i;
2297     if (!i)
2298         resultSetRank (zh, sset, rset, rset_nmem);
2299     else
2300     {
2301         yaz_log(YLOG_DEBUG, "resultSetSortSingle in rpn_search");
2302         resultSetSortSingle (zh, nmem, sset, rset,
2303                              sort_sequence, &sort_status);
2304         if (zh->errCode)
2305         {
2306             yaz_log(YLOG_DEBUG, "resultSetSortSingle status = %d", zh->errCode);
2307         }
2308     }
2309     return rset;
2310 }
2311
2312 struct scan_info_entry {
2313     char *term;
2314     ISAMC_P isam_p;
2315 };
2316
2317 struct scan_info {
2318     struct scan_info_entry *list;
2319     ODR odr;
2320     int before, after;
2321     char prefix[20];
2322 };
2323
2324 static int scan_handle (char *name, const char *info, int pos, void *client)
2325 {
2326     int len_prefix, idx;
2327     struct scan_info *scan_info = (struct scan_info *) client;
2328
2329     len_prefix = strlen(scan_info->prefix);
2330     if (memcmp (name, scan_info->prefix, len_prefix))
2331         return 1;
2332     if (pos > 0)        idx = scan_info->after - pos + scan_info->before;
2333     else
2334         idx = - pos - 1;
2335     scan_info->list[idx].term = (char *)
2336         odr_malloc(scan_info->odr, strlen(name + len_prefix)+1);
2337     strcpy(scan_info->list[idx].term, name + len_prefix);
2338     assert (*info == sizeof(ISAMC_P));
2339     memcpy (&scan_info->list[idx].isam_p, info+1, sizeof(ISAMC_P));
2340     return 0;
2341 }
2342
2343 static void scan_term_untrans (ZebraHandle zh, NMEM stream, int reg_type,
2344                                char **dst, const char *src)
2345 {
2346     char term_src[IT_MAX_WORD];
2347     char term_dst[IT_MAX_WORD];
2348     
2349     term_untrans (zh, reg_type, term_src, src);
2350
2351     if (zh->iconv_from_utf8 != 0)
2352     {
2353         int len;
2354         char *inbuf = term_src;
2355         size_t inleft = strlen(term_src);
2356         char *outbuf = term_dst;
2357         size_t outleft = sizeof(term_dst)-1;
2358         size_t ret;
2359         
2360         ret = yaz_iconv (zh->iconv_from_utf8, &inbuf, &inleft,
2361                          &outbuf, &outleft);
2362         if (ret == (size_t)(-1))
2363             len = 0;
2364         else
2365             len = outbuf - term_dst;
2366         *dst = nmem_malloc(stream, len + 1);
2367         if (len > 0)
2368             memcpy (*dst, term_dst, len);
2369         (*dst)[len] = '\0';
2370     }
2371     else
2372         *dst = nmem_strdup(stream, term_src);
2373 }
2374
2375 static void count_set (RSET r, int *count)
2376 {
2377     zint psysno = 0;
2378     int kno = 0;
2379     struct it_key key;
2380     RSFD rfd;
2381
2382     yaz_log(YLOG_DEBUG, "count_set");
2383
2384     *count = 0;
2385     rfd = rset_open (r, RSETF_READ);
2386     while (rset_read (rfd, &key,0 /* never mind terms */))
2387     {
2388         if (key.mem[0] != psysno)
2389         {
2390             psysno = key.mem[0];
2391             (*count)++;
2392         }
2393         kno++;
2394     }
2395     rset_close (rfd);
2396     yaz_log(YLOG_DEBUG, "%d keys, %d records", kno, *count);
2397 }
2398
2399 void rpn_scan(ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt,
2400                oid_value attributeset,
2401                int num_bases, char **basenames,
2402                int *position, int *num_entries, ZebraScanEntry **list,
2403                int *is_partial, RSET limit_set, int return_zero)
2404 {
2405     int i;
2406     int pos = *position;
2407     int num = *num_entries;
2408     int before;
2409     int after;
2410     int base_no;
2411     char termz[IT_MAX_WORD+20];
2412     AttrType use;
2413     int use_value;
2414     const char *use_string = 0;
2415     struct scan_info *scan_info_array;
2416     ZebraScanEntry *glist;
2417     int ords[32], ord_no = 0;
2418     int ptr[32];
2419
2420     int bases_ok = 0;     /* no of databases with OK attribute */
2421     int errCode = 0;      /* err code (if any is not OK) */
2422     char *errString = 0;  /* addinfo */
2423
2424     unsigned reg_id;
2425     char *search_type = NULL;
2426     char rank_type[128];
2427     int complete_flag;
2428     int sort_flag;
2429     NMEM rset_nmem=NULL; 
2430
2431     *list = 0;
2432
2433     if (attributeset == VAL_NONE)
2434         attributeset = VAL_BIB1;
2435
2436     if (!limit_set)
2437     {
2438         AttrType termset;
2439         int termset_value_numeric;
2440         const char *termset_value_string;
2441         attr_init (&termset, zapt, 8);
2442         termset_value_numeric =
2443             attr_find_ex (&termset, NULL, &termset_value_string);
2444         if (termset_value_numeric != -1)
2445         {
2446             char resname[32];
2447             const char *termset_name = 0;
2448             
2449             if (termset_value_numeric != -2)
2450             {
2451                 
2452                 sprintf (resname, "%d", termset_value_numeric);
2453                 termset_name = resname;
2454             }
2455             else
2456                 termset_name = termset_value_string;
2457             
2458             limit_set = resultSetRef (zh, termset_name);
2459         }
2460     }
2461         
2462     yaz_log (YLOG_DEBUG, "position = %d, num = %d set=%d",
2463              pos, num, attributeset);
2464         
2465     attr_init (&use, zapt, 1);
2466     use_value = attr_find_ex (&use, &attributeset, &use_string);
2467
2468     if (zebra_maps_attr (zh->reg->zebra_maps, zapt, &reg_id, &search_type,
2469                          rank_type, &complete_flag, &sort_flag))
2470     {
2471         *num_entries = 0;
2472         zh->errCode = 113;
2473         return ;
2474     }
2475     yaz_log (YLOG_DEBUG, "use_value = %d", use_value);
2476
2477     if (use_value == -1)
2478         use_value = 1016;
2479     for (base_no = 0; base_no < num_bases && ord_no < 32; base_no++)
2480     {
2481         int r;
2482         attent attp;
2483         data1_local_attribute *local_attr;
2484
2485         if ((r=att_getentbyatt (zh, &attp, attributeset, use_value,
2486                                 use_string)))
2487         {
2488             yaz_log(YLOG_DEBUG, "att_getentbyatt fail. set=%d use=%d",
2489                   attributeset, use_value);
2490             if (r == -1)
2491             {
2492                 char val_str[32];
2493                 sprintf (val_str, "%d", use_value);
2494                 errCode = 114;
2495                 errString = odr_strdup (stream, val_str);
2496             }   
2497             else
2498                 errCode = 121;
2499             continue;
2500         }
2501         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
2502         {
2503             zh->errString = basenames[base_no];
2504             zh->errCode = 109; /* Database unavailable */
2505             *num_entries = 0;
2506             return;
2507         }
2508         bases_ok++;
2509         for (local_attr = attp.local_attributes; local_attr && ord_no < 32;
2510              local_attr = local_attr->next)
2511         {
2512             int ord;
2513
2514             ord = zebraExplain_lookupSU (zh->reg->zei, attp.attset_ordinal,
2515                                          local_attr->local);
2516             if (ord > 0)
2517                 ords[ord_no++] = ord;
2518         }
2519     }
2520     if (!bases_ok && errCode)
2521     {
2522         zh->errCode = errCode;
2523         zh->errString = errString;
2524         *num_entries = 0;
2525     }
2526     if (ord_no == 0)
2527     {
2528         *num_entries = 0;
2529         return;
2530     }
2531     /* prepare dictionary scanning */
2532     before = pos-1;
2533     after = 1+num-pos;
2534     scan_info_array = (struct scan_info *)
2535         odr_malloc(stream, ord_no * sizeof(*scan_info_array));
2536     for (i = 0; i < ord_no; i++)
2537     {
2538         int j, prefix_len = 0;
2539         int before_tmp = before, after_tmp = after;
2540         struct scan_info *scan_info = scan_info_array + i;
2541         struct rpn_char_map_info rcmi;
2542
2543         rpn_char_map_prepare (zh->reg, reg_id, &rcmi);
2544
2545         scan_info->before = before;
2546         scan_info->after = after;
2547         scan_info->odr = stream;
2548
2549         scan_info->list = (struct scan_info_entry *)
2550             odr_malloc(stream, (before+after) * sizeof(*scan_info->list));
2551         for (j = 0; j<before+after; j++)
2552             scan_info->list[j].term = NULL;
2553
2554         prefix_len += key_SU_encode (ords[i], termz + prefix_len);
2555         termz[prefix_len++] = reg_id;
2556         termz[prefix_len] = 0;
2557         strcpy(scan_info->prefix, termz);
2558
2559         if (trans_scan_term(zh, zapt, termz+prefix_len, reg_id))
2560             return ;
2561                     
2562         dict_scan(zh->reg->dict, termz, &before_tmp, &after_tmp,
2563                   scan_info, scan_handle);
2564     }
2565     glist = (ZebraScanEntry *)
2566         odr_malloc(stream, (before+after)*sizeof(*glist));
2567
2568     rset_nmem = nmem_create();
2569
2570     /* consider terms after main term */
2571     for (i = 0; i < ord_no; i++)
2572         ptr[i] = before;
2573     
2574     *is_partial = 0;
2575     for (i = 0; i<after; i++)
2576     {
2577         int j, j0 = -1;
2578         const char *mterm = NULL;
2579         const char *tst;
2580         RSET rset;
2581         
2582         for (j = 0; j < ord_no; j++)
2583         {
2584             if (ptr[j] < before+after &&
2585                 (tst=scan_info_array[j].list[ptr[j]].term) &&
2586                 (!mterm || strcmp (tst, mterm) < 0))
2587             {
2588                 j0 = j;
2589                 mterm = tst;
2590             }
2591         }
2592         if (j0 == -1)
2593             break;
2594         scan_term_untrans (zh, stream->mem, reg_id,
2595                            &glist[i+before].term, mterm);
2596         rset = rset_trunc(zh, &scan_info_array[j0].list[ptr[j0]].isam_p, 1,
2597                            glist[i+before].term, strlen(glist[i+before].term),
2598                            NULL, 0, zapt->term->which, rset_nmem, 
2599                            key_it_ctrl,key_it_ctrl->scope);
2600         ptr[j0]++;
2601         for (j = j0+1; j<ord_no; j++)
2602         {
2603             if (ptr[j] < before+after &&
2604                 (tst=scan_info_array[j].list[ptr[j]].term) &&
2605                 !strcmp (tst, mterm))
2606             {
2607                 RSET rset2;
2608
2609                 rset2 =
2610                    rset_trunc(zh, &scan_info_array[j].list[ptr[j]].isam_p, 1,
2611                                glist[i+before].term,
2612                                strlen(glist[i+before].term), NULL, 0,
2613                                zapt->term->which,rset_nmem,
2614                                key_it_ctrl, key_it_ctrl->scope);
2615                 rset = rsbool_create_or(rset_nmem,key_it_ctrl,
2616                                key_it_ctrl->scope, rset, rset2);
2617                 /* FIXME - Use a proper multi-or */
2618
2619                 ptr[j]++;
2620             }
2621         }
2622         if (limit_set)
2623             rset = rsbool_create_and(rset_nmem,key_it_ctrl,key_it_ctrl->scope,
2624                             rset, rset_dup(limit_set));
2625         count_set (rset, &glist[i+before].occurrences);
2626         rset_delete (rset);
2627     }
2628     if (i < after)
2629     {
2630         *num_entries -= (after-i);
2631         *is_partial = 1;
2632     }
2633
2634     /* consider terms before main term */
2635     for (i = 0; i<ord_no; i++)
2636         ptr[i] = 0;
2637
2638     for (i = 0; i<before; i++)
2639     {
2640         int j, j0 = -1;
2641         const char *mterm = NULL;
2642         const char *tst;
2643         RSET rset;
2644         
2645         for (j = 0; j <ord_no; j++)
2646         {
2647             if (ptr[j] < before &&
2648                 (tst=scan_info_array[j].list[before-1-ptr[j]].term) &&
2649                 (!mterm || strcmp (tst, mterm) > 0))
2650             {
2651                 j0 = j;
2652                 mterm = tst;
2653             }
2654         }
2655         if (j0 == -1)
2656             break;
2657
2658         scan_term_untrans (zh, stream->mem, reg_id,
2659                            &glist[before-1-i].term, mterm);
2660
2661         rset = rset_trunc
2662                (zh, &scan_info_array[j0].list[before-1-ptr[j0]].isam_p, 1,
2663                 glist[before-1-i].term, strlen(glist[before-1-i].term),
2664                 NULL, 0, zapt->term->which,rset_nmem,
2665                 key_it_ctrl,key_it_ctrl->scope);
2666
2667         ptr[j0]++;
2668
2669         for (j = j0+1; j<ord_no; j++)
2670         {
2671             if (ptr[j] < before &&
2672                 (tst=scan_info_array[j].list[before-1-ptr[j]].term) &&
2673                 !strcmp (tst, mterm))
2674             {
2675                 RSET rset2;
2676
2677                 rset2 = rset_trunc(
2678                     zh,
2679                     &scan_info_array[j].list[before-1-ptr[j]].isam_p, 1,
2680                     glist[before-1-i].term,
2681                     strlen(glist[before-1-i].term), NULL, 0,
2682                     zapt->term->which, rset_nmem,
2683                     key_it_ctrl, key_it_ctrl->scope);
2684                 rset = rsbool_create_and(rset_nmem,key_it_ctrl,
2685                                          key_it_ctrl->scope, rset, rset2);
2686                 /* FIXME - multi-and ?? */
2687                 ptr[j]++;
2688             }
2689         }
2690         if (limit_set)
2691             rset = rsbool_create_and(rset_nmem,key_it_ctrl,key_it_ctrl->scope,
2692                             rset, rset_dup(limit_set));
2693         count_set (rset, &glist[before-1-i].occurrences);
2694         rset_delete (rset);
2695     }
2696     i = before-i;
2697     if (i)
2698     {
2699         *is_partial = 1;
2700         *position -= i;
2701         *num_entries -= i;
2702     }
2703
2704     nmem_destroy(rset_nmem);
2705     *list = glist + i;               /* list is set to first 'real' entry */
2706     
2707     yaz_log(YLOG_DEBUG, "position = %d, num_entries = %d",
2708           *position, *num_entries);
2709     if (zh->errCode)
2710         yaz_log(YLOG_DEBUG, "scan error: %d", zh->errCode);
2711 }
2712