bc273e797a28664adfd4b86bb2690387f66d7059
[idzebra-moved-to-github.git] / index / zrpn.c
1 /* $Id: zrpn.c,v 1.160 2004-11-15 22:57:25 adam Exp $
2    Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003,2004
3    Index Data Aps
4
5 This file is part of the Zebra server.
6
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
11
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with Zebra; see the file LICENSE.zebra.  If not, write to the
19 Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
20 02111-1307, USA.
21 */
22
23
24 #include <stdio.h>
25 #include <assert.h>
26 #ifdef WIN32
27 #include <io.h>
28 #else
29 #include <unistd.h>
30 #endif
31 #include <ctype.h>
32
33 #include "index.h"
34 #include <zebra_xpath.h>
35
36 #include <charmap.h>
37 #include <rset.h>
38
39
40 static const struct key_control it_ctrl =
41
42     sizeof(struct it_key),
43     2, /* we have sysnos and seqnos in this key, nothing more */
44     key_compare_it, 
45     key_logdump_txt,   /* FIXME  - clean up these functions */
46     key_get_seq,
47 };
48
49
50 const struct key_control *key_it_ctrl = &it_ctrl;
51
52 struct rpn_char_map_info
53 {
54     ZebraMaps zm;
55     int reg_type;
56 };
57
58 typedef struct
59 {
60     int type;
61     int major;
62     int minor;
63     Z_AttributesPlusTerm *zapt;
64 } AttrType;
65
66
67 static const char **rpn_char_map_handler(void *vp, const char **from, int len)
68 {
69     struct rpn_char_map_info *p = (struct rpn_char_map_info *) vp;
70     const char **out = zebra_maps_input(p->zm, p->reg_type, from, len, 0);
71 #if 0
72     if (out && *out)
73     {
74         const char *outp = *out;
75         yaz_log(LOG_LOG, "---");
76         while (*outp)
77         {
78             yaz_log(LOG_LOG, "%02X", *outp);
79             outp++;
80         }
81     }
82 #endif
83     return out;
84 }
85
86 static void rpn_char_map_prepare(struct zebra_register *reg, int reg_type,
87                                   struct rpn_char_map_info *map_info)
88 {
89     map_info->zm = reg->zebra_maps;
90     map_info->reg_type = reg_type;
91     dict_grep_cmap(reg->dict, map_info, rpn_char_map_handler);
92 }
93
94 static int attr_find_ex(AttrType *src, oid_value *attributeSetP,
95                          const char **string_value)
96 {
97     int num_attributes;
98
99     num_attributes = src->zapt->attributes->num_attributes;
100     while (src->major < num_attributes)
101     {
102         Z_AttributeElement *element;
103
104         element = src->zapt->attributes->attributes[src->major];
105         if (src->type == *element->attributeType)
106         {
107             switch (element->which) 
108             {
109             case Z_AttributeValue_numeric:
110                 ++(src->major);
111                 if (element->attributeSet && attributeSetP)
112                 {
113                     oident *attrset;
114
115                     attrset = oid_getentbyoid(element->attributeSet);
116                     *attributeSetP = attrset->value;
117                 }
118                 return *element->value.numeric;
119                 break;
120             case Z_AttributeValue_complex:
121                 if (src->minor >= element->value.complex->num_list)
122                     break;
123                 if (element->attributeSet && attributeSetP)
124                 {
125                     oident *attrset;
126                     
127                     attrset = oid_getentbyoid(element->attributeSet);
128                     *attributeSetP = attrset->value;
129                 }
130                 if (element->value.complex->list[src->minor]->which ==  
131                     Z_StringOrNumeric_numeric)
132                 {
133                     ++(src->minor);
134                     return
135                         *element->value.complex->list[src->minor-1]->u.numeric;
136                 }
137                 else if (element->value.complex->list[src->minor]->which ==  
138                          Z_StringOrNumeric_string)
139                 {
140                     if (!string_value)
141                         break;
142                     ++(src->minor);
143                     *string_value = 
144                         element->value.complex->list[src->minor-1]->u.string;
145                     return -2;
146                 }
147                 else
148                     break;
149             default:
150                 assert(0);
151             }
152         }
153         ++(src->major);
154     }
155     return -1;
156 }
157
158 static int attr_find(AttrType *src, oid_value *attributeSetP)
159 {
160     return attr_find_ex(src, attributeSetP, 0);
161 }
162
163 static void attr_init(AttrType *src, Z_AttributesPlusTerm *zapt,
164                        int type)
165 {
166     src->zapt = zapt;
167     src->type = type;
168     src->major = 0;
169     src->minor = 0;
170 }
171
172 #define TERM_COUNT        
173        
174 struct grep_info {        
175 #ifdef TERM_COUNT        
176     int *term_no;        
177 #endif        
178     ISAMC_P *isam_p_buf;
179     int isam_p_size;        
180     int isam_p_indx;
181     ZebraHandle zh;
182     int reg_type;
183     ZebraSet termset;
184 };        
185
186 static void term_untrans(ZebraHandle zh, int reg_type,
187                            char *dst, const char *src)
188 {
189     int len = 0;
190     while (*src)
191     {
192         const char *cp = zebra_maps_output(zh->reg->zebra_maps,
193                                             reg_type, &src);
194         if (!cp && len < IT_MAX_WORD-1)
195             dst[len++] = *src++;
196         else
197             while (*cp && len < IT_MAX_WORD-1)
198                 dst[len++] = *cp++;
199     }
200     dst[len] = '\0';
201 }
202
203 static void add_isam_p(const char *name, const char *info,
204                         struct grep_info *p)
205 {
206     if (p->isam_p_indx == p->isam_p_size)
207     {
208         ISAMC_P *new_isam_p_buf;
209 #ifdef TERM_COUNT        
210         int *new_term_no;        
211 #endif
212         p->isam_p_size = 2*p->isam_p_size + 100;
213         new_isam_p_buf = (ISAMC_P *) xmalloc(sizeof(*new_isam_p_buf) *
214                                              p->isam_p_size);
215         if (p->isam_p_buf)
216         {
217             memcpy(new_isam_p_buf, p->isam_p_buf,
218                     p->isam_p_indx * sizeof(*p->isam_p_buf));
219             xfree(p->isam_p_buf);
220         }
221         p->isam_p_buf = new_isam_p_buf;
222
223 #ifdef TERM_COUNT
224         new_term_no = (int *) xmalloc(sizeof(*new_term_no) *
225                                        p->isam_p_size);
226         if (p->term_no)
227         {
228             memcpy(new_term_no, p->isam_p_buf,
229                     p->isam_p_indx * sizeof(*p->term_no));
230             xfree(p->term_no);
231         }
232         p->term_no = new_term_no;
233 #endif
234     }
235     assert(*info == sizeof(*p->isam_p_buf));
236     memcpy(p->isam_p_buf + p->isam_p_indx, info+1, sizeof(*p->isam_p_buf));
237
238 #if 1
239     if (p->termset)
240     {
241         const char *db;
242         int set, use;
243         char term_tmp[IT_MAX_WORD];
244         int su_code = 0;
245         int len = key_SU_decode (&su_code, name);
246         
247         term_untrans  (p->zh, p->reg_type, term_tmp, name+len+1);
248         yaz_log(LOG_LOG, "grep: %d %c %s", su_code, name[len], term_tmp);
249         zebraExplain_lookup_ord (p->zh->reg->zei,
250                                  su_code, &db, &set, &use);
251         yaz_log(LOG_LOG, "grep:  set=%d use=%d db=%s", set, use, db);
252         
253         resultSetAddTerm(p->zh, p->termset, name[len], db,
254                          set, use, term_tmp);
255     }
256 #endif
257     (p->isam_p_indx)++;
258 }
259
260 static int grep_handle(char *name, const char *info, void *p)
261 {
262     add_isam_p(name, info, (struct grep_info *) p);
263     return 0;
264 }
265
266 static int term_pre(ZebraMaps zebra_maps, int reg_type, const char **src,
267                      const char *ct1, const char *ct2, int first)
268 {
269     const char *s1, *s0 = *src;
270     const char **map;
271
272     /* skip white space */
273     while (*s0)
274     {
275         if (ct1 && strchr(ct1, *s0))
276             break;
277         if (ct2 && strchr(ct2, *s0))
278             break;
279         s1 = s0;
280         map = zebra_maps_input(zebra_maps, reg_type, &s1, strlen(s1), first);
281         if (**map != *CHR_SPACE)
282             break;
283         s0 = s1;
284     }
285     *src = s0;
286     return *s0;
287 }
288
289 #define REGEX_CHARS " []()|.*+?!"
290
291 /* term_100: handle term, where trunc=none(no operators at all) */
292 static int term_100(ZebraMaps zebra_maps, int reg_type,
293                      const char **src, char *dst, int space_split,
294                      char *dst_term)
295 {
296     const char *s0, *s1;
297     const char **map;
298     int i = 0;
299     int j = 0;
300
301     const char *space_start = 0;
302     const char *space_end = 0;
303
304     if (!term_pre(zebra_maps, reg_type, src, NULL, NULL, !space_split))
305         return 0;
306     s0 = *src;
307     while (*s0)
308     {
309         s1 = s0;
310         map = zebra_maps_input(zebra_maps, reg_type, &s0, strlen(s0), 0);
311         if (space_split)
312         {
313             if (**map == *CHR_SPACE)
314                 break;
315         }
316         else  /* complete subfield only. */
317         {
318             if (**map == *CHR_SPACE)
319             {   /* save space mapping for later  .. */
320                 space_start = s1;
321                 space_end = s0;
322                 continue;
323             }
324             else if (space_start)
325             {   /* reload last space */
326                 while (space_start < space_end)
327                 {
328                     if (strchr(REGEX_CHARS, *space_start))
329                         dst[i++] = '\\';
330                     dst_term[j++] = *space_start;
331                     dst[i++] = *space_start++;
332                 }
333                 /* and reset */
334                 space_start = space_end = 0;
335             }
336         }
337         /* add non-space char */
338         while (s1 < s0)
339         {
340             if (strchr(REGEX_CHARS, *s1))
341                 dst[i++] = '\\';
342             dst_term[j++] = *s1;
343             dst[i++] = *s1++;
344         }
345     }
346     dst[i] = '\0';
347     dst_term[j] = '\0';
348     *src = s0;
349     return i;
350 }
351
352 /* term_101: handle term, where trunc=Process # */
353 static int term_101(ZebraMaps zebra_maps, int reg_type,
354                      const char **src, char *dst, int space_split,
355                      char *dst_term)
356 {
357     const char *s0, *s1;
358     const char **map;
359     int i = 0;
360     int j = 0;
361
362     if (!term_pre(zebra_maps, reg_type, src, "#", "#", !space_split))
363         return 0;
364     s0 = *src;
365     while (*s0)
366     {
367         if (*s0 == '#')
368         {
369             dst[i++] = '.';
370             dst[i++] = '*';
371             dst_term[j++] = *s0++;
372         }
373         else
374         {
375             s1 = s0;
376             map = zebra_maps_input(zebra_maps, reg_type, &s0, strlen(s0), 0);
377             if (space_split && **map == *CHR_SPACE)
378                 break;
379             while (s1 < s0)
380             {
381                 if (strchr(REGEX_CHARS, *s1))
382                     dst[i++] = '\\';
383                 dst_term[j++] = *s1;
384                 dst[i++] = *s1++;
385             }
386         }
387     }
388     dst[i] = '\0';
389     dst_term[j++] = '\0';
390     *src = s0;
391     return i;
392 }
393
394 /* term_103: handle term, where trunc=re-2 (regular expressions) */
395 static int term_103(ZebraMaps zebra_maps, int reg_type, const char **src,
396                      char *dst, int *errors, int space_split,
397                      char *dst_term)
398 {
399     int i = 0;
400     int j = 0;
401     const char *s0, *s1;
402     const char **map;
403
404     if (!term_pre(zebra_maps, reg_type, src, "^\\()[].*+?|", "(", !space_split))
405         return 0;
406     s0 = *src;
407     if (errors && *s0 == '+' && s0[1] && s0[2] == '+' && s0[3] &&
408         isdigit(s0[1]))
409     {
410         *errors = s0[1] - '0';
411         s0 += 3;
412         if (*errors > 3)
413             *errors = 3;
414     }
415     while (*s0)
416     {
417         if (strchr("^\\()[].*+?|-", *s0))
418         {
419             dst_term[j++] = *s0;
420             dst[i++] = *s0++;
421         }
422         else
423         {
424             s1 = s0;
425             map = zebra_maps_input(zebra_maps, reg_type, &s0, strlen(s0), 0);
426             if (**map == *CHR_SPACE)
427                 break;
428             while (s1 < s0)
429             {
430                 if (strchr(REGEX_CHARS, *s1))
431                     dst[i++] = '\\';
432                 dst_term[j++] = *s1;
433                 dst[i++] = *s1++;
434             }
435         }
436     }
437     dst[i] = '\0';
438     dst_term[j] = '\0';
439     *src = s0;
440     return i;
441 }
442
443 /* term_103: handle term, where trunc=re-1 (regular expressions) */
444 static int term_102 (ZebraMaps zebra_maps, int reg_type, const char **src,
445                      char *dst, int space_split, char *dst_term)
446 {
447     return term_103(zebra_maps, reg_type, src, dst, NULL, space_split,
448                      dst_term);
449 }
450
451
452 /* term_104: handle term, where trunc=Process # and ! */
453 static int term_104(ZebraMaps zebra_maps, int reg_type,
454                      const char **src, char *dst, int space_split,
455                      char *dst_term)
456 {
457     const char *s0, *s1;
458     const char **map;
459     int i = 0;
460     int j = 0;
461
462     if (!term_pre(zebra_maps, reg_type, src, "?*#", "?*#", !space_split))
463         return 0;
464     s0 = *src;
465     while (*s0)
466     {
467         if (*s0 == '?')
468         {
469             dst_term[j++] = *s0++;
470             if (*s0 >= '0' && *s0 <= '9')
471             {
472                 int limit = 0;
473                 while (*s0 >= '0' && *s0 <= '9')
474                 {
475                     limit = limit * 10 + (*s0 - '0');
476                     dst_term[j++] = *s0++;
477                 }
478                 if (limit > 20)
479                     limit = 20;
480                 while (--limit >= 0)
481                 {
482                     dst[i++] = '.';
483                     dst[i++] = '?';
484                 }
485             }
486             else
487             {
488                 dst[i++] = '.';
489                 dst[i++] = '*';
490             }
491         }
492         else if (*s0 == '*')
493         {
494             dst[i++] = '.';
495             dst[i++] = '*';
496             dst_term[j++] = *s0++;
497         }
498         else if (*s0 == '#')
499         {
500             dst[i++] = '.';
501             dst_term[j++] = *s0++;
502         }
503         {
504             s1 = s0;
505             map = zebra_maps_input(zebra_maps, reg_type, &s0, strlen(s0), 0);
506             if (space_split && **map == *CHR_SPACE)
507                 break;
508             while (s1 < s0)
509             {
510                 if (strchr(REGEX_CHARS, *s1))
511                     dst[i++] = '\\';
512                 dst_term[j++] = *s1;
513                 dst[i++] = *s1++;
514             }
515         }
516     }
517     dst[i] = '\0';
518     dst_term[j++] = '\0';
519     *src = s0;
520     return i;
521 }
522
523 /* term_105/106: handle term, where trunc=Process * and ! and right trunc */
524 static int term_105 (ZebraMaps zebra_maps, int reg_type,
525                      const char **src, char *dst, int space_split,
526                      char *dst_term, int right_truncate)
527 {
528     const char *s0, *s1;
529     const char **map;
530     int i = 0;
531     int j = 0;
532
533     if (!term_pre(zebra_maps, reg_type, src, "*!", "*!", !space_split))
534         return 0;
535     s0 = *src;
536     while (*s0)
537     {
538         if (*s0 == '*')
539         {
540             dst[i++] = '.';
541             dst[i++] = '*';
542             dst_term[j++] = *s0++;
543         }
544         else if (*s0 == '!')
545         {
546             dst[i++] = '.';
547             dst_term[j++] = *s0++;
548         }
549         {
550             s1 = s0;
551             map = zebra_maps_input(zebra_maps, reg_type, &s0, strlen(s0), 0);
552             if (space_split && **map == *CHR_SPACE)
553                 break;
554             while (s1 < s0)
555             {
556                 if (strchr(REGEX_CHARS, *s1))
557                     dst[i++] = '\\';
558                 dst_term[j++] = *s1;
559                 dst[i++] = *s1++;
560             }
561         }
562     }
563     if (right_truncate)
564     {
565         dst[i++] = '.';
566         dst[i++] = '*';
567     }
568     dst[i] = '\0';
569     
570     dst_term[j++] = '\0';
571     *src = s0;
572     return i;
573 }
574
575
576 /* gen_regular_rel - generate regular expression from relation
577  *  val:     border value (inclusive)
578  *  islt:    1 if <=; 0 if >=.
579  */
580 static void gen_regular_rel(char *dst, int val, int islt)
581 {
582     int dst_p;
583     int w, d, i;
584     int pos = 0;
585     char numstr[20];
586
587     yaz_log(LOG_DEBUG, "gen_regular_rel. val=%d, islt=%d", val, islt);
588     if (val >= 0)
589     {
590         if (islt)
591             strcpy(dst, "(-[0-9]+|(");
592         else
593             strcpy(dst, "((");
594     } 
595     else
596     {
597         if (!islt)
598         {
599             strcpy(dst, "([0-9]+|-(");
600             dst_p = strlen(dst);
601             islt = 1;
602         }
603         else
604         {
605             strcpy(dst, "(-(");
606             islt = 0;
607         }
608         val = -val;
609     }
610     dst_p = strlen(dst);
611     sprintf(numstr, "%d", val);
612     for (w = strlen(numstr); --w >= 0; pos++)
613     {
614         d = numstr[w];
615         if (pos > 0)
616         {
617             if (islt)
618             {
619                 if (d == '0')
620                     continue;
621                 d--;
622             } 
623             else
624             {
625                 if (d == '9')
626                     continue;
627                 d++;
628             }
629         }
630         
631         strcpy(dst + dst_p, numstr);
632         dst_p = strlen(dst) - pos - 1;
633
634         if (islt)
635         {
636             if (d != '0')
637             {
638                 dst[dst_p++] = '[';
639                 dst[dst_p++] = '0';
640                 dst[dst_p++] = '-';
641                 dst[dst_p++] = d;
642                 dst[dst_p++] = ']';
643             }
644             else
645                 dst[dst_p++] = d;
646         }
647         else
648         {
649             if (d != '9')
650             { 
651                 dst[dst_p++] = '[';
652                 dst[dst_p++] = d;
653                 dst[dst_p++] = '-';
654                 dst[dst_p++] = '9';
655                 dst[dst_p++] = ']';
656             }
657             else
658                 dst[dst_p++] = d;
659         }
660         for (i = 0; i<pos; i++)
661         {
662             dst[dst_p++] = '[';
663             dst[dst_p++] = '0';
664             dst[dst_p++] = '-';
665             dst[dst_p++] = '9';
666             dst[dst_p++] = ']';
667         }
668         dst[dst_p++] = '|';
669     }
670     dst[dst_p] = '\0';
671     if (islt)
672     {
673         /* match everything less than 10^(pos-1) */
674         strcat(dst, "0*");
675         for (i=1; i<pos; i++)
676             strcat(dst, "[0-9]?");
677     }
678     else
679     {
680         /* match everything greater than 10^pos */
681         for (i = 0; i <= pos; i++)
682             strcat(dst, "[0-9]");
683         strcat(dst, "[0-9]*");
684     }
685     strcat(dst, "))");
686 }
687
688 void string_rel_add_char(char **term_p, const char *src, int *indx)
689 {
690     if (src[*indx] == '\\')
691         *(*term_p)++ = src[(*indx)++];
692     *(*term_p)++ = src[(*indx)++];
693 }
694
695 /*
696  *   >  abc     ([b-].*|a[c-].*|ab[d-].*|abc.+)
697  *              ([^-a].*|a[^-b].*ab[^-c].*|abc.+)
698  *   >= abc     ([b-].*|a[c-].*|ab[c-].*)
699  *              ([^-a].*|a[^-b].*|ab[c-].*)
700  *   <  abc     ([-0].*|a[-a].*|ab[-b].*)
701  *              ([^a-].*|a[^b-].*|ab[^c-].*)
702  *   <= abc     ([-0].*|a[-a].*|ab[-b].*|abc)
703  *              ([^a-].*|a[^b-].*|ab[^c-].*|abc)
704  */
705 static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
706                             const char **term_sub, char *term_dict,
707                             oid_value attributeSet,
708                             int reg_type, int space_split, char *term_dst)
709 {
710     AttrType relation;
711     int relation_value;
712     int i;
713     char *term_tmp = term_dict + strlen(term_dict);
714     char term_component[2*IT_MAX_WORD+20];
715
716     attr_init(&relation, zapt, 2);
717     relation_value = attr_find(&relation, NULL);
718
719     yaz_log(LOG_DEBUG, "string relation value=%d", relation_value);
720     switch (relation_value)
721     {
722     case 1:
723         if (!term_100 (zh->reg->zebra_maps, reg_type,
724                        term_sub, term_component,
725                        space_split, term_dst))
726             return 0;
727         yaz_log(LOG_DEBUG, "Relation <");
728         
729         *term_tmp++ = '(';
730         for (i = 0; term_component[i]; )
731         {
732             int j = 0;
733
734             if (i)
735                 *term_tmp++ = '|';
736             while (j < i)
737                 string_rel_add_char (&term_tmp, term_component, &j);
738
739             *term_tmp++ = '[';
740
741             *term_tmp++ = '^';
742             string_rel_add_char (&term_tmp, term_component, &i);
743             *term_tmp++ = '-';
744
745             *term_tmp++ = ']';
746             *term_tmp++ = '.';
747             *term_tmp++ = '*';
748
749             if ((term_tmp - term_dict) > IT_MAX_WORD)
750                 break;
751         }
752         *term_tmp++ = ')';
753         *term_tmp = '\0';
754         break;
755     case 2:
756         if (!term_100 (zh->reg->zebra_maps, reg_type,
757                        term_sub, term_component,
758                        space_split, term_dst))
759             return 0;
760         yaz_log(LOG_DEBUG, "Relation <=");
761
762         *term_tmp++ = '(';
763         for (i = 0; term_component[i]; )
764         {
765             int j = 0;
766
767             while (j < i)
768                 string_rel_add_char (&term_tmp, term_component, &j);
769             *term_tmp++ = '[';
770
771             *term_tmp++ = '^';
772             string_rel_add_char (&term_tmp, term_component, &i);
773             *term_tmp++ = '-';
774
775             *term_tmp++ = ']';
776             *term_tmp++ = '.';
777             *term_tmp++ = '*';
778
779             *term_tmp++ = '|';
780
781             if ((term_tmp - term_dict) > IT_MAX_WORD)
782                 break;
783         }
784         for (i = 0; term_component[i]; )
785             string_rel_add_char (&term_tmp, term_component, &i);
786         *term_tmp++ = ')';
787         *term_tmp = '\0';
788         break;
789     case 5:
790         if (!term_100 (zh->reg->zebra_maps, reg_type,
791                        term_sub, term_component, space_split, term_dst))
792             return 0;
793         yaz_log(LOG_DEBUG, "Relation >");
794
795         *term_tmp++ = '(';
796         for (i = 0; term_component[i];)
797         {
798             int j = 0;
799
800             while (j < i)
801                 string_rel_add_char (&term_tmp, term_component, &j);
802             *term_tmp++ = '[';
803             
804             *term_tmp++ = '^';
805             *term_tmp++ = '-';
806             string_rel_add_char (&term_tmp, term_component, &i);
807
808             *term_tmp++ = ']';
809             *term_tmp++ = '.';
810             *term_tmp++ = '*';
811
812             *term_tmp++ = '|';
813
814             if ((term_tmp - term_dict) > IT_MAX_WORD)
815                 break;
816         }
817         for (i = 0; term_component[i];)
818             string_rel_add_char (&term_tmp, term_component, &i);
819         *term_tmp++ = '.';
820         *term_tmp++ = '+';
821         *term_tmp++ = ')';
822         *term_tmp = '\0';
823         break;
824     case 4:
825         if (!term_100 (zh->reg->zebra_maps, reg_type, term_sub,
826                        term_component, space_split, term_dst))
827             return 0;
828         yaz_log(LOG_DEBUG, "Relation >=");
829
830         *term_tmp++ = '(';
831         for (i = 0; term_component[i];)
832         {
833             int j = 0;
834
835             if (i)
836                 *term_tmp++ = '|';
837             while (j < i)
838                 string_rel_add_char (&term_tmp, term_component, &j);
839             *term_tmp++ = '[';
840
841             if (term_component[i+1])
842             {
843                 *term_tmp++ = '^';
844                 *term_tmp++ = '-';
845                 string_rel_add_char (&term_tmp, term_component, &i);
846             }
847             else
848             {
849                 string_rel_add_char (&term_tmp, term_component, &i);
850                 *term_tmp++ = '-';
851             }
852             *term_tmp++ = ']';
853             *term_tmp++ = '.';
854             *term_tmp++ = '*';
855
856             if ((term_tmp - term_dict) > IT_MAX_WORD)
857                 break;
858         }
859         *term_tmp++ = ')';
860         *term_tmp = '\0';
861         break;
862     case 3:
863     default:
864         yaz_log(LOG_DEBUG, "Relation =");
865         if (!term_100 (zh->reg->zebra_maps, reg_type, term_sub,
866                        term_component, space_split, term_dst))
867             return 0;
868         strcat(term_tmp, "(");
869         strcat(term_tmp, term_component);
870         strcat(term_tmp, ")");
871     }
872     return 1;
873 }
874
875 static int string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
876                         const char **term_sub, 
877                         oid_value attributeSet, NMEM stream,
878                         struct grep_info *grep_info,
879                         int reg_type, int complete_flag,
880                         int num_bases, char **basenames,
881                         char *term_dst, int xpath_use);
882
883 static RSET term_trunc(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
884                         const char **term_sub, 
885                         oid_value attributeSet, NMEM stream,
886                         struct grep_info *grep_info,
887                         int reg_type, int complete_flag,
888                         int num_bases, char **basenames,
889                         char *term_dst,
890                         const char *rank_type, int xpath_use,
891                         NMEM rset_nmem)
892 {
893     int r;
894     grep_info->isam_p_indx = 0;
895     r = string_term(zh, zapt, term_sub, attributeSet, stream, grep_info,
896                      reg_type, complete_flag, num_bases, basenames,
897                      term_dst, xpath_use);
898     if (r < 1)
899         return 0;
900     yaz_log(LOG_DEBUG, "term: %s", term_dst);
901     return rset_trunc(zh, grep_info->isam_p_buf,
902                        grep_info->isam_p_indx, term_dst,
903                        strlen(term_dst), rank_type, 1 /* preserve pos */,
904                        zapt->term->which, rset_nmem,
905                        key_it_ctrl,key_it_ctrl->scope);
906 }
907 static char *nmem_strdup_i(NMEM nmem, int v)
908 {
909     char val_str[64];
910     sprintf (val_str, "%d", v);
911     return nmem_strdup(nmem, val_str);
912 }
913
914 static int string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
915                        const char **term_sub, 
916                        oid_value attributeSet, NMEM stream,
917                        struct grep_info *grep_info,
918                        int reg_type, int complete_flag,
919                        int num_bases, char **basenames,
920                        char *term_dst, int xpath_use)
921 {
922     char term_dict[2*IT_MAX_WORD+4000];
923     int j, r, base_no;
924     AttrType truncation;
925     int truncation_value;
926     AttrType use;
927     int use_value;
928     const char *use_string = 0;
929     oid_value curAttributeSet = attributeSet;
930     const char *termp;
931     struct rpn_char_map_info rcmi;
932     int space_split = complete_flag ? 0 : 1;
933
934     int bases_ok = 0;     /* no of databases with OK attribute */
935     int errCode = 0;      /* err code (if any is not OK) */
936     char *errString = 0;  /* addinfo */
937
938     rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
939     attr_init (&use, zapt, 1);
940     use_value = attr_find_ex (&use, &curAttributeSet, &use_string);
941     yaz_log(LOG_DEBUG, "string_term, use value %d", use_value);
942     attr_init (&truncation, zapt, 5);
943     truncation_value = attr_find (&truncation, NULL);
944     yaz_log(LOG_DEBUG, "truncation value %d", truncation_value);
945
946     if (use_value == -1)    /* no attribute - assumy "any" */
947         use_value = 1016;
948     for (base_no = 0; base_no < num_bases; base_no++)
949     {
950         int attr_ok = 0;
951         int regex_range = 0;
952         int init_pos = 0;
953         attent attp;
954         data1_local_attribute id_xpath_attr;
955         data1_local_attribute *local_attr;
956         int max_pos, prefix_len = 0;
957
958         termp = *term_sub;
959
960         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
961         {
962             zh->errCode = 109; /* Database unavailable */
963             zh->errString = basenames[base_no];
964             return -1;
965         }
966         if (xpath_use > 0 && use_value == -2) 
967         {
968             use_value = xpath_use;
969             attp.local_attributes = &id_xpath_attr;
970             attp.attset_ordinal = VAL_IDXPATH;
971             id_xpath_attr.next = 0;
972             id_xpath_attr.local = use_value;
973         }
974         else if (curAttributeSet == VAL_IDXPATH)
975         {
976             attp.local_attributes = &id_xpath_attr;
977             attp.attset_ordinal = VAL_IDXPATH;
978             id_xpath_attr.next = 0;
979             id_xpath_attr.local = use_value;
980         }
981         else
982         {
983             if ((r=att_getentbyatt (zh, &attp, curAttributeSet, use_value,
984                                             use_string)))
985             {
986                 yaz_log(LOG_DEBUG, "att_getentbyatt fail. set=%d use=%d r=%d",
987                       curAttributeSet, use_value, r);
988                 if (r == -1)
989                 {
990                     /* set was found, but value wasn't defined */
991                     errCode = 114;
992                     if (use_string)
993                         errString = nmem_strdup(stream, use_string);
994                     else
995                         errString = nmem_strdup_i (stream, use_value);
996                 }
997                 else
998                 {
999                     int oid[OID_SIZE];
1000                     struct oident oident;
1001                     
1002                     oident.proto = PROTO_Z3950;
1003                     oident.oclass = CLASS_ATTSET;
1004                     oident.value = curAttributeSet;
1005                     oid_ent_to_oid (&oident, oid);
1006                     
1007                     errCode = 121;
1008                     errString = nmem_strdup (stream, oident.desc);
1009                 }
1010                 continue;
1011             }
1012         }
1013         for (local_attr = attp.local_attributes; local_attr;
1014              local_attr = local_attr->next)
1015         {
1016             int ord;
1017             char ord_buf[32];
1018             int i, ord_len;
1019             
1020             ord = zebraExplain_lookupSU (zh->reg->zei, attp.attset_ordinal,
1021                                          local_attr->local);
1022             if (ord < 0)
1023                 continue;
1024             if (prefix_len)
1025                 term_dict[prefix_len++] = '|';
1026             else
1027                 term_dict[prefix_len++] = '(';
1028             
1029             ord_len = key_SU_encode (ord, ord_buf);
1030             for (i = 0; i<ord_len; i++)
1031             {
1032                 term_dict[prefix_len++] = 1;
1033                 term_dict[prefix_len++] = ord_buf[i];
1034             }
1035         }
1036         if (!prefix_len)
1037         {
1038 #if 1
1039             bases_ok++;
1040 #else
1041             errCode = 114;
1042             errString = nmem_strdup_i(stream, use_value);
1043             continue;
1044 #endif
1045         }
1046         else
1047         {
1048             bases_ok++; /* this has OK attributes */
1049             attr_ok = 1;
1050         }
1051
1052         term_dict[prefix_len++] = ')';
1053         term_dict[prefix_len++] = 1;
1054         term_dict[prefix_len++] = reg_type;
1055         yaz_log(LOG_DEBUG, "reg_type = %d", term_dict[prefix_len-1]);
1056         term_dict[prefix_len] = '\0';
1057         j = prefix_len;
1058         switch (truncation_value)
1059         {
1060         case -1:         /* not specified */
1061         case 100:        /* do not truncate */
1062             if (!string_relation (zh, zapt, &termp, term_dict,
1063                                   attributeSet,
1064                                   reg_type, space_split, term_dst))
1065                 return 0;
1066             break;
1067         case 1:          /* right truncation */
1068             term_dict[j++] = '(';
1069             if (!term_100(zh->reg->zebra_maps, reg_type,
1070                           &termp, term_dict + j, space_split, term_dst))
1071                 return 0;
1072             strcat(term_dict, ".*)");
1073             break;
1074         case 2:          /* keft truncation */
1075             term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*';
1076             if (!term_100(zh->reg->zebra_maps, reg_type,
1077                           &termp, term_dict + j, space_split, term_dst))
1078                 return 0;
1079             strcat(term_dict, ")");
1080             break;
1081         case 3:          /* left&right truncation */
1082             term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*';
1083             if (!term_100(zh->reg->zebra_maps, reg_type,
1084                           &termp, term_dict + j, space_split, term_dst))
1085                 return 0;
1086             strcat(term_dict, ".*)");
1087             break;
1088         case 101:        /* process # in term */
1089             term_dict[j++] = '(';
1090             if (!term_101(zh->reg->zebra_maps, reg_type,
1091                           &termp, term_dict + j, space_split, term_dst))
1092                 return 0;
1093             strcat(term_dict, ")");
1094             break;
1095         case 102:        /* Regexp-1 */
1096             term_dict[j++] = '(';
1097             if (!term_102(zh->reg->zebra_maps, reg_type,
1098                           &termp, term_dict + j, space_split, term_dst))
1099                 return 0;
1100             strcat(term_dict, ")");
1101             break;
1102         case 103:       /* Regexp-2 */
1103             r = 1;
1104             term_dict[j++] = '(';
1105             init_pos = 2;
1106             if (!term_103 (zh->reg->zebra_maps, reg_type,
1107                            &termp, term_dict + j, &regex_range,
1108                            space_split, term_dst))
1109                 return 0;
1110             strcat(term_dict, ")");
1111         case 104:        /* process # and ! in term */
1112             term_dict[j++] = '(';
1113             if (!term_104 (zh->reg->zebra_maps, reg_type,
1114                            &termp, term_dict + j, space_split, term_dst))
1115                 return 0;
1116             strcat(term_dict, ")");
1117             break;
1118         case 105:        /* process * and ! in term */
1119             term_dict[j++] = '(';
1120             if (!term_105 (zh->reg->zebra_maps, reg_type,
1121                            &termp, term_dict + j, space_split, term_dst, 1))
1122                 return 0;
1123             strcat(term_dict, ")");
1124             break;
1125         case 106:        /* process * and ! in term */
1126             term_dict[j++] = '(';
1127             if (!term_105 (zh->reg->zebra_maps, reg_type,
1128                            &termp, term_dict + j, space_split, term_dst, 0))
1129                 return 0;
1130             strcat(term_dict, ")");
1131             break;
1132         default:
1133             zh->errCode = 120;
1134             zh->errString = nmem_strdup_i(stream, truncation_value);
1135             return -1;
1136         }
1137         if (attr_ok)
1138         {
1139             yaz_log(LOG_DEBUG, "dict_lookup_grep: %s", term_dict+prefix_len);
1140             r = dict_lookup_grep(zh->reg->dict, term_dict, regex_range,
1141                                  grep_info, &max_pos, init_pos,
1142                                  grep_handle);
1143             if (r)
1144                 yaz_log(LOG_WARN, "dict_lookup_grep fail %d", r);
1145         }
1146     }
1147     if (!bases_ok)
1148     {
1149         zh->errCode = errCode;
1150         zh->errString = errString;
1151         return -1;
1152     }
1153     *term_sub = termp;
1154     yaz_log(LOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1155     return 1;
1156 }
1157
1158
1159 /* convert APT search term to UTF8 */
1160 static int zapt_term_to_utf8 (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1161                               char *termz)
1162 {
1163     size_t sizez;
1164     Z_Term *term = zapt->term;
1165
1166     switch (term->which)
1167     {
1168     case Z_Term_general:
1169         if (zh->iconv_to_utf8 != 0)
1170         {
1171             char *inbuf = term->u.general->buf;
1172             size_t inleft = term->u.general->len;
1173             char *outbuf = termz;
1174             size_t outleft = IT_MAX_WORD-1;
1175             size_t ret;
1176
1177             ret = yaz_iconv(zh->iconv_to_utf8, &inbuf, &inleft,
1178                         &outbuf, &outleft);
1179             if (ret == (size_t)(-1))
1180             {
1181                 ret = yaz_iconv(zh->iconv_to_utf8, 0, 0, 0, 0);
1182                 zh->errCode = 125;
1183                 return -1;
1184             }
1185             *outbuf = 0;
1186         }
1187         else
1188         {
1189             sizez = term->u.general->len;
1190             if (sizez > IT_MAX_WORD-1)
1191                 sizez = IT_MAX_WORD-1;
1192             memcpy (termz, term->u.general->buf, sizez);
1193             termz[sizez] = '\0';
1194         }
1195         break;
1196     case Z_Term_characterString:
1197         sizez = strlen(term->u.characterString);
1198         if (sizez > IT_MAX_WORD-1)
1199             sizez = IT_MAX_WORD-1;
1200         memcpy (termz, term->u.characterString, sizez);
1201         termz[sizez] = '\0';
1202         break;
1203     default:
1204         zh->errCode = 124;
1205         return -1;
1206     }
1207     return 0;
1208 }
1209
1210 /* convert APT SCAN term to internal cmap */
1211 static int trans_scan_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1212                             char *termz, int reg_type)
1213 {
1214     char termz0[IT_MAX_WORD];
1215
1216     if (zapt_term_to_utf8(zh, zapt, termz0))
1217         return -1;    /* error */
1218     else
1219     {
1220         const char **map;
1221         const char *cp = (const char *) termz0;
1222         const char *cp_end = cp + strlen(cp);
1223         const char *src;
1224         int i = 0;
1225         const char *space_map = NULL;
1226         int len;
1227             
1228         while ((len = (cp_end - cp)) > 0)
1229         {
1230             map = zebra_maps_input (zh->reg->zebra_maps, reg_type, &cp, len, 0);
1231             if (**map == *CHR_SPACE)
1232                 space_map = *map;
1233             else
1234             {
1235                 if (i && space_map)
1236                     for (src = space_map; *src; src++)
1237                         termz[i++] = *src;
1238                 space_map = NULL;
1239                 for (src = *map; *src; src++)
1240                     termz[i++] = *src;
1241             }
1242         }
1243         termz[i] = '\0';
1244     }
1245     return 0;
1246 }
1247
1248 char *normalize_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1249                      const char *termz, NMEM stream, unsigned reg_id)
1250 {
1251     WRBUF wrbuf = 0;
1252     AttrType truncation;
1253     int truncation_value;
1254     char *ex_list = 0;
1255
1256     attr_init (&truncation, zapt, 5);
1257     truncation_value = attr_find (&truncation, NULL);
1258
1259     switch (truncation_value)
1260     {
1261     default:
1262         ex_list = "";
1263         break;
1264     case 101:
1265         ex_list = "#";
1266         break;
1267     case 102:
1268     case 103:
1269         ex_list = 0;
1270         break;
1271     case 104:
1272         ex_list = "!#";
1273         break;
1274     case 105:
1275         ex_list = "!*";
1276         break;
1277     }
1278     if (ex_list)
1279         wrbuf = zebra_replace(zh->reg->zebra_maps, reg_id, ex_list,
1280                               termz, strlen(termz));
1281     if (!wrbuf)
1282         return nmem_strdup(stream, termz);
1283     else
1284     {
1285         char *buf = (char*) nmem_malloc(stream, wrbuf_len(wrbuf)+1);
1286         memcpy (buf, wrbuf_buf(wrbuf), wrbuf_len(wrbuf));
1287         buf[wrbuf_len(wrbuf)] = '\0';
1288         return buf;
1289     }
1290 }
1291
1292 static void grep_info_delete (struct grep_info *grep_info)
1293 {
1294 #ifdef TERM_COUNT
1295     xfree(grep_info->term_no);
1296 #endif
1297     xfree (grep_info->isam_p_buf);
1298 }
1299
1300 static int grep_info_prepare (ZebraHandle zh,
1301                               Z_AttributesPlusTerm *zapt,
1302                               struct grep_info *grep_info,
1303                               int reg_type,
1304                               NMEM stream)
1305 {
1306     AttrType termset;
1307     int termset_value_numeric;
1308     const char *termset_value_string;
1309
1310 #ifdef TERM_COUNT
1311     grep_info->term_no = 0;
1312 #endif
1313     grep_info->isam_p_size = 0;
1314     grep_info->isam_p_buf = NULL;
1315     grep_info->zh = zh;
1316     grep_info->reg_type = reg_type;
1317     grep_info->termset = 0;
1318
1319     if (!zapt)
1320         return 0;
1321     attr_init (&termset, zapt, 8);
1322     termset_value_numeric =
1323         attr_find_ex (&termset, NULL, &termset_value_string);
1324     if (termset_value_numeric != -1)
1325     {
1326         char resname[32];
1327         const char *termset_name = 0;
1328         if (termset_value_numeric != -2)
1329         {
1330     
1331             sprintf (resname, "%d", termset_value_numeric);
1332             termset_name = resname;
1333         }
1334         else
1335             termset_name = termset_value_string;
1336         yaz_log(LOG_LOG, "creating termset set %s", termset_name);
1337         grep_info->termset = resultSetAdd (zh, termset_name, 1);
1338         if (!grep_info->termset)
1339         {
1340             zh->errCode = 128;
1341             zh->errString = nmem_strdup (stream, termset_name);
1342             return -1;
1343         }
1344     }
1345     return 0;
1346 }
1347                                
1348
1349 static RSET rpn_search_APT_phrase (ZebraHandle zh,
1350                                    Z_AttributesPlusTerm *zapt,
1351                                    const char *termz_org,
1352                                    oid_value attributeSet,
1353                                    NMEM stream,
1354                                    int reg_type, int complete_flag,
1355                                    const char *rank_type, int xpath_use,
1356                                    int num_bases, char **basenames, 
1357                                    NMEM rset_nmem)
1358 {
1359     char term_dst[IT_MAX_WORD+1];
1360     RSET rset[60], result;
1361     int rset_no = 0;
1362     struct grep_info grep_info;
1363     char *termz = normalize_term(zh, zapt, termz_org, stream, reg_type);
1364     const char *termp = termz;
1365
1366     *term_dst = 0;
1367     if (grep_info_prepare (zh, zapt, &grep_info, reg_type, stream))
1368         return 0;
1369     while (1)
1370     { 
1371         yaz_log(LOG_DEBUG, "APT_phrase termp=%s", termp);
1372         rset[rset_no] = term_trunc(zh, zapt, &termp, attributeSet,
1373                                     stream, &grep_info,
1374                                     reg_type, complete_flag,
1375                                     num_bases, basenames,
1376                                     term_dst, rank_type,
1377                                     xpath_use,rset_nmem);
1378         if (!rset[rset_no])
1379             break;
1380         if (++rset_no >= (int) (sizeof(rset)/sizeof(*rset)))
1381             break;
1382     }
1383     grep_info_delete (&grep_info);
1384     if (rset_no == 0)
1385         return rsnull_create (rset_nmem,key_it_ctrl); 
1386     else if (rset_no == 1)
1387         return (rset[0]);
1388     else
1389         result = rsprox_create( rset_nmem, key_it_ctrl, key_it_ctrl->scope,
1390                        rset_no, rset,
1391                        1 /* ordered */, 0 /* exclusion */,
1392                        3 /* relation */, 1 /* distance */);
1393     return result;
1394 }
1395
1396 static RSET rpn_search_APT_or_list (ZebraHandle zh,
1397                                     Z_AttributesPlusTerm *zapt,
1398                                     const char *termz_org,
1399                                     oid_value attributeSet,
1400                                     NMEM stream,
1401                                     int reg_type, int complete_flag,
1402                                     const char *rank_type,
1403                                     int xpath_use,
1404                                     int num_bases, char **basenames,
1405                                     NMEM rset_nmem)
1406 {
1407     char term_dst[IT_MAX_WORD+1];
1408     RSET rset[60];
1409     int rset_no = 0;
1410     struct grep_info grep_info;
1411     char *termz = normalize_term(zh, zapt, termz_org, stream, reg_type);
1412     const char *termp = termz;
1413
1414     if (grep_info_prepare (zh, zapt, &grep_info, reg_type, stream))
1415         return 0;
1416     while (1)
1417     { 
1418         yaz_log(LOG_DEBUG, "APT_or_list termp=%s", termp);
1419         rset[rset_no] = term_trunc(zh, zapt, &termp, attributeSet,
1420                                     stream, &grep_info,
1421                                     reg_type, complete_flag,
1422                                     num_bases, basenames,
1423                                     term_dst, rank_type,
1424                                     xpath_use,rset_nmem);
1425         if (!rset[rset_no])
1426             break;
1427         if (++rset_no >= (int) (sizeof(rset)/sizeof(*rset)))
1428             break;
1429     }
1430     grep_info_delete (&grep_info);
1431     if (rset_no == 0)
1432         return rsnull_create (rset_nmem,key_it_ctrl);  
1433     return rsmultior_create(rset_nmem, key_it_ctrl,key_it_ctrl->scope,
1434                             rset_no, rset);
1435 }
1436
1437 static RSET rpn_search_APT_and_list (ZebraHandle zh,
1438                                      Z_AttributesPlusTerm *zapt,
1439                                      const char *termz_org,
1440                                      oid_value attributeSet,
1441                                      NMEM stream,
1442                                      int reg_type, int complete_flag,
1443                                      const char *rank_type, 
1444                                      int xpath_use,
1445                                      int num_bases, char **basenames,
1446                                      NMEM rset_nmem)
1447 {
1448     char term_dst[IT_MAX_WORD+1];
1449     RSET rset[60]; /* FIXME - bug 160 - should be dynamic somehow */
1450     int rset_no = 0;
1451     struct grep_info grep_info;
1452     char *termz = normalize_term(zh, zapt, termz_org, stream, reg_type);
1453     const char *termp = termz;
1454
1455     if (grep_info_prepare (zh, zapt, &grep_info, reg_type, stream))
1456         return 0;
1457     while (1)
1458     { 
1459         yaz_log(LOG_DEBUG, "APT_and_list termp=%s", termp);
1460         rset[rset_no] = term_trunc(zh, zapt, &termp, attributeSet,
1461                                     stream, &grep_info,
1462                                     reg_type, complete_flag,
1463                                     num_bases, basenames,
1464                                     term_dst, rank_type,
1465                                     xpath_use, rset_nmem);
1466         if (!rset[rset_no])
1467             break;
1468         assert (rset[rset_no]);
1469         if (++rset_no >= (int) (sizeof(rset)/sizeof(*rset)))
1470             break;
1471     }
1472     grep_info_delete (&grep_info);
1473     if (rset_no == 0)
1474         return rsnull_create (rset_nmem,key_it_ctrl); 
1475
1476     return rsmultiand_create( rset_nmem, key_it_ctrl, key_it_ctrl->scope,
1477                               rset_no, rset);
1478 }
1479
1480 static int numeric_relation (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1481                              const char **term_sub,
1482                              char *term_dict,
1483                              oid_value attributeSet,
1484                              struct grep_info *grep_info,
1485                              int *max_pos,
1486                              int reg_type,
1487                              char *term_dst)
1488 {
1489     AttrType relation;
1490     int relation_value;
1491     int term_value;
1492     int r;
1493     char *term_tmp = term_dict + strlen(term_dict);
1494
1495     attr_init (&relation, zapt, 2);
1496     relation_value = attr_find (&relation, NULL);
1497
1498     yaz_log(LOG_DEBUG, "numeric relation value=%d", relation_value);
1499
1500     if (!term_100 (zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1501                    term_dst))
1502         return 0;
1503     term_value = atoi (term_tmp);
1504     switch (relation_value)
1505     {
1506     case 1:
1507         yaz_log(LOG_DEBUG, "Relation <");
1508         gen_regular_rel (term_tmp, term_value-1, 1);
1509         break;
1510     case 2:
1511         yaz_log(LOG_DEBUG, "Relation <=");
1512         gen_regular_rel (term_tmp, term_value, 1);
1513         break;
1514     case 4:
1515         yaz_log(LOG_DEBUG, "Relation >=");
1516         gen_regular_rel (term_tmp, term_value, 0);
1517         break;
1518     case 5:
1519         yaz_log(LOG_DEBUG, "Relation >");
1520         gen_regular_rel (term_tmp, term_value+1, 0);
1521         break;
1522     case 3:
1523     default:
1524         yaz_log(LOG_DEBUG, "Relation =");
1525         sprintf (term_tmp, "(0*%d)", term_value);
1526     }
1527     yaz_log(LOG_DEBUG, "dict_lookup_grep: %s", term_tmp);
1528     r = dict_lookup_grep(zh->reg->dict, term_dict, 0, grep_info, max_pos,
1529                           0, grep_handle);
1530     if (r)
1531         yaz_log(LOG_WARN, "dict_lookup_grep fail, rel=gt: %d", r);
1532     yaz_log(LOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1533     return 1;
1534 }
1535
1536 static int numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1537                          const char **term_sub, 
1538                          oid_value attributeSet, struct grep_info *grep_info,
1539                          int reg_type, int complete_flag,
1540                          int num_bases, char **basenames,
1541                          char *term_dst, int xpath_use, NMEM stream)
1542 {
1543     char term_dict[2*IT_MAX_WORD+2];
1544     int r, base_no;
1545     AttrType use;
1546     int use_value;
1547     const char *use_string = 0;
1548     oid_value curAttributeSet = attributeSet;
1549     const char *termp;
1550     struct rpn_char_map_info rcmi;
1551
1552     int bases_ok = 0;     /* no of databases with OK attribute */
1553     int errCode = 0;      /* err code (if any is not OK) */
1554     char *errString = 0;  /* addinfo */
1555
1556     rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
1557     attr_init (&use, zapt, 1);
1558     use_value = attr_find_ex (&use, &curAttributeSet, &use_string);
1559
1560     if (use_value == -1)
1561         use_value = 1016;
1562
1563     for (base_no = 0; base_no < num_bases; base_no++)
1564     {
1565         attent attp;
1566         data1_local_attribute id_xpath_attr;
1567         data1_local_attribute *local_attr;
1568         int max_pos, prefix_len = 0;
1569
1570         termp = *term_sub;
1571         if (use_value == -2)  /* string attribute (assume IDXPATH/any) */
1572         {
1573             use_value = xpath_use;
1574             attp.local_attributes = &id_xpath_attr;
1575             attp.attset_ordinal = VAL_IDXPATH;
1576             id_xpath_attr.next = 0;
1577             id_xpath_attr.local = use_value;
1578         }
1579         else if (curAttributeSet == VAL_IDXPATH)
1580         {
1581             attp.local_attributes = &id_xpath_attr;
1582             attp.attset_ordinal = VAL_IDXPATH;
1583             id_xpath_attr.next = 0;
1584             id_xpath_attr.local = use_value;
1585         }
1586         else
1587         {
1588             if ((r=att_getentbyatt (zh, &attp, curAttributeSet, use_value,
1589                                             use_string)))
1590             {
1591                 yaz_log(LOG_DEBUG, "att_getentbyatt fail. set=%d use=%d r=%d",
1592                       curAttributeSet, use_value, r);
1593                 if (r == -1)
1594                 {
1595                     errString = nmem_strdup_i(stream, use_value);
1596                     errCode = 114;
1597                 }
1598                 else
1599                     errCode = 121;
1600                 continue;
1601             }
1602         }
1603         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1604         {
1605             zh->errCode = 109; /* Database unavailable */
1606             zh->errString = basenames[base_no];
1607             return -1;
1608         }
1609         for (local_attr = attp.local_attributes; local_attr;
1610              local_attr = local_attr->next)
1611         {
1612             int ord;
1613             char ord_buf[32];
1614             int i, ord_len;
1615
1616             ord = zebraExplain_lookupSU (zh->reg->zei, attp.attset_ordinal,
1617                                           local_attr->local);
1618             if (ord < 0)
1619                 continue;
1620             if (prefix_len)
1621                 term_dict[prefix_len++] = '|';
1622             else
1623                 term_dict[prefix_len++] = '(';
1624
1625             ord_len = key_SU_encode (ord, ord_buf);
1626             for (i = 0; i<ord_len; i++)
1627             {
1628                 term_dict[prefix_len++] = 1;
1629                 term_dict[prefix_len++] = ord_buf[i];
1630             }
1631         }
1632         if (!prefix_len)
1633         {
1634             errCode = 114;
1635             errString = nmem_strdup_i(stream, use_value);
1636             continue;
1637         }
1638         bases_ok++;
1639         term_dict[prefix_len++] = ')';        
1640         term_dict[prefix_len++] = 1;
1641         term_dict[prefix_len++] = reg_type;
1642         yaz_log(LOG_DEBUG, "reg_type = %d", term_dict[prefix_len-1]);
1643         term_dict[prefix_len] = '\0';
1644         if (!numeric_relation (zh, zapt, &termp, term_dict,
1645                                attributeSet, grep_info, &max_pos, reg_type,
1646                                term_dst))
1647             return 0;
1648     }
1649     if (!bases_ok)
1650     {
1651         zh->errCode = errCode;
1652         zh->errString = errString;
1653         return -1;
1654     }
1655     *term_sub = termp;
1656     yaz_log(LOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1657     return 1;
1658 }
1659
1660 static RSET rpn_search_APT_numeric (ZebraHandle zh,
1661                                     Z_AttributesPlusTerm *zapt,
1662                                     const char *termz,
1663                                     oid_value attributeSet,
1664                                     NMEM stream,
1665                                     int reg_type, int complete_flag,
1666                                     const char *rank_type, int xpath_use,
1667                                     int num_bases, char **basenames,
1668                                     NMEM rset_nmem)
1669 {
1670     char term_dst[IT_MAX_WORD+1];
1671     const char *termp = termz;
1672     RSET rset[60]; /* FIXME - hard-coded magic number */
1673     int  r, rset_no = 0;
1674     struct grep_info grep_info;
1675
1676     yaz_log(LOG_DEBUG, "APT_numeric t='%s'",termz);
1677     if (grep_info_prepare (zh, zapt, &grep_info, reg_type, stream))
1678         return 0;
1679     while (1)
1680     { 
1681         yaz_log(LOG_DEBUG, "APT_numeric termp=%s", termp);
1682         grep_info.isam_p_indx = 0;
1683         r = numeric_term(zh, zapt, &termp, attributeSet, &grep_info,
1684                           reg_type, complete_flag, num_bases, basenames,
1685                           term_dst, xpath_use,
1686                           stream);
1687         if (r < 1)
1688             break;
1689         yaz_log(LOG_DEBUG, "term: %s", term_dst);
1690         rset[rset_no] = rset_trunc(zh, grep_info.isam_p_buf,
1691                                     grep_info.isam_p_indx, term_dst,
1692                                     strlen(term_dst), rank_type,
1693                                     0 /* preserve position */,
1694                                     zapt->term->which, rset_nmem, 
1695                                     key_it_ctrl,key_it_ctrl->scope);
1696         assert (rset[rset_no]);
1697         if (++rset_no >= (int) (sizeof(rset)/sizeof(*rset)))
1698             break;
1699     }
1700     grep_info_delete (&grep_info);
1701     if (rset_no == 0)
1702         return rsnull_create (rset_nmem,key_it_ctrl);
1703     if (rset_no == 1)
1704         return rset[0];
1705     return rsmultiand_create(rset_nmem,key_it_ctrl,key_it_ctrl->scope,
1706                rset_no, rset);
1707 }
1708
1709 static RSET rpn_search_APT_local (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1710                                   const char *termz,
1711                                   oid_value attributeSet,
1712                                   NMEM stream,
1713                                   const char *rank_type, NMEM rset_nmem)
1714 {
1715     RSET result;
1716     RSFD rsfd;
1717     struct it_key key;
1718     int sys;
1719     result = rstemp_create( rset_nmem,key_it_ctrl,key_it_ctrl->scope,
1720                      res_get (zh->res, "setTmpDir"),0 );
1721     rsfd = rset_open (result, RSETF_WRITE);
1722
1723     sys = atoi(termz);
1724     if (sys <= 0)
1725         sys = 1;
1726     key.mem[0] = sys;
1727     key.mem[1] = 1;
1728     key.len = 2;
1729     rset_write (rsfd, &key);
1730     rset_close (rsfd);
1731     return result;
1732 }
1733
1734 static RSET rpn_sort_spec (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1735                            oid_value attributeSet, NMEM stream,
1736                            Z_SortKeySpecList *sort_sequence,
1737                            const char *rank_type)
1738 {
1739     int i;
1740     int sort_relation_value;
1741     AttrType sort_relation_type;
1742     int use_value;
1743     AttrType use_type;
1744     Z_SortKeySpec *sks;
1745     Z_SortKey *sk;
1746     Z_AttributeElement *ae;
1747     int oid[OID_SIZE];
1748     oident oe;
1749     char termz[20];
1750     
1751     attr_init (&sort_relation_type, zapt, 7);
1752     sort_relation_value = attr_find (&sort_relation_type, &attributeSet);
1753
1754     attr_init (&use_type, zapt, 1);
1755     use_value = attr_find (&use_type, &attributeSet);
1756
1757     if (!sort_sequence->specs)
1758     {
1759         sort_sequence->num_specs = 10;
1760         sort_sequence->specs = (Z_SortKeySpec **)
1761             nmem_malloc(stream, sort_sequence->num_specs *
1762                          sizeof(*sort_sequence->specs));
1763         for (i = 0; i<sort_sequence->num_specs; i++)
1764             sort_sequence->specs[i] = 0;
1765     }
1766     if (zapt->term->which != Z_Term_general)
1767         i = 0;
1768     else
1769         i = atoi_n ((char *) zapt->term->u.general->buf,
1770                     zapt->term->u.general->len);
1771     if (i >= sort_sequence->num_specs)
1772         i = 0;
1773     sprintf (termz, "%d", i);
1774
1775     oe.proto = PROTO_Z3950;
1776     oe.oclass = CLASS_ATTSET;
1777     oe.value = attributeSet;
1778     if (!oid_ent_to_oid (&oe, oid))
1779         return 0;
1780
1781     sks = (Z_SortKeySpec *) nmem_malloc(stream, sizeof(*sks));
1782     sks->sortElement = (Z_SortElement *)
1783         nmem_malloc(stream, sizeof(*sks->sortElement));
1784     sks->sortElement->which = Z_SortElement_generic;
1785     sk = sks->sortElement->u.generic = (Z_SortKey *)
1786         nmem_malloc(stream, sizeof(*sk));
1787     sk->which = Z_SortKey_sortAttributes;
1788     sk->u.sortAttributes = (Z_SortAttributes *)
1789         nmem_malloc(stream, sizeof(*sk->u.sortAttributes));
1790
1791     sk->u.sortAttributes->id = oid;
1792     sk->u.sortAttributes->list = (Z_AttributeList *)
1793         nmem_malloc(stream, sizeof(*sk->u.sortAttributes->list));
1794     sk->u.sortAttributes->list->num_attributes = 1;
1795     sk->u.sortAttributes->list->attributes = (Z_AttributeElement **)
1796         nmem_malloc(stream, sizeof(*sk->u.sortAttributes->list->attributes));
1797     ae = *sk->u.sortAttributes->list->attributes = (Z_AttributeElement *)
1798         nmem_malloc(stream, sizeof(**sk->u.sortAttributes->list->attributes));
1799     ae->attributeSet = 0;
1800     ae->attributeType = (int *)
1801         nmem_malloc(stream, sizeof(*ae->attributeType));
1802     *ae->attributeType = 1;
1803     ae->which = Z_AttributeValue_numeric;
1804     ae->value.numeric = (int *)
1805         nmem_malloc(stream, sizeof(*ae->value.numeric));
1806     *ae->value.numeric = use_value;
1807
1808     sks->sortRelation = (int *)
1809         nmem_malloc(stream, sizeof(*sks->sortRelation));
1810     if (sort_relation_value == 1)
1811         *sks->sortRelation = Z_SortKeySpec_ascending;
1812     else if (sort_relation_value == 2)
1813         *sks->sortRelation = Z_SortKeySpec_descending;
1814     else 
1815         *sks->sortRelation = Z_SortKeySpec_ascending;
1816
1817     sks->caseSensitivity = (int *)
1818         nmem_malloc(stream, sizeof(*sks->caseSensitivity));
1819     *sks->caseSensitivity = 0;
1820
1821     sks->which = Z_SortKeySpec_null;
1822     sks->u.null = odr_nullval ();
1823     sort_sequence->specs[i] = sks;
1824     return rsnull_create (NULL,key_it_ctrl);
1825         /* FIXME - nmem?? */
1826 }
1827
1828
1829 static int parse_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1830                        oid_value attributeSet,
1831                        struct xpath_location_step *xpath, int max, NMEM mem)
1832 {
1833     oid_value curAttributeSet = attributeSet;
1834     AttrType use;
1835     const char *use_string = 0;
1836     
1837     attr_init (&use, zapt, 1);
1838     attr_find_ex (&use, &curAttributeSet, &use_string);
1839
1840     if (!use_string || *use_string != '/')
1841         return -1;
1842
1843     return zebra_parse_xpath_str(use_string, xpath, max, mem);
1844 }
1845  
1846                
1847
1848 static RSET xpath_trunc(ZebraHandle zh, NMEM stream,
1849                         int reg_type, const char *term, int use,
1850                         oid_value curAttributeSet, NMEM rset_nmem)
1851 {
1852     RSET rset;
1853     struct grep_info grep_info;
1854     char term_dict[2048];
1855     char ord_buf[32];
1856     int prefix_len = 0;
1857     int ord = zebraExplain_lookupSU (zh->reg->zei, curAttributeSet, use);
1858     int ord_len, i, r, max_pos;
1859     int term_type = Z_Term_characterString;
1860     const char *flags = "void";
1861
1862     if (grep_info_prepare (zh, 0 /* zapt */, &grep_info, '0', stream))
1863         return rsnull_create (rset_nmem,key_it_ctrl);
1864
1865     if (ord < 0)
1866         return rsnull_create (rset_nmem,key_it_ctrl);
1867     if (prefix_len)
1868         term_dict[prefix_len++] = '|';
1869     else
1870         term_dict[prefix_len++] = '(';
1871     
1872     ord_len = key_SU_encode (ord, ord_buf);
1873     for (i = 0; i<ord_len; i++)
1874     {
1875         term_dict[prefix_len++] = 1;
1876         term_dict[prefix_len++] = ord_buf[i];
1877     }
1878     term_dict[prefix_len++] = ')';
1879     term_dict[prefix_len++] = 1;
1880     term_dict[prefix_len++] = reg_type;
1881     
1882     strcpy(term_dict+prefix_len, term);
1883     
1884     grep_info.isam_p_indx = 0;
1885     r = dict_lookup_grep(zh->reg->dict, term_dict, 0,
1886                           &grep_info, &max_pos, 0, grep_handle);
1887     yaz_log (LOG_LOG, "%s %d positions", term,
1888              grep_info.isam_p_indx);
1889     rset = rset_trunc(zh, grep_info.isam_p_buf,
1890                        grep_info.isam_p_indx, term, strlen(term),
1891                        flags, 1, term_type,rset_nmem,
1892                        key_it_ctrl, key_it_ctrl->scope);
1893     grep_info_delete (&grep_info);
1894     return rset;
1895 }
1896
1897 static RSET rpn_search_xpath (ZebraHandle zh,
1898                               oid_value attributeSet,
1899                               int num_bases, char **basenames,
1900                               NMEM stream, const char *rank_type, RSET rset,
1901                               int xpath_len, struct xpath_location_step *xpath,
1902                               NMEM rset_nmem)
1903 {
1904     oid_value curAttributeSet = attributeSet;
1905     int base_no;
1906     int i;
1907
1908     if (xpath_len < 0)
1909         return rset;
1910
1911     yaz_log (LOG_LOG, "len=%d", xpath_len);
1912     for (i = 0; i<xpath_len; i++)
1913     {
1914         yaz_log (LOG_LOG, "XPATH %d %s", i, xpath[i].part);
1915
1916     }
1917
1918     curAttributeSet = VAL_IDXPATH;
1919
1920     /*
1921       //a    ->    a/.*
1922       //a/b  ->    b/a/.*
1923       /a     ->    a/
1924       /a/b   ->    b/a/
1925
1926       /      ->    none
1927
1928    a[@attr=value]/b[@other=othervalue]
1929
1930  /e/@a val      range(e/,range(@a,freetext(w,1015,val),@a),e/)
1931  /a/b val       range(b/a/,freetext(w,1016,val),b/a/)
1932  /a/b/@c val    range(b/a/,range(@c,freetext(w,1016,val),@c),b/a/)
1933  /a/b[@c=y] val range(b/a/,freetext(w,1016,val),b/a/,@c=y)
1934  /a[@c=y]/b val range(a/,range(b/a/,freetext(w,1016,val),b/a/),a/,@c=y)
1935  /a[@c=x]/b[@c=y] range(a/,range(b/a/,freetext(w,1016,val),b/a/,@c=y),a/,@c=x)
1936       
1937     */
1938
1939     dict_grep_cmap (zh->reg->dict, 0, 0);
1940
1941     for (base_no = 0; base_no < num_bases; base_no++)
1942     {
1943         int level = xpath_len;
1944         int first_path = 1;
1945         
1946         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1947         {
1948             zh->errCode = 109; /* Database unavailable */
1949             zh->errString = basenames[base_no];
1950             return rset;
1951         }
1952         while (--level >= 0)
1953         {
1954             char xpath_rev[128];
1955             int i, len;
1956             RSET rset_start_tag = 0, rset_end_tag = 0, rset_attr = 0;
1957
1958             *xpath_rev = 0;
1959             len = 0;
1960             for (i = level; i >= 1; --i)
1961             {
1962                 const char *cp = xpath[i].part;
1963                 if (*cp)
1964                 {
1965                     for (;*cp; cp++)
1966                         if (*cp == '*')
1967                         {
1968                             memcpy (xpath_rev + len, "[^/]*", 5);
1969                             len += 5;
1970                         }
1971                         else if (*cp == ' ')
1972                         {
1973
1974                             xpath_rev[len++] = 1;
1975                             xpath_rev[len++] = ' ';
1976                         }
1977
1978                         else
1979                             xpath_rev[len++] = *cp;
1980                     xpath_rev[len++] = '/';
1981                 }
1982                 else if (i == 1)  /* // case */
1983                 {
1984                     xpath_rev[len++] = '.';
1985                     xpath_rev[len++] = '*';
1986                 }
1987             }
1988             xpath_rev[len] = 0;
1989
1990             if (xpath[level].predicate &&
1991                 xpath[level].predicate->which == XPATH_PREDICATE_RELATION &&
1992                 xpath[level].predicate->u.relation.name[0])
1993             {
1994                 WRBUF wbuf = wrbuf_alloc();
1995                 wrbuf_puts(wbuf, xpath[level].predicate->u.relation.name+1);
1996                 if (xpath[level].predicate->u.relation.value)
1997                 {
1998                     const char *cp = xpath[level].predicate->u.relation.value;
1999                     wrbuf_putc(wbuf, '=');
2000                     
2001                     while (*cp)
2002                     {
2003                         if (strchr(REGEX_CHARS, *cp))
2004                             wrbuf_putc(wbuf, '\\');
2005                         wrbuf_putc(wbuf, *cp);
2006                         cp++;
2007                     }
2008                 }
2009                 wrbuf_puts(wbuf, "");
2010                 rset_attr = xpath_trunc(
2011                     zh, stream, '0', wrbuf_buf(wbuf), 3, 
2012                     curAttributeSet,rset_nmem);
2013                 wrbuf_free(wbuf, 1);
2014             } 
2015             else 
2016             {
2017                 if (!first_path)
2018                     continue;
2019             }
2020             yaz_log (LOG_LOG, "xpath_rev (%d) = %s", level, xpath_rev);
2021             if (strlen(xpath_rev))
2022             {
2023                 rset_start_tag = xpath_trunc(zh, stream, '0', 
2024                         xpath_rev, 1, curAttributeSet, rset_nmem);
2025             
2026                 rset_end_tag = xpath_trunc(zh, stream, '0', 
2027                         xpath_rev, 2, curAttributeSet, rset_nmem);
2028
2029                 /*
2030                 parms.key_size = sizeof(struct it_key);
2031                 parms.cmp = key_compare_it;
2032                 parms.rset_l = rset_start_tag;
2033                 parms.rset_m = rset;
2034                 parms.rset_r = rset_end_tag;
2035                 parms.rset_attr = rset_attr;
2036                 parms.printer = key_print_it;
2037                 rset = rset_create (rset_kind_between, &parms);
2038                 */
2039                 rset=rsbetween_create( rset_nmem,key_it_ctrl,key_it_ctrl->scope,
2040                         rset_start_tag, rset, rset_end_tag, rset_attr);
2041             }
2042             first_path = 0;
2043         }
2044     }
2045
2046     return rset;
2047 }
2048
2049
2050
2051 static RSET rpn_search_APT (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2052                             oid_value attributeSet, NMEM stream,
2053                             Z_SortKeySpecList *sort_sequence,
2054                             int num_bases, char **basenames, 
2055                             NMEM rset_nmem)
2056 {
2057     unsigned reg_id;
2058     char *search_type = NULL;
2059     char rank_type[128];
2060     int complete_flag;
2061     int sort_flag;
2062     char termz[IT_MAX_WORD+1];
2063     RSET rset = 0;
2064     int xpath_len;
2065     int xpath_use = 0;
2066     struct xpath_location_step xpath[10];
2067
2068     zebra_maps_attr (zh->reg->zebra_maps, zapt, &reg_id, &search_type,
2069                      rank_type, &complete_flag, &sort_flag);
2070     
2071     yaz_log(LOG_DEBUG, "reg_id=%c", reg_id);
2072     yaz_log(LOG_DEBUG, "complete_flag=%d", complete_flag);
2073     yaz_log(LOG_DEBUG, "search_type=%s", search_type);
2074     yaz_log(LOG_DEBUG, "rank_type=%s", rank_type);
2075
2076     if (zapt_term_to_utf8(zh, zapt, termz))
2077         return 0;
2078
2079     if (sort_flag)
2080         return rpn_sort_spec (zh, zapt, attributeSet, stream, sort_sequence,
2081                               rank_type);
2082     xpath_len = parse_xpath(zh, zapt, attributeSet, xpath, 10, stream);
2083     if (xpath_len >= 0)
2084     {
2085         xpath_use = 1016;
2086         if (xpath[xpath_len-1].part[0] == '@')
2087             xpath_use = 1015;
2088     }
2089
2090     if (!strcmp (search_type, "phrase"))
2091     {
2092         rset = rpn_search_APT_phrase (zh, zapt, termz, attributeSet, stream,
2093                                       reg_id, complete_flag, rank_type,
2094                                       xpath_use,
2095                                       num_bases, basenames, rset_nmem);
2096     }
2097     else if (!strcmp (search_type, "and-list"))
2098     {
2099         rset = rpn_search_APT_and_list (zh, zapt, termz, attributeSet, stream,
2100                                         reg_id, complete_flag, rank_type,
2101                                         xpath_use,
2102                                         num_bases, basenames, rset_nmem);
2103     }
2104     else if (!strcmp (search_type, "or-list"))
2105     {
2106         rset = rpn_search_APT_or_list (zh, zapt, termz, attributeSet, stream,
2107                                        reg_id, complete_flag, rank_type,
2108                                        xpath_use,
2109                                        num_bases, basenames, rset_nmem);
2110     }
2111     else if (!strcmp (search_type, "local"))
2112     {
2113         rset = rpn_search_APT_local (zh, zapt, termz, attributeSet, stream,
2114                                      rank_type, rset_nmem);
2115     }
2116     else if (!strcmp (search_type, "numeric"))
2117     {
2118         rset = rpn_search_APT_numeric (zh, zapt, termz, attributeSet, stream,
2119                                        reg_id, complete_flag, rank_type,
2120                                        xpath_use,
2121                                        num_bases, basenames, rset_nmem);
2122     }
2123     else if (!strcmp (search_type, "always"))
2124     {
2125         rset = 0;
2126     }
2127     else
2128         zh->errCode = 118;
2129     return rpn_search_xpath (zh, attributeSet, num_bases, basenames,
2130                              stream, rank_type, rset, 
2131                              xpath_len, xpath, rset_nmem);
2132 }
2133
2134 static RSET rpn_search_structure (ZebraHandle zh, Z_RPNStructure *zs,
2135                                   oid_value attributeSet, 
2136                                   NMEM stream, NMEM rset_nmem,
2137                                   Z_SortKeySpecList *sort_sequence,
2138                                   int num_bases, char **basenames)
2139 {
2140     RSET r = NULL;
2141     if (zs->which == Z_RPNStructure_complex)
2142     {
2143         Z_Operator *zop = zs->u.complex->roperator;
2144         RSET rsets[2]; /* l and r argument */
2145
2146         rsets[0]=rpn_search_structure (zh, zs->u.complex->s1,
2147                                        attributeSet, stream, rset_nmem,
2148                                        sort_sequence,
2149                                        num_bases, basenames);
2150         if (rsets[0] == NULL)
2151             return NULL;
2152         rsets[1]=rpn_search_structure (zh, zs->u.complex->s2,
2153                                        attributeSet, stream, rset_nmem,
2154                                        sort_sequence,
2155                                        num_bases, basenames);
2156         if (rsets[1] == NULL)
2157         {
2158             rset_delete (rsets[0]);
2159             return NULL;
2160         }
2161
2162         switch (zop->which)
2163         {
2164         case Z_Operator_and:
2165             r=rsmultiand_create(rset_nmem, key_it_ctrl, key_it_ctrl->scope,
2166                         2, rsets);
2167             break;
2168         case Z_Operator_or:
2169             r=rsmultior_create(rset_nmem, key_it_ctrl, key_it_ctrl->scope,
2170                         2, rsets);
2171             break;
2172         case Z_Operator_and_not:
2173             r = rsbool_create_not(rset_nmem,key_it_ctrl, key_it_ctrl->scope,
2174                     rsets[0],rsets[1]);
2175             break;
2176         case Z_Operator_prox:
2177             if (zop->u.prox->which != Z_ProximityOperator_known)
2178             {
2179                 zh->errCode = 132;
2180                 return NULL;
2181             }
2182             if (*zop->u.prox->u.known != Z_ProxUnit_word)
2183             {
2184                 char *val = (char *) nmem_malloc(stream, 16);
2185                 zh->errCode = 132;
2186                 zh->errString = val;
2187                 sprintf (val, "%d", *zop->u.prox->u.known);
2188                 return NULL;
2189             }
2190             else
2191             {
2192                 /* new / old prox */
2193                 r=rsprox_create(rset_nmem,key_it_ctrl,key_it_ctrl->scope,
2194                          2, rsets, 
2195                          *zop->u.prox->ordered,
2196                          (!zop->u.prox->exclusion ? 
2197                               0 : *zop->u.prox->exclusion),
2198                          *zop->u.prox->relationType,
2199                          *zop->u.prox->distance );
2200             }
2201             break;
2202         default:
2203             zh->errCode = 110;
2204             return NULL;
2205         }
2206     }
2207     else if (zs->which == Z_RPNStructure_simple)
2208     {
2209         if (zs->u.simple->which == Z_Operand_APT)
2210         {
2211             yaz_log(LOG_DEBUG, "rpn_search_APT");
2212             r = rpn_search_APT (zh, zs->u.simple->u.attributesPlusTerm,
2213                                 attributeSet, stream, sort_sequence,
2214                                 num_bases, basenames,rset_nmem);
2215         }
2216         else if (zs->u.simple->which == Z_Operand_resultSetId)
2217         {
2218             yaz_log(LOG_DEBUG, "rpn_search_ref");
2219             r = resultSetRef (zh, zs->u.simple->u.resultSetId);
2220             if (!r)
2221             {
2222                 r = rsnull_create (rset_nmem,key_it_ctrl);
2223                 zh->errCode = 30;
2224                 zh->errString =
2225                     nmem_strdup (stream, zs->u.simple->u.resultSetId);
2226                 return 0;
2227             }
2228             else
2229                 rset_dup(r);
2230         }
2231         else
2232         {
2233             zh->errCode = 3;
2234             return 0;
2235         }
2236     }
2237     else
2238     {
2239         zh->errCode = 3;
2240         return 0;
2241     }
2242     return r;
2243 }
2244
2245
2246 RSET rpn_search (ZebraHandle zh, NMEM nmem, NMEM rset_nmem,
2247                  Z_RPNQuery *rpn, int num_bases, char **basenames, 
2248                  const char *setname,
2249                  ZebraSet sset)
2250 {
2251     RSET rset;
2252     oident *attrset;
2253     oid_value attributeSet;
2254     Z_SortKeySpecList *sort_sequence;
2255     int sort_status, i;
2256
2257     zh->errCode = 0;
2258     zh->errString = NULL;
2259     zh->hits = 0;
2260
2261     sort_sequence = (Z_SortKeySpecList *)
2262         nmem_malloc(nmem, sizeof(*sort_sequence));
2263     sort_sequence->num_specs = 10; /* FIXME - Hard-coded number */
2264     sort_sequence->specs = (Z_SortKeySpec **)
2265         nmem_malloc(nmem, sort_sequence->num_specs *
2266                      sizeof(*sort_sequence->specs));
2267     for (i = 0; i<sort_sequence->num_specs; i++)
2268         sort_sequence->specs[i] = 0;
2269     
2270     attrset = oid_getentbyoid (rpn->attributeSetId);
2271     attributeSet = attrset->value;
2272     rset = rpn_search_structure (zh, rpn->RPNStructure, attributeSet,
2273                                  nmem, rset_nmem,
2274                                  sort_sequence, num_bases, basenames);
2275     if (!rset)
2276         return 0;
2277
2278     if (zh->errCode)
2279         yaz_log(LOG_DEBUG, "search error: %d", zh->errCode);
2280     
2281     for (i = 0; sort_sequence->specs[i]; i++)
2282         ;
2283     sort_sequence->num_specs = i;
2284     if (!i)
2285         resultSetRank (zh, sset, rset, rset_nmem);
2286     else
2287     {
2288         yaz_log(LOG_DEBUG, "resultSetSortSingle in rpn_search");
2289         resultSetSortSingle (zh, nmem, sset, rset,
2290                              sort_sequence, &sort_status);
2291         if (zh->errCode)
2292         {
2293             yaz_log(LOG_DEBUG, "resultSetSortSingle status = %d", zh->errCode);
2294         }
2295     }
2296     return rset;
2297 }
2298
2299 struct scan_info_entry {
2300     char *term;
2301     ISAMC_P isam_p;
2302 };
2303
2304 struct scan_info {
2305     struct scan_info_entry *list;
2306     ODR odr;
2307     int before, after;
2308     char prefix[20];
2309 };
2310
2311 static int scan_handle (char *name, const char *info, int pos, void *client)
2312 {
2313     int len_prefix, idx;
2314     struct scan_info *scan_info = (struct scan_info *) client;
2315
2316     len_prefix = strlen(scan_info->prefix);
2317     if (memcmp (name, scan_info->prefix, len_prefix))
2318         return 1;
2319     if (pos > 0)        idx = scan_info->after - pos + scan_info->before;
2320     else
2321         idx = - pos - 1;
2322     scan_info->list[idx].term = (char *)
2323         odr_malloc(scan_info->odr, strlen(name + len_prefix)+1);
2324     strcpy(scan_info->list[idx].term, name + len_prefix);
2325     assert (*info == sizeof(ISAMC_P));
2326     memcpy (&scan_info->list[idx].isam_p, info+1, sizeof(ISAMC_P));
2327     return 0;
2328 }
2329
2330 static void scan_term_untrans (ZebraHandle zh, NMEM stream, int reg_type,
2331                                char **dst, const char *src)
2332 {
2333     char term_src[IT_MAX_WORD];
2334     char term_dst[IT_MAX_WORD];
2335     
2336     term_untrans (zh, reg_type, term_src, src);
2337
2338     if (zh->iconv_from_utf8 != 0)
2339     {
2340         int len;
2341         char *inbuf = term_src;
2342         size_t inleft = strlen(term_src);
2343         char *outbuf = term_dst;
2344         size_t outleft = sizeof(term_dst)-1;
2345         size_t ret;
2346         
2347         ret = yaz_iconv (zh->iconv_from_utf8, &inbuf, &inleft,
2348                          &outbuf, &outleft);
2349         if (ret == (size_t)(-1))
2350             len = 0;
2351         else
2352             len = outbuf - term_dst;
2353         *dst = nmem_malloc(stream, len + 1);
2354         if (len > 0)
2355             memcpy (*dst, term_dst, len);
2356         (*dst)[len] = '\0';
2357     }
2358     else
2359         *dst = nmem_strdup(stream, term_src);
2360 }
2361
2362 static void count_set (RSET r, int *count)
2363 {
2364     zint psysno = 0;
2365     int kno = 0;
2366     struct it_key key;
2367     RSFD rfd;
2368
2369     yaz_log(LOG_DEBUG, "count_set");
2370
2371     *count = 0;
2372     rfd = rset_open (r, RSETF_READ);
2373     while (rset_read (rfd, &key,0 /* never mind terms */))
2374     {
2375         if (key.mem[0] != psysno)
2376         {
2377             psysno = key.mem[0];
2378             (*count)++;
2379         }
2380         kno++;
2381     }
2382     rset_close (rfd);
2383     yaz_log(LOG_DEBUG, "%d keys, %d records", kno, *count);
2384 }
2385
2386 void rpn_scan(ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt,
2387                oid_value attributeset,
2388                int num_bases, char **basenames,
2389                int *position, int *num_entries, ZebraScanEntry **list,
2390                int *is_partial, RSET limit_set, int return_zero)
2391 {
2392     int i;
2393     int pos = *position;
2394     int num = *num_entries;
2395     int before;
2396     int after;
2397     int base_no;
2398     char termz[IT_MAX_WORD+20];
2399     AttrType use;
2400     int use_value;
2401     const char *use_string = 0;
2402     struct scan_info *scan_info_array;
2403     ZebraScanEntry *glist;
2404     int ords[32], ord_no = 0;
2405     int ptr[32];
2406
2407     int bases_ok = 0;     /* no of databases with OK attribute */
2408     int errCode = 0;      /* err code (if any is not OK) */
2409     char *errString = 0;  /* addinfo */
2410
2411     unsigned reg_id;
2412     char *search_type = NULL;
2413     char rank_type[128];
2414     int complete_flag;
2415     int sort_flag;
2416     NMEM rset_nmem=NULL; 
2417
2418     *list = 0;
2419
2420     if (attributeset == VAL_NONE)
2421         attributeset = VAL_BIB1;
2422
2423     if (!limit_set)
2424     {
2425         AttrType termset;
2426         int termset_value_numeric;
2427         const char *termset_value_string;
2428         attr_init (&termset, zapt, 8);
2429         termset_value_numeric =
2430             attr_find_ex (&termset, NULL, &termset_value_string);
2431         if (termset_value_numeric != -1)
2432         {
2433             char resname[32];
2434             const char *termset_name = 0;
2435             
2436             if (termset_value_numeric != -2)
2437             {
2438                 
2439                 sprintf (resname, "%d", termset_value_numeric);
2440                 termset_name = resname;
2441             }
2442             else
2443                 termset_name = termset_value_string;
2444             
2445             limit_set = resultSetRef (zh, termset_name);
2446         }
2447     }
2448         
2449     yaz_log (LOG_DEBUG, "position = %d, num = %d set=%d",
2450              pos, num, attributeset);
2451         
2452     attr_init (&use, zapt, 1);
2453     use_value = attr_find_ex (&use, &attributeset, &use_string);
2454
2455     if (zebra_maps_attr (zh->reg->zebra_maps, zapt, &reg_id, &search_type,
2456                          rank_type, &complete_flag, &sort_flag))
2457     {
2458         *num_entries = 0;
2459         zh->errCode = 113;
2460         return ;
2461     }
2462     yaz_log (LOG_DEBUG, "use_value = %d", use_value);
2463
2464     if (use_value == -1)
2465         use_value = 1016;
2466     for (base_no = 0; base_no < num_bases && ord_no < 32; base_no++)
2467     {
2468         int r;
2469         attent attp;
2470         data1_local_attribute *local_attr;
2471
2472         if ((r=att_getentbyatt (zh, &attp, attributeset, use_value,
2473                                 use_string)))
2474         {
2475             yaz_log(LOG_DEBUG, "att_getentbyatt fail. set=%d use=%d",
2476                   attributeset, use_value);
2477             if (r == -1)
2478             {
2479                 char val_str[32];
2480                 sprintf (val_str, "%d", use_value);
2481                 errCode = 114;
2482                 errString = odr_strdup (stream, val_str);
2483             }   
2484             else
2485                 errCode = 121;
2486             continue;
2487         }
2488         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
2489         {
2490             zh->errString = basenames[base_no];
2491             zh->errCode = 109; /* Database unavailable */
2492             *num_entries = 0;
2493             return;
2494         }
2495         bases_ok++;
2496         for (local_attr = attp.local_attributes; local_attr && ord_no < 32;
2497              local_attr = local_attr->next)
2498         {
2499             int ord;
2500
2501             ord = zebraExplain_lookupSU (zh->reg->zei, attp.attset_ordinal,
2502                                          local_attr->local);
2503             if (ord > 0)
2504                 ords[ord_no++] = ord;
2505         }
2506     }
2507     if (!bases_ok && errCode)
2508     {
2509         zh->errCode = errCode;
2510         zh->errString = errString;
2511         *num_entries = 0;
2512     }
2513     if (ord_no == 0)
2514     {
2515         char val_str[32];
2516         sprintf (val_str, "%d", use_value);
2517         zh->errCode = 114;
2518         zh->errString = odr_strdup (stream, val_str);
2519
2520         *num_entries = 0;
2521         return;
2522     }
2523     /* prepare dictionary scanning */
2524     before = pos-1;
2525     after = 1+num-pos;
2526     scan_info_array = (struct scan_info *)
2527         odr_malloc(stream, ord_no * sizeof(*scan_info_array));
2528     for (i = 0; i < ord_no; i++)
2529     {
2530         int j, prefix_len = 0;
2531         int before_tmp = before, after_tmp = after;
2532         struct scan_info *scan_info = scan_info_array + i;
2533         struct rpn_char_map_info rcmi;
2534
2535         rpn_char_map_prepare (zh->reg, reg_id, &rcmi);
2536
2537         scan_info->before = before;
2538         scan_info->after = after;
2539         scan_info->odr = stream;
2540
2541         scan_info->list = (struct scan_info_entry *)
2542             odr_malloc(stream, (before+after) * sizeof(*scan_info->list));
2543         for (j = 0; j<before+after; j++)
2544             scan_info->list[j].term = NULL;
2545
2546         prefix_len += key_SU_encode (ords[i], termz + prefix_len);
2547         termz[prefix_len++] = reg_id;
2548         termz[prefix_len] = 0;
2549         strcpy(scan_info->prefix, termz);
2550
2551         if (trans_scan_term(zh, zapt, termz+prefix_len, reg_id))
2552             return ;
2553                     
2554         dict_scan(zh->reg->dict, termz, &before_tmp, &after_tmp,
2555                   scan_info, scan_handle);
2556     }
2557     glist = (ZebraScanEntry *)
2558         odr_malloc(stream, (before+after)*sizeof(*glist));
2559
2560     rset_nmem = nmem_create();
2561
2562     /* consider terms after main term */
2563     for (i = 0; i < ord_no; i++)
2564         ptr[i] = before;
2565     
2566     *is_partial = 0;
2567     for (i = 0; i<after; i++)
2568     {
2569         int j, j0 = -1;
2570         const char *mterm = NULL;
2571         const char *tst;
2572         RSET rset;
2573         
2574         for (j = 0; j < ord_no; j++)
2575         {
2576             if (ptr[j] < before+after &&
2577                 (tst=scan_info_array[j].list[ptr[j]].term) &&
2578                 (!mterm || strcmp (tst, mterm) < 0))
2579             {
2580                 j0 = j;
2581                 mterm = tst;
2582             }
2583         }
2584         if (j0 == -1)
2585             break;
2586         scan_term_untrans (zh, stream->mem, reg_id,
2587                            &glist[i+before].term, mterm);
2588         rset = rset_trunc(zh, &scan_info_array[j0].list[ptr[j0]].isam_p, 1,
2589                            glist[i+before].term, strlen(glist[i+before].term),
2590                            NULL, 0, zapt->term->which, rset_nmem, 
2591                            key_it_ctrl,key_it_ctrl->scope);
2592         ptr[j0]++;
2593         for (j = j0+1; j<ord_no; j++)
2594         {
2595             if (ptr[j] < before+after &&
2596                 (tst=scan_info_array[j].list[ptr[j]].term) &&
2597                 !strcmp (tst, mterm))
2598             {
2599                 RSET rset2;
2600
2601                 rset2 =
2602                    rset_trunc(zh, &scan_info_array[j].list[ptr[j]].isam_p, 1,
2603                                glist[i+before].term,
2604                                strlen(glist[i+before].term), NULL, 0,
2605                                zapt->term->which,rset_nmem,
2606                                key_it_ctrl, key_it_ctrl->scope);
2607                 rset = rsbool_create_or(rset_nmem,key_it_ctrl,
2608                                key_it_ctrl->scope, rset, rset2);
2609                 /* FIXME - Use a proper multi-or */
2610
2611                 ptr[j]++;
2612             }
2613         }
2614         if (limit_set)
2615             rset = rsbool_create_and(rset_nmem,key_it_ctrl,key_it_ctrl->scope,
2616                             rset, rset_dup(limit_set));
2617         count_set (rset, &glist[i+before].occurrences);
2618         rset_delete (rset);
2619     }
2620     if (i < after)
2621     {
2622         *num_entries -= (after-i);
2623         *is_partial = 1;
2624     }
2625
2626     /* consider terms before main term */
2627     for (i = 0; i<ord_no; i++)
2628         ptr[i] = 0;
2629
2630     for (i = 0; i<before; i++)
2631     {
2632         int j, j0 = -1;
2633         const char *mterm = NULL;
2634         const char *tst;
2635         RSET rset;
2636         
2637         for (j = 0; j <ord_no; j++)
2638         {
2639             if (ptr[j] < before &&
2640                 (tst=scan_info_array[j].list[before-1-ptr[j]].term) &&
2641                 (!mterm || strcmp (tst, mterm) > 0))
2642             {
2643                 j0 = j;
2644                 mterm = tst;
2645             }
2646         }
2647         if (j0 == -1)
2648             break;
2649
2650         scan_term_untrans (zh, stream->mem, reg_id,
2651                            &glist[before-1-i].term, mterm);
2652
2653         rset = rset_trunc
2654                (zh, &scan_info_array[j0].list[before-1-ptr[j0]].isam_p, 1,
2655                 glist[before-1-i].term, strlen(glist[before-1-i].term),
2656                 NULL, 0, zapt->term->which,rset_nmem,
2657                 key_it_ctrl,key_it_ctrl->scope);
2658
2659         ptr[j0]++;
2660
2661         for (j = j0+1; j<ord_no; j++)
2662         {
2663             if (ptr[j] < before &&
2664                 (tst=scan_info_array[j].list[before-1-ptr[j]].term) &&
2665                 !strcmp (tst, mterm))
2666             {
2667                 RSET rset2;
2668
2669                 rset2 = rset_trunc(
2670                     zh,
2671                     &scan_info_array[j].list[before-1-ptr[j]].isam_p, 1,
2672                     glist[before-1-i].term,
2673                     strlen(glist[before-1-i].term), NULL, 0,
2674                     zapt->term->which, rset_nmem,
2675                     key_it_ctrl, key_it_ctrl->scope);
2676                 rset = rsbool_create_and(rset_nmem,key_it_ctrl,
2677                                          key_it_ctrl->scope, rset, rset2);
2678                 /* FIXME - multi-and ?? */
2679                 ptr[j]++;
2680             }
2681         }
2682         if (limit_set)
2683             rset = rsbool_create_and(rset_nmem,key_it_ctrl,key_it_ctrl->scope,
2684                             rset, rset_dup(limit_set));
2685         count_set (rset, &glist[before-1-i].occurrences);
2686         rset_delete (rset);
2687     }
2688     i = before-i;
2689     if (i)
2690     {
2691         *is_partial = 1;
2692         *position -= i;
2693         *num_entries -= i;
2694     }
2695
2696     nmem_destroy(rset_nmem);
2697     *list = glist + i;               /* list is set to first 'real' entry */
2698     
2699     yaz_log(LOG_DEBUG, "position = %d, num_entries = %d",
2700           *position, *num_entries);
2701     if (zh->errCode)
2702         yaz_log(LOG_DEBUG, "scan error: %d", zh->errCode);
2703 }
2704