Handle unknown register type in zebra_term_untrans
[idzebra-moved-to-github.git] / index / zrpn.c
1 /* $Id: zrpn.c,v 1.209 2006-02-20 12:38:42 adam Exp $
2    Copyright (C) 1995-2005
3    Index Data ApS
4
5 This file is part of the Zebra server.
6
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
11
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with Zebra; see the file LICENSE.zebra.  If not, write to the
19 Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
20 02111-1307, USA.
21 */
22
23 #include <stdio.h>
24 #include <assert.h>
25 #ifdef WIN32
26 #include <io.h>
27 #endif
28 #if HAVE_UNISTD_H
29 #include <unistd.h>
30 #endif
31 #include <ctype.h>
32
33 #include <yaz/diagbib1.h>
34 #include "index.h"
35 #include <zebra_xpath.h>
36
37 #include <charmap.h>
38 #include <rset.h>
39
40 struct rpn_char_map_info
41 {
42     ZebraMaps zm;
43     int reg_type;
44 };
45
46 typedef struct
47 {
48     int type;
49     int major;
50     int minor;
51     Z_AttributesPlusTerm *zapt;
52 } AttrType;
53
54 static int log_level_set = 0;
55 static int log_level_rpn = 0;
56
57 static const char **rpn_char_map_handler(void *vp, const char **from, int len)
58 {
59     struct rpn_char_map_info *p = (struct rpn_char_map_info *) vp;
60     const char **out = zebra_maps_input(p->zm, p->reg_type, from, len, 0);
61 #if 0
62     if (out && *out)
63     {
64         const char *outp = *out;
65         yaz_log(YLOG_LOG, "---");
66         while (*outp)
67         {
68             yaz_log(YLOG_LOG, "%02X", *outp);
69             outp++;
70         }
71     }
72 #endif
73     return out;
74 }
75
76 static void rpn_char_map_prepare(struct zebra_register *reg, int reg_type,
77                                   struct rpn_char_map_info *map_info)
78 {
79     map_info->zm = reg->zebra_maps;
80     map_info->reg_type = reg_type;
81     dict_grep_cmap(reg->dict, map_info, rpn_char_map_handler);
82 }
83
84 static int attr_find_ex(AttrType *src, oid_value *attributeSetP,
85                         const char **string_value)
86 {
87     int num_attributes;
88
89     num_attributes = src->zapt->attributes->num_attributes;
90     while (src->major < num_attributes)
91     {
92         Z_AttributeElement *element;
93
94         element = src->zapt->attributes->attributes[src->major];
95         if (src->type == *element->attributeType)
96         {
97             switch (element->which) 
98             {
99             case Z_AttributeValue_numeric:
100                 ++(src->major);
101                 if (element->attributeSet && attributeSetP)
102                 {
103                     oident *attrset;
104
105                     attrset = oid_getentbyoid(element->attributeSet);
106                     *attributeSetP = attrset->value;
107                 }
108                 return *element->value.numeric;
109                 break;
110             case Z_AttributeValue_complex:
111                 if (src->minor >= element->value.complex->num_list)
112                     break;
113                 if (element->attributeSet && attributeSetP)
114                 {
115                     oident *attrset;
116                     
117                     attrset = oid_getentbyoid(element->attributeSet);
118                     *attributeSetP = attrset->value;
119                 }
120                 if (element->value.complex->list[src->minor]->which ==  
121                     Z_StringOrNumeric_numeric)
122                 {
123                     ++(src->minor);
124                     return
125                         *element->value.complex->list[src->minor-1]->u.numeric;
126                 }
127                 else if (element->value.complex->list[src->minor]->which ==  
128                          Z_StringOrNumeric_string)
129                 {
130                     if (!string_value)
131                         break;
132                     ++(src->minor);
133                     *string_value = 
134                         element->value.complex->list[src->minor-1]->u.string;
135                     return -2;
136                 }
137                 else
138                     break;
139             default:
140                 assert(0);
141             }
142         }
143         ++(src->major);
144     }
145     return -1;
146 }
147
148 static int attr_find(AttrType *src, oid_value *attributeSetP)
149 {
150     return attr_find_ex(src, attributeSetP, 0);
151 }
152
153 static void attr_init(AttrType *src, Z_AttributesPlusTerm *zapt,
154                        int type)
155 {
156     src->zapt = zapt;
157     src->type = type;
158     src->major = 0;
159     src->minor = 0;
160 }
161
162 #define TERM_COUNT        
163        
164 struct grep_info {        
165 #ifdef TERM_COUNT        
166     int *term_no;        
167 #endif        
168     ISAM_P *isam_p_buf;
169     int isam_p_size;        
170     int isam_p_indx;
171     ZebraHandle zh;
172     int reg_type;
173     ZebraSet termset;
174 };        
175
176 void zebra_term_untrans(ZebraHandle zh, int reg_type,
177                         char *dst, const char *src)
178 {
179     int len = 0;
180     while (*src)
181     {
182         const char *cp = zebra_maps_output(zh->reg->zebra_maps,
183                                            reg_type, &src);
184         if (!cp)
185         {
186             if (len < IT_MAX_WORD-1)
187                 dst[len++] = *src;
188             src++;
189         }
190         else
191             while (*cp && len < IT_MAX_WORD-1)
192                 dst[len++] = *cp++;
193     }
194     dst[len] = '\0';
195 }
196
197 static void add_isam_p(const char *name, const char *info,
198                        struct grep_info *p)
199 {
200     if (!log_level_set)
201     {
202         log_level_rpn = yaz_log_module_level("rpn");
203         log_level_set = 1;
204     }
205     if (p->isam_p_indx == p->isam_p_size)
206     {
207         ISAM_P *new_isam_p_buf;
208 #ifdef TERM_COUNT        
209         int *new_term_no;        
210 #endif
211         p->isam_p_size = 2*p->isam_p_size + 100;
212         new_isam_p_buf = (ISAM_P *) xmalloc(sizeof(*new_isam_p_buf) *
213                                             p->isam_p_size);
214         if (p->isam_p_buf)
215         {
216             memcpy(new_isam_p_buf, p->isam_p_buf,
217                     p->isam_p_indx * sizeof(*p->isam_p_buf));
218             xfree(p->isam_p_buf);
219         }
220         p->isam_p_buf = new_isam_p_buf;
221
222 #ifdef TERM_COUNT
223         new_term_no = (int *) xmalloc(sizeof(*new_term_no) * p->isam_p_size);
224         if (p->term_no)
225         {
226             memcpy(new_term_no, p->isam_p_buf,
227                     p->isam_p_indx * sizeof(*p->term_no));
228             xfree(p->term_no);
229         }
230         p->term_no = new_term_no;
231 #endif
232     }
233     assert(*info == sizeof(*p->isam_p_buf));
234     memcpy(p->isam_p_buf + p->isam_p_indx, info+1, sizeof(*p->isam_p_buf));
235
236 #if 1
237     if (p->termset)
238     {
239         const char *db;
240         int set, use;
241         char term_tmp[IT_MAX_WORD];
242         int ord = 0;
243         int len = key_SU_decode (&ord, (const unsigned char *) name);
244         
245         zebra_term_untrans  (p->zh, p->reg_type, term_tmp, name+len+1);
246         yaz_log(log_level_rpn, "grep: %d %c %s", ord, name[len], term_tmp);
247         zebraExplain_lookup_ord (p->zh->reg->zei,
248                                  ord, 0 /* index_type */, &db, &set, &use);
249         yaz_log(log_level_rpn, "grep:  set=%d use=%d db=%s", set, use, db);
250         
251         resultSetAddTerm(p->zh, p->termset, name[len], db,
252                          set, use, term_tmp);
253     }
254 #endif
255     (p->isam_p_indx)++;
256 }
257
258 static int grep_handle(char *name, const char *info, void *p)
259 {
260     add_isam_p(name, info, (struct grep_info *) p);
261     return 0;
262 }
263
264 static int term_pre(ZebraMaps zebra_maps, int reg_type, const char **src,
265                     const char *ct1, const char *ct2, int first)
266 {
267     const char *s1, *s0 = *src;
268     const char **map;
269
270     /* skip white space */
271     while (*s0)
272     {
273         if (ct1 && strchr(ct1, *s0))
274             break;
275         if (ct2 && strchr(ct2, *s0))
276             break;
277         s1 = s0;
278         map = zebra_maps_input(zebra_maps, reg_type, &s1, strlen(s1), first);
279         if (**map != *CHR_SPACE)
280             break;
281         s0 = s1;
282     }
283     *src = s0;
284     return *s0;
285 }
286
287
288 static void esc_str(char *out_buf, int out_size,
289                     const char *in_buf, int in_size)
290 {
291     int k;
292
293     assert(out_buf);
294     assert(in_buf);
295     assert(out_size > 20);
296     *out_buf = '\0';
297     for (k = 0; k<in_size; k++)
298     {
299         int c = in_buf[k] & 0xff;
300         int pc;
301         if (c < 32 || c > 126)
302             pc = '?';
303         else
304             pc = c;
305         sprintf(out_buf +strlen(out_buf), "%02X:%c  ", c, pc);
306         if (strlen(out_buf) > out_size-20)
307         {
308             strcat(out_buf, "..");
309             break;
310         }
311     }
312 }
313
314 #define REGEX_CHARS " []()|.*+?!"
315
316 /* term_100: handle term, where trunc = none(no operators at all) */
317 static int term_100(ZebraMaps zebra_maps, int reg_type,
318                     const char **src, char *dst, int space_split,
319                     char *dst_term)
320 {
321     const char *s0;
322     const char **map;
323     int i = 0;
324     int j = 0;
325
326     const char *space_start = 0;
327     const char *space_end = 0;
328
329     if (!term_pre(zebra_maps, reg_type, src, NULL, NULL, !space_split))
330         return 0;
331     s0 = *src;
332     while (*s0)
333     {
334         const char *s1 = s0;
335         int q_map_match = 0;
336         map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), 
337                                 &q_map_match);
338         if (space_split)
339         {
340             if (**map == *CHR_SPACE)
341                 break;
342         }
343         else  /* complete subfield only. */
344         {
345             if (**map == *CHR_SPACE)
346             {   /* save space mapping for later  .. */
347                 space_start = s1;
348                 space_end = s0;
349                 continue;
350             }
351             else if (space_start)
352             {   /* reload last space */
353                 while (space_start < space_end)
354                 {
355                     if (strchr(REGEX_CHARS, *space_start))
356                         dst[i++] = '\\';
357                     dst_term[j++] = *space_start;
358                     dst[i++] = *space_start++;
359                 }
360                 /* and reset */
361                 space_start = space_end = 0;
362             }
363         }
364         /* add non-space char */
365         memcpy(dst_term+j, s1, s0 - s1);
366         j += (s0 - s1);
367         if (!q_map_match)
368         {
369             while (s1 < s0)
370             {
371                 if (strchr(REGEX_CHARS, *s1))
372                     dst[i++] = '\\';
373                 dst[i++] = *s1++;
374             }
375         }
376         else
377         {
378             char tmpbuf[80];
379             esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
380             
381             strcpy(dst + i, map[0]);
382             i += strlen(map[0]);
383         }
384     }
385     dst[i] = '\0';
386     dst_term[j] = '\0';
387     *src = s0;
388     return i;
389 }
390
391 /* term_101: handle term, where trunc = Process # */
392 static int term_101(ZebraMaps zebra_maps, int reg_type,
393                     const char **src, char *dst, int space_split,
394                     char *dst_term)
395 {
396     const char *s0;
397     const char **map;
398     int i = 0;
399     int j = 0;
400
401     if (!term_pre(zebra_maps, reg_type, src, "#", "#", !space_split))
402         return 0;
403     s0 = *src;
404     while (*s0)
405     {
406         if (*s0 == '#')
407         {
408             dst[i++] = '.';
409             dst[i++] = '*';
410             dst_term[j++] = *s0++;
411         }
412         else
413         {
414             const char *s1 = s0;
415             int q_map_match = 0;
416             map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), 
417                                     &q_map_match);
418             if (space_split && **map == *CHR_SPACE)
419                 break;
420
421             /* add non-space char */
422             memcpy(dst_term+j, s1, s0 - s1);
423             j += (s0 - s1);
424             if (!q_map_match)
425             {
426                 while (s1 < s0)
427                 {
428                     if (strchr(REGEX_CHARS, *s1))
429                         dst[i++] = '\\';
430                     dst[i++] = *s1++;
431                 }
432             }
433             else
434             {
435                 char tmpbuf[80];
436                 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
437                 
438                 strcpy(dst + i, map[0]);
439                 i += strlen(map[0]);
440             }
441         }
442     }
443     dst[i] = '\0';
444     dst_term[j++] = '\0';
445     *src = s0;
446     return i;
447 }
448
449 /* term_103: handle term, where trunc = re-2 (regular expressions) */
450 static int term_103(ZebraMaps zebra_maps, int reg_type, const char **src,
451                     char *dst, int *errors, int space_split,
452                     char *dst_term)
453 {
454     int i = 0;
455     int j = 0;
456     const char *s0;
457     const char **map;
458
459     if (!term_pre(zebra_maps, reg_type, src, "^\\()[].*+?|", "(", !space_split))
460         return 0;
461     s0 = *src;
462     if (errors && *s0 == '+' && s0[1] && s0[2] == '+' && s0[3] &&
463         isdigit(((const unsigned char *)s0)[1]))
464     {
465         *errors = s0[1] - '0';
466         s0 += 3;
467         if (*errors > 3)
468             *errors = 3;
469     }
470     while (*s0)
471     {
472         if (strchr("^\\()[].*+?|-", *s0))
473         {
474             dst_term[j++] = *s0;
475             dst[i++] = *s0++;
476         }
477         else
478         {
479             const char *s1 = s0;
480             int q_map_match = 0;
481             map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), 
482                                     &q_map_match);
483             if (space_split && **map == *CHR_SPACE)
484                 break;
485
486             /* add non-space char */
487             memcpy(dst_term+j, s1, s0 - s1);
488             j += (s0 - s1);
489             if (!q_map_match)
490             {
491                 while (s1 < s0)
492                 {
493                     if (strchr(REGEX_CHARS, *s1))
494                         dst[i++] = '\\';
495                     dst[i++] = *s1++;
496                 }
497             }
498             else
499             {
500                 char tmpbuf[80];
501                 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
502                 
503                 strcpy(dst + i, map[0]);
504                 i += strlen(map[0]);
505             }
506         }
507     }
508     dst[i] = '\0';
509     dst_term[j] = '\0';
510     *src = s0;
511     
512     return i;
513 }
514
515 /* term_103: handle term, where trunc = re-1 (regular expressions) */
516 static int term_102(ZebraMaps zebra_maps, int reg_type, const char **src,
517                     char *dst, int space_split, char *dst_term)
518 {
519     return term_103(zebra_maps, reg_type, src, dst, NULL, space_split,
520                     dst_term);
521 }
522
523
524 /* term_104: handle term, where trunc = Process # and ! */
525 static int term_104(ZebraMaps zebra_maps, int reg_type,
526                     const char **src, char *dst, int space_split,
527                     char *dst_term)
528 {
529     const char *s0;
530     const char **map;
531     int i = 0;
532     int j = 0;
533
534     if (!term_pre(zebra_maps, reg_type, src, "?*#", "?*#", !space_split))
535         return 0;
536     s0 = *src;
537     while (*s0)
538     {
539         if (*s0 == '?')
540         {
541             dst_term[j++] = *s0++;
542             if (*s0 >= '0' && *s0 <= '9')
543             {
544                 int limit = 0;
545                 while (*s0 >= '0' && *s0 <= '9')
546                 {
547                     limit = limit * 10 + (*s0 - '0');
548                     dst_term[j++] = *s0++;
549                 }
550                 if (limit > 20)
551                     limit = 20;
552                 while (--limit >= 0)
553                 {
554                     dst[i++] = '.';
555                     dst[i++] = '?';
556                 }
557             }
558             else
559             {
560                 dst[i++] = '.';
561                 dst[i++] = '*';
562             }
563         }
564         else if (*s0 == '*')
565         {
566             dst[i++] = '.';
567             dst[i++] = '*';
568             dst_term[j++] = *s0++;
569         }
570         else if (*s0 == '#')
571         {
572             dst[i++] = '.';
573             dst_term[j++] = *s0++;
574         }
575         else
576         {
577             const char *s1 = s0;
578             int q_map_match = 0;
579             map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), 
580                                     &q_map_match);
581             if (space_split && **map == *CHR_SPACE)
582                 break;
583
584             /* add non-space char */
585             memcpy(dst_term+j, s1, s0 - s1);
586             j += (s0 - s1);
587             if (!q_map_match)
588             {
589                 while (s1 < s0)
590                 {
591                     if (strchr(REGEX_CHARS, *s1))
592                         dst[i++] = '\\';
593                     dst[i++] = *s1++;
594                 }
595             }
596             else
597             {
598                 char tmpbuf[80];
599                 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
600                 
601                 strcpy(dst + i, map[0]);
602                 i += strlen(map[0]);
603             }
604         }
605     }
606     dst[i] = '\0';
607     dst_term[j++] = '\0';
608     *src = s0;
609     return i;
610 }
611
612 /* term_105/106: handle term, where trunc = Process * and ! and right trunc */
613 static int term_105(ZebraMaps zebra_maps, int reg_type,
614                     const char **src, char *dst, int space_split,
615                     char *dst_term, int right_truncate)
616 {
617     const char *s0;
618     const char **map;
619     int i = 0;
620     int j = 0;
621
622     if (!term_pre(zebra_maps, reg_type, src, "*!", "*!", !space_split))
623         return 0;
624     s0 = *src;
625     while (*s0)
626     {
627         if (*s0 == '*')
628         {
629             dst[i++] = '.';
630             dst[i++] = '*';
631             dst_term[j++] = *s0++;
632         }
633         else if (*s0 == '!')
634         {
635             dst[i++] = '.';
636             dst_term[j++] = *s0++;
637         }
638         else
639         {
640             const char *s1 = s0;
641             int q_map_match = 0;
642             map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), 
643                                     &q_map_match);
644             if (space_split && **map == *CHR_SPACE)
645                 break;
646
647             /* add non-space char */
648             memcpy(dst_term+j, s1, s0 - s1);
649             j += (s0 - s1);
650             if (!q_map_match)
651             {
652                 while (s1 < s0)
653                 {
654                     if (strchr(REGEX_CHARS, *s1))
655                         dst[i++] = '\\';
656                     dst[i++] = *s1++;
657                 }
658             }
659             else
660             {
661                 char tmpbuf[80];
662                 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
663                 
664                 strcpy(dst + i, map[0]);
665                 i += strlen(map[0]);
666             }
667         }
668     }
669     if (right_truncate)
670     {
671         dst[i++] = '.';
672         dst[i++] = '*';
673     }
674     dst[i] = '\0';
675     
676     dst_term[j++] = '\0';
677     *src = s0;
678     return i;
679 }
680
681
682 /* gen_regular_rel - generate regular expression from relation
683  *  val:     border value (inclusive)
684  *  islt:    1 if <=; 0 if >=.
685  */
686 static void gen_regular_rel(char *dst, int val, int islt)
687 {
688     int dst_p;
689     int w, d, i;
690     int pos = 0;
691     char numstr[20];
692
693     yaz_log(YLOG_DEBUG, "gen_regular_rel. val=%d, islt=%d", val, islt);
694     if (val >= 0)
695     {
696         if (islt)
697             strcpy(dst, "(-[0-9]+|(");
698         else
699             strcpy(dst, "((");
700     } 
701     else
702     {
703         if (!islt)
704         {
705             strcpy(dst, "([0-9]+|-(");
706             dst_p = strlen(dst);
707             islt = 1;
708         }
709         else
710         {
711             strcpy(dst, "(-(");
712             islt = 0;
713         }
714         val = -val;
715     }
716     dst_p = strlen(dst);
717     sprintf(numstr, "%d", val);
718     for (w = strlen(numstr); --w >= 0; pos++)
719     {
720         d = numstr[w];
721         if (pos > 0)
722         {
723             if (islt)
724             {
725                 if (d == '0')
726                     continue;
727                 d--;
728             } 
729             else
730             {
731                 if (d == '9')
732                     continue;
733                 d++;
734             }
735         }
736         
737         strcpy(dst + dst_p, numstr);
738         dst_p = strlen(dst) - pos - 1;
739
740         if (islt)
741         {
742             if (d != '0')
743             {
744                 dst[dst_p++] = '[';
745                 dst[dst_p++] = '0';
746                 dst[dst_p++] = '-';
747                 dst[dst_p++] = d;
748                 dst[dst_p++] = ']';
749             }
750             else
751                 dst[dst_p++] = d;
752         }
753         else
754         {
755             if (d != '9')
756             { 
757                 dst[dst_p++] = '[';
758                 dst[dst_p++] = d;
759                 dst[dst_p++] = '-';
760                 dst[dst_p++] = '9';
761                 dst[dst_p++] = ']';
762             }
763             else
764                 dst[dst_p++] = d;
765         }
766         for (i = 0; i<pos; i++)
767         {
768             dst[dst_p++] = '[';
769             dst[dst_p++] = '0';
770             dst[dst_p++] = '-';
771             dst[dst_p++] = '9';
772             dst[dst_p++] = ']';
773         }
774         dst[dst_p++] = '|';
775     }
776     dst[dst_p] = '\0';
777     if (islt)
778     {
779         /* match everything less than 10^(pos-1) */
780         strcat(dst, "0*");
781         for (i = 1; i<pos; i++)
782             strcat(dst, "[0-9]?");
783     }
784     else
785     {
786         /* match everything greater than 10^pos */
787         for (i = 0; i <= pos; i++)
788             strcat(dst, "[0-9]");
789         strcat(dst, "[0-9]*");
790     }
791     strcat(dst, "))");
792 }
793
794 void string_rel_add_char(char **term_p, const char *src, int *indx)
795 {
796     if (src[*indx] == '\\')
797         *(*term_p)++ = src[(*indx)++];
798     *(*term_p)++ = src[(*indx)++];
799 }
800
801 /*
802  *   >  abc     ([b-].*|a[c-].*|ab[d-].*|abc.+)
803  *              ([^-a].*|a[^-b].*ab[^-c].*|abc.+)
804  *   >= abc     ([b-].*|a[c-].*|ab[c-].*)
805  *              ([^-a].*|a[^-b].*|ab[c-].*)
806  *   <  abc     ([-0].*|a[-a].*|ab[-b].*)
807  *              ([^a-].*|a[^b-].*|ab[^c-].*)
808  *   <= abc     ([-0].*|a[-a].*|ab[-b].*|abc)
809  *              ([^a-].*|a[^b-].*|ab[^c-].*|abc)
810  */
811 static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
812                            const char **term_sub, char *term_dict,
813                            oid_value attributeSet,
814                            int reg_type, int space_split, char *term_dst,
815                            int *error_code)
816 {
817     AttrType relation;
818     int relation_value;
819     int i;
820     char *term_tmp = term_dict + strlen(term_dict);
821     char term_component[2*IT_MAX_WORD+20];
822
823     attr_init(&relation, zapt, 2);
824     relation_value = attr_find(&relation, NULL);
825
826     *error_code = 0;
827     yaz_log(YLOG_DEBUG, "string relation value=%d", relation_value);
828     switch (relation_value)
829     {
830     case 1:
831         if (!term_100(zh->reg->zebra_maps, reg_type,
832                       term_sub, term_component,
833                       space_split, term_dst))
834             return 0;
835         yaz_log(log_level_rpn, "Relation <");
836         
837         *term_tmp++ = '(';
838         for (i = 0; term_component[i]; )
839         {
840             int j = 0;
841
842             if (i)
843                 *term_tmp++ = '|';
844             while (j < i)
845                 string_rel_add_char(&term_tmp, term_component, &j);
846
847             *term_tmp++ = '[';
848
849             *term_tmp++ = '^';
850             string_rel_add_char(&term_tmp, term_component, &i);
851             *term_tmp++ = '-';
852
853             *term_tmp++ = ']';
854             *term_tmp++ = '.';
855             *term_tmp++ = '*';
856
857             if ((term_tmp - term_dict) > IT_MAX_WORD)
858                 break;
859         }
860         *term_tmp++ = ')';
861         *term_tmp = '\0';
862         break;
863     case 2:
864         if (!term_100(zh->reg->zebra_maps, reg_type,
865                       term_sub, term_component,
866                       space_split, term_dst))
867             return 0;
868         yaz_log(log_level_rpn, "Relation <=");
869
870         *term_tmp++ = '(';
871         for (i = 0; term_component[i]; )
872         {
873             int j = 0;
874
875             while (j < i)
876                 string_rel_add_char(&term_tmp, term_component, &j);
877             *term_tmp++ = '[';
878
879             *term_tmp++ = '^';
880             string_rel_add_char(&term_tmp, term_component, &i);
881             *term_tmp++ = '-';
882
883             *term_tmp++ = ']';
884             *term_tmp++ = '.';
885             *term_tmp++ = '*';
886
887             *term_tmp++ = '|';
888
889             if ((term_tmp - term_dict) > IT_MAX_WORD)
890                 break;
891         }
892         for (i = 0; term_component[i]; )
893             string_rel_add_char(&term_tmp, term_component, &i);
894         *term_tmp++ = ')';
895         *term_tmp = '\0';
896         break;
897     case 5:
898         if (!term_100 (zh->reg->zebra_maps, reg_type,
899                        term_sub, term_component, space_split, term_dst))
900             return 0;
901         yaz_log(log_level_rpn, "Relation >");
902
903         *term_tmp++ = '(';
904         for (i = 0; term_component[i];)
905         {
906             int j = 0;
907
908             while (j < i)
909                 string_rel_add_char(&term_tmp, term_component, &j);
910             *term_tmp++ = '[';
911             
912             *term_tmp++ = '^';
913             *term_tmp++ = '-';
914             string_rel_add_char(&term_tmp, term_component, &i);
915
916             *term_tmp++ = ']';
917             *term_tmp++ = '.';
918             *term_tmp++ = '*';
919
920             *term_tmp++ = '|';
921
922             if ((term_tmp - term_dict) > IT_MAX_WORD)
923                 break;
924         }
925         for (i = 0; term_component[i];)
926             string_rel_add_char(&term_tmp, term_component, &i);
927         *term_tmp++ = '.';
928         *term_tmp++ = '+';
929         *term_tmp++ = ')';
930         *term_tmp = '\0';
931         break;
932     case 4:
933         if (!term_100(zh->reg->zebra_maps, reg_type, term_sub,
934                       term_component, space_split, term_dst))
935             return 0;
936         yaz_log(log_level_rpn, "Relation >=");
937
938         *term_tmp++ = '(';
939         for (i = 0; term_component[i];)
940         {
941             int j = 0;
942
943             if (i)
944                 *term_tmp++ = '|';
945             while (j < i)
946                 string_rel_add_char(&term_tmp, term_component, &j);
947             *term_tmp++ = '[';
948
949             if (term_component[i+1])
950             {
951                 *term_tmp++ = '^';
952                 *term_tmp++ = '-';
953                 string_rel_add_char(&term_tmp, term_component, &i);
954             }
955             else
956             {
957                 string_rel_add_char(&term_tmp, term_component, &i);
958                 *term_tmp++ = '-';
959             }
960             *term_tmp++ = ']';
961             *term_tmp++ = '.';
962             *term_tmp++ = '*';
963
964             if ((term_tmp - term_dict) > IT_MAX_WORD)
965                 break;
966         }
967         *term_tmp++ = ')';
968         *term_tmp = '\0';
969         break;
970     case 3:
971     case 102:
972     case -1:
973         yaz_log(log_level_rpn, "Relation =");
974         if (!term_100(zh->reg->zebra_maps, reg_type, term_sub,
975                       term_component, space_split, term_dst))
976             return 0;
977         strcat(term_tmp, "(");
978         strcat(term_tmp, term_component);
979         strcat(term_tmp, ")");
980         break;
981     default:
982         *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
983         return 0;
984     }
985     return 1;
986 }
987
988 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
989                              const char **term_sub, 
990                              oid_value attributeSet, NMEM stream,
991                              struct grep_info *grep_info,
992                              int reg_type, int complete_flag,
993                              int num_bases, char **basenames,
994                              char *term_dst, int xpath_use,
995                              struct ord_list **ol);
996
997 static ZEBRA_RES term_limits_APT(ZebraHandle zh,
998                                  Z_AttributesPlusTerm *zapt,
999                                  zint *hits_limit_value,
1000                                  const char **term_ref_id_str,
1001                                  NMEM nmem)
1002 {
1003     AttrType term_ref_id_attr;
1004     AttrType hits_limit_attr;
1005     int term_ref_id_int;
1006  
1007     attr_init(&hits_limit_attr, zapt, 9);
1008     *hits_limit_value  = attr_find(&hits_limit_attr, NULL);
1009
1010     attr_init(&term_ref_id_attr, zapt, 10);
1011     term_ref_id_int = attr_find_ex(&term_ref_id_attr, NULL, term_ref_id_str);
1012     if (term_ref_id_int >= 0)
1013     {
1014         char *res = nmem_malloc(nmem, 20);
1015         sprintf(res, "%d", term_ref_id_int);
1016         *term_ref_id_str = res;
1017     }
1018
1019     /* no limit given ? */
1020     if (*hits_limit_value == -1)
1021     {
1022         if (*term_ref_id_str)
1023         {
1024             /* use global if term_ref is present */
1025             *hits_limit_value = zh->approx_limit;
1026         }
1027         else
1028         {
1029             /* no counting if term_ref is not present */
1030             *hits_limit_value = 0;
1031         }
1032     }
1033     else if (*hits_limit_value == 0)
1034     {
1035         /* 0 is the same as global limit */
1036         *hits_limit_value = zh->approx_limit;
1037     }
1038     yaz_log(YLOG_DEBUG, "term_limits_APT ref_id=%s limit=" ZINT_FORMAT,
1039             *term_ref_id_str ? *term_ref_id_str : "none",
1040             *hits_limit_value);
1041     return ZEBRA_OK;
1042 }
1043
1044 static ZEBRA_RES term_trunc(ZebraHandle zh,
1045                             Z_AttributesPlusTerm *zapt,
1046                             const char **term_sub, 
1047                             oid_value attributeSet, NMEM stream,
1048                             struct grep_info *grep_info,
1049                             int reg_type, int complete_flag,
1050                             int num_bases, char **basenames,
1051                             char *term_dst,
1052                             const char *rank_type, int xpath_use,
1053                             NMEM rset_nmem,
1054                             RSET *rset,
1055                             struct rset_key_control *kc)
1056 {
1057     ZEBRA_RES res;
1058     struct ord_list *ol;
1059     zint hits_limit_value;
1060     const char *term_ref_id_str = 0;
1061     *rset = 0;
1062
1063     term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str,
1064                     stream);
1065     grep_info->isam_p_indx = 0;
1066     res = string_term(zh, zapt, term_sub, attributeSet, stream, grep_info,
1067                       reg_type, complete_flag, num_bases, basenames,
1068                       term_dst, xpath_use, &ol);
1069     if (res != ZEBRA_OK)
1070         return res;
1071     if (!*term_sub)  /* no more terms ? */
1072         return res;
1073     yaz_log(log_level_rpn, "term: %s", term_dst);
1074     *rset = rset_trunc(zh, grep_info->isam_p_buf,
1075                        grep_info->isam_p_indx, term_dst,
1076                        strlen(term_dst), rank_type, 1 /* preserve pos */,
1077                        zapt->term->which, rset_nmem,
1078                        kc, kc->scope, ol, reg_type, hits_limit_value,
1079                        term_ref_id_str);
1080     if (!*rset)
1081         return ZEBRA_FAIL;
1082     return ZEBRA_OK;
1083 }
1084
1085 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1086                              const char **term_sub, 
1087                              oid_value attributeSet, NMEM stream,
1088                              struct grep_info *grep_info,
1089                              int reg_type, int complete_flag,
1090                              int num_bases, char **basenames,
1091                              char *term_dst, int xpath_use,
1092                              struct ord_list **ol)
1093 {
1094     char term_dict[2*IT_MAX_WORD+4000];
1095     int j, r, base_no;
1096     AttrType truncation;
1097     int truncation_value;
1098     AttrType use;
1099     int use_value;
1100     const char *use_string = 0;
1101     oid_value curAttributeSet = attributeSet;
1102     const char *termp;
1103     struct rpn_char_map_info rcmi;
1104     int space_split = complete_flag ? 0 : 1;
1105
1106     int bases_ok = 0;     /* no of databases with OK attribute */
1107
1108     *ol = ord_list_create(stream);
1109
1110     rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
1111     attr_init(&use, zapt, 1);
1112     use_value = attr_find_ex(&use, &curAttributeSet, &use_string);
1113     yaz_log(log_level_rpn, "string_term, use value %d", use_value);
1114     attr_init(&truncation, zapt, 5);
1115     truncation_value = attr_find(&truncation, NULL);
1116     yaz_log(log_level_rpn, "truncation value %d", truncation_value);
1117
1118     if (use_value == -1)    /* no attribute - assumy "any" */
1119         use_value = 1016;
1120     for (base_no = 0; base_no < num_bases; base_no++)
1121     {
1122         int ord = -1;
1123         int attr_ok = 0;
1124         int regex_range = 0;
1125         int init_pos = 0;
1126         attent attp;
1127         data1_local_attribute id_xpath_attr;
1128         data1_local_attribute *local_attr;
1129         int max_pos, prefix_len = 0;
1130         int relation_error;
1131
1132         termp = *term_sub;
1133
1134         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1135         {
1136             zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
1137                            basenames[base_no]);
1138             return ZEBRA_FAIL;
1139         }
1140         if (xpath_use > 0 && use_value == -2) 
1141         {
1142             /* xpath mode and we have a string attribute */
1143             attp.local_attributes = &id_xpath_attr;
1144             attp.attset_ordinal = VAL_IDXPATH;
1145             id_xpath_attr.next = 0;
1146
1147             use_value = xpath_use;  /* xpath_use as use-attribute now */
1148             id_xpath_attr.local = use_value;
1149         }
1150         else if (curAttributeSet == VAL_IDXPATH && use_value >= 0)
1151         {
1152             /* X-Path attribute, use numeric value directly */
1153             attp.local_attributes = &id_xpath_attr;
1154             attp.attset_ordinal = VAL_IDXPATH;
1155             id_xpath_attr.next = 0;
1156             id_xpath_attr.local = use_value;
1157         }
1158         else if (use_string &&
1159                  (ord = zebraExplain_lookup_attr_str(zh->reg->zei,
1160                                                      reg_type,
1161                                                      use_string)) >= 0)
1162         {
1163             /* we have a match for a raw string attribute */
1164             char ord_buf[32];
1165             int i, ord_len;
1166
1167             if (prefix_len)
1168                 term_dict[prefix_len++] = '|';
1169             else
1170                 term_dict[prefix_len++] = '(';
1171             
1172             ord_len = key_SU_encode (ord, ord_buf);
1173             for (i = 0; i<ord_len; i++)
1174             {
1175                 term_dict[prefix_len++] = 1;
1176                 term_dict[prefix_len++] = ord_buf[i];
1177             }
1178             attp.local_attributes = 0;  /* no more attributes */
1179             *ol = ord_list_append(stream, *ol, ord);
1180         }
1181         else 
1182         {
1183             /* lookup in the .att files . Allow string as well */
1184             if ((r = att_getentbyatt (zh, &attp, curAttributeSet, use_value,
1185                                       use_string)))
1186             {
1187                 yaz_log(YLOG_DEBUG, "att_getentbyatt fail. set=%d use=%d r=%d",
1188                         curAttributeSet, use_value, r);
1189                 if (r == -1)
1190                 {
1191                     /* set was found, but value wasn't defined */
1192                     if (use_string)
1193                         zebra_setError(zh, 
1194                                        YAZ_BIB1_UNSUPP_USE_ATTRIBUTE,
1195                                        use_string);
1196                     else
1197                         zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_USE_ATTRIBUTE, 
1198                                             use_value);
1199                 }
1200                 else
1201                 {
1202                     int oid[OID_SIZE];
1203                     struct oident oident;
1204                     
1205                     oident.proto = PROTO_Z3950;
1206                     oident.oclass = CLASS_ATTSET;
1207                     oident.value = curAttributeSet;
1208                     oid_ent_to_oid (&oident, oid);
1209                     
1210                     zebra_setError(zh, 
1211                                    YAZ_BIB1_UNSUPP_ATTRIBUTE_SET,
1212                                    oident.desc);
1213                     
1214                 }
1215                 continue;
1216             }
1217         }
1218         for (local_attr = attp.local_attributes; local_attr;
1219              local_attr = local_attr->next)
1220         {
1221             char ord_buf[32];
1222             int i, ord_len;
1223             
1224             ord = zebraExplain_lookup_attr_su(zh->reg->zei,
1225                                               reg_type,
1226                                               attp.attset_ordinal,
1227                                               local_attr->local);
1228             if (ord < 0)
1229                 continue;
1230             *ol = ord_list_append(stream, *ol, ord);
1231             if (prefix_len)
1232                 term_dict[prefix_len++] = '|';
1233             else
1234                 term_dict[prefix_len++] = '(';
1235             
1236             ord_len = key_SU_encode (ord, ord_buf);
1237             for (i = 0; i<ord_len; i++)
1238             {
1239                 term_dict[prefix_len++] = 1;
1240                 term_dict[prefix_len++] = ord_buf[i];
1241             }
1242         }
1243         bases_ok++;
1244         if (prefix_len)
1245             attr_ok = 1;
1246
1247         term_dict[prefix_len++] = ')';
1248         term_dict[prefix_len] = '\0';
1249         j = prefix_len;
1250         switch (truncation_value)
1251         {
1252         case -1:         /* not specified */
1253         case 100:        /* do not truncate */
1254             if (!string_relation (zh, zapt, &termp, term_dict,
1255                                   attributeSet,
1256                                   reg_type, space_split, term_dst,
1257                                   &relation_error))
1258             {
1259                 if (relation_error)
1260                 {
1261                     zebra_setError(zh, relation_error, 0);
1262                     return ZEBRA_FAIL;
1263                 }
1264                 *term_sub = 0;
1265                 return ZEBRA_OK;
1266             }
1267             break;
1268         case 1:          /* right truncation */
1269             term_dict[j++] = '(';
1270             if (!term_100(zh->reg->zebra_maps, reg_type,
1271                           &termp, term_dict + j, space_split, term_dst))
1272             {
1273                 *term_sub = 0;
1274                 return ZEBRA_OK;
1275             }
1276             strcat(term_dict, ".*)");
1277             break;
1278         case 2:          /* keft truncation */
1279             term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*';
1280             if (!term_100(zh->reg->zebra_maps, reg_type,
1281                           &termp, term_dict + j, space_split, term_dst))
1282             {
1283                 *term_sub = 0;
1284                 return ZEBRA_OK;
1285             }
1286             strcat(term_dict, ")");
1287             break;
1288         case 3:          /* left&right truncation */
1289             term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*';
1290             if (!term_100(zh->reg->zebra_maps, reg_type,
1291                           &termp, term_dict + j, space_split, term_dst))
1292             {
1293                 *term_sub = 0;
1294                 return ZEBRA_OK;
1295             }
1296             strcat(term_dict, ".*)");
1297             break;
1298         case 101:        /* process # in term */
1299             term_dict[j++] = '(';
1300             if (!term_101(zh->reg->zebra_maps, reg_type,
1301                           &termp, term_dict + j, space_split, term_dst))
1302             {
1303                 *term_sub = 0;
1304                 return ZEBRA_OK;
1305             }
1306             strcat(term_dict, ")");
1307             break;
1308         case 102:        /* Regexp-1 */
1309             term_dict[j++] = '(';
1310             if (!term_102(zh->reg->zebra_maps, reg_type,
1311                           &termp, term_dict + j, space_split, term_dst))
1312             {
1313                 *term_sub = 0;
1314                 return ZEBRA_OK;
1315             }
1316             strcat(term_dict, ")");
1317             break;
1318         case 103:       /* Regexp-2 */
1319             regex_range = 1;
1320             term_dict[j++] = '(';
1321             init_pos = 2;
1322             if (!term_103(zh->reg->zebra_maps, reg_type,
1323                           &termp, term_dict + j, &regex_range,
1324                           space_split, term_dst))
1325             {
1326                 *term_sub = 0;
1327                 return ZEBRA_OK;
1328             }
1329             strcat(term_dict, ")");
1330             break;
1331         case 104:        /* process # and ! in term */
1332             term_dict[j++] = '(';
1333             if (!term_104(zh->reg->zebra_maps, reg_type,
1334                           &termp, term_dict + j, space_split, term_dst))
1335             {
1336                 *term_sub = 0;
1337                 return ZEBRA_OK;
1338             }
1339             strcat(term_dict, ")");
1340             break;
1341         case 105:        /* process * and ! in term */
1342             term_dict[j++] = '(';
1343             if (!term_105(zh->reg->zebra_maps, reg_type,
1344                           &termp, term_dict + j, space_split, term_dst, 1))
1345             {
1346                 *term_sub = 0;
1347                 return ZEBRA_OK;
1348             }
1349             strcat(term_dict, ")");
1350             break;
1351         case 106:        /* process * and ! in term */
1352             term_dict[j++] = '(';
1353             if (!term_105(zh->reg->zebra_maps, reg_type,
1354                           &termp, term_dict + j, space_split, term_dst, 0))
1355             {
1356                 *term_sub = 0;
1357                 return ZEBRA_OK;
1358             }
1359             strcat(term_dict, ")");
1360             break;
1361         default:
1362             zebra_setError_zint(zh,
1363                                 YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
1364                                 truncation_value);
1365             return ZEBRA_FAIL;
1366         }
1367         if (attr_ok)
1368         {
1369             char buf[80];
1370             const char *input = term_dict + prefix_len;
1371             esc_str(buf, sizeof(buf), input, strlen(input));
1372         }
1373         if (attr_ok)
1374         {
1375             yaz_log(log_level_rpn, "dict_lookup_grep: %s", term_dict+prefix_len);
1376             r = dict_lookup_grep(zh->reg->dict, term_dict, regex_range,
1377                                  grep_info, &max_pos, init_pos,
1378                                  grep_handle);
1379             if (r)
1380                 yaz_log(YLOG_WARN, "dict_lookup_grep fail %d", r);
1381         }
1382     }
1383     if (!bases_ok)
1384         return ZEBRA_FAIL;
1385     *term_sub = termp;
1386     yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1387     return ZEBRA_OK;
1388 }
1389
1390
1391 /* convert APT search term to UTF8 */
1392 static ZEBRA_RES zapt_term_to_utf8(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1393                                    char *termz)
1394 {
1395     size_t sizez;
1396     Z_Term *term = zapt->term;
1397
1398     switch (term->which)
1399     {
1400     case Z_Term_general:
1401         if (zh->iconv_to_utf8 != 0)
1402         {
1403             char *inbuf = (char *) term->u.general->buf;
1404             size_t inleft = term->u.general->len;
1405             char *outbuf = termz;
1406             size_t outleft = IT_MAX_WORD-1;
1407             size_t ret;
1408
1409             ret = yaz_iconv(zh->iconv_to_utf8, &inbuf, &inleft,
1410                         &outbuf, &outleft);
1411             if (ret == (size_t)(-1))
1412             {
1413                 ret = yaz_iconv(zh->iconv_to_utf8, 0, 0, 0, 0);
1414                 zebra_setError(
1415                     zh, 
1416                     YAZ_BIB1_QUERY_TERM_INCLUDES_CHARS_THAT_DO_NOT_TRANSLATE_INTO_,
1417                     0);
1418                 return ZEBRA_FAIL;
1419             }
1420             *outbuf = 0;
1421         }
1422         else
1423         {
1424             sizez = term->u.general->len;
1425             if (sizez > IT_MAX_WORD-1)
1426                 sizez = IT_MAX_WORD-1;
1427             memcpy (termz, term->u.general->buf, sizez);
1428             termz[sizez] = '\0';
1429         }
1430         break;
1431     case Z_Term_characterString:
1432         sizez = strlen(term->u.characterString);
1433         if (sizez > IT_MAX_WORD-1)
1434             sizez = IT_MAX_WORD-1;
1435         memcpy (termz, term->u.characterString, sizez);
1436         termz[sizez] = '\0';
1437         break;
1438     default:
1439         zebra_setError(zh, YAZ_BIB1_UNSUPP_CODED_VALUE_FOR_TERM, 0);
1440         return ZEBRA_FAIL;
1441     }
1442     return ZEBRA_OK;
1443 }
1444
1445 /* convert APT SCAN term to internal cmap */
1446 static ZEBRA_RES trans_scan_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1447                                  char *termz, int reg_type)
1448 {
1449     char termz0[IT_MAX_WORD];
1450
1451     if (zapt_term_to_utf8(zh, zapt, termz0) == ZEBRA_FAIL)
1452         return ZEBRA_FAIL;    /* error */
1453     else
1454     {
1455         const char **map;
1456         const char *cp = (const char *) termz0;
1457         const char *cp_end = cp + strlen(cp);
1458         const char *src;
1459         int i = 0;
1460         const char *space_map = NULL;
1461         int len;
1462             
1463         while ((len = (cp_end - cp)) > 0)
1464         {
1465             map = zebra_maps_input(zh->reg->zebra_maps, reg_type, &cp, len, 0);
1466             if (**map == *CHR_SPACE)
1467                 space_map = *map;
1468             else
1469             {
1470                 if (i && space_map)
1471                     for (src = space_map; *src; src++)
1472                         termz[i++] = *src;
1473                 space_map = NULL;
1474                 for (src = *map; *src; src++)
1475                     termz[i++] = *src;
1476             }
1477         }
1478         termz[i] = '\0';
1479     }
1480     return ZEBRA_OK;
1481 }
1482
1483 static void grep_info_delete(struct grep_info *grep_info)
1484 {
1485 #ifdef TERM_COUNT
1486     xfree(grep_info->term_no);
1487 #endif
1488     xfree(grep_info->isam_p_buf);
1489 }
1490
1491 static ZEBRA_RES grep_info_prepare(ZebraHandle zh,
1492                                    Z_AttributesPlusTerm *zapt,
1493                                    struct grep_info *grep_info,
1494                                    int reg_type)
1495 {
1496     AttrType termset;
1497     int termset_value_numeric;
1498     const char *termset_value_string;
1499
1500 #ifdef TERM_COUNT
1501     grep_info->term_no = 0;
1502 #endif
1503     grep_info->isam_p_size = 0;
1504     grep_info->isam_p_buf = NULL;
1505     grep_info->zh = zh;
1506     grep_info->reg_type = reg_type;
1507     grep_info->termset = 0;
1508
1509     if (!zapt)
1510         return ZEBRA_OK;
1511     attr_init(&termset, zapt, 8);
1512     termset_value_numeric =
1513         attr_find_ex(&termset, NULL, &termset_value_string);
1514     if (termset_value_numeric != -1)
1515     {
1516         char resname[32];
1517         const char *termset_name = 0;
1518         if (termset_value_numeric != -2)
1519         {
1520     
1521             sprintf(resname, "%d", termset_value_numeric);
1522             termset_name = resname;
1523         }
1524         else
1525             termset_name = termset_value_string;
1526         yaz_log(log_level_rpn, "creating termset set %s", termset_name);
1527         grep_info->termset = resultSetAdd(zh, termset_name, 1);
1528         if (!grep_info->termset)
1529         {
1530             zebra_setError(zh, YAZ_BIB1_ILLEGAL_RESULT_SET_NAME, termset_name);
1531             return ZEBRA_FAIL;
1532         }
1533     }
1534     return ZEBRA_OK;
1535 }
1536                                
1537 /**
1538   \brief Create result set(s) for list of terms
1539   \param zh Zebra Handle
1540   \param termz term as used in query but converted to UTF-8
1541   \param attributeSet default attribute set
1542   \param stream memory for result
1543   \param reg_type register type ('w', 'p',..)
1544   \param complete_flag whether it's phrases or not
1545   \param rank_type term flags for ranking
1546   \param xpath_use use attribute for X-Path (-1 for no X-path)
1547   \param num_bases number of databases
1548   \param basenames array of databases
1549   \param rset_mem memory for result sets
1550   \param result_sets output result set for each term in list (output)
1551   \param number number of output result sets
1552   \param kc rset key control to be used for created result sets
1553 */
1554 static ZEBRA_RES term_list_trunc(ZebraHandle zh,
1555                                  Z_AttributesPlusTerm *zapt,
1556                                  const char *termz,
1557                                  oid_value attributeSet,
1558                                  NMEM stream,
1559                                  int reg_type, int complete_flag,
1560                                  const char *rank_type, int xpath_use,
1561                                  int num_bases, char **basenames, 
1562                                  NMEM rset_nmem,
1563                                  RSET **result_sets, int *num_result_sets,
1564                                  struct rset_key_control *kc)
1565 {
1566     char term_dst[IT_MAX_WORD+1];
1567     struct grep_info grep_info;
1568     const char *termp = termz;
1569     int alloc_sets = 0;
1570
1571     *num_result_sets = 0;
1572     *term_dst = 0;
1573     if (grep_info_prepare(zh, zapt, &grep_info, reg_type) == ZEBRA_FAIL)
1574         return ZEBRA_FAIL;
1575     while(1)
1576     { 
1577         ZEBRA_RES res;
1578
1579         if (alloc_sets == *num_result_sets)
1580         {
1581             int add = 10;
1582             RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) * 
1583                                               sizeof(*rnew));
1584             if (alloc_sets)
1585                 memcpy(rnew, *result_sets, alloc_sets * sizeof(*rnew));
1586             alloc_sets = alloc_sets + add;
1587             *result_sets = rnew;
1588         }
1589         res = term_trunc(zh, zapt, &termp, attributeSet,
1590                          stream, &grep_info,
1591                          reg_type, complete_flag,
1592                          num_bases, basenames,
1593                          term_dst, rank_type,
1594                          xpath_use, rset_nmem,
1595                          &(*result_sets)[*num_result_sets],
1596                          kc);
1597         if (res != ZEBRA_OK)
1598         {
1599             int i;
1600             for (i = 0; i < *num_result_sets; i++)
1601                 rset_delete((*result_sets)[i]);
1602             grep_info_delete (&grep_info);
1603             return res;
1604         }
1605         if ((*result_sets)[*num_result_sets] == 0)
1606             break;
1607         (*num_result_sets)++;
1608     }
1609     grep_info_delete(&grep_info);
1610     return ZEBRA_OK;
1611 }
1612
1613 static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh,
1614                                        Z_AttributesPlusTerm *zapt,
1615                                        const char *termz_org,
1616                                        oid_value attributeSet,
1617                                        NMEM stream,
1618                                        int reg_type, int complete_flag,
1619                                        const char *rank_type, int xpath_use,
1620                                        int num_bases, char **basenames, 
1621                                        NMEM rset_nmem,
1622                                        RSET *rset,
1623                                        struct rset_key_control *kc)
1624 {
1625     RSET *result_sets = 0;
1626     int num_result_sets = 0;
1627     ZEBRA_RES res =
1628         term_list_trunc(zh, zapt, termz_org, attributeSet,
1629                         stream, reg_type, complete_flag,
1630                         rank_type, xpath_use,
1631                         num_bases, basenames,
1632                         rset_nmem,
1633                         &result_sets, &num_result_sets, kc);
1634     if (res != ZEBRA_OK)
1635         return res;
1636     if (num_result_sets == 0)
1637         *rset = rsnull_create (rset_nmem, kc, 0); 
1638     else if (num_result_sets == 1)
1639         *rset = result_sets[0];
1640     else
1641         *rset = rsprox_create(rset_nmem, kc, kc->scope,
1642                               num_result_sets, result_sets,
1643                               1 /* ordered */, 0 /* exclusion */,
1644                               3 /* relation */, 1 /* distance */);
1645     if (!*rset)
1646         return ZEBRA_FAIL;
1647     return ZEBRA_OK;
1648 }
1649
1650 static ZEBRA_RES rpn_search_APT_or_list(ZebraHandle zh,
1651                                         Z_AttributesPlusTerm *zapt,
1652                                         const char *termz_org,
1653                                         oid_value attributeSet,
1654                                         NMEM stream,
1655                                         int reg_type, int complete_flag,
1656                                         const char *rank_type,
1657                                         int xpath_use,
1658                                         int num_bases, char **basenames,
1659                                         NMEM rset_nmem,
1660                                         RSET *rset,
1661                                         struct rset_key_control *kc)
1662 {
1663     RSET *result_sets = 0;
1664     int num_result_sets = 0;
1665     ZEBRA_RES res =
1666         term_list_trunc(zh, zapt, termz_org, attributeSet,
1667                         stream, reg_type, complete_flag,
1668                         rank_type, xpath_use,
1669                         num_bases, basenames,
1670                         rset_nmem,
1671                         &result_sets, &num_result_sets, kc);
1672     if (res != ZEBRA_OK)
1673         return res;
1674     if (num_result_sets == 0)
1675         *rset = rsnull_create (rset_nmem, kc, 0); 
1676     else if (num_result_sets == 1)
1677         *rset = result_sets[0];
1678     else
1679         *rset = rsmulti_or_create(rset_nmem, kc, kc->scope, 0 /* termid */,
1680                                   num_result_sets, result_sets);
1681     if (!*rset)
1682         return ZEBRA_FAIL;
1683     return ZEBRA_OK;
1684 }
1685
1686 static ZEBRA_RES rpn_search_APT_and_list(ZebraHandle zh,
1687                                          Z_AttributesPlusTerm *zapt,
1688                                          const char *termz_org,
1689                                          oid_value attributeSet,
1690                                          NMEM stream,
1691                                          int reg_type, int complete_flag,
1692                                          const char *rank_type, 
1693                                          int xpath_use,
1694                                          int num_bases, char **basenames,
1695                                          NMEM rset_nmem,
1696                                          RSET *rset,
1697                                          struct rset_key_control *kc)
1698 {
1699     RSET *result_sets = 0;
1700     int num_result_sets = 0;
1701     ZEBRA_RES res =
1702         term_list_trunc(zh, zapt, termz_org, attributeSet,
1703                         stream, reg_type, complete_flag,
1704                         rank_type, xpath_use,
1705                         num_bases, basenames,
1706                         rset_nmem,
1707                         &result_sets, &num_result_sets,
1708                         kc);
1709     if (res != ZEBRA_OK)
1710         return res;
1711     if (num_result_sets == 0)
1712         *rset = rsnull_create (rset_nmem, kc, 0); 
1713     else if (num_result_sets == 1)
1714         *rset = result_sets[0];
1715     else
1716         *rset = rsmulti_and_create(rset_nmem, kc, kc->scope,
1717                                    num_result_sets, result_sets);
1718     if (!*rset)
1719         return ZEBRA_FAIL;
1720     return ZEBRA_OK;
1721 }
1722
1723 static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1724                             const char **term_sub,
1725                             char *term_dict,
1726                             oid_value attributeSet,
1727                             struct grep_info *grep_info,
1728                             int *max_pos,
1729                             int reg_type,
1730                             char *term_dst,
1731                             int *error_code)
1732 {
1733     AttrType relation;
1734     int relation_value;
1735     int term_value;
1736     int r;
1737     char *term_tmp = term_dict + strlen(term_dict);
1738
1739     *error_code = 0;
1740     attr_init(&relation, zapt, 2);
1741     relation_value = attr_find(&relation, NULL);
1742
1743     yaz_log(log_level_rpn, "numeric relation value=%d", relation_value);
1744
1745     if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1746                   term_dst))
1747         return 0;
1748     term_value = atoi (term_tmp);
1749     switch (relation_value)
1750     {
1751     case 1:
1752         yaz_log(log_level_rpn, "Relation <");
1753         gen_regular_rel(term_tmp, term_value-1, 1);
1754         break;
1755     case 2:
1756         yaz_log(log_level_rpn, "Relation <=");
1757         gen_regular_rel(term_tmp, term_value, 1);
1758         break;
1759     case 4:
1760         yaz_log(log_level_rpn, "Relation >=");
1761         gen_regular_rel(term_tmp, term_value, 0);
1762         break;
1763     case 5:
1764         yaz_log(log_level_rpn, "Relation >");
1765         gen_regular_rel(term_tmp, term_value+1, 0);
1766         break;
1767     case -1:
1768     case 3:
1769         yaz_log(log_level_rpn, "Relation =");
1770         sprintf(term_tmp, "(0*%d)", term_value);
1771         break;
1772     default:
1773         *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
1774         return 0;
1775     }
1776     yaz_log(log_level_rpn, "dict_lookup_grep: %s", term_tmp);
1777     r = dict_lookup_grep(zh->reg->dict, term_dict, 0, grep_info, max_pos,
1778                           0, grep_handle);
1779     if (r)
1780         yaz_log(YLOG_WARN, "dict_lookup_grep fail, rel = gt: %d", r);
1781     yaz_log(log_level_rpn, "%d positions", grep_info->isam_p_indx);
1782     return 1;
1783 }
1784
1785 static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1786                               const char **term_sub, 
1787                               oid_value attributeSet,
1788                               struct grep_info *grep_info,
1789                               int reg_type, int complete_flag,
1790                               int num_bases, char **basenames,
1791                               char *term_dst, int xpath_use, NMEM stream)
1792 {
1793     char term_dict[2*IT_MAX_WORD+2];
1794     int r, base_no;
1795     AttrType use;
1796     int use_value;
1797     const char *use_string = 0;
1798     oid_value curAttributeSet = attributeSet;
1799     const char *termp;
1800     struct rpn_char_map_info rcmi;
1801
1802     int bases_ok = 0;     /* no of databases with OK attribute */
1803
1804     rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
1805     attr_init(&use, zapt, 1);
1806     use_value = attr_find_ex(&use, &curAttributeSet, &use_string);
1807
1808     if (use_value == -1)
1809         use_value = 1016;
1810
1811     for (base_no = 0; base_no < num_bases; base_no++)
1812     {
1813         attent attp;
1814         data1_local_attribute id_xpath_attr;
1815         data1_local_attribute *local_attr;
1816         int max_pos, prefix_len = 0;
1817         int relation_error = 0;
1818
1819         termp = *term_sub;
1820         if (use_value == -2)  /* string attribute (assume IDXPATH/any) */
1821         {
1822             use_value = xpath_use;
1823             attp.local_attributes = &id_xpath_attr;
1824             attp.attset_ordinal = VAL_IDXPATH;
1825             id_xpath_attr.next = 0;
1826             id_xpath_attr.local = use_value;
1827         }
1828         else if (curAttributeSet == VAL_IDXPATH)
1829         {
1830             attp.local_attributes = &id_xpath_attr;
1831             attp.attset_ordinal = VAL_IDXPATH;
1832             id_xpath_attr.next = 0;
1833             id_xpath_attr.local = use_value;
1834         }
1835         else
1836         {
1837             if ((r = att_getentbyatt (zh, &attp, curAttributeSet, use_value,
1838                                       use_string)))
1839             {
1840                 yaz_log(YLOG_DEBUG, "att_getentbyatt fail. set=%d use=%d r=%d",
1841                       curAttributeSet, use_value, r);
1842                 if (r == -1)
1843                 {
1844                     if (use_string)
1845                         zebra_setError(zh, 
1846                                        YAZ_BIB1_UNSUPP_USE_ATTRIBUTE,
1847                                        use_string);
1848                     else
1849                         zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_USE_ATTRIBUTE, 
1850                                             use_value);
1851                 }
1852                 else
1853                     zebra_setError(zh, YAZ_BIB1_UNSUPP_ATTRIBUTE_SET, 0);
1854                 continue;
1855             }
1856         }
1857         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1858         {
1859             zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
1860                            basenames[base_no]);
1861             return ZEBRA_FAIL;
1862         }
1863         for (local_attr = attp.local_attributes; local_attr;
1864              local_attr = local_attr->next)
1865         {
1866             int ord;
1867             char ord_buf[32];
1868             int i, ord_len;
1869
1870             ord = zebraExplain_lookup_attr_su(zh->reg->zei,
1871                                               reg_type,
1872                                               attp.attset_ordinal,
1873                                               local_attr->local);
1874             if (ord < 0)
1875                 continue;
1876             if (prefix_len)
1877                 term_dict[prefix_len++] = '|';
1878             else
1879                 term_dict[prefix_len++] = '(';
1880
1881             ord_len = key_SU_encode (ord, ord_buf);
1882             for (i = 0; i<ord_len; i++)
1883             {
1884                 term_dict[prefix_len++] = 1;
1885                 term_dict[prefix_len++] = ord_buf[i];
1886             }
1887         }
1888         if (!prefix_len)
1889         {
1890             zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_USE_ATTRIBUTE, use_value);
1891             continue;
1892         }
1893         bases_ok++;
1894         term_dict[prefix_len++] = ')';
1895         term_dict[prefix_len] = '\0';
1896         if (!numeric_relation(zh, zapt, &termp, term_dict,
1897                               attributeSet, grep_info, &max_pos, reg_type,
1898                               term_dst, &relation_error))
1899         {
1900             if (relation_error)
1901             {
1902                 zebra_setError(zh, relation_error, 0);
1903                 return ZEBRA_FAIL;
1904             }
1905             *term_sub = 0;
1906             return ZEBRA_OK;
1907         }
1908     }
1909     if (!bases_ok)
1910         return ZEBRA_FAIL;
1911     *term_sub = termp;
1912     yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1913     return ZEBRA_OK;
1914 }
1915
1916                                  
1917 static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh,
1918                                         Z_AttributesPlusTerm *zapt,
1919                                         const char *termz,
1920                                         oid_value attributeSet,
1921                                         NMEM stream,
1922                                         int reg_type, int complete_flag,
1923                                         const char *rank_type, int xpath_use,
1924                                         int num_bases, char **basenames,
1925                                         NMEM rset_nmem,
1926                                         RSET *rset,
1927                                         struct rset_key_control *kc)
1928 {
1929     char term_dst[IT_MAX_WORD+1];
1930     const char *termp = termz;
1931     RSET *result_sets = 0;
1932     int num_result_sets = 0;
1933     ZEBRA_RES res;
1934     struct grep_info grep_info;
1935     int alloc_sets = 0;
1936     zint hits_limit_value;
1937     const char *term_ref_id_str = 0;
1938
1939     term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str, stream);
1940
1941     yaz_log(log_level_rpn, "APT_numeric t='%s'", termz);
1942     if (grep_info_prepare(zh, zapt, &grep_info, reg_type) == ZEBRA_FAIL)
1943         return ZEBRA_FAIL;
1944     while (1)
1945     { 
1946         if (alloc_sets == num_result_sets)
1947         {
1948             int add = 10;
1949             RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) * 
1950                                               sizeof(*rnew));
1951             if (alloc_sets)
1952                 memcpy(rnew, result_sets, alloc_sets * sizeof(*rnew));
1953             alloc_sets = alloc_sets + add;
1954             result_sets = rnew;
1955         }
1956         yaz_log(YLOG_DEBUG, "APT_numeric termp=%s", termp);
1957         grep_info.isam_p_indx = 0;
1958         res = numeric_term(zh, zapt, &termp, attributeSet, &grep_info,
1959                            reg_type, complete_flag, num_bases, basenames,
1960                            term_dst, xpath_use,
1961                            stream);
1962         if (res == ZEBRA_FAIL || termp == 0)
1963             break;
1964         yaz_log(YLOG_DEBUG, "term: %s", term_dst);
1965         result_sets[num_result_sets] =
1966             rset_trunc(zh, grep_info.isam_p_buf,
1967                        grep_info.isam_p_indx, term_dst,
1968                        strlen(term_dst), rank_type,
1969                        0 /* preserve position */,
1970                        zapt->term->which, rset_nmem, 
1971                        kc, kc->scope, 0, reg_type,
1972                        hits_limit_value,
1973                        term_ref_id_str);
1974         if (!result_sets[num_result_sets])
1975             break;
1976         num_result_sets++;
1977     }
1978     grep_info_delete(&grep_info);
1979     if (termp)
1980     {
1981         int i;
1982         for (i = 0; i<num_result_sets; i++)
1983             rset_delete(result_sets[i]);
1984         return ZEBRA_FAIL;
1985     }
1986     if (num_result_sets == 0)
1987         *rset = rsnull_create(rset_nmem, kc, 0);
1988     if (num_result_sets == 1)
1989         *rset = result_sets[0];
1990     else
1991         *rset = rsmulti_and_create(rset_nmem, kc, kc->scope,
1992                                    num_result_sets, result_sets);
1993     if (!*rset)
1994         return ZEBRA_FAIL;
1995     return ZEBRA_OK;
1996 }
1997
1998 static ZEBRA_RES rpn_search_APT_local(ZebraHandle zh,
1999                                       Z_AttributesPlusTerm *zapt,
2000                                       const char *termz,
2001                                       oid_value attributeSet,
2002                                       NMEM stream,
2003                                       const char *rank_type, NMEM rset_nmem,
2004                                       RSET *rset,
2005                                       struct rset_key_control *kc)
2006 {
2007     RSFD rsfd;
2008     struct it_key key;
2009     int sys;
2010     *rset = rstemp_create(rset_nmem, kc, kc->scope,
2011                           res_get (zh->res, "setTmpDir"),0 );
2012     rsfd = rset_open(*rset, RSETF_WRITE);
2013     
2014     sys = atoi(termz);
2015     if (sys <= 0)
2016         sys = 1;
2017     key.mem[0] = sys;
2018     key.mem[1] = 1;
2019     key.len = 2;
2020     rset_write (rsfd, &key);
2021     rset_close (rsfd);
2022     return ZEBRA_OK;
2023 }
2024
2025 static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2026                                oid_value attributeSet, NMEM stream,
2027                                Z_SortKeySpecList *sort_sequence,
2028                                const char *rank_type,
2029                                NMEM rset_nmem,
2030                                RSET *rset,
2031                                struct rset_key_control *kc)
2032 {
2033     int i;
2034     int sort_relation_value;
2035     AttrType sort_relation_type;
2036     Z_SortKeySpec *sks;
2037     Z_SortKey *sk;
2038     int oid[OID_SIZE];
2039     oident oe;
2040     char termz[20];
2041     
2042     attr_init(&sort_relation_type, zapt, 7);
2043     sort_relation_value = attr_find(&sort_relation_type, &attributeSet);
2044
2045     if (!sort_sequence->specs)
2046     {
2047         sort_sequence->num_specs = 10;
2048         sort_sequence->specs = (Z_SortKeySpec **)
2049             nmem_malloc(stream, sort_sequence->num_specs *
2050                          sizeof(*sort_sequence->specs));
2051         for (i = 0; i<sort_sequence->num_specs; i++)
2052             sort_sequence->specs[i] = 0;
2053     }
2054     if (zapt->term->which != Z_Term_general)
2055         i = 0;
2056     else
2057         i = atoi_n ((char *) zapt->term->u.general->buf,
2058                     zapt->term->u.general->len);
2059     if (i >= sort_sequence->num_specs)
2060         i = 0;
2061     sprintf(termz, "%d", i);
2062
2063     oe.proto = PROTO_Z3950;
2064     oe.oclass = CLASS_ATTSET;
2065     oe.value = attributeSet;
2066     if (!oid_ent_to_oid (&oe, oid))
2067         return ZEBRA_FAIL;
2068
2069     sks = (Z_SortKeySpec *) nmem_malloc(stream, sizeof(*sks));
2070     sks->sortElement = (Z_SortElement *)
2071         nmem_malloc(stream, sizeof(*sks->sortElement));
2072     sks->sortElement->which = Z_SortElement_generic;
2073     sk = sks->sortElement->u.generic = (Z_SortKey *)
2074         nmem_malloc(stream, sizeof(*sk));
2075     sk->which = Z_SortKey_sortAttributes;
2076     sk->u.sortAttributes = (Z_SortAttributes *)
2077         nmem_malloc(stream, sizeof(*sk->u.sortAttributes));
2078
2079     sk->u.sortAttributes->id = oid;
2080     sk->u.sortAttributes->list = zapt->attributes;
2081
2082     sks->sortRelation = (int *)
2083         nmem_malloc(stream, sizeof(*sks->sortRelation));
2084     if (sort_relation_value == 1)
2085         *sks->sortRelation = Z_SortKeySpec_ascending;
2086     else if (sort_relation_value == 2)
2087         *sks->sortRelation = Z_SortKeySpec_descending;
2088     else 
2089         *sks->sortRelation = Z_SortKeySpec_ascending;
2090
2091     sks->caseSensitivity = (int *)
2092         nmem_malloc(stream, sizeof(*sks->caseSensitivity));
2093     *sks->caseSensitivity = 0;
2094
2095     sks->which = Z_SortKeySpec_null;
2096     sks->u.null = odr_nullval ();
2097     sort_sequence->specs[i] = sks;
2098     *rset = rsnull_create (rset_nmem, kc, 0);
2099     return ZEBRA_OK;
2100 }
2101
2102
2103 static int parse_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2104                        oid_value attributeSet,
2105                        struct xpath_location_step *xpath, int max, NMEM mem)
2106 {
2107     oid_value curAttributeSet = attributeSet;
2108     AttrType use;
2109     const char *use_string = 0;
2110     
2111     attr_init(&use, zapt, 1);
2112     attr_find_ex(&use, &curAttributeSet, &use_string);
2113
2114     if (!use_string || *use_string != '/')
2115         return -1;
2116
2117     return zebra_parse_xpath_str(use_string, xpath, max, mem);
2118 }
2119  
2120                
2121
2122 static RSET xpath_trunc(ZebraHandle zh, NMEM stream,
2123                         int reg_type, const char *term, int use,
2124                         oid_value curAttributeSet, NMEM rset_nmem,
2125                         struct rset_key_control *kc)
2126 {
2127     RSET rset;
2128     struct grep_info grep_info;
2129     char term_dict[2048];
2130     char ord_buf[32];
2131     int prefix_len = 0;
2132     int ord = zebraExplain_lookup_attr_su(zh->reg->zei, reg_type,
2133                                           curAttributeSet, use);
2134     int ord_len, i, r, max_pos;
2135     int term_type = Z_Term_characterString;
2136     const char *flags = "void";
2137
2138     if (grep_info_prepare(zh, 0 /* zapt */, &grep_info, '0') == ZEBRA_FAIL)
2139         return rsnull_create(rset_nmem, kc, 0);
2140     
2141     if (ord < 0)
2142         return rsnull_create(rset_nmem, kc, 0);
2143     if (prefix_len)
2144         term_dict[prefix_len++] = '|';
2145     else
2146         term_dict[prefix_len++] = '(';
2147     
2148     ord_len = key_SU_encode (ord, ord_buf);
2149     for (i = 0; i<ord_len; i++)
2150     {
2151         term_dict[prefix_len++] = 1;
2152         term_dict[prefix_len++] = ord_buf[i];
2153     }
2154     term_dict[prefix_len++] = ')';
2155     strcpy(term_dict+prefix_len, term);
2156     
2157     grep_info.isam_p_indx = 0;
2158     r = dict_lookup_grep(zh->reg->dict, term_dict, 0,
2159                           &grep_info, &max_pos, 0, grep_handle);
2160     yaz_log(YLOG_DEBUG, "%s %d positions", term,
2161              grep_info.isam_p_indx);
2162     rset = rset_trunc(zh, grep_info.isam_p_buf,
2163                       grep_info.isam_p_indx, term, strlen(term),
2164                       flags, 1, term_type,rset_nmem,
2165                       kc, kc->scope, 0, reg_type, 0 /* hits_limit */,
2166                       0 /* term_ref_id_str */);
2167     grep_info_delete(&grep_info);
2168     return rset;
2169 }
2170
2171 static
2172 ZEBRA_RES rpn_search_xpath(ZebraHandle zh,
2173                            oid_value attributeSet,
2174                            int num_bases, char **basenames,
2175                            NMEM stream, const char *rank_type, RSET rset,
2176                            int xpath_len, struct xpath_location_step *xpath,
2177                            NMEM rset_nmem,
2178                            RSET *rset_out,
2179                            struct rset_key_control *kc)
2180 {
2181     oid_value curAttributeSet = attributeSet;
2182     int base_no;
2183     int i;
2184
2185     if (xpath_len < 0)
2186     {
2187         *rset_out = rset;
2188         return ZEBRA_OK;
2189     }
2190
2191     yaz_log(YLOG_DEBUG, "xpath len=%d", xpath_len);
2192     for (i = 0; i<xpath_len; i++)
2193     {
2194         yaz_log(log_level_rpn, "XPATH %d %s", i, xpath[i].part);
2195
2196     }
2197
2198     curAttributeSet = VAL_IDXPATH;
2199
2200     /*
2201       //a    ->    a/.*
2202       //a/b  ->    b/a/.*
2203       /a     ->    a/
2204       /a/b   ->    b/a/
2205
2206       /      ->    none
2207
2208    a[@attr = value]/b[@other = othervalue]
2209
2210  /e/@a val      range(e/,range(@a,freetext(w,1015,val),@a),e/)
2211  /a/b val       range(b/a/,freetext(w,1016,val),b/a/)
2212  /a/b/@c val    range(b/a/,range(@c,freetext(w,1016,val),@c),b/a/)
2213  /a/b[@c = y] val range(b/a/,freetext(w,1016,val),b/a/,@c = y)
2214  /a[@c = y]/b val range(a/,range(b/a/,freetext(w,1016,val),b/a/),a/,@c = y)
2215  /a[@c = x]/b[@c = y] range(a/,range(b/a/,freetext(w,1016,val),b/a/,@c = y),a/,@c = x)
2216       
2217     */
2218
2219     dict_grep_cmap (zh->reg->dict, 0, 0);
2220
2221     for (base_no = 0; base_no < num_bases; base_no++)
2222     {
2223         int level = xpath_len;
2224         int first_path = 1;
2225         
2226         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
2227         {
2228             zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
2229                            basenames[base_no]);
2230             *rset_out = rset;
2231             return ZEBRA_FAIL;
2232         }
2233         while (--level >= 0)
2234         {
2235             char xpath_rev[128];
2236             int i, len;
2237             RSET rset_start_tag = 0, rset_end_tag = 0, rset_attr = 0;
2238
2239             *xpath_rev = 0;
2240             len = 0;
2241             for (i = level; i >= 1; --i)
2242             {
2243                 const char *cp = xpath[i].part;
2244                 if (*cp)
2245                 {
2246                     for (;*cp; cp++)
2247                         if (*cp == '*')
2248                         {
2249                             memcpy (xpath_rev + len, "[^/]*", 5);
2250                             len += 5;
2251                         }
2252                         else if (*cp == ' ')
2253                         {
2254
2255                             xpath_rev[len++] = 1;
2256                             xpath_rev[len++] = ' ';
2257                         }
2258
2259                         else
2260                             xpath_rev[len++] = *cp;
2261                     xpath_rev[len++] = '/';
2262                 }
2263                 else if (i == 1)  /* // case */
2264                 {
2265                     xpath_rev[len++] = '.';
2266                     xpath_rev[len++] = '*';
2267                 }
2268             }
2269             xpath_rev[len] = 0;
2270
2271             if (xpath[level].predicate &&
2272                 xpath[level].predicate->which == XPATH_PREDICATE_RELATION &&
2273                 xpath[level].predicate->u.relation.name[0])
2274             {
2275                 WRBUF wbuf = wrbuf_alloc();
2276                 wrbuf_puts(wbuf, xpath[level].predicate->u.relation.name+1);
2277                 if (xpath[level].predicate->u.relation.value)
2278                 {
2279                     const char *cp = xpath[level].predicate->u.relation.value;
2280                     wrbuf_putc(wbuf, '=');
2281                     
2282                     while (*cp)
2283                     {
2284                         if (strchr(REGEX_CHARS, *cp))
2285                             wrbuf_putc(wbuf, '\\');
2286                         wrbuf_putc(wbuf, *cp);
2287                         cp++;
2288                     }
2289                 }
2290                 wrbuf_puts(wbuf, "");
2291                 rset_attr = xpath_trunc(
2292                     zh, stream, '0', wrbuf_buf(wbuf), 3, 
2293                     curAttributeSet, rset_nmem, kc);
2294                 wrbuf_free(wbuf, 1);
2295             } 
2296             else 
2297             {
2298                 if (!first_path)
2299                     continue;
2300             }
2301             yaz_log(log_level_rpn, "xpath_rev (%d) = %s", level, xpath_rev);
2302             if (strlen(xpath_rev))
2303             {
2304                 rset_start_tag = xpath_trunc(zh, stream, '0', 
2305                         xpath_rev, 1, curAttributeSet, rset_nmem, kc);
2306             
2307                 rset_end_tag = xpath_trunc(zh, stream, '0', 
2308                         xpath_rev, 2, curAttributeSet, rset_nmem, kc);
2309
2310                 rset = rsbetween_create(rset_nmem, kc, kc->scope,
2311                                         rset_start_tag, rset,
2312                                         rset_end_tag, rset_attr);
2313             }
2314             first_path = 0;
2315         }
2316     }
2317     *rset_out = rset;
2318     return ZEBRA_OK;
2319 }
2320
2321 static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2322                                 oid_value attributeSet, NMEM stream,
2323                                 Z_SortKeySpecList *sort_sequence,
2324                                 int num_bases, char **basenames, 
2325                                 NMEM rset_nmem,
2326                                 RSET *rset,
2327                                 struct rset_key_control *kc)
2328 {
2329     ZEBRA_RES res = ZEBRA_OK;
2330     unsigned reg_id;
2331     char *search_type = NULL;
2332     char rank_type[128];
2333     int complete_flag;
2334     int sort_flag;
2335     char termz[IT_MAX_WORD+1];
2336     int xpath_len;
2337     int xpath_use = 0;
2338     struct xpath_location_step xpath[10];
2339
2340     if (!log_level_set)
2341     {
2342         log_level_rpn = yaz_log_module_level("rpn");
2343         log_level_set = 1;
2344     }
2345     zebra_maps_attr(zh->reg->zebra_maps, zapt, &reg_id, &search_type,
2346                     rank_type, &complete_flag, &sort_flag);
2347     
2348     yaz_log(YLOG_DEBUG, "reg_id=%c", reg_id);
2349     yaz_log(YLOG_DEBUG, "complete_flag=%d", complete_flag);
2350     yaz_log(YLOG_DEBUG, "search_type=%s", search_type);
2351     yaz_log(YLOG_DEBUG, "rank_type=%s", rank_type);
2352
2353     if (zapt_term_to_utf8(zh, zapt, termz) == ZEBRA_FAIL)
2354         return ZEBRA_FAIL;
2355
2356     if (sort_flag)
2357         return rpn_sort_spec(zh, zapt, attributeSet, stream, sort_sequence,
2358                              rank_type, rset_nmem, rset, kc);
2359     /* consider if an X-Path query is used */
2360     xpath_len = parse_xpath(zh, zapt, attributeSet, xpath, 10, stream);
2361     if (xpath_len >= 0)
2362     {
2363         xpath_use = 1016;  /* searching for element by default */
2364         if (xpath[xpath_len-1].part[0] == '@') 
2365             xpath_use = 1015;  /* last step an attribute .. */
2366     }
2367
2368     /* search using one of the various search type strategies
2369        termz is our UTF-8 search term
2370        attributeSet is top-level default attribute set 
2371        stream is ODR for search
2372        reg_id is the register type
2373        complete_flag is 1 for complete subfield, 0 for incomplete
2374        xpath_use is use-attribute to be used for X-Path search, 0 for none
2375     */
2376     if (!strcmp(search_type, "phrase"))
2377     {
2378         res = rpn_search_APT_phrase(zh, zapt, termz, attributeSet, stream,
2379                                     reg_id, complete_flag, rank_type,
2380                                     xpath_use,
2381                                     num_bases, basenames, rset_nmem,
2382                                     rset, kc);
2383     }
2384     else if (!strcmp(search_type, "and-list"))
2385     {
2386         res = rpn_search_APT_and_list(zh, zapt, termz, attributeSet, stream,
2387                                       reg_id, complete_flag, rank_type,
2388                                       xpath_use,
2389                                       num_bases, basenames, rset_nmem,
2390                                       rset, kc);
2391     }
2392     else if (!strcmp(search_type, "or-list"))
2393     {
2394         res = rpn_search_APT_or_list(zh, zapt, termz, attributeSet, stream,
2395                                      reg_id, complete_flag, rank_type,
2396                                      xpath_use,
2397                                      num_bases, basenames, rset_nmem,
2398                                      rset, kc);
2399     }
2400     else if (!strcmp(search_type, "local"))
2401     {
2402         res = rpn_search_APT_local(zh, zapt, termz, attributeSet, stream,
2403                                    rank_type, rset_nmem, rset, kc);
2404     }
2405     else if (!strcmp(search_type, "numeric"))
2406     {
2407         res = rpn_search_APT_numeric(zh, zapt, termz, attributeSet, stream,
2408                                      reg_id, complete_flag, rank_type,
2409                                      xpath_use,
2410                                      num_bases, basenames, rset_nmem,
2411                                      rset, kc);
2412     }
2413     else
2414     {
2415         zebra_setError(zh, YAZ_BIB1_UNSUPP_STRUCTURE_ATTRIBUTE, 0);
2416         res = ZEBRA_FAIL;
2417     }
2418     if (res != ZEBRA_OK)
2419         return res;
2420     if (!*rset)
2421         return ZEBRA_FAIL;
2422     return rpn_search_xpath(zh, attributeSet, num_bases, basenames,
2423                             stream, rank_type, *rset, 
2424                             xpath_len, xpath, rset_nmem, rset, kc);
2425 }
2426
2427 static ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2428                                       oid_value attributeSet, 
2429                                       NMEM stream, NMEM rset_nmem,
2430                                       Z_SortKeySpecList *sort_sequence,
2431                                       int num_bases, char **basenames,
2432                                       RSET **result_sets, int *num_result_sets,
2433                                       Z_Operator *parent_op,
2434                                       struct rset_key_control *kc);
2435
2436 ZEBRA_RES rpn_search_top(ZebraHandle zh, Z_RPNStructure *zs,
2437                          oid_value attributeSet, 
2438                          NMEM stream, NMEM rset_nmem,
2439                          Z_SortKeySpecList *sort_sequence,
2440                          int num_bases, char **basenames,
2441                          RSET *result_set)
2442 {
2443     RSET *result_sets = 0;
2444     int num_result_sets = 0;
2445     ZEBRA_RES res;
2446     struct rset_key_control *kc = zebra_key_control_create(zh);
2447
2448     res = rpn_search_structure(zh, zs, attributeSet,
2449                                stream, rset_nmem,
2450                                sort_sequence, 
2451                                num_bases, basenames,
2452                                &result_sets, &num_result_sets,
2453                                0 /* no parent op */,
2454                                kc);
2455     if (res != ZEBRA_OK)
2456     {
2457         int i;
2458         for (i = 0; i<num_result_sets; i++)
2459             rset_delete(result_sets[i]);
2460         *result_set = 0;
2461     }
2462     else
2463     {
2464         assert(num_result_sets == 1);
2465         assert(result_sets);
2466         assert(*result_sets);
2467         *result_set = *result_sets;
2468     }
2469     (*kc->dec)(kc);
2470     return res;
2471 }
2472
2473 ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2474                                oid_value attributeSet, 
2475                                NMEM stream, NMEM rset_nmem,
2476                                Z_SortKeySpecList *sort_sequence,
2477                                int num_bases, char **basenames,
2478                                RSET **result_sets, int *num_result_sets,
2479                                Z_Operator *parent_op,
2480                                struct rset_key_control *kc)
2481 {
2482     *num_result_sets = 0;
2483     if (zs->which == Z_RPNStructure_complex)
2484     {
2485         ZEBRA_RES res;
2486         Z_Operator *zop = zs->u.complex->roperator;
2487         RSET *result_sets_l = 0;
2488         int num_result_sets_l = 0;
2489         RSET *result_sets_r = 0;
2490         int num_result_sets_r = 0;
2491
2492         res = rpn_search_structure(zh, zs->u.complex->s1,
2493                                    attributeSet, stream, rset_nmem,
2494                                    sort_sequence,
2495                                    num_bases, basenames,
2496                                    &result_sets_l, &num_result_sets_l,
2497                                    zop, kc);
2498         if (res != ZEBRA_OK)
2499         {
2500             int i;
2501             for (i = 0; i<num_result_sets_l; i++)
2502                 rset_delete(result_sets_l[i]);
2503             return res;
2504         }
2505         res = rpn_search_structure(zh, zs->u.complex->s2,
2506                                    attributeSet, stream, rset_nmem,
2507                                    sort_sequence,
2508                                    num_bases, basenames,
2509                                    &result_sets_r, &num_result_sets_r,
2510                                    zop, kc);
2511         if (res != ZEBRA_OK)
2512         {
2513             int i;
2514             for (i = 0; i<num_result_sets_l; i++)
2515                 rset_delete(result_sets_l[i]);
2516             for (i = 0; i<num_result_sets_r; i++)
2517                 rset_delete(result_sets_r[i]);
2518             return res;
2519         }
2520
2521         /* make a new list of result for all children */
2522         *num_result_sets = num_result_sets_l + num_result_sets_r;
2523         *result_sets = nmem_malloc(stream, *num_result_sets * 
2524                                    sizeof(**result_sets));
2525         memcpy(*result_sets, result_sets_l, 
2526                num_result_sets_l * sizeof(**result_sets));
2527         memcpy(*result_sets + num_result_sets_l, result_sets_r, 
2528                num_result_sets_r * sizeof(**result_sets));
2529
2530         if (!parent_op || parent_op->which != zop->which
2531             || (zop->which != Z_Operator_and &&
2532                 zop->which != Z_Operator_or))
2533         {
2534             /* parent node different from this one (or non-present) */
2535             /* we must combine result sets now */
2536             RSET rset;
2537             switch (zop->which)
2538             {
2539             case Z_Operator_and:
2540                 rset = rsmulti_and_create(rset_nmem, kc,
2541                                           kc->scope,
2542                                           *num_result_sets, *result_sets);
2543                 break;
2544             case Z_Operator_or:
2545                 rset = rsmulti_or_create(rset_nmem, kc,
2546                                          kc->scope, 0, /* termid */
2547                                          *num_result_sets, *result_sets);
2548                 break;
2549             case Z_Operator_and_not:
2550                 rset = rsbool_create_not(rset_nmem, kc,
2551                                          kc->scope,
2552                                          (*result_sets)[0],
2553                                          (*result_sets)[1]);
2554                 break;
2555             case Z_Operator_prox:
2556                 if (zop->u.prox->which != Z_ProximityOperator_known)
2557                 {
2558                     zebra_setError(zh, 
2559                                    YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2560                                    0);
2561                     return ZEBRA_FAIL;
2562                 }
2563                 if (*zop->u.prox->u.known != Z_ProxUnit_word)
2564                 {
2565                     zebra_setError_zint(zh,
2566                                         YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2567                                         *zop->u.prox->u.known);
2568                     return ZEBRA_FAIL;
2569                 }
2570                 else
2571                 {
2572                     rset = rsprox_create(rset_nmem, kc,
2573                                          kc->scope,
2574                                          *num_result_sets, *result_sets, 
2575                                          *zop->u.prox->ordered,
2576                                          (!zop->u.prox->exclusion ? 
2577                                           0 : *zop->u.prox->exclusion),
2578                                          *zop->u.prox->relationType,
2579                                          *zop->u.prox->distance );
2580                 }
2581                 break;
2582             default:
2583                 zebra_setError(zh, YAZ_BIB1_OPERATOR_UNSUPP, 0);
2584                 return ZEBRA_FAIL;
2585             }
2586             *num_result_sets = 1;
2587             *result_sets = nmem_malloc(stream, *num_result_sets * 
2588                                        sizeof(**result_sets));
2589             (*result_sets)[0] = rset;
2590         }
2591     }
2592     else if (zs->which == Z_RPNStructure_simple)
2593     {
2594         RSET rset;
2595         ZEBRA_RES res;
2596
2597         if (zs->u.simple->which == Z_Operand_APT)
2598         {
2599             yaz_log(YLOG_DEBUG, "rpn_search_APT");
2600             res = rpn_search_APT(zh, zs->u.simple->u.attributesPlusTerm,
2601                                  attributeSet, stream, sort_sequence,
2602                                  num_bases, basenames, rset_nmem, &rset,
2603                                  kc);
2604             if (res != ZEBRA_OK)
2605                 return res;
2606         }
2607         else if (zs->u.simple->which == Z_Operand_resultSetId)
2608         {
2609             yaz_log(YLOG_DEBUG, "rpn_search_ref");
2610             rset = resultSetRef(zh, zs->u.simple->u.resultSetId);
2611             if (!rset)
2612             {
2613                 zebra_setError(zh, 
2614                                YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
2615                                zs->u.simple->u.resultSetId);
2616                 return ZEBRA_FAIL;
2617             }
2618             rset_dup(rset);
2619         }
2620         else
2621         {
2622             zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2623             return ZEBRA_FAIL;
2624         }
2625         *num_result_sets = 1;
2626         *result_sets = nmem_malloc(stream, *num_result_sets * 
2627                                    sizeof(**result_sets));
2628         (*result_sets)[0] = rset;
2629     }
2630     else
2631     {
2632         zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2633         return ZEBRA_FAIL;
2634     }
2635     return ZEBRA_OK;
2636 }
2637
2638 struct scan_info_entry {
2639     char *term;
2640     ISAM_P isam_p;
2641 };
2642
2643 struct scan_info {
2644     struct scan_info_entry *list;
2645     ODR odr;
2646     int before, after;
2647     char prefix[20];
2648 };
2649
2650 static int scan_handle (char *name, const char *info, int pos, void *client)
2651 {
2652     int len_prefix, idx;
2653     struct scan_info *scan_info = (struct scan_info *) client;
2654
2655     len_prefix = strlen(scan_info->prefix);
2656     if (memcmp (name, scan_info->prefix, len_prefix))
2657         return 1;
2658     if (pos > 0)
2659         idx = scan_info->after - pos + scan_info->before;
2660     else
2661         idx = - pos - 1;
2662
2663     if (idx < 0)
2664         return 0;
2665     scan_info->list[idx].term = (char *)
2666         odr_malloc(scan_info->odr, strlen(name + len_prefix)+1);
2667     strcpy(scan_info->list[idx].term, name + len_prefix);
2668     assert (*info == sizeof(ISAM_P));
2669     memcpy (&scan_info->list[idx].isam_p, info+1, sizeof(ISAM_P));
2670     return 0;
2671 }
2672
2673 void zebra_term_untrans_iconv(ZebraHandle zh, NMEM stream, int reg_type,
2674                               char **dst, const char *src)
2675 {
2676     char term_src[IT_MAX_WORD];
2677     char term_dst[IT_MAX_WORD];
2678     
2679     zebra_term_untrans (zh, reg_type, term_src, src);
2680
2681     if (zh->iconv_from_utf8 != 0)
2682     {
2683         int len;
2684         char *inbuf = term_src;
2685         size_t inleft = strlen(term_src);
2686         char *outbuf = term_dst;
2687         size_t outleft = sizeof(term_dst)-1;
2688         size_t ret;
2689         
2690         ret = yaz_iconv (zh->iconv_from_utf8, &inbuf, &inleft,
2691                          &outbuf, &outleft);
2692         if (ret == (size_t)(-1))
2693             len = 0;
2694         else
2695             len = outbuf - term_dst;
2696         *dst = nmem_malloc(stream, len + 1);
2697         if (len > 0)
2698             memcpy (*dst, term_dst, len);
2699         (*dst)[len] = '\0';
2700     }
2701     else
2702         *dst = nmem_strdup(stream, term_src);
2703 }
2704
2705 static void count_set(ZebraHandle zh, RSET rset, zint *count)
2706 {
2707     zint psysno = 0;
2708     struct it_key key;
2709     RSFD rfd;
2710
2711     yaz_log(YLOG_DEBUG, "count_set");
2712
2713     rset->hits_limit = zh->approx_limit;
2714
2715     *count = 0;
2716     rfd = rset_open(rset, RSETF_READ);
2717     while (rset_read(rfd, &key,0 /* never mind terms */))
2718     {
2719         if (key.mem[0] != psysno)
2720         {
2721             psysno = key.mem[0];
2722             if (rfd->counted_items >= rset->hits_limit)
2723                 break;
2724         }
2725     }
2726     rset_close (rfd);
2727     *count = rset->hits_count;
2728 }
2729
2730 ZEBRA_RES rpn_scan(ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt,
2731                    oid_value attributeset,
2732                    int num_bases, char **basenames,
2733                    int *position, int *num_entries, ZebraScanEntry **list,
2734                    int *is_partial, RSET limit_set, int return_zero)
2735 {
2736     int i;
2737     int pos = *position;
2738     int num = *num_entries;
2739     int before;
2740     int after;
2741     int base_no;
2742     char termz[IT_MAX_WORD+20];
2743     AttrType use;
2744     int use_value;
2745     const char *use_string = 0;
2746     struct scan_info *scan_info_array;
2747     ZebraScanEntry *glist;
2748     int ords[32], ord_no = 0;
2749     int ptr[32];
2750
2751     int bases_ok = 0;     /* no of databases with OK attribute */
2752     int errCode = 0;      /* err code (if any is not OK) */
2753     char *errString = 0;  /* addinfo */
2754
2755     unsigned reg_id;
2756     char *search_type = NULL;
2757     char rank_type[128];
2758     int complete_flag;
2759     int sort_flag;
2760     NMEM rset_nmem = NULL; 
2761     struct rset_key_control *kc = 0;
2762
2763     *list = 0;
2764     *is_partial = 0;
2765
2766     if (attributeset == VAL_NONE)
2767         attributeset = VAL_BIB1;
2768
2769     if (!limit_set)
2770     {
2771         AttrType termset;
2772         int termset_value_numeric;
2773         const char *termset_value_string;
2774         attr_init(&termset, zapt, 8);
2775         termset_value_numeric =
2776             attr_find_ex(&termset, NULL, &termset_value_string);
2777         if (termset_value_numeric != -1)
2778         {
2779             char resname[32];
2780             const char *termset_name = 0;
2781             
2782             if (termset_value_numeric != -2)
2783             {
2784                 
2785                 sprintf(resname, "%d", termset_value_numeric);
2786                 termset_name = resname;
2787             }
2788             else
2789                 termset_name = termset_value_string;
2790             
2791             limit_set = resultSetRef (zh, termset_name);
2792         }
2793     }
2794         
2795     yaz_log(YLOG_DEBUG, "position = %d, num = %d set=%d",
2796             pos, num, attributeset);
2797         
2798     attr_init(&use, zapt, 1);
2799     use_value = attr_find_ex(&use, &attributeset, &use_string);
2800
2801     if (zebra_maps_attr(zh->reg->zebra_maps, zapt, &reg_id, &search_type,
2802                         rank_type, &complete_flag, &sort_flag))
2803     {
2804         *num_entries = 0;
2805         zebra_setError(zh, YAZ_BIB1_UNSUPP_ATTRIBUTE_TYPE, 0);
2806         return ZEBRA_FAIL;
2807     }
2808     yaz_log(YLOG_DEBUG, "use_value = %d", use_value);
2809
2810     if (use_value == -1)
2811         use_value = 1016;
2812     for (base_no = 0; base_no < num_bases && ord_no < 32; base_no++)
2813     {
2814         data1_local_attribute *local_attr;
2815         attent attp;
2816         int ord;
2817
2818         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
2819         {
2820             zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
2821                            basenames[base_no]);
2822             *num_entries = 0;
2823             return ZEBRA_FAIL;
2824         }
2825
2826         if (use_string &&
2827             (ord = zebraExplain_lookup_attr_str(zh->reg->zei, reg_id,
2828                                                 use_string)) >= 0)
2829         {
2830             /* we have a match for a raw string attribute */
2831             if (ord > 0)
2832                 ords[ord_no++] = ord;
2833             attp.local_attributes = 0;  /* no more attributes */
2834         }
2835         else
2836         {
2837             int r;
2838             
2839             if ((r = att_getentbyatt (zh, &attp, attributeset, use_value,
2840                                       use_string)))
2841             {
2842                 yaz_log(YLOG_DEBUG, "att_getentbyatt fail. set=%d use=%d",
2843                         attributeset, use_value);
2844                 if (r == -1)
2845                 {
2846                     errCode = YAZ_BIB1_UNSUPP_USE_ATTRIBUTE;
2847                     if (use_string)
2848                         zebra_setError(zh, YAZ_BIB1_UNSUPP_USE_ATTRIBUTE,
2849                                        use_string);
2850                     else
2851                         zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_USE_ATTRIBUTE,
2852                                             use_value);
2853                 }   
2854                 else
2855                 {
2856                     zebra_setError(zh, YAZ_BIB1_UNSUPP_ATTRIBUTE_SET, 0);
2857                 }
2858                 continue;
2859             }
2860         }
2861         bases_ok++;
2862         for (local_attr = attp.local_attributes; local_attr && ord_no < 32;
2863              local_attr = local_attr->next)
2864         {
2865             ord = zebraExplain_lookup_attr_su(zh->reg->zei, reg_id,
2866                                               attp.attset_ordinal,
2867                                               local_attr->local);
2868             if (ord > 0)
2869                 ords[ord_no++] = ord;
2870         }
2871     }
2872     if (!bases_ok && errCode)
2873     {
2874         zebra_setError(zh, errCode, errString);
2875         *num_entries = 0;
2876         return ZEBRA_FAIL;
2877     }
2878     if (ord_no == 0)
2879     {
2880         *num_entries = 0;
2881         return ZEBRA_OK;
2882     }
2883     /* prepare dictionary scanning */
2884     if (num < 1)
2885     {
2886         *num_entries = 0;
2887         return ZEBRA_OK;
2888     }
2889     before = pos-1;
2890     if (before < 0)
2891         before = 0;
2892     after = 1+num-pos;
2893     if (after < 0)
2894         after = 0;
2895     yaz_log(YLOG_DEBUG, "rpn_scan pos=%d num=%d before=%d "
2896             "after=%d before+after=%d",
2897             pos, num, before, after, before+after);
2898     scan_info_array = (struct scan_info *)
2899         odr_malloc(stream, ord_no * sizeof(*scan_info_array));
2900     for (i = 0; i < ord_no; i++)
2901     {
2902         int j, prefix_len = 0;
2903         int before_tmp = before, after_tmp = after;
2904         struct scan_info *scan_info = scan_info_array + i;
2905         struct rpn_char_map_info rcmi;
2906
2907         rpn_char_map_prepare (zh->reg, reg_id, &rcmi);
2908
2909         scan_info->before = before;
2910         scan_info->after = after;
2911         scan_info->odr = stream;
2912
2913         scan_info->list = (struct scan_info_entry *)
2914             odr_malloc(stream, (before+after) * sizeof(*scan_info->list));
2915         for (j = 0; j<before+after; j++)
2916             scan_info->list[j].term = NULL;
2917
2918         prefix_len += key_SU_encode (ords[i], termz + prefix_len);
2919         termz[prefix_len] = 0;
2920         strcpy(scan_info->prefix, termz);
2921
2922         if (trans_scan_term(zh, zapt, termz+prefix_len, reg_id) == ZEBRA_FAIL)
2923             return ZEBRA_FAIL;
2924         
2925         dict_scan(zh->reg->dict, termz, &before_tmp, &after_tmp,
2926                   scan_info, scan_handle);
2927     }
2928     glist = (ZebraScanEntry *)
2929         odr_malloc(stream, (before+after)*sizeof(*glist));
2930
2931     rset_nmem = nmem_create();
2932     kc = zebra_key_control_create(zh);
2933
2934     /* consider terms after main term */
2935     for (i = 0; i < ord_no; i++)
2936         ptr[i] = before;
2937     
2938     *is_partial = 0;
2939     for (i = 0; i<after; i++)
2940     {
2941         int j, j0 = -1;
2942         const char *mterm = NULL;
2943         const char *tst;
2944         RSET rset = 0;
2945         int lo = i + pos-1; /* offset in result list */
2946
2947         /* find: j0 is the first of the minimal values */
2948         for (j = 0; j < ord_no; j++)
2949         {
2950             if (ptr[j] < before+after && ptr[j] >= 0 &&
2951                 (tst = scan_info_array[j].list[ptr[j]].term) &&
2952                 (!mterm || strcmp (tst, mterm) < 0))
2953             {
2954                 j0 = j;
2955                 mterm = tst;
2956             }
2957         }
2958         if (j0 == -1)
2959             break;  /* no value found, stop */
2960
2961         /* get result set for first one , but only if it's within bounds */
2962         if (lo >= 0)
2963         {
2964             /* get result set for first term */
2965             zebra_term_untrans_iconv(zh, stream->mem, reg_id,
2966                                      &glist[lo].term, mterm);
2967             rset = rset_trunc(zh, &scan_info_array[j0].list[ptr[j0]].isam_p, 1,
2968                               glist[lo].term, strlen(glist[lo].term),
2969                               NULL, 0, zapt->term->which, rset_nmem, 
2970                               kc, kc->scope, 0, reg_id, 0 /* hits_limit */,
2971                               0 /* term_ref_id_str */);
2972         }
2973         ptr[j0]++; /* move index for this set .. */
2974         /* get result set for remaining scan terms */
2975         for (j = j0+1; j<ord_no; j++)
2976         {
2977             if (ptr[j] < before+after && ptr[j] >= 0 &&
2978                 (tst = scan_info_array[j].list[ptr[j]].term) &&
2979                 !strcmp (tst, mterm))
2980             {
2981                 if (lo >= 0)
2982                 {
2983                     RSET rsets[2];
2984                     
2985                     rsets[0] = rset;
2986                     rsets[1] =
2987                         rset_trunc(
2988                             zh, &scan_info_array[j].list[ptr[j]].isam_p, 1,
2989                             glist[lo].term,
2990                             strlen(glist[lo].term), NULL, 0,
2991                             zapt->term->which,rset_nmem,
2992                             kc, kc->scope, 0, reg_id, 0 /* hits_limit */,
2993                             0 /* term_ref_id_str */ );
2994                     rset = rsmulti_or_create(rset_nmem, kc,
2995                                              kc->scope, 0 /* termid */,
2996                                              2, rsets);
2997                 }
2998                 ptr[j]++;
2999             }
3000         }
3001         if (lo >= 0)
3002         {
3003             zint count;
3004             /* merge with limit_set if given */
3005             if (limit_set)
3006             {
3007                 RSET rsets[2];
3008                 rsets[0] = rset;
3009                 rsets[1] = rset_dup(limit_set);
3010                 
3011                 rset = rsmulti_and_create(rset_nmem, kc,
3012                                           kc->scope,
3013                                           2, rsets);
3014             }
3015             /* count it */
3016             count_set(zh, rset, &count);
3017             glist[lo].occurrences = count;
3018             rset_delete(rset);
3019         }
3020     }
3021     if (i < after)
3022     {
3023         *num_entries -= (after-i);
3024         *is_partial = 1;
3025         if (*num_entries < 0)
3026         {
3027             (*kc->dec)(kc);
3028             nmem_destroy(rset_nmem);
3029             *num_entries = 0;
3030             return ZEBRA_OK;
3031         }
3032     }
3033     /* consider terms before main term */
3034     for (i = 0; i<ord_no; i++)
3035         ptr[i] = 0;
3036     
3037     for (i = 0; i<before; i++)
3038     {
3039         int j, j0 = -1;
3040         const char *mterm = NULL;
3041         const char *tst;
3042         RSET rset;
3043         int lo = before-1-i; /* offset in result list */
3044         zint count;
3045         
3046         for (j = 0; j <ord_no; j++)
3047         {
3048             if (ptr[j] < before && ptr[j] >= 0 &&
3049                 (tst = scan_info_array[j].list[before-1-ptr[j]].term) &&
3050                 (!mterm || strcmp (tst, mterm) > 0))
3051             {
3052                 j0 = j;
3053                     mterm = tst;
3054             }
3055         }
3056         if (j0 == -1)
3057             break;
3058         
3059         zebra_term_untrans_iconv(zh, stream->mem, reg_id,
3060                                  &glist[lo].term, mterm);
3061         
3062         rset = rset_trunc
3063             (zh, &scan_info_array[j0].list[before-1-ptr[j0]].isam_p, 1,
3064              glist[lo].term, strlen(glist[lo].term),
3065              NULL, 0, zapt->term->which, rset_nmem,
3066              kc, kc->scope, 0, reg_id, 0 /* hits_limit */,
3067              0 /* term_ref_id_str */);
3068         
3069         ptr[j0]++;
3070         
3071         for (j = j0+1; j<ord_no; j++)
3072         {
3073             if (ptr[j] < before && ptr[j] >= 0 &&
3074                 (tst = scan_info_array[j].list[before-1-ptr[j]].term) &&
3075                 !strcmp (tst, mterm))
3076             {
3077                 RSET rsets[2];
3078                 
3079                 rsets[0] = rset;
3080                 rsets[1] = rset_trunc(
3081                     zh,
3082                     &scan_info_array[j].list[before-1-ptr[j]].isam_p, 1,
3083                     glist[lo].term,
3084                     strlen(glist[lo].term), NULL, 0,
3085                     zapt->term->which, rset_nmem,
3086                     kc, kc->scope, 0, reg_id, 0 /* hits_limit */,
3087                     0 /* term_ref_id_str */);
3088                 rset = rsmulti_or_create(rset_nmem, kc,
3089                                          kc->scope, 0 /* termid */, 2, rsets);
3090                 
3091                 ptr[j]++;
3092             }
3093         }
3094         if (limit_set)
3095         {
3096             RSET rsets[2];
3097             rsets[0] = rset;
3098             rsets[1] = rset_dup(limit_set);
3099             
3100             rset = rsmulti_and_create(rset_nmem, kc,
3101                                       kc->scope, 2, rsets);
3102         }
3103         count_set(zh, rset, &count);
3104         glist[lo].occurrences = count;
3105         rset_delete (rset);
3106     }
3107     (*kc->dec)(kc);
3108     nmem_destroy(rset_nmem);
3109     i = before-i;
3110     if (i)
3111     {
3112         *is_partial = 1;
3113         *position -= i;
3114         *num_entries -= i;
3115         if (*num_entries <= 0)
3116         {
3117             *num_entries = 0;
3118             return ZEBRA_OK;
3119         }
3120     }
3121     
3122     *list = glist + i;               /* list is set to first 'real' entry */
3123     
3124     yaz_log(YLOG_DEBUG, "position = %d, num_entries = %d",
3125             *position, *num_entries);
3126     return ZEBRA_OK;
3127 }
3128