Align Zebra API more so that ZEBRA_RES is used to signal error/OK.
[idzebra-moved-to-github.git] / index / zrpn.c
1 /* $Id: zrpn.c,v 1.191 2005-05-11 12:39:37 adam Exp $
2    Copyright (C) 1995-2005
3    Index Data ApS
4
5 This file is part of the Zebra server.
6
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
11
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with Zebra; see the file LICENSE.zebra.  If not, write to the
19 Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
20 02111-1307, USA.
21 */
22
23 #include <stdio.h>
24 #include <assert.h>
25 #ifdef WIN32
26 #include <io.h>
27 #else
28 #include <unistd.h>
29 #endif
30 #include <ctype.h>
31
32 #include <yaz/diagbib1.h>
33 #include "index.h"
34 #include <zebra_xpath.h>
35
36 #include <charmap.h>
37 #include <rset.h>
38
39 struct rpn_char_map_info
40 {
41     ZebraMaps zm;
42     int reg_type;
43 };
44
45 typedef struct
46 {
47     int type;
48     int major;
49     int minor;
50     Z_AttributesPlusTerm *zapt;
51 } AttrType;
52
53
54 static int log_level_set = 0;
55 static int log_level_rpn = 0;
56
57 static const char **rpn_char_map_handler(void *vp, const char **from, int len)
58 {
59     struct rpn_char_map_info *p = (struct rpn_char_map_info *) vp;
60     const char **out = zebra_maps_input(p->zm, p->reg_type, from, len, 0);
61 #if 0
62     if (out && *out)
63     {
64         const char *outp = *out;
65         yaz_log(YLOG_LOG, "---");
66         while (*outp)
67         {
68             yaz_log(YLOG_LOG, "%02X", *outp);
69             outp++;
70         }
71     }
72 #endif
73     return out;
74 }
75
76 static void rpn_char_map_prepare(struct zebra_register *reg, int reg_type,
77                                   struct rpn_char_map_info *map_info)
78 {
79     map_info->zm = reg->zebra_maps;
80     map_info->reg_type = reg_type;
81     dict_grep_cmap(reg->dict, map_info, rpn_char_map_handler);
82 }
83
84 static int attr_find_ex(AttrType *src, oid_value *attributeSetP,
85                          const char **string_value)
86 {
87     int num_attributes;
88
89     num_attributes = src->zapt->attributes->num_attributes;
90     while (src->major < num_attributes)
91     {
92         Z_AttributeElement *element;
93
94         element = src->zapt->attributes->attributes[src->major];
95         if (src->type == *element->attributeType)
96         {
97             switch (element->which) 
98             {
99             case Z_AttributeValue_numeric:
100                 ++(src->major);
101                 if (element->attributeSet && attributeSetP)
102                 {
103                     oident *attrset;
104
105                     attrset = oid_getentbyoid(element->attributeSet);
106                     *attributeSetP = attrset->value;
107                 }
108                 return *element->value.numeric;
109                 break;
110             case Z_AttributeValue_complex:
111                 if (src->minor >= element->value.complex->num_list)
112                     break;
113                 if (element->attributeSet && attributeSetP)
114                 {
115                     oident *attrset;
116                     
117                     attrset = oid_getentbyoid(element->attributeSet);
118                     *attributeSetP = attrset->value;
119                 }
120                 if (element->value.complex->list[src->minor]->which ==  
121                     Z_StringOrNumeric_numeric)
122                 {
123                     ++(src->minor);
124                     return
125                         *element->value.complex->list[src->minor-1]->u.numeric;
126                 }
127                 else if (element->value.complex->list[src->minor]->which ==  
128                          Z_StringOrNumeric_string)
129                 {
130                     if (!string_value)
131                         break;
132                     ++(src->minor);
133                     *string_value = 
134                         element->value.complex->list[src->minor-1]->u.string;
135                     return -2;
136                 }
137                 else
138                     break;
139             default:
140                 assert(0);
141             }
142         }
143         ++(src->major);
144     }
145     return -1;
146 }
147
148 static int attr_find(AttrType *src, oid_value *attributeSetP)
149 {
150     return attr_find_ex(src, attributeSetP, 0);
151 }
152
153 static void attr_init(AttrType *src, Z_AttributesPlusTerm *zapt,
154                        int type)
155 {
156     src->zapt = zapt;
157     src->type = type;
158     src->major = 0;
159     src->minor = 0;
160 }
161
162 #define TERM_COUNT        
163        
164 struct grep_info {        
165 #ifdef TERM_COUNT        
166     int *term_no;        
167 #endif        
168     ISAM_P *isam_p_buf;
169     int isam_p_size;        
170     int isam_p_indx;
171     ZebraHandle zh;
172     int reg_type;
173     ZebraSet termset;
174 };        
175
176 static void term_untrans(ZebraHandle zh, int reg_type,
177                            char *dst, const char *src)
178 {
179     int len = 0;
180     while (*src)
181     {
182         const char *cp = zebra_maps_output(zh->reg->zebra_maps,
183                                            reg_type, &src);
184         if (!cp && len < IT_MAX_WORD-1)
185             dst[len++] = *src++;
186         else
187             while (*cp && len < IT_MAX_WORD-1)
188                 dst[len++] = *cp++;
189     }
190     dst[len] = '\0';
191 }
192
193 static void add_isam_p(const char *name, const char *info,
194                        struct grep_info *p)
195 {
196     if (!log_level_set)
197     {
198         log_level_rpn = yaz_log_module_level("rpn");
199         log_level_set = 1;
200     }
201     if (p->isam_p_indx == p->isam_p_size)
202     {
203         ISAM_P *new_isam_p_buf;
204 #ifdef TERM_COUNT        
205         int *new_term_no;        
206 #endif
207         p->isam_p_size = 2*p->isam_p_size + 100;
208         new_isam_p_buf = (ISAM_P *) xmalloc(sizeof(*new_isam_p_buf) *
209                                             p->isam_p_size);
210         if (p->isam_p_buf)
211         {
212             memcpy(new_isam_p_buf, p->isam_p_buf,
213                     p->isam_p_indx * sizeof(*p->isam_p_buf));
214             xfree(p->isam_p_buf);
215         }
216         p->isam_p_buf = new_isam_p_buf;
217
218 #ifdef TERM_COUNT
219         new_term_no = (int *) xmalloc(sizeof(*new_term_no) * p->isam_p_size);
220         if (p->term_no)
221         {
222             memcpy(new_term_no, p->isam_p_buf,
223                     p->isam_p_indx * sizeof(*p->term_no));
224             xfree(p->term_no);
225         }
226         p->term_no = new_term_no;
227 #endif
228     }
229     assert(*info == sizeof(*p->isam_p_buf));
230     memcpy(p->isam_p_buf + p->isam_p_indx, info+1, sizeof(*p->isam_p_buf));
231
232 #if 1
233     if (p->termset)
234     {
235         const char *db;
236         int set, use;
237         char term_tmp[IT_MAX_WORD];
238         int su_code = 0;
239         int len = key_SU_decode (&su_code, name);
240         
241         term_untrans  (p->zh, p->reg_type, term_tmp, name+len+1);
242         yaz_log(log_level_rpn, "grep: %d %c %s", su_code, name[len], term_tmp);
243         zebraExplain_lookup_ord (p->zh->reg->zei,
244                                  su_code, &db, &set, &use);
245         yaz_log(log_level_rpn, "grep:  set=%d use=%d db=%s", set, use, db);
246         
247         resultSetAddTerm(p->zh, p->termset, name[len], db,
248                          set, use, term_tmp);
249     }
250 #endif
251     (p->isam_p_indx)++;
252 }
253
254 static int grep_handle(char *name, const char *info, void *p)
255 {
256     add_isam_p(name, info, (struct grep_info *) p);
257     return 0;
258 }
259
260 static int term_pre(ZebraMaps zebra_maps, int reg_type, const char **src,
261                     const char *ct1, const char *ct2, int first)
262 {
263     const char *s1, *s0 = *src;
264     const char **map;
265
266     /* skip white space */
267     while (*s0)
268     {
269         if (ct1 && strchr(ct1, *s0))
270             break;
271         if (ct2 && strchr(ct2, *s0))
272             break;
273         s1 = s0;
274         map = zebra_maps_input(zebra_maps, reg_type, &s1, strlen(s1), first);
275         if (**map != *CHR_SPACE)
276             break;
277         s0 = s1;
278     }
279     *src = s0;
280     return *s0;
281 }
282
283
284 static void esc_str(char *out_buf, int out_size,
285                     const char *in_buf, int in_size)
286 {
287     int k;
288
289     assert(out_buf);
290     assert(in_buf);
291     assert(out_size > 20);
292     *out_buf = '\0';
293     for (k = 0; k<in_size; k++)
294     {
295         int c = in_buf[k] & 0xff;
296         int pc;
297         if (c < 32 || c > 126)
298             pc = '?';
299         else
300             pc = c;
301         sprintf(out_buf +strlen(out_buf), "%02X:%c  ", c, pc);
302         if (strlen(out_buf) > out_size-20)
303         {
304             strcat(out_buf, "..");
305             break;
306         }
307     }
308 }
309
310 #define REGEX_CHARS " []()|.*+?!"
311
312 /* term_100: handle term, where trunc = none(no operators at all) */
313 static int term_100(ZebraMaps zebra_maps, int reg_type,
314                     const char **src, char *dst, int space_split,
315                     char *dst_term)
316 {
317     const char *s0;
318     const char **map;
319     int i = 0;
320     int j = 0;
321
322     const char *space_start = 0;
323     const char *space_end = 0;
324
325     if (!term_pre(zebra_maps, reg_type, src, NULL, NULL, !space_split))
326         return 0;
327     s0 = *src;
328     while (*s0)
329     {
330         const char *s1 = s0;
331         int q_map_match = 0;
332         map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), 
333                                 &q_map_match);
334         if (space_split)
335         {
336             if (**map == *CHR_SPACE)
337                 break;
338         }
339         else  /* complete subfield only. */
340         {
341             if (**map == *CHR_SPACE)
342             {   /* save space mapping for later  .. */
343                 space_start = s1;
344                 space_end = s0;
345                 continue;
346             }
347             else if (space_start)
348             {   /* reload last space */
349                 while (space_start < space_end)
350                 {
351                     if (strchr(REGEX_CHARS, *space_start))
352                         dst[i++] = '\\';
353                     dst_term[j++] = *space_start;
354                     dst[i++] = *space_start++;
355                 }
356                 /* and reset */
357                 space_start = space_end = 0;
358             }
359         }
360         /* add non-space char */
361         memcpy(dst_term+j, s1, s0 - s1);
362         j += (s0 - s1);
363         if (!q_map_match)
364         {
365             while (s1 < s0)
366             {
367                 if (strchr(REGEX_CHARS, *s1))
368                     dst[i++] = '\\';
369                 dst[i++] = *s1++;
370             }
371         }
372         else
373         {
374             char tmpbuf[80];
375             esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
376             
377             strcpy(dst + i, map[0]);
378             i += strlen(map[0]);
379         }
380     }
381     dst[i] = '\0';
382     dst_term[j] = '\0';
383     *src = s0;
384     return i;
385 }
386
387 /* term_101: handle term, where trunc = Process # */
388 static int term_101(ZebraMaps zebra_maps, int reg_type,
389                     const char **src, char *dst, int space_split,
390                     char *dst_term)
391 {
392     const char *s0;
393     const char **map;
394     int i = 0;
395     int j = 0;
396
397     if (!term_pre(zebra_maps, reg_type, src, "#", "#", !space_split))
398         return 0;
399     s0 = *src;
400     while (*s0)
401     {
402         if (*s0 == '#')
403         {
404             dst[i++] = '.';
405             dst[i++] = '*';
406             dst_term[j++] = *s0++;
407         }
408         else
409         {
410             const char *s1 = s0;
411             int q_map_match = 0;
412             map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), 
413                                     &q_map_match);
414             if (space_split && **map == *CHR_SPACE)
415                 break;
416
417             /* add non-space char */
418             memcpy(dst_term+j, s1, s0 - s1);
419             j += (s0 - s1);
420             if (!q_map_match)
421             {
422                 while (s1 < s0)
423                 {
424                     if (strchr(REGEX_CHARS, *s1))
425                         dst[i++] = '\\';
426                     dst[i++] = *s1++;
427                 }
428             }
429             else
430             {
431                 char tmpbuf[80];
432                 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
433                 
434                 strcpy(dst + i, map[0]);
435                 i += strlen(map[0]);
436             }
437         }
438     }
439     dst[i] = '\0';
440     dst_term[j++] = '\0';
441     *src = s0;
442     return i;
443 }
444
445 /* term_103: handle term, where trunc = re-2 (regular expressions) */
446 static int term_103(ZebraMaps zebra_maps, int reg_type, const char **src,
447                     char *dst, int *errors, int space_split,
448                     char *dst_term)
449 {
450     int i = 0;
451     int j = 0;
452     const char *s0;
453     const char **map;
454
455     if (!term_pre(zebra_maps, reg_type, src, "^\\()[].*+?|", "(", !space_split))
456         return 0;
457     s0 = *src;
458     if (errors && *s0 == '+' && s0[1] && s0[2] == '+' && s0[3] &&
459         isdigit(((const unsigned char *)s0)[1]))
460     {
461         *errors = s0[1] - '0';
462         s0 += 3;
463         if (*errors > 3)
464             *errors = 3;
465     }
466     while (*s0)
467     {
468         if (strchr("^\\()[].*+?|-", *s0))
469         {
470             dst_term[j++] = *s0;
471             dst[i++] = *s0++;
472         }
473         else
474         {
475             const char *s1 = s0;
476             int q_map_match = 0;
477             map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), 
478                                     &q_map_match);
479             if (space_split && **map == *CHR_SPACE)
480                 break;
481
482             /* add non-space char */
483             memcpy(dst_term+j, s1, s0 - s1);
484             j += (s0 - s1);
485             if (!q_map_match)
486             {
487                 while (s1 < s0)
488                 {
489                     if (strchr(REGEX_CHARS, *s1))
490                         dst[i++] = '\\';
491                     dst[i++] = *s1++;
492                 }
493             }
494             else
495             {
496                 char tmpbuf[80];
497                 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
498                 
499                 strcpy(dst + i, map[0]);
500                 i += strlen(map[0]);
501             }
502         }
503     }
504     dst[i] = '\0';
505     dst_term[j] = '\0';
506     *src = s0;
507     
508     return i;
509 }
510
511 /* term_103: handle term, where trunc = re-1 (regular expressions) */
512 static int term_102(ZebraMaps zebra_maps, int reg_type, const char **src,
513                     char *dst, int space_split, char *dst_term)
514 {
515     return term_103(zebra_maps, reg_type, src, dst, NULL, space_split,
516                     dst_term);
517 }
518
519
520 /* term_104: handle term, where trunc = Process # and ! */
521 static int term_104(ZebraMaps zebra_maps, int reg_type,
522                     const char **src, char *dst, int space_split,
523                     char *dst_term)
524 {
525     const char *s0;
526     const char **map;
527     int i = 0;
528     int j = 0;
529
530     if (!term_pre(zebra_maps, reg_type, src, "?*#", "?*#", !space_split))
531         return 0;
532     s0 = *src;
533     while (*s0)
534     {
535         if (*s0 == '?')
536         {
537             dst_term[j++] = *s0++;
538             if (*s0 >= '0' && *s0 <= '9')
539             {
540                 int limit = 0;
541                 while (*s0 >= '0' && *s0 <= '9')
542                 {
543                     limit = limit * 10 + (*s0 - '0');
544                     dst_term[j++] = *s0++;
545                 }
546                 if (limit > 20)
547                     limit = 20;
548                 while (--limit >= 0)
549                 {
550                     dst[i++] = '.';
551                     dst[i++] = '?';
552                 }
553             }
554             else
555             {
556                 dst[i++] = '.';
557                 dst[i++] = '*';
558             }
559         }
560         else if (*s0 == '*')
561         {
562             dst[i++] = '.';
563             dst[i++] = '*';
564             dst_term[j++] = *s0++;
565         }
566         else if (*s0 == '#')
567         {
568             dst[i++] = '.';
569             dst_term[j++] = *s0++;
570         }
571         else
572         {
573             const char *s1 = s0;
574             int q_map_match = 0;
575             map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), 
576                                     &q_map_match);
577             if (space_split && **map == *CHR_SPACE)
578                 break;
579
580             /* add non-space char */
581             memcpy(dst_term+j, s1, s0 - s1);
582             j += (s0 - s1);
583             if (!q_map_match)
584             {
585                 while (s1 < s0)
586                 {
587                     if (strchr(REGEX_CHARS, *s1))
588                         dst[i++] = '\\';
589                     dst[i++] = *s1++;
590                 }
591             }
592             else
593             {
594                 char tmpbuf[80];
595                 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
596                 
597                 strcpy(dst + i, map[0]);
598                 i += strlen(map[0]);
599             }
600         }
601     }
602     dst[i] = '\0';
603     dst_term[j++] = '\0';
604     *src = s0;
605     return i;
606 }
607
608 /* term_105/106: handle term, where trunc = Process * and ! and right trunc */
609 static int term_105(ZebraMaps zebra_maps, int reg_type,
610                     const char **src, char *dst, int space_split,
611                     char *dst_term, int right_truncate)
612 {
613     const char *s0;
614     const char **map;
615     int i = 0;
616     int j = 0;
617
618     if (!term_pre(zebra_maps, reg_type, src, "*!", "*!", !space_split))
619         return 0;
620     s0 = *src;
621     while (*s0)
622     {
623         if (*s0 == '*')
624         {
625             dst[i++] = '.';
626             dst[i++] = '*';
627             dst_term[j++] = *s0++;
628         }
629         else if (*s0 == '!')
630         {
631             dst[i++] = '.';
632             dst_term[j++] = *s0++;
633         }
634         else
635         {
636             const char *s1 = s0;
637             int q_map_match = 0;
638             map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), 
639                                     &q_map_match);
640             if (space_split && **map == *CHR_SPACE)
641                 break;
642
643             /* add non-space char */
644             memcpy(dst_term+j, s1, s0 - s1);
645             j += (s0 - s1);
646             if (!q_map_match)
647             {
648                 while (s1 < s0)
649                 {
650                     if (strchr(REGEX_CHARS, *s1))
651                         dst[i++] = '\\';
652                     dst[i++] = *s1++;
653                 }
654             }
655             else
656             {
657                 char tmpbuf[80];
658                 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
659                 
660                 strcpy(dst + i, map[0]);
661                 i += strlen(map[0]);
662             }
663         }
664     }
665     if (right_truncate)
666     {
667         dst[i++] = '.';
668         dst[i++] = '*';
669     }
670     dst[i] = '\0';
671     
672     dst_term[j++] = '\0';
673     *src = s0;
674     return i;
675 }
676
677
678 /* gen_regular_rel - generate regular expression from relation
679  *  val:     border value (inclusive)
680  *  islt:    1 if <=; 0 if >=.
681  */
682 static void gen_regular_rel(char *dst, int val, int islt)
683 {
684     int dst_p;
685     int w, d, i;
686     int pos = 0;
687     char numstr[20];
688
689     yaz_log(YLOG_DEBUG, "gen_regular_rel. val=%d, islt=%d", val, islt);
690     if (val >= 0)
691     {
692         if (islt)
693             strcpy(dst, "(-[0-9]+|(");
694         else
695             strcpy(dst, "((");
696     } 
697     else
698     {
699         if (!islt)
700         {
701             strcpy(dst, "([0-9]+|-(");
702             dst_p = strlen(dst);
703             islt = 1;
704         }
705         else
706         {
707             strcpy(dst, "(-(");
708             islt = 0;
709         }
710         val = -val;
711     }
712     dst_p = strlen(dst);
713     sprintf(numstr, "%d", val);
714     for (w = strlen(numstr); --w >= 0; pos++)
715     {
716         d = numstr[w];
717         if (pos > 0)
718         {
719             if (islt)
720             {
721                 if (d == '0')
722                     continue;
723                 d--;
724             } 
725             else
726             {
727                 if (d == '9')
728                     continue;
729                 d++;
730             }
731         }
732         
733         strcpy(dst + dst_p, numstr);
734         dst_p = strlen(dst) - pos - 1;
735
736         if (islt)
737         {
738             if (d != '0')
739             {
740                 dst[dst_p++] = '[';
741                 dst[dst_p++] = '0';
742                 dst[dst_p++] = '-';
743                 dst[dst_p++] = d;
744                 dst[dst_p++] = ']';
745             }
746             else
747                 dst[dst_p++] = d;
748         }
749         else
750         {
751             if (d != '9')
752             { 
753                 dst[dst_p++] = '[';
754                 dst[dst_p++] = d;
755                 dst[dst_p++] = '-';
756                 dst[dst_p++] = '9';
757                 dst[dst_p++] = ']';
758             }
759             else
760                 dst[dst_p++] = d;
761         }
762         for (i = 0; i<pos; i++)
763         {
764             dst[dst_p++] = '[';
765             dst[dst_p++] = '0';
766             dst[dst_p++] = '-';
767             dst[dst_p++] = '9';
768             dst[dst_p++] = ']';
769         }
770         dst[dst_p++] = '|';
771     }
772     dst[dst_p] = '\0';
773     if (islt)
774     {
775         /* match everything less than 10^(pos-1) */
776         strcat(dst, "0*");
777         for (i = 1; i<pos; i++)
778             strcat(dst, "[0-9]?");
779     }
780     else
781     {
782         /* match everything greater than 10^pos */
783         for (i = 0; i <= pos; i++)
784             strcat(dst, "[0-9]");
785         strcat(dst, "[0-9]*");
786     }
787     strcat(dst, "))");
788 }
789
790 void string_rel_add_char(char **term_p, const char *src, int *indx)
791 {
792     if (src[*indx] == '\\')
793         *(*term_p)++ = src[(*indx)++];
794     *(*term_p)++ = src[(*indx)++];
795 }
796
797 /*
798  *   >  abc     ([b-].*|a[c-].*|ab[d-].*|abc.+)
799  *              ([^-a].*|a[^-b].*ab[^-c].*|abc.+)
800  *   >= abc     ([b-].*|a[c-].*|ab[c-].*)
801  *              ([^-a].*|a[^-b].*|ab[c-].*)
802  *   <  abc     ([-0].*|a[-a].*|ab[-b].*)
803  *              ([^a-].*|a[^b-].*|ab[^c-].*)
804  *   <= abc     ([-0].*|a[-a].*|ab[-b].*|abc)
805  *              ([^a-].*|a[^b-].*|ab[^c-].*|abc)
806  */
807 static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
808                            const char **term_sub, char *term_dict,
809                            oid_value attributeSet,
810                            int reg_type, int space_split, char *term_dst,
811                            int *error_code)
812 {
813     AttrType relation;
814     int relation_value;
815     int i;
816     char *term_tmp = term_dict + strlen(term_dict);
817     char term_component[2*IT_MAX_WORD+20];
818
819     attr_init(&relation, zapt, 2);
820     relation_value = attr_find(&relation, NULL);
821
822     *error_code = 0;
823     yaz_log(YLOG_DEBUG, "string relation value=%d", relation_value);
824     switch (relation_value)
825     {
826     case 1:
827         if (!term_100(zh->reg->zebra_maps, reg_type,
828                       term_sub, term_component,
829                       space_split, term_dst))
830             return 0;
831         yaz_log(log_level_rpn, "Relation <");
832         
833         *term_tmp++ = '(';
834         for (i = 0; term_component[i]; )
835         {
836             int j = 0;
837
838             if (i)
839                 *term_tmp++ = '|';
840             while (j < i)
841                 string_rel_add_char(&term_tmp, term_component, &j);
842
843             *term_tmp++ = '[';
844
845             *term_tmp++ = '^';
846             string_rel_add_char(&term_tmp, term_component, &i);
847             *term_tmp++ = '-';
848
849             *term_tmp++ = ']';
850             *term_tmp++ = '.';
851             *term_tmp++ = '*';
852
853             if ((term_tmp - term_dict) > IT_MAX_WORD)
854                 break;
855         }
856         *term_tmp++ = ')';
857         *term_tmp = '\0';
858         break;
859     case 2:
860         if (!term_100(zh->reg->zebra_maps, reg_type,
861                       term_sub, term_component,
862                       space_split, term_dst))
863             return 0;
864         yaz_log(log_level_rpn, "Relation <=");
865
866         *term_tmp++ = '(';
867         for (i = 0; term_component[i]; )
868         {
869             int j = 0;
870
871             while (j < i)
872                 string_rel_add_char(&term_tmp, term_component, &j);
873             *term_tmp++ = '[';
874
875             *term_tmp++ = '^';
876             string_rel_add_char(&term_tmp, term_component, &i);
877             *term_tmp++ = '-';
878
879             *term_tmp++ = ']';
880             *term_tmp++ = '.';
881             *term_tmp++ = '*';
882
883             *term_tmp++ = '|';
884
885             if ((term_tmp - term_dict) > IT_MAX_WORD)
886                 break;
887         }
888         for (i = 0; term_component[i]; )
889             string_rel_add_char(&term_tmp, term_component, &i);
890         *term_tmp++ = ')';
891         *term_tmp = '\0';
892         break;
893     case 5:
894         if (!term_100 (zh->reg->zebra_maps, reg_type,
895                        term_sub, term_component, space_split, term_dst))
896             return 0;
897         yaz_log(log_level_rpn, "Relation >");
898
899         *term_tmp++ = '(';
900         for (i = 0; term_component[i];)
901         {
902             int j = 0;
903
904             while (j < i)
905                 string_rel_add_char(&term_tmp, term_component, &j);
906             *term_tmp++ = '[';
907             
908             *term_tmp++ = '^';
909             *term_tmp++ = '-';
910             string_rel_add_char(&term_tmp, term_component, &i);
911
912             *term_tmp++ = ']';
913             *term_tmp++ = '.';
914             *term_tmp++ = '*';
915
916             *term_tmp++ = '|';
917
918             if ((term_tmp - term_dict) > IT_MAX_WORD)
919                 break;
920         }
921         for (i = 0; term_component[i];)
922             string_rel_add_char(&term_tmp, term_component, &i);
923         *term_tmp++ = '.';
924         *term_tmp++ = '+';
925         *term_tmp++ = ')';
926         *term_tmp = '\0';
927         break;
928     case 4:
929         if (!term_100(zh->reg->zebra_maps, reg_type, term_sub,
930                       term_component, space_split, term_dst))
931             return 0;
932         yaz_log(log_level_rpn, "Relation >=");
933
934         *term_tmp++ = '(';
935         for (i = 0; term_component[i];)
936         {
937             int j = 0;
938
939             if (i)
940                 *term_tmp++ = '|';
941             while (j < i)
942                 string_rel_add_char(&term_tmp, term_component, &j);
943             *term_tmp++ = '[';
944
945             if (term_component[i+1])
946             {
947                 *term_tmp++ = '^';
948                 *term_tmp++ = '-';
949                 string_rel_add_char(&term_tmp, term_component, &i);
950             }
951             else
952             {
953                 string_rel_add_char(&term_tmp, term_component, &i);
954                 *term_tmp++ = '-';
955             }
956             *term_tmp++ = ']';
957             *term_tmp++ = '.';
958             *term_tmp++ = '*';
959
960             if ((term_tmp - term_dict) > IT_MAX_WORD)
961                 break;
962         }
963         *term_tmp++ = ')';
964         *term_tmp = '\0';
965         break;
966     case 3:
967     case 102:
968     case -1:
969         yaz_log(log_level_rpn, "Relation =");
970         if (!term_100(zh->reg->zebra_maps, reg_type, term_sub,
971                       term_component, space_split, term_dst))
972             return 0;
973         strcat(term_tmp, "(");
974         strcat(term_tmp, term_component);
975         strcat(term_tmp, ")");
976         break;
977     default:
978         *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
979         return 0;
980     }
981     return 1;
982 }
983
984 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
985                              const char **term_sub, 
986                              oid_value attributeSet, NMEM stream,
987                              struct grep_info *grep_info,
988                              int reg_type, int complete_flag,
989                              int num_bases, char **basenames,
990                              char *term_dst, int xpath_use);
991
992 static ZEBRA_RES term_trunc(ZebraHandle zh,
993                             Z_AttributesPlusTerm *zapt,
994                             const char **term_sub, 
995                             oid_value attributeSet, NMEM stream,
996                             struct grep_info *grep_info,
997                             int reg_type, int complete_flag,
998                             int num_bases, char **basenames,
999                             char *term_dst,
1000                             const char *rank_type, int xpath_use,
1001                             NMEM rset_nmem,
1002                             RSET *rset,
1003                             struct rset_key_control *kc)
1004 {
1005     ZEBRA_RES res;
1006     *rset = 0;
1007     grep_info->isam_p_indx = 0;
1008     res = string_term(zh, zapt, term_sub, attributeSet, stream, grep_info,
1009                       reg_type, complete_flag, num_bases, basenames,
1010                       term_dst, xpath_use);
1011     if (res != ZEBRA_OK)
1012         return res;
1013     if (!*term_sub)  /* no more terms ? */
1014         return res;
1015     yaz_log(log_level_rpn, "term: %s", term_dst);
1016     *rset = rset_trunc(zh, grep_info->isam_p_buf,
1017                        grep_info->isam_p_indx, term_dst,
1018                        strlen(term_dst), rank_type, 1 /* preserve pos */,
1019                        zapt->term->which, rset_nmem,
1020                        kc, kc->scope);
1021     if (!*rset)
1022         return ZEBRA_FAIL;
1023     return ZEBRA_OK;
1024 }
1025
1026 static char *nmem_strdup_i(NMEM nmem, int v)
1027 {
1028     char val_str[64];
1029     sprintf(val_str, "%d", v);
1030     return nmem_strdup(nmem, val_str);
1031 }
1032
1033 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1034                              const char **term_sub, 
1035                              oid_value attributeSet, NMEM stream,
1036                              struct grep_info *grep_info,
1037                              int reg_type, int complete_flag,
1038                              int num_bases, char **basenames,
1039                              char *term_dst, int xpath_use)
1040 {
1041     char term_dict[2*IT_MAX_WORD+4000];
1042     int j, r, base_no;
1043     AttrType truncation;
1044     int truncation_value;
1045     AttrType use;
1046     int use_value;
1047     const char *use_string = 0;
1048     oid_value curAttributeSet = attributeSet;
1049     const char *termp;
1050     struct rpn_char_map_info rcmi;
1051     int space_split = complete_flag ? 0 : 1;
1052
1053     int bases_ok = 0;     /* no of databases with OK attribute */
1054     int errCode = 0;      /* err code (if any is not OK) */
1055     char *errString = 0;  /* addinfo */
1056
1057     rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
1058     attr_init(&use, zapt, 1);
1059     use_value = attr_find_ex(&use, &curAttributeSet, &use_string);
1060     yaz_log(log_level_rpn, "string_term, use value %d", use_value);
1061     attr_init(&truncation, zapt, 5);
1062     truncation_value = attr_find(&truncation, NULL);
1063     yaz_log(log_level_rpn, "truncation value %d", truncation_value);
1064
1065     if (use_value == -1)    /* no attribute - assumy "any" */
1066         use_value = 1016;
1067     for (base_no = 0; base_no < num_bases; base_no++)
1068     {
1069         int ord = -1;
1070         int attr_ok = 0;
1071         int regex_range = 0;
1072         int init_pos = 0;
1073         attent attp;
1074         data1_local_attribute id_xpath_attr;
1075         data1_local_attribute *local_attr;
1076         int max_pos, prefix_len = 0;
1077         int relation_error;
1078
1079         termp = *term_sub;
1080
1081         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1082         {
1083             zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
1084                            basenames[base_no]);
1085             return ZEBRA_FAIL;
1086         }
1087         if (xpath_use > 0 && use_value == -2) 
1088         {
1089             /* xpath mode and we have a string attribute */
1090             attp.local_attributes = &id_xpath_attr;
1091             attp.attset_ordinal = VAL_IDXPATH;
1092             id_xpath_attr.next = 0;
1093
1094             use_value = xpath_use;  /* xpath_use as use-attribute now */
1095             id_xpath_attr.local = use_value;
1096         }
1097         else if (curAttributeSet == VAL_IDXPATH && use_value >= 0)
1098         {
1099             /* X-Path attribute, use numeric value directly */
1100             attp.local_attributes = &id_xpath_attr;
1101             attp.attset_ordinal = VAL_IDXPATH;
1102             id_xpath_attr.next = 0;
1103             id_xpath_attr.local = use_value;
1104         }
1105         else if (use_string &&
1106                  (ord = zebraExplain_lookup_attr_str(zh->reg->zei,
1107                                                      use_string)) >= 0)
1108         {
1109             /* we have a match for a raw string attribute */
1110             char ord_buf[32];
1111             int i, ord_len;
1112
1113             if (prefix_len)
1114                 term_dict[prefix_len++] = '|';
1115             else
1116                 term_dict[prefix_len++] = '(';
1117             
1118             ord_len = key_SU_encode (ord, ord_buf);
1119             for (i = 0; i<ord_len; i++)
1120             {
1121                 term_dict[prefix_len++] = 1;
1122                 term_dict[prefix_len++] = ord_buf[i];
1123             }
1124             attp.local_attributes = 0;  /* no more attributes */
1125         }
1126         else 
1127         {
1128             /* lookup in the .att files . Allow string as well */
1129             if ((r = att_getentbyatt (zh, &attp, curAttributeSet, use_value,
1130                                       use_string)))
1131             {
1132                 yaz_log(YLOG_DEBUG, "att_getentbyatt fail. set=%d use=%d r=%d",
1133                         curAttributeSet, use_value, r);
1134                 if (r == -1)
1135                 {
1136                     /* set was found, but value wasn't defined */
1137                     errCode = YAZ_BIB1_UNSUPP_USE_ATTRIBUTE;
1138                     if (use_string)
1139                         errString = nmem_strdup(stream, use_string);
1140                     else
1141                         errString = nmem_strdup_i (stream, use_value);
1142                 }
1143                 else
1144                 {
1145                     int oid[OID_SIZE];
1146                     struct oident oident;
1147                     
1148                     oident.proto = PROTO_Z3950;
1149                     oident.oclass = CLASS_ATTSET;
1150                     oident.value = curAttributeSet;
1151                     oid_ent_to_oid (&oident, oid);
1152                     
1153                     errCode = YAZ_BIB1_UNSUPP_ATTRIBUTE_SET;
1154                     errString = nmem_strdup(stream, oident.desc);
1155                 }
1156                 continue;
1157             }
1158         }
1159         for (local_attr = attp.local_attributes; local_attr;
1160              local_attr = local_attr->next)
1161         {
1162             char ord_buf[32];
1163             int i, ord_len;
1164             
1165             ord = zebraExplain_lookup_attr_su(zh->reg->zei,
1166                                               attp.attset_ordinal,
1167                                               local_attr->local);
1168             if (ord < 0)
1169                 continue;
1170             if (prefix_len)
1171                 term_dict[prefix_len++] = '|';
1172             else
1173                 term_dict[prefix_len++] = '(';
1174             
1175             ord_len = key_SU_encode (ord, ord_buf);
1176             for (i = 0; i<ord_len; i++)
1177             {
1178                 term_dict[prefix_len++] = 1;
1179                 term_dict[prefix_len++] = ord_buf[i];
1180             }
1181         }
1182         bases_ok++;
1183         if (prefix_len)
1184             attr_ok = 1;
1185
1186         term_dict[prefix_len++] = ')';
1187         term_dict[prefix_len++] = 1;
1188         term_dict[prefix_len++] = reg_type;
1189         yaz_log(log_level_rpn, "reg_type = %d", term_dict[prefix_len-1]);
1190         term_dict[prefix_len] = '\0';
1191         j = prefix_len;
1192         switch (truncation_value)
1193         {
1194         case -1:         /* not specified */
1195         case 100:        /* do not truncate */
1196             if (!string_relation (zh, zapt, &termp, term_dict,
1197                                   attributeSet,
1198                                   reg_type, space_split, term_dst,
1199                                   &relation_error))
1200             {
1201                 if (relation_error)
1202                 {
1203                     zebra_setError(zh, relation_error, 0);
1204                     return ZEBRA_FAIL;
1205                 }
1206                 *term_sub = 0;
1207                 return ZEBRA_OK;
1208             }
1209             break;
1210         case 1:          /* right truncation */
1211             term_dict[j++] = '(';
1212             if (!term_100(zh->reg->zebra_maps, reg_type,
1213                           &termp, term_dict + j, space_split, term_dst))
1214             {
1215                 *term_sub = 0;
1216                 return ZEBRA_OK;
1217             }
1218             strcat(term_dict, ".*)");
1219             break;
1220         case 2:          /* keft truncation */
1221             term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*';
1222             if (!term_100(zh->reg->zebra_maps, reg_type,
1223                           &termp, term_dict + j, space_split, term_dst))
1224             {
1225                 *term_sub = 0;
1226                 return ZEBRA_OK;
1227             }
1228             strcat(term_dict, ")");
1229             break;
1230         case 3:          /* left&right truncation */
1231             term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*';
1232             if (!term_100(zh->reg->zebra_maps, reg_type,
1233                           &termp, term_dict + j, space_split, term_dst))
1234             {
1235                 *term_sub = 0;
1236                 return ZEBRA_OK;
1237             }
1238             strcat(term_dict, ".*)");
1239             break;
1240         case 101:        /* process # in term */
1241             term_dict[j++] = '(';
1242             if (!term_101(zh->reg->zebra_maps, reg_type,
1243                           &termp, term_dict + j, space_split, term_dst))
1244             {
1245                 *term_sub = 0;
1246                 return ZEBRA_OK;
1247             }
1248             strcat(term_dict, ")");
1249             break;
1250         case 102:        /* Regexp-1 */
1251             term_dict[j++] = '(';
1252             if (!term_102(zh->reg->zebra_maps, reg_type,
1253                           &termp, term_dict + j, space_split, term_dst))
1254             {
1255                 *term_sub = 0;
1256                 return ZEBRA_OK;
1257             }
1258             strcat(term_dict, ")");
1259             break;
1260         case 103:       /* Regexp-2 */
1261             regex_range = 1;
1262             term_dict[j++] = '(';
1263             init_pos = 2;
1264             if (!term_103(zh->reg->zebra_maps, reg_type,
1265                           &termp, term_dict + j, &regex_range,
1266                           space_split, term_dst))
1267             {
1268                 *term_sub = 0;
1269                 return ZEBRA_OK;
1270             }
1271             strcat(term_dict, ")");
1272             break;
1273         case 104:        /* process # and ! in term */
1274             term_dict[j++] = '(';
1275             if (!term_104(zh->reg->zebra_maps, reg_type,
1276                           &termp, term_dict + j, space_split, term_dst))
1277             {
1278                 *term_sub = 0;
1279                 return ZEBRA_OK;
1280             }
1281             strcat(term_dict, ")");
1282             break;
1283         case 105:        /* process * and ! in term */
1284             term_dict[j++] = '(';
1285             if (!term_105(zh->reg->zebra_maps, reg_type,
1286                           &termp, term_dict + j, space_split, term_dst, 1))
1287             {
1288                 *term_sub = 0;
1289                 return ZEBRA_OK;
1290             }
1291             strcat(term_dict, ")");
1292             break;
1293         case 106:        /* process * and ! in term */
1294             term_dict[j++] = '(';
1295             if (!term_105(zh->reg->zebra_maps, reg_type,
1296                           &termp, term_dict + j, space_split, term_dst, 0))
1297             {
1298                 *term_sub = 0;
1299                 return ZEBRA_OK;
1300             }
1301             strcat(term_dict, ")");
1302             break;
1303         default:
1304             zebra_setError_zint(zh,
1305                                 YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
1306                                 truncation_value);
1307             return ZEBRA_FAIL;
1308         }
1309         if (attr_ok)
1310         {
1311             char buf[80];
1312             const char *input = term_dict + prefix_len;
1313             esc_str(buf, sizeof(buf), input, strlen(input));
1314         }
1315         if (attr_ok)
1316         {
1317             yaz_log(log_level_rpn, "dict_lookup_grep: %s", term_dict+prefix_len);
1318             r = dict_lookup_grep(zh->reg->dict, term_dict, regex_range,
1319                                  grep_info, &max_pos, init_pos,
1320                                  grep_handle);
1321             if (r)
1322                 yaz_log(YLOG_WARN, "dict_lookup_grep fail %d", r);
1323         }
1324     }
1325     if (!bases_ok)
1326     {
1327         zebra_setError(zh, errCode, errString);
1328         return ZEBRA_FAIL;
1329     }
1330     *term_sub = termp;
1331     yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1332     return ZEBRA_OK;
1333 }
1334
1335
1336 /* convert APT search term to UTF8 */
1337 static ZEBRA_RES zapt_term_to_utf8(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1338                                    char *termz)
1339 {
1340     size_t sizez;
1341     Z_Term *term = zapt->term;
1342
1343     switch (term->which)
1344     {
1345     case Z_Term_general:
1346         if (zh->iconv_to_utf8 != 0)
1347         {
1348             char *inbuf = term->u.general->buf;
1349             size_t inleft = term->u.general->len;
1350             char *outbuf = termz;
1351             size_t outleft = IT_MAX_WORD-1;
1352             size_t ret;
1353
1354             ret = yaz_iconv(zh->iconv_to_utf8, &inbuf, &inleft,
1355                         &outbuf, &outleft);
1356             if (ret == (size_t)(-1))
1357             {
1358                 ret = yaz_iconv(zh->iconv_to_utf8, 0, 0, 0, 0);
1359                 zebra_setError(
1360                     zh, 
1361                     YAZ_BIB1_QUERY_TERM_INCLUDES_CHARS_THAT_DO_NOT_TRANSLATE_INTO_,
1362                     0);
1363                 return ZEBRA_FAIL;
1364             }
1365             *outbuf = 0;
1366         }
1367         else
1368         {
1369             sizez = term->u.general->len;
1370             if (sizez > IT_MAX_WORD-1)
1371                 sizez = IT_MAX_WORD-1;
1372             memcpy (termz, term->u.general->buf, sizez);
1373             termz[sizez] = '\0';
1374         }
1375         break;
1376     case Z_Term_characterString:
1377         sizez = strlen(term->u.characterString);
1378         if (sizez > IT_MAX_WORD-1)
1379             sizez = IT_MAX_WORD-1;
1380         memcpy (termz, term->u.characterString, sizez);
1381         termz[sizez] = '\0';
1382         break;
1383     default:
1384         zebra_setError(zh, YAZ_BIB1_UNSUPP_CODED_VALUE_FOR_TERM, 0);
1385         return ZEBRA_FAIL;
1386     }
1387     return ZEBRA_OK;
1388 }
1389
1390 /* convert APT SCAN term to internal cmap */
1391 static ZEBRA_RES trans_scan_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1392                                  char *termz, int reg_type)
1393 {
1394     char termz0[IT_MAX_WORD];
1395
1396     if (zapt_term_to_utf8(zh, zapt, termz0) == ZEBRA_FAIL)
1397         return ZEBRA_FAIL;    /* error */
1398     else
1399     {
1400         const char **map;
1401         const char *cp = (const char *) termz0;
1402         const char *cp_end = cp + strlen(cp);
1403         const char *src;
1404         int i = 0;
1405         const char *space_map = NULL;
1406         int len;
1407             
1408         while ((len = (cp_end - cp)) > 0)
1409         {
1410             map = zebra_maps_input(zh->reg->zebra_maps, reg_type, &cp, len, 0);
1411             if (**map == *CHR_SPACE)
1412                 space_map = *map;
1413             else
1414             {
1415                 if (i && space_map)
1416                     for (src = space_map; *src; src++)
1417                         termz[i++] = *src;
1418                 space_map = NULL;
1419                 for (src = *map; *src; src++)
1420                     termz[i++] = *src;
1421             }
1422         }
1423         termz[i] = '\0';
1424     }
1425     return ZEBRA_OK;
1426 }
1427
1428 char *normalize_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1429                      const char *termz, NMEM stream, unsigned reg_id)
1430 {
1431     WRBUF wrbuf = 0;
1432     AttrType truncation;
1433     int truncation_value;
1434     char *ex_list = 0;
1435
1436     attr_init(&truncation, zapt, 5);
1437     truncation_value = attr_find(&truncation, NULL);
1438
1439     switch (truncation_value)
1440     {
1441     default:
1442         ex_list = "";
1443         break;
1444     case 101:
1445         ex_list = "#";
1446         break;
1447     case 102:
1448     case 103:
1449         ex_list = 0;
1450         break;
1451     case 104:
1452         ex_list = "!#";
1453         break;
1454     case 105:
1455         ex_list = "!*";
1456         break;
1457     }
1458     if (ex_list)
1459         wrbuf = zebra_replace(zh->reg->zebra_maps, reg_id, ex_list,
1460                               termz, strlen(termz));
1461     if (!wrbuf)
1462         return nmem_strdup(stream, termz);
1463     else
1464     {
1465         char *buf = (char*) nmem_malloc(stream, wrbuf_len(wrbuf)+1);
1466         memcpy (buf, wrbuf_buf(wrbuf), wrbuf_len(wrbuf));
1467         buf[wrbuf_len(wrbuf)] = '\0';
1468         return buf;
1469     }
1470 }
1471
1472 static void grep_info_delete(struct grep_info *grep_info)
1473 {
1474 #ifdef TERM_COUNT
1475     xfree(grep_info->term_no);
1476 #endif
1477     xfree(grep_info->isam_p_buf);
1478 }
1479
1480 static ZEBRA_RES grep_info_prepare(ZebraHandle zh,
1481                                    Z_AttributesPlusTerm *zapt,
1482                                    struct grep_info *grep_info,
1483                                    int reg_type)
1484 {
1485     AttrType termset;
1486     int termset_value_numeric;
1487     const char *termset_value_string;
1488
1489 #ifdef TERM_COUNT
1490     grep_info->term_no = 0;
1491 #endif
1492     grep_info->isam_p_size = 0;
1493     grep_info->isam_p_buf = NULL;
1494     grep_info->zh = zh;
1495     grep_info->reg_type = reg_type;
1496     grep_info->termset = 0;
1497
1498     if (!zapt)
1499         return ZEBRA_OK;
1500     attr_init(&termset, zapt, 8);
1501     termset_value_numeric =
1502         attr_find_ex(&termset, NULL, &termset_value_string);
1503     if (termset_value_numeric != -1)
1504     {
1505         char resname[32];
1506         const char *termset_name = 0;
1507         if (termset_value_numeric != -2)
1508         {
1509     
1510             sprintf(resname, "%d", termset_value_numeric);
1511             termset_name = resname;
1512         }
1513         else
1514             termset_name = termset_value_string;
1515         yaz_log(log_level_rpn, "creating termset set %s", termset_name);
1516         grep_info->termset = resultSetAdd(zh, termset_name, 1);
1517         if (!grep_info->termset)
1518         {
1519             zebra_setError(zh, YAZ_BIB1_ILLEGAL_RESULT_SET_NAME, termset_name);
1520             return ZEBRA_FAIL;
1521         }
1522     }
1523     return ZEBRA_OK;
1524 }
1525                                
1526
1527 static ZEBRA_RES term_list_trunc(ZebraHandle zh,
1528                                  Z_AttributesPlusTerm *zapt,
1529                                  const char *termz_org,
1530                                  oid_value attributeSet,
1531                                  NMEM stream,
1532                                  int reg_type, int complete_flag,
1533                                  const char *rank_type, int xpath_use,
1534                                  int num_bases, char **basenames, 
1535                                  NMEM rset_nmem,
1536                                  RSET **result_sets, int *num_result_sets,
1537                                  struct rset_key_control *kc)
1538 {
1539     char term_dst[IT_MAX_WORD+1];
1540     struct grep_info grep_info;
1541     char *termz = normalize_term(zh, zapt, termz_org, stream, reg_type);
1542     const char *termp = termz;
1543     int alloc_sets = 0;
1544
1545     *num_result_sets = 0;
1546     *term_dst = 0;
1547     if (grep_info_prepare(zh, zapt, &grep_info, reg_type) == ZEBRA_FAIL)
1548         return ZEBRA_FAIL;
1549     while(1)
1550     { 
1551         ZEBRA_RES res;
1552
1553         if (alloc_sets == *num_result_sets)
1554         {
1555             int add = 10;
1556             RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) * 
1557                                               sizeof(*rnew));
1558             if (alloc_sets)
1559                 memcpy(rnew, *result_sets, alloc_sets * sizeof(*rnew));
1560             alloc_sets = alloc_sets + add;
1561             *result_sets = rnew;
1562         }
1563         res = term_trunc(zh, zapt, &termp, attributeSet,
1564                          stream, &grep_info,
1565                          reg_type, complete_flag,
1566                          num_bases, basenames,
1567                          term_dst, rank_type,
1568                          xpath_use, rset_nmem,
1569                          &(*result_sets)[*num_result_sets],
1570                          kc);
1571         if (res != ZEBRA_OK)
1572         {
1573             int i;
1574             for (i = 0; i < *num_result_sets; i++)
1575                 rset_delete((*result_sets)[i]);
1576             grep_info_delete (&grep_info);
1577             return res;
1578         }
1579         if ((*result_sets)[*num_result_sets] == 0)
1580             break;
1581         (*num_result_sets)++;
1582     }
1583     grep_info_delete(&grep_info);
1584     return ZEBRA_OK;
1585 }
1586
1587 static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh,
1588                                        Z_AttributesPlusTerm *zapt,
1589                                        const char *termz_org,
1590                                        oid_value attributeSet,
1591                                        NMEM stream,
1592                                        int reg_type, int complete_flag,
1593                                        const char *rank_type, int xpath_use,
1594                                        int num_bases, char **basenames, 
1595                                        NMEM rset_nmem,
1596                                        RSET *rset,
1597                                        struct rset_key_control *kc)
1598 {
1599     RSET *result_sets = 0;
1600     int num_result_sets = 0;
1601     ZEBRA_RES res =
1602         term_list_trunc(zh, zapt, termz_org, attributeSet,
1603                         stream, reg_type, complete_flag,
1604                         rank_type, xpath_use,
1605                         num_bases, basenames,
1606                         rset_nmem,
1607                         &result_sets, &num_result_sets, kc);
1608     if (res != ZEBRA_OK)
1609         return res;
1610     if (num_result_sets == 0)
1611         *rset = rsnull_create (rset_nmem, kc); 
1612     else if (num_result_sets == 1)
1613         *rset = result_sets[0];
1614     else
1615         *rset = rsprox_create(rset_nmem, kc, kc->scope,
1616                               num_result_sets, result_sets,
1617                               1 /* ordered */, 0 /* exclusion */,
1618                               3 /* relation */, 1 /* distance */);
1619     if (!*rset)
1620         return ZEBRA_FAIL;
1621     return ZEBRA_OK;
1622 }
1623
1624 static ZEBRA_RES rpn_search_APT_or_list(ZebraHandle zh,
1625                                         Z_AttributesPlusTerm *zapt,
1626                                         const char *termz_org,
1627                                         oid_value attributeSet,
1628                                         NMEM stream,
1629                                         int reg_type, int complete_flag,
1630                                         const char *rank_type,
1631                                         int xpath_use,
1632                                         int num_bases, char **basenames,
1633                                         NMEM rset_nmem,
1634                                         RSET *rset,
1635                                         struct rset_key_control *kc)
1636 {
1637     RSET *result_sets = 0;
1638     int num_result_sets = 0;
1639     ZEBRA_RES res =
1640         term_list_trunc(zh, zapt, termz_org, attributeSet,
1641                         stream, reg_type, complete_flag,
1642                         rank_type, xpath_use,
1643                         num_bases, basenames,
1644                         rset_nmem,
1645                         &result_sets, &num_result_sets, kc);
1646     if (res != ZEBRA_OK)
1647         return res;
1648     if (num_result_sets == 0)
1649         *rset = rsnull_create (rset_nmem, kc); 
1650     else if (num_result_sets == 1)
1651         *rset = result_sets[0];
1652     else
1653         *rset = rsmulti_or_create(rset_nmem, kc, kc->scope,
1654                                   num_result_sets, result_sets);
1655     if (!*rset)
1656         return ZEBRA_FAIL;
1657     return ZEBRA_OK;
1658 }
1659
1660 static ZEBRA_RES rpn_search_APT_and_list(ZebraHandle zh,
1661                                          Z_AttributesPlusTerm *zapt,
1662                                          const char *termz_org,
1663                                          oid_value attributeSet,
1664                                          NMEM stream,
1665                                          int reg_type, int complete_flag,
1666                                          const char *rank_type, 
1667                                          int xpath_use,
1668                                          int num_bases, char **basenames,
1669                                          NMEM rset_nmem,
1670                                          RSET *rset,
1671                                          struct rset_key_control *kc)
1672 {
1673     RSET *result_sets = 0;
1674     int num_result_sets = 0;
1675     ZEBRA_RES res =
1676         term_list_trunc(zh, zapt, termz_org, attributeSet,
1677                         stream, reg_type, complete_flag,
1678                         rank_type, xpath_use,
1679                         num_bases, basenames,
1680                         rset_nmem,
1681                         &result_sets, &num_result_sets,
1682                         kc);
1683     if (res != ZEBRA_OK)
1684         return res;
1685     if (num_result_sets == 0)
1686         *rset = rsnull_create (rset_nmem, kc); 
1687     else if (num_result_sets == 1)
1688         *rset = result_sets[0];
1689     else
1690         *rset = rsmulti_and_create(rset_nmem, kc, kc->scope,
1691                                    num_result_sets, result_sets);
1692     if (!*rset)
1693         return ZEBRA_FAIL;
1694     return ZEBRA_OK;
1695 }
1696
1697 static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1698                             const char **term_sub,
1699                             char *term_dict,
1700                             oid_value attributeSet,
1701                             struct grep_info *grep_info,
1702                             int *max_pos,
1703                             int reg_type,
1704                             char *term_dst,
1705                             int *error_code)
1706 {
1707     AttrType relation;
1708     int relation_value;
1709     int term_value;
1710     int r;
1711     char *term_tmp = term_dict + strlen(term_dict);
1712
1713     *error_code = 0;
1714     attr_init(&relation, zapt, 2);
1715     relation_value = attr_find(&relation, NULL);
1716
1717     yaz_log(log_level_rpn, "numeric relation value=%d", relation_value);
1718
1719     if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1720                   term_dst))
1721         return 0;
1722     term_value = atoi (term_tmp);
1723     switch (relation_value)
1724     {
1725     case 1:
1726         yaz_log(log_level_rpn, "Relation <");
1727         gen_regular_rel(term_tmp, term_value-1, 1);
1728         break;
1729     case 2:
1730         yaz_log(log_level_rpn, "Relation <=");
1731         gen_regular_rel(term_tmp, term_value, 1);
1732         break;
1733     case 4:
1734         yaz_log(log_level_rpn, "Relation >=");
1735         gen_regular_rel(term_tmp, term_value, 0);
1736         break;
1737     case 5:
1738         yaz_log(log_level_rpn, "Relation >");
1739         gen_regular_rel(term_tmp, term_value+1, 0);
1740         break;
1741     case -1:
1742     case 3:
1743         yaz_log(log_level_rpn, "Relation =");
1744         sprintf(term_tmp, "(0*%d)", term_value);
1745         break;
1746     default:
1747         *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
1748         return 0;
1749     }
1750     yaz_log(log_level_rpn, "dict_lookup_grep: %s", term_tmp);
1751     r = dict_lookup_grep(zh->reg->dict, term_dict, 0, grep_info, max_pos,
1752                           0, grep_handle);
1753     if (r)
1754         yaz_log(YLOG_WARN, "dict_lookup_grep fail, rel = gt: %d", r);
1755     yaz_log(log_level_rpn, "%d positions", grep_info->isam_p_indx);
1756     return 1;
1757 }
1758
1759 static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1760                               const char **term_sub, 
1761                               oid_value attributeSet,
1762                               struct grep_info *grep_info,
1763                               int reg_type, int complete_flag,
1764                               int num_bases, char **basenames,
1765                               char *term_dst, int xpath_use, NMEM stream)
1766 {
1767     char term_dict[2*IT_MAX_WORD+2];
1768     int r, base_no;
1769     AttrType use;
1770     int use_value;
1771     const char *use_string = 0;
1772     oid_value curAttributeSet = attributeSet;
1773     const char *termp;
1774     struct rpn_char_map_info rcmi;
1775
1776     int bases_ok = 0;     /* no of databases with OK attribute */
1777     int errCode = 0;      /* err code (if any is not OK) */
1778     char *errString = 0;  /* addinfo */
1779
1780     rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
1781     attr_init(&use, zapt, 1);
1782     use_value = attr_find_ex(&use, &curAttributeSet, &use_string);
1783
1784     if (use_value == -1)
1785         use_value = 1016;
1786
1787     for (base_no = 0; base_no < num_bases; base_no++)
1788     {
1789         attent attp;
1790         data1_local_attribute id_xpath_attr;
1791         data1_local_attribute *local_attr;
1792         int max_pos, prefix_len = 0;
1793         int relation_error = 0;
1794
1795         termp = *term_sub;
1796         if (use_value == -2)  /* string attribute (assume IDXPATH/any) */
1797         {
1798             use_value = xpath_use;
1799             attp.local_attributes = &id_xpath_attr;
1800             attp.attset_ordinal = VAL_IDXPATH;
1801             id_xpath_attr.next = 0;
1802             id_xpath_attr.local = use_value;
1803         }
1804         else if (curAttributeSet == VAL_IDXPATH)
1805         {
1806             attp.local_attributes = &id_xpath_attr;
1807             attp.attset_ordinal = VAL_IDXPATH;
1808             id_xpath_attr.next = 0;
1809             id_xpath_attr.local = use_value;
1810         }
1811         else
1812         {
1813             if ((r = att_getentbyatt (zh, &attp, curAttributeSet, use_value,
1814                                             use_string)))
1815             {
1816                 yaz_log(YLOG_DEBUG, "att_getentbyatt fail. set=%d use=%d r=%d",
1817                       curAttributeSet, use_value, r);
1818                 if (r == -1)
1819                 {
1820                     errCode = YAZ_BIB1_UNSUPP_USE_ATTRIBUTE;
1821                     if (use_string)
1822                         errString = nmem_strdup(stream, use_string);
1823                     else
1824                         errString = nmem_strdup_i (stream, use_value);
1825                 }
1826                 else
1827                     errCode = YAZ_BIB1_UNSUPP_ATTRIBUTE_SET;
1828                 continue;
1829             }
1830         }
1831         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1832         {
1833             zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
1834                            basenames[base_no]);
1835             return ZEBRA_FAIL;
1836         }
1837         for (local_attr = attp.local_attributes; local_attr;
1838              local_attr = local_attr->next)
1839         {
1840             int ord;
1841             char ord_buf[32];
1842             int i, ord_len;
1843
1844             ord = zebraExplain_lookup_attr_su(zh->reg->zei,
1845                                               attp.attset_ordinal,
1846                                               local_attr->local);
1847             if (ord < 0)
1848                 continue;
1849             if (prefix_len)
1850                 term_dict[prefix_len++] = '|';
1851             else
1852                 term_dict[prefix_len++] = '(';
1853
1854             ord_len = key_SU_encode (ord, ord_buf);
1855             for (i = 0; i<ord_len; i++)
1856             {
1857                 term_dict[prefix_len++] = 1;
1858                 term_dict[prefix_len++] = ord_buf[i];
1859             }
1860         }
1861         if (!prefix_len)
1862         {
1863             zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_USE_ATTRIBUTE, use_value);
1864             continue;
1865         }
1866         bases_ok++;
1867         term_dict[prefix_len++] = ')';        
1868         term_dict[prefix_len++] = 1;
1869         term_dict[prefix_len++] = reg_type;
1870         yaz_log(YLOG_DEBUG, "reg_type = %d", term_dict[prefix_len-1]);
1871         term_dict[prefix_len] = '\0';
1872         if (!numeric_relation(zh, zapt, &termp, term_dict,
1873                               attributeSet, grep_info, &max_pos, reg_type,
1874                               term_dst, &relation_error))
1875         {
1876             if (relation_error)
1877             {
1878                 zebra_setError(zh, relation_error, 0);
1879                 return ZEBRA_FAIL;
1880             }
1881             *term_sub = 0;
1882             return ZEBRA_OK;
1883         }
1884     }
1885     if (!bases_ok)
1886     {
1887         zebra_setError(zh, errCode, errString);
1888         return ZEBRA_FAIL;
1889     }
1890     *term_sub = termp;
1891     yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1892     return ZEBRA_OK;
1893 }
1894
1895 static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh,
1896                                         Z_AttributesPlusTerm *zapt,
1897                                         const char *termz,
1898                                         oid_value attributeSet,
1899                                         NMEM stream,
1900                                         int reg_type, int complete_flag,
1901                                         const char *rank_type, int xpath_use,
1902                                         int num_bases, char **basenames,
1903                                         NMEM rset_nmem,
1904                                         RSET *rset,
1905                                         struct rset_key_control *kc)
1906 {
1907     char term_dst[IT_MAX_WORD+1];
1908     const char *termp = termz;
1909     RSET *result_sets = 0;
1910     int num_result_sets = 0;
1911     ZEBRA_RES res;
1912     struct grep_info grep_info;
1913     int alloc_sets = 0;
1914
1915     yaz_log(log_level_rpn, "APT_numeric t='%s'", termz);
1916     if (grep_info_prepare(zh, zapt, &grep_info, reg_type) == ZEBRA_FAIL)
1917         return ZEBRA_FAIL;
1918     while (1)
1919     { 
1920         if (alloc_sets == num_result_sets)
1921         {
1922             int add = 10;
1923             RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) * 
1924                                               sizeof(*rnew));
1925             if (alloc_sets)
1926                 memcpy(rnew, result_sets, alloc_sets * sizeof(*rnew));
1927             alloc_sets = alloc_sets + add;
1928             result_sets = rnew;
1929         }
1930         yaz_log(YLOG_DEBUG, "APT_numeric termp=%s", termp);
1931         grep_info.isam_p_indx = 0;
1932         res = numeric_term(zh, zapt, &termp, attributeSet, &grep_info,
1933                            reg_type, complete_flag, num_bases, basenames,
1934                            term_dst, xpath_use,
1935                            stream);
1936         if (res == ZEBRA_FAIL || termp == 0)
1937             break;
1938         yaz_log(YLOG_DEBUG, "term: %s", term_dst);
1939         result_sets[num_result_sets] =
1940             rset_trunc(zh, grep_info.isam_p_buf,
1941                        grep_info.isam_p_indx, term_dst,
1942                        strlen(term_dst), rank_type,
1943                        0 /* preserve position */,
1944                        zapt->term->which, rset_nmem, 
1945                        kc, kc->scope);
1946         if (!result_sets[num_result_sets])
1947             break;
1948         num_result_sets++;
1949     }
1950     grep_info_delete(&grep_info);
1951     if (termp)
1952     {
1953         int i;
1954         for (i = 0; i<num_result_sets; i++)
1955             rset_delete(result_sets[i]);
1956         return ZEBRA_FAIL;
1957     }
1958     if (num_result_sets == 0)
1959         *rset = rsnull_create(rset_nmem, kc);
1960     if (num_result_sets == 1)
1961         *rset = result_sets[0];
1962     else
1963         *rset = rsmulti_and_create(rset_nmem, kc, kc->scope,
1964                                    num_result_sets, result_sets);
1965     if (!*rset)
1966         return ZEBRA_FAIL;
1967     return ZEBRA_OK;
1968 }
1969
1970 static ZEBRA_RES rpn_search_APT_local(ZebraHandle zh,
1971                                       Z_AttributesPlusTerm *zapt,
1972                                       const char *termz,
1973                                       oid_value attributeSet,
1974                                       NMEM stream,
1975                                       const char *rank_type, NMEM rset_nmem,
1976                                       RSET *rset,
1977                                       struct rset_key_control *kc)
1978 {
1979     RSFD rsfd;
1980     struct it_key key;
1981     int sys;
1982     *rset = rstemp_create(rset_nmem, kc, kc->scope,
1983                           res_get (zh->res, "setTmpDir"),0 );
1984     rsfd = rset_open(*rset, RSETF_WRITE);
1985     
1986     sys = atoi(termz);
1987     if (sys <= 0)
1988         sys = 1;
1989     key.mem[0] = sys;
1990     key.mem[1] = 1;
1991     key.len = 2;
1992     rset_write (rsfd, &key);
1993     rset_close (rsfd);
1994     return ZEBRA_OK;
1995 }
1996
1997 static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1998                                oid_value attributeSet, NMEM stream,
1999                                Z_SortKeySpecList *sort_sequence,
2000                                const char *rank_type,
2001                                RSET *rset,
2002                                struct rset_key_control *kc)
2003 {
2004     int i;
2005     int sort_relation_value;
2006     AttrType sort_relation_type;
2007     Z_SortKeySpec *sks;
2008     Z_SortKey *sk;
2009     int oid[OID_SIZE];
2010     oident oe;
2011     char termz[20];
2012     
2013     attr_init(&sort_relation_type, zapt, 7);
2014     sort_relation_value = attr_find(&sort_relation_type, &attributeSet);
2015
2016     if (!sort_sequence->specs)
2017     {
2018         sort_sequence->num_specs = 10;
2019         sort_sequence->specs = (Z_SortKeySpec **)
2020             nmem_malloc(stream, sort_sequence->num_specs *
2021                          sizeof(*sort_sequence->specs));
2022         for (i = 0; i<sort_sequence->num_specs; i++)
2023             sort_sequence->specs[i] = 0;
2024     }
2025     if (zapt->term->which != Z_Term_general)
2026         i = 0;
2027     else
2028         i = atoi_n ((char *) zapt->term->u.general->buf,
2029                     zapt->term->u.general->len);
2030     if (i >= sort_sequence->num_specs)
2031         i = 0;
2032     sprintf(termz, "%d", i);
2033
2034     oe.proto = PROTO_Z3950;
2035     oe.oclass = CLASS_ATTSET;
2036     oe.value = attributeSet;
2037     if (!oid_ent_to_oid (&oe, oid))
2038         return ZEBRA_FAIL;
2039
2040     sks = (Z_SortKeySpec *) nmem_malloc(stream, sizeof(*sks));
2041     sks->sortElement = (Z_SortElement *)
2042         nmem_malloc(stream, sizeof(*sks->sortElement));
2043     sks->sortElement->which = Z_SortElement_generic;
2044     sk = sks->sortElement->u.generic = (Z_SortKey *)
2045         nmem_malloc(stream, sizeof(*sk));
2046     sk->which = Z_SortKey_sortAttributes;
2047     sk->u.sortAttributes = (Z_SortAttributes *)
2048         nmem_malloc(stream, sizeof(*sk->u.sortAttributes));
2049
2050     sk->u.sortAttributes->id = oid;
2051     sk->u.sortAttributes->list = zapt->attributes;
2052
2053     sks->sortRelation = (int *)
2054         nmem_malloc(stream, sizeof(*sks->sortRelation));
2055     if (sort_relation_value == 1)
2056         *sks->sortRelation = Z_SortKeySpec_ascending;
2057     else if (sort_relation_value == 2)
2058         *sks->sortRelation = Z_SortKeySpec_descending;
2059     else 
2060         *sks->sortRelation = Z_SortKeySpec_ascending;
2061
2062     sks->caseSensitivity = (int *)
2063         nmem_malloc(stream, sizeof(*sks->caseSensitivity));
2064     *sks->caseSensitivity = 0;
2065
2066     sks->which = Z_SortKeySpec_null;
2067     sks->u.null = odr_nullval ();
2068     sort_sequence->specs[i] = sks;
2069     *rset = rsnull_create (NULL, kc);
2070     return ZEBRA_OK;
2071 }
2072
2073
2074 static int parse_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2075                        oid_value attributeSet,
2076                        struct xpath_location_step *xpath, int max, NMEM mem)
2077 {
2078     oid_value curAttributeSet = attributeSet;
2079     AttrType use;
2080     const char *use_string = 0;
2081     
2082     attr_init(&use, zapt, 1);
2083     attr_find_ex(&use, &curAttributeSet, &use_string);
2084
2085     if (!use_string || *use_string != '/')
2086         return -1;
2087
2088     return zebra_parse_xpath_str(use_string, xpath, max, mem);
2089 }
2090  
2091                
2092
2093 static RSET xpath_trunc(ZebraHandle zh, NMEM stream,
2094                         int reg_type, const char *term, int use,
2095                         oid_value curAttributeSet, NMEM rset_nmem,
2096                         struct rset_key_control *kc)
2097 {
2098     RSET rset;
2099     struct grep_info grep_info;
2100     char term_dict[2048];
2101     char ord_buf[32];
2102     int prefix_len = 0;
2103     int ord = zebraExplain_lookup_attr_su(zh->reg->zei, curAttributeSet, use);
2104     int ord_len, i, r, max_pos;
2105     int term_type = Z_Term_characterString;
2106     const char *flags = "void";
2107
2108     if (grep_info_prepare(zh, 0 /* zapt */, &grep_info, '0') == ZEBRA_FAIL)
2109         return rsnull_create(rset_nmem, kc);
2110     
2111     if (ord < 0)
2112         return rsnull_create(rset_nmem, kc);
2113     if (prefix_len)
2114         term_dict[prefix_len++] = '|';
2115     else
2116         term_dict[prefix_len++] = '(';
2117     
2118     ord_len = key_SU_encode (ord, ord_buf);
2119     for (i = 0; i<ord_len; i++)
2120     {
2121         term_dict[prefix_len++] = 1;
2122         term_dict[prefix_len++] = ord_buf[i];
2123     }
2124     term_dict[prefix_len++] = ')';
2125     term_dict[prefix_len++] = 1;
2126     term_dict[prefix_len++] = reg_type;
2127     
2128     strcpy(term_dict+prefix_len, term);
2129     
2130     grep_info.isam_p_indx = 0;
2131     r = dict_lookup_grep(zh->reg->dict, term_dict, 0,
2132                           &grep_info, &max_pos, 0, grep_handle);
2133     yaz_log(YLOG_DEBUG, "%s %d positions", term,
2134              grep_info.isam_p_indx);
2135     rset = rset_trunc(zh, grep_info.isam_p_buf,
2136                       grep_info.isam_p_indx, term, strlen(term),
2137                       flags, 1, term_type,rset_nmem,
2138                       kc, kc->scope);
2139     grep_info_delete(&grep_info);
2140     return rset;
2141 }
2142
2143 static
2144 ZEBRA_RES rpn_search_xpath(ZebraHandle zh,
2145                            oid_value attributeSet,
2146                            int num_bases, char **basenames,
2147                            NMEM stream, const char *rank_type, RSET rset,
2148                            int xpath_len, struct xpath_location_step *xpath,
2149                            NMEM rset_nmem,
2150                            RSET *rset_out,
2151                            struct rset_key_control *kc)
2152 {
2153     oid_value curAttributeSet = attributeSet;
2154     int base_no;
2155     int i;
2156
2157     if (xpath_len < 0)
2158     {
2159         *rset_out = rset;
2160         return ZEBRA_OK;
2161     }
2162
2163     yaz_log(YLOG_DEBUG, "xpath len=%d", xpath_len);
2164     for (i = 0; i<xpath_len; i++)
2165     {
2166         yaz_log(log_level_rpn, "XPATH %d %s", i, xpath[i].part);
2167
2168     }
2169
2170     curAttributeSet = VAL_IDXPATH;
2171
2172     /*
2173       //a    ->    a/.*
2174       //a/b  ->    b/a/.*
2175       /a     ->    a/
2176       /a/b   ->    b/a/
2177
2178       /      ->    none
2179
2180    a[@attr = value]/b[@other = othervalue]
2181
2182  /e/@a val      range(e/,range(@a,freetext(w,1015,val),@a),e/)
2183  /a/b val       range(b/a/,freetext(w,1016,val),b/a/)
2184  /a/b/@c val    range(b/a/,range(@c,freetext(w,1016,val),@c),b/a/)
2185  /a/b[@c = y] val range(b/a/,freetext(w,1016,val),b/a/,@c = y)
2186  /a[@c = y]/b val range(a/,range(b/a/,freetext(w,1016,val),b/a/),a/,@c = y)
2187  /a[@c = x]/b[@c = y] range(a/,range(b/a/,freetext(w,1016,val),b/a/,@c = y),a/,@c = x)
2188       
2189     */
2190
2191     dict_grep_cmap (zh->reg->dict, 0, 0);
2192
2193     for (base_no = 0; base_no < num_bases; base_no++)
2194     {
2195         int level = xpath_len;
2196         int first_path = 1;
2197         
2198         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
2199         {
2200             zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
2201                            basenames[base_no]);
2202             *rset_out = rset;
2203             return ZEBRA_FAIL;
2204         }
2205         while (--level >= 0)
2206         {
2207             char xpath_rev[128];
2208             int i, len;
2209             RSET rset_start_tag = 0, rset_end_tag = 0, rset_attr = 0;
2210
2211             *xpath_rev = 0;
2212             len = 0;
2213             for (i = level; i >= 1; --i)
2214             {
2215                 const char *cp = xpath[i].part;
2216                 if (*cp)
2217                 {
2218                     for (;*cp; cp++)
2219                         if (*cp == '*')
2220                         {
2221                             memcpy (xpath_rev + len, "[^/]*", 5);
2222                             len += 5;
2223                         }
2224                         else if (*cp == ' ')
2225                         {
2226
2227                             xpath_rev[len++] = 1;
2228                             xpath_rev[len++] = ' ';
2229                         }
2230
2231                         else
2232                             xpath_rev[len++] = *cp;
2233                     xpath_rev[len++] = '/';
2234                 }
2235                 else if (i == 1)  /* // case */
2236                 {
2237                     xpath_rev[len++] = '.';
2238                     xpath_rev[len++] = '*';
2239                 }
2240             }
2241             xpath_rev[len] = 0;
2242
2243             if (xpath[level].predicate &&
2244                 xpath[level].predicate->which == XPATH_PREDICATE_RELATION &&
2245                 xpath[level].predicate->u.relation.name[0])
2246             {
2247                 WRBUF wbuf = wrbuf_alloc();
2248                 wrbuf_puts(wbuf, xpath[level].predicate->u.relation.name+1);
2249                 if (xpath[level].predicate->u.relation.value)
2250                 {
2251                     const char *cp = xpath[level].predicate->u.relation.value;
2252                     wrbuf_putc(wbuf, '=');
2253                     
2254                     while (*cp)
2255                     {
2256                         if (strchr(REGEX_CHARS, *cp))
2257                             wrbuf_putc(wbuf, '\\');
2258                         wrbuf_putc(wbuf, *cp);
2259                         cp++;
2260                     }
2261                 }
2262                 wrbuf_puts(wbuf, "");
2263                 rset_attr = xpath_trunc(
2264                     zh, stream, '0', wrbuf_buf(wbuf), 3, 
2265                     curAttributeSet, rset_nmem, kc);
2266                 wrbuf_free(wbuf, 1);
2267             } 
2268             else 
2269             {
2270                 if (!first_path)
2271                     continue;
2272             }
2273             yaz_log(log_level_rpn, "xpath_rev (%d) = %s", level, xpath_rev);
2274             if (strlen(xpath_rev))
2275             {
2276                 rset_start_tag = xpath_trunc(zh, stream, '0', 
2277                         xpath_rev, 1, curAttributeSet, rset_nmem, kc);
2278             
2279                 rset_end_tag = xpath_trunc(zh, stream, '0', 
2280                         xpath_rev, 2, curAttributeSet, rset_nmem, kc);
2281
2282                 rset = rsbetween_create(rset_nmem, kc, kc->scope,
2283                                         rset_start_tag, rset,
2284                                         rset_end_tag, rset_attr);
2285             }
2286             first_path = 0;
2287         }
2288     }
2289     *rset_out = rset;
2290     return ZEBRA_OK;
2291 }
2292
2293 static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2294                                 oid_value attributeSet, NMEM stream,
2295                                 Z_SortKeySpecList *sort_sequence,
2296                                 int num_bases, char **basenames, 
2297                                 NMEM rset_nmem,
2298                                 RSET *rset,
2299                                 struct rset_key_control *kc)
2300 {
2301     ZEBRA_RES res = ZEBRA_OK;
2302     unsigned reg_id;
2303     char *search_type = NULL;
2304     char rank_type[128];
2305     int complete_flag;
2306     int sort_flag;
2307     char termz[IT_MAX_WORD+1];
2308     int xpath_len;
2309     int xpath_use = 0;
2310     struct xpath_location_step xpath[10];
2311
2312     if (!log_level_set)
2313     {
2314         log_level_rpn = yaz_log_module_level("rpn");
2315         log_level_set = 1;
2316     }
2317     zebra_maps_attr(zh->reg->zebra_maps, zapt, &reg_id, &search_type,
2318                     rank_type, &complete_flag, &sort_flag);
2319     
2320     yaz_log(YLOG_DEBUG, "reg_id=%c", reg_id);
2321     yaz_log(YLOG_DEBUG, "complete_flag=%d", complete_flag);
2322     yaz_log(YLOG_DEBUG, "search_type=%s", search_type);
2323     yaz_log(YLOG_DEBUG, "rank_type=%s", rank_type);
2324
2325     if (zapt_term_to_utf8(zh, zapt, termz) == ZEBRA_FAIL)
2326         return ZEBRA_FAIL;
2327
2328     if (sort_flag)
2329         return rpn_sort_spec(zh, zapt, attributeSet, stream, sort_sequence,
2330                              rank_type, rset, kc);
2331     xpath_len = parse_xpath(zh, zapt, attributeSet, xpath, 10, stream);
2332     if (xpath_len >= 0)
2333     {
2334         xpath_use = 1016;
2335         if (xpath[xpath_len-1].part[0] == '@')
2336             xpath_use = 1015;
2337     }
2338
2339     if (!strcmp(search_type, "phrase"))
2340     {
2341         res = rpn_search_APT_phrase(zh, zapt, termz, attributeSet, stream,
2342                                     reg_id, complete_flag, rank_type,
2343                                     xpath_use,
2344                                     num_bases, basenames, rset_nmem,
2345                                     rset, kc);
2346     }
2347     else if (!strcmp(search_type, "and-list"))
2348     {
2349         res = rpn_search_APT_and_list(zh, zapt, termz, attributeSet, stream,
2350                                       reg_id, complete_flag, rank_type,
2351                                       xpath_use,
2352                                       num_bases, basenames, rset_nmem,
2353                                       rset, kc);
2354     }
2355     else if (!strcmp(search_type, "or-list"))
2356     {
2357         res = rpn_search_APT_or_list(zh, zapt, termz, attributeSet, stream,
2358                                      reg_id, complete_flag, rank_type,
2359                                      xpath_use,
2360                                      num_bases, basenames, rset_nmem,
2361                                      rset, kc);
2362     }
2363     else if (!strcmp(search_type, "local"))
2364     {
2365         res = rpn_search_APT_local(zh, zapt, termz, attributeSet, stream,
2366                                    rank_type, rset_nmem, rset, kc);
2367     }
2368     else if (!strcmp(search_type, "numeric"))
2369     {
2370         res = rpn_search_APT_numeric(zh, zapt, termz, attributeSet, stream,
2371                                      reg_id, complete_flag, rank_type,
2372                                      xpath_use,
2373                                      num_bases, basenames, rset_nmem,
2374                                      rset, kc);
2375     }
2376     else
2377     {
2378         zebra_setError(zh, YAZ_BIB1_UNSUPP_STRUCTURE_ATTRIBUTE, 0);
2379         res = ZEBRA_FAIL;
2380     }
2381     if (res != ZEBRA_OK)
2382         return res;
2383     if (!*rset)
2384         return ZEBRA_FAIL;
2385     return rpn_search_xpath(zh, attributeSet, num_bases, basenames,
2386                             stream, rank_type, *rset, 
2387                             xpath_len, xpath, rset_nmem, rset, kc);
2388 }
2389
2390 static ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2391                                       oid_value attributeSet, 
2392                                       NMEM stream, NMEM rset_nmem,
2393                                       Z_SortKeySpecList *sort_sequence,
2394                                       int num_bases, char **basenames,
2395                                       RSET **result_sets, int *num_result_sets,
2396                                       Z_Operator *parent_op,
2397                                       struct rset_key_control *kc);
2398
2399 ZEBRA_RES rpn_search_top(ZebraHandle zh, Z_RPNStructure *zs,
2400                          oid_value attributeSet, 
2401                          NMEM stream, NMEM rset_nmem,
2402                          Z_SortKeySpecList *sort_sequence,
2403                          int num_bases, char **basenames,
2404                          RSET *result_set)
2405 {
2406     RSET *result_sets = 0;
2407     int num_result_sets = 0;
2408     ZEBRA_RES res;
2409     struct rset_key_control *kc = zebra_key_control_create(zh);
2410
2411     res = rpn_search_structure(zh, zs, attributeSet,
2412                                stream, rset_nmem,
2413                                sort_sequence, 
2414                                num_bases, basenames,
2415                                &result_sets, &num_result_sets,
2416                                0 /* no parent op */,
2417                                kc);
2418     if (res != ZEBRA_OK)
2419     {
2420         int i;
2421         for (i = 0; i<num_result_sets; i++)
2422             rset_delete(result_sets[i]);
2423         *result_set = 0;
2424     }
2425     else
2426     {
2427         assert(num_result_sets == 1);
2428         assert(result_sets);
2429         assert(*result_sets);
2430         *result_set = *result_sets;
2431     }
2432     (*kc->dec)(kc);
2433     return res;
2434 }
2435
2436 ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2437                                oid_value attributeSet, 
2438                                NMEM stream, NMEM rset_nmem,
2439                                Z_SortKeySpecList *sort_sequence,
2440                                int num_bases, char **basenames,
2441                                RSET **result_sets, int *num_result_sets,
2442                                Z_Operator *parent_op,
2443                                struct rset_key_control *kc)
2444 {
2445     *num_result_sets = 0;
2446     if (zs->which == Z_RPNStructure_complex)
2447     {
2448         ZEBRA_RES res;
2449         Z_Operator *zop = zs->u.complex->roperator;
2450         RSET *result_sets_l = 0;
2451         int num_result_sets_l = 0;
2452         RSET *result_sets_r = 0;
2453         int num_result_sets_r = 0;
2454
2455         res = rpn_search_structure(zh, zs->u.complex->s1,
2456                                    attributeSet, stream, rset_nmem,
2457                                    sort_sequence,
2458                                    num_bases, basenames,
2459                                    &result_sets_l, &num_result_sets_l,
2460                                    zop, kc);
2461         if (res != ZEBRA_OK)
2462         {
2463             int i;
2464             for (i = 0; i<num_result_sets_l; i++)
2465                 rset_delete(result_sets_l[i]);
2466             return res;
2467         }
2468         res = rpn_search_structure(zh, zs->u.complex->s2,
2469                                    attributeSet, stream, rset_nmem,
2470                                    sort_sequence,
2471                                    num_bases, basenames,
2472                                    &result_sets_r, &num_result_sets_r,
2473                                    zop, kc);
2474         if (res != ZEBRA_OK)
2475         {
2476             int i;
2477             for (i = 0; i<num_result_sets_l; i++)
2478                 rset_delete(result_sets_l[i]);
2479             for (i = 0; i<num_result_sets_r; i++)
2480                 rset_delete(result_sets_r[i]);
2481             return res;
2482         }
2483
2484         /* make a new list of result for all children */
2485         *num_result_sets = num_result_sets_l + num_result_sets_r;
2486         *result_sets = nmem_malloc(stream, *num_result_sets * 
2487                                    sizeof(**result_sets));
2488         memcpy(*result_sets, result_sets_l, 
2489                num_result_sets_l * sizeof(**result_sets));
2490         memcpy(*result_sets + num_result_sets_l, result_sets_r, 
2491                num_result_sets_r * sizeof(**result_sets));
2492
2493         if (!parent_op || parent_op->which != zop->which
2494             || (zop->which != Z_Operator_and &&
2495                 zop->which != Z_Operator_or))
2496         {
2497             /* parent node different from this one (or non-present) */
2498             /* we must combine result sets now */
2499             RSET rset;
2500             switch (zop->which)
2501             {
2502             case Z_Operator_and:
2503                 rset = rsmulti_and_create(rset_nmem, kc,
2504                                           kc->scope,
2505                                           *num_result_sets, *result_sets);
2506                 break;
2507             case Z_Operator_or:
2508                 rset = rsmulti_or_create(rset_nmem, kc,
2509                                          kc->scope,
2510                                          *num_result_sets, *result_sets);
2511                 break;
2512             case Z_Operator_and_not:
2513                 rset = rsbool_create_not(rset_nmem, kc,
2514                                          kc->scope,
2515                                          (*result_sets)[0],
2516                                          (*result_sets)[1]);
2517                 break;
2518             case Z_Operator_prox:
2519                 if (zop->u.prox->which != Z_ProximityOperator_known)
2520                 {
2521                     zebra_setError(zh, 
2522                                    YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2523                                    0);
2524                     return ZEBRA_FAIL;
2525                 }
2526                 if (*zop->u.prox->u.known != Z_ProxUnit_word)
2527                 {
2528                     zebra_setError_zint(zh,
2529                                         YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2530                                         *zop->u.prox->u.known);
2531                     return ZEBRA_FAIL;
2532                 }
2533                 else
2534                 {
2535                     rset = rsprox_create(rset_nmem, kc,
2536                                          kc->scope,
2537                                          *num_result_sets, *result_sets, 
2538                                          *zop->u.prox->ordered,
2539                                          (!zop->u.prox->exclusion ? 
2540                                           0 : *zop->u.prox->exclusion),
2541                                          *zop->u.prox->relationType,
2542                                          *zop->u.prox->distance );
2543                 }
2544                 break;
2545             default:
2546                 zebra_setError(zh, YAZ_BIB1_OPERATOR_UNSUPP, 0);
2547                 return ZEBRA_FAIL;
2548             }
2549             *num_result_sets = 1;
2550             *result_sets = nmem_malloc(stream, *num_result_sets * 
2551                                        sizeof(**result_sets));
2552             (*result_sets)[0] = rset;
2553         }
2554     }
2555     else if (zs->which == Z_RPNStructure_simple)
2556     {
2557         RSET rset;
2558         ZEBRA_RES res;
2559
2560         if (zs->u.simple->which == Z_Operand_APT)
2561         {
2562             yaz_log(YLOG_DEBUG, "rpn_search_APT");
2563             res = rpn_search_APT(zh, zs->u.simple->u.attributesPlusTerm,
2564                                  attributeSet, stream, sort_sequence,
2565                                  num_bases, basenames, rset_nmem, &rset,
2566                                  kc);
2567             if (res != ZEBRA_OK)
2568                 return res;
2569         }
2570         else if (zs->u.simple->which == Z_Operand_resultSetId)
2571         {
2572             yaz_log(YLOG_DEBUG, "rpn_search_ref");
2573             rset = resultSetRef(zh, zs->u.simple->u.resultSetId);
2574             if (!rset)
2575             {
2576                 zebra_setError(zh, 
2577                                YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
2578                                zs->u.simple->u.resultSetId);
2579                 return ZEBRA_FAIL;
2580             }
2581             rset_dup(rset);
2582         }
2583         else
2584         {
2585             zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2586             return ZEBRA_FAIL;
2587         }
2588         *num_result_sets = 1;
2589         *result_sets = nmem_malloc(stream, *num_result_sets * 
2590                                    sizeof(**result_sets));
2591         (*result_sets)[0] = rset;
2592     }
2593     else
2594     {
2595         zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2596         return ZEBRA_FAIL;
2597     }
2598     return ZEBRA_OK;
2599 }
2600
2601 struct scan_info_entry {
2602     char *term;
2603     ISAM_P isam_p;
2604 };
2605
2606 struct scan_info {
2607     struct scan_info_entry *list;
2608     ODR odr;
2609     int before, after;
2610     char prefix[20];
2611 };
2612
2613 static int scan_handle (char *name, const char *info, int pos, void *client)
2614 {
2615     int len_prefix, idx;
2616     struct scan_info *scan_info = (struct scan_info *) client;
2617
2618     len_prefix = strlen(scan_info->prefix);
2619     if (memcmp (name, scan_info->prefix, len_prefix))
2620         return 1;
2621     if (pos > 0)
2622         idx = scan_info->after - pos + scan_info->before;
2623     else
2624         idx = - pos - 1;
2625
2626     if (idx < 0)
2627         return 0;
2628     scan_info->list[idx].term = (char *)
2629         odr_malloc(scan_info->odr, strlen(name + len_prefix)+1);
2630     strcpy(scan_info->list[idx].term, name + len_prefix);
2631     assert (*info == sizeof(ISAM_P));
2632     memcpy (&scan_info->list[idx].isam_p, info+1, sizeof(ISAM_P));
2633     return 0;
2634 }
2635
2636 static void scan_term_untrans (ZebraHandle zh, NMEM stream, int reg_type,
2637                                char **dst, const char *src)
2638 {
2639     char term_src[IT_MAX_WORD];
2640     char term_dst[IT_MAX_WORD];
2641     
2642     term_untrans (zh, reg_type, term_src, src);
2643
2644     if (zh->iconv_from_utf8 != 0)
2645     {
2646         int len;
2647         char *inbuf = term_src;
2648         size_t inleft = strlen(term_src);
2649         char *outbuf = term_dst;
2650         size_t outleft = sizeof(term_dst)-1;
2651         size_t ret;
2652         
2653         ret = yaz_iconv (zh->iconv_from_utf8, &inbuf, &inleft,
2654                          &outbuf, &outleft);
2655         if (ret == (size_t)(-1))
2656             len = 0;
2657         else
2658             len = outbuf - term_dst;
2659         *dst = nmem_malloc(stream, len + 1);
2660         if (len > 0)
2661             memcpy (*dst, term_dst, len);
2662         (*dst)[len] = '\0';
2663     }
2664     else
2665         *dst = nmem_strdup(stream, term_src);
2666 }
2667
2668 static void count_set (RSET r, int *count)
2669 {
2670     zint psysno = 0;
2671     int kno = 0;
2672     struct it_key key;
2673     RSFD rfd;
2674
2675     yaz_log(YLOG_DEBUG, "count_set");
2676
2677     *count = 0;
2678     rfd = rset_open (r, RSETF_READ);
2679     while (rset_read (rfd, &key,0 /* never mind terms */))
2680     {
2681         if (key.mem[0] != psysno)
2682         {
2683             psysno = key.mem[0];
2684             (*count)++;
2685         }
2686         kno++;
2687     }
2688     rset_close (rfd);
2689     yaz_log(YLOG_DEBUG, "%d keys, %d records", kno, *count);
2690 }
2691
2692 ZEBRA_RES rpn_scan(ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt,
2693                    oid_value attributeset,
2694                    int num_bases, char **basenames,
2695                    int *position, int *num_entries, ZebraScanEntry **list,
2696                    int *is_partial, RSET limit_set, int return_zero)
2697 {
2698     int i;
2699     int pos = *position;
2700     int num = *num_entries;
2701     int before;
2702     int after;
2703     int base_no;
2704     char termz[IT_MAX_WORD+20];
2705     AttrType use;
2706     int use_value;
2707     const char *use_string = 0;
2708     struct scan_info *scan_info_array;
2709     ZebraScanEntry *glist;
2710     int ords[32], ord_no = 0;
2711     int ptr[32];
2712
2713     int bases_ok = 0;     /* no of databases with OK attribute */
2714     int errCode = 0;      /* err code (if any is not OK) */
2715     char *errString = 0;  /* addinfo */
2716
2717     unsigned reg_id;
2718     char *search_type = NULL;
2719     char rank_type[128];
2720     int complete_flag;
2721     int sort_flag;
2722     NMEM rset_nmem = NULL; 
2723     struct rset_key_control *kc = 0;
2724
2725     *list = 0;
2726     *is_partial = 0;
2727
2728     if (attributeset == VAL_NONE)
2729         attributeset = VAL_BIB1;
2730
2731     if (!limit_set)
2732     {
2733         AttrType termset;
2734         int termset_value_numeric;
2735         const char *termset_value_string;
2736         attr_init(&termset, zapt, 8);
2737         termset_value_numeric =
2738             attr_find_ex(&termset, NULL, &termset_value_string);
2739         if (termset_value_numeric != -1)
2740         {
2741             char resname[32];
2742             const char *termset_name = 0;
2743             
2744             if (termset_value_numeric != -2)
2745             {
2746                 
2747                 sprintf(resname, "%d", termset_value_numeric);
2748                 termset_name = resname;
2749             }
2750             else
2751                 termset_name = termset_value_string;
2752             
2753             limit_set = resultSetRef (zh, termset_name);
2754         }
2755     }
2756         
2757     yaz_log(YLOG_DEBUG, "position = %d, num = %d set=%d",
2758             pos, num, attributeset);
2759         
2760     attr_init(&use, zapt, 1);
2761     use_value = attr_find_ex(&use, &attributeset, &use_string);
2762
2763     if (zebra_maps_attr(zh->reg->zebra_maps, zapt, &reg_id, &search_type,
2764                         rank_type, &complete_flag, &sort_flag))
2765     {
2766         *num_entries = 0;
2767         zebra_setError(zh, YAZ_BIB1_UNSUPP_ATTRIBUTE_TYPE, 0);
2768         return ZEBRA_FAIL;
2769     }
2770     yaz_log(YLOG_DEBUG, "use_value = %d", use_value);
2771
2772     if (use_value == -1)
2773         use_value = 1016;
2774     for (base_no = 0; base_no < num_bases && ord_no < 32; base_no++)
2775     {
2776         data1_local_attribute *local_attr;
2777         attent attp;
2778         int ord;
2779
2780         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
2781         {
2782             zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
2783                            basenames[base_no]);
2784             *num_entries = 0;
2785             return ZEBRA_FAIL;
2786         }
2787
2788         if (use_string &&
2789             (ord = zebraExplain_lookup_attr_str(zh->reg->zei,
2790                                                 use_string)) >= 0)
2791         {
2792             /* we have a match for a raw string attribute */
2793             if (ord > 0)
2794                 ords[ord_no++] = ord;
2795             attp.local_attributes = 0;  /* no more attributes */
2796         }
2797         else
2798         {
2799             int r;
2800             
2801             if ((r = att_getentbyatt (zh, &attp, attributeset, use_value,
2802                                       use_string)))
2803             {
2804                 yaz_log(YLOG_DEBUG, "att_getentbyatt fail. set=%d use=%d",
2805                         attributeset, use_value);
2806                 if (r == -1)
2807                 {
2808                     errCode = YAZ_BIB1_UNSUPP_USE_ATTRIBUTE;
2809                     if (use_string)
2810                         zebra_setError(zh, YAZ_BIB1_UNSUPP_USE_ATTRIBUTE,
2811                                        use_string);
2812                     else
2813                         zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_USE_ATTRIBUTE,
2814                                             use_value);
2815                 }   
2816                 else
2817                 {
2818                     zebra_setError(zh, YAZ_BIB1_UNSUPP_ATTRIBUTE_SET, 0);
2819                 }
2820                 continue;
2821             }
2822         }
2823         bases_ok++;
2824         for (local_attr = attp.local_attributes; local_attr && ord_no < 32;
2825              local_attr = local_attr->next)
2826         {
2827             ord = zebraExplain_lookup_attr_su(zh->reg->zei,
2828                                               attp.attset_ordinal,
2829                                               local_attr->local);
2830             if (ord > 0)
2831                 ords[ord_no++] = ord;
2832         }
2833     }
2834     if (!bases_ok && errCode)
2835     {
2836         zebra_setError(zh, errCode, errString);
2837         *num_entries = 0;
2838         return ZEBRA_FAIL;
2839     }
2840     if (ord_no == 0)
2841     {
2842         *num_entries = 0;
2843         return ZEBRA_OK;
2844     }
2845     /* prepare dictionary scanning */
2846     if (num < 1)
2847     {
2848         *num_entries = 0;
2849         return ZEBRA_OK;
2850     }
2851     before = pos-1;
2852     if (before < 0)
2853         before = 0;
2854     after = 1+num-pos;
2855     if (after < 0)
2856         after = 0;
2857     yaz_log(YLOG_DEBUG, "rpn_scan pos=%d num=%d before=%d "
2858             "after=%d before+after=%d",
2859             pos, num, before, after, before+after);
2860     scan_info_array = (struct scan_info *)
2861         odr_malloc(stream, ord_no * sizeof(*scan_info_array));
2862     for (i = 0; i < ord_no; i++)
2863     {
2864         int j, prefix_len = 0;
2865         int before_tmp = before, after_tmp = after;
2866         struct scan_info *scan_info = scan_info_array + i;
2867         struct rpn_char_map_info rcmi;
2868
2869         rpn_char_map_prepare (zh->reg, reg_id, &rcmi);
2870
2871         scan_info->before = before;
2872         scan_info->after = after;
2873         scan_info->odr = stream;
2874
2875         scan_info->list = (struct scan_info_entry *)
2876             odr_malloc(stream, (before+after) * sizeof(*scan_info->list));
2877         for (j = 0; j<before+after; j++)
2878             scan_info->list[j].term = NULL;
2879
2880         prefix_len += key_SU_encode (ords[i], termz + prefix_len);
2881         termz[prefix_len++] = reg_id;
2882         termz[prefix_len] = 0;
2883         strcpy(scan_info->prefix, termz);
2884
2885         if (trans_scan_term(zh, zapt, termz+prefix_len, reg_id) == ZEBRA_FAIL)
2886             return ZEBRA_FAIL;
2887         
2888         dict_scan(zh->reg->dict, termz, &before_tmp, &after_tmp,
2889                   scan_info, scan_handle);
2890     }
2891     glist = (ZebraScanEntry *)
2892         odr_malloc(stream, (before+after)*sizeof(*glist));
2893
2894     rset_nmem = nmem_create();
2895     kc = zebra_key_control_create(zh);
2896
2897     /* consider terms after main term */
2898     for (i = 0; i < ord_no; i++)
2899         ptr[i] = before;
2900     
2901     *is_partial = 0;
2902     for (i = 0; i<after; i++)
2903     {
2904         int j, j0 = -1;
2905         const char *mterm = NULL;
2906         const char *tst;
2907         RSET rset = 0;
2908         int lo = i + pos-1; /* offset in result list */
2909
2910         /* find: j0 is the first of the minimal values */
2911         for (j = 0; j < ord_no; j++)
2912         {
2913             if (ptr[j] < before+after && ptr[j] >= 0 &&
2914                 (tst = scan_info_array[j].list[ptr[j]].term) &&
2915                 (!mterm || strcmp (tst, mterm) < 0))
2916             {
2917                 j0 = j;
2918                 mterm = tst;
2919             }
2920         }
2921         if (j0 == -1)
2922             break;  /* no value found, stop */
2923
2924         /* get result set for first one , but only if it's within bounds */
2925         if (lo >= 0)
2926         {
2927             /* get result set for first term */
2928             scan_term_untrans(zh, stream->mem, reg_id,
2929                               &glist[lo].term, mterm);
2930             rset = rset_trunc(zh, &scan_info_array[j0].list[ptr[j0]].isam_p, 1,
2931                               glist[lo].term, strlen(glist[lo].term),
2932                               NULL, 0, zapt->term->which, rset_nmem, 
2933                               kc, kc->scope);
2934         }
2935         ptr[j0]++; /* move index for this set .. */
2936         /* get result set for remaining scan terms */
2937         for (j = j0+1; j<ord_no; j++)
2938         {
2939             if (ptr[j] < before+after && ptr[j] >= 0 &&
2940                 (tst = scan_info_array[j].list[ptr[j]].term) &&
2941                 !strcmp (tst, mterm))
2942             {
2943                 if (lo >= 0)
2944                 {
2945                     RSET rsets[2];
2946                     
2947                     rsets[0] = rset;
2948                     rsets[1] =
2949                         rset_trunc(
2950                             zh, &scan_info_array[j].list[ptr[j]].isam_p, 1,
2951                             glist[lo].term,
2952                             strlen(glist[lo].term), NULL, 0,
2953                             zapt->term->which,rset_nmem,
2954                             kc, kc->scope);
2955                     rset = rsmulti_or_create(rset_nmem, kc,
2956                                              2, kc->scope, rsets);
2957                 }
2958                 ptr[j]++;
2959             }
2960         }
2961         if (lo >= 0)
2962         {
2963             /* merge with limit_set if given */
2964             if (limit_set)
2965             {
2966                 RSET rsets[2];
2967                 rsets[0] = rset;
2968                 rsets[1] = rset_dup(limit_set);
2969                 
2970                 rset = rsmulti_and_create(rset_nmem, kc,
2971                                           kc->scope, 2, rsets);
2972             }
2973             /* count it */
2974             count_set(rset, &glist[lo].occurrences);
2975             rset_delete(rset);
2976         }
2977     }
2978     if (i < after)
2979     {
2980         *num_entries -= (after-i);
2981         *is_partial = 1;
2982         if (*num_entries < 0)
2983         {
2984             (*kc->dec)(kc);
2985             nmem_destroy(rset_nmem);
2986             *num_entries = 0;
2987             return ZEBRA_OK;
2988         }
2989     }
2990     /* consider terms before main term */
2991     for (i = 0; i<ord_no; i++)
2992         ptr[i] = 0;
2993     
2994     for (i = 0; i<before; i++)
2995     {
2996         int j, j0 = -1;
2997         const char *mterm = NULL;
2998         const char *tst;
2999         RSET rset;
3000         int lo = before-1-i; /* offset in result list */
3001         
3002         for (j = 0; j <ord_no; j++)
3003         {
3004             if (ptr[j] < before && ptr[j] >= 0 &&
3005                 (tst = scan_info_array[j].list[before-1-ptr[j]].term) &&
3006                 (!mterm || strcmp (tst, mterm) > 0))
3007             {
3008                 j0 = j;
3009                     mterm = tst;
3010             }
3011         }
3012         if (j0 == -1)
3013             break;
3014         
3015         scan_term_untrans (zh, stream->mem, reg_id,
3016                            &glist[lo].term, mterm);
3017         
3018         rset = rset_trunc
3019             (zh, &scan_info_array[j0].list[before-1-ptr[j0]].isam_p, 1,
3020              glist[lo].term, strlen(glist[lo].term),
3021              NULL, 0, zapt->term->which,rset_nmem,
3022              kc, kc->scope);
3023         
3024         ptr[j0]++;
3025         
3026         for (j = j0+1; j<ord_no; j++)
3027         {
3028             if (ptr[j] < before && ptr[j] >= 0 &&
3029                 (tst = scan_info_array[j].list[before-1-ptr[j]].term) &&
3030                 !strcmp (tst, mterm))
3031             {
3032                 RSET rsets[2];
3033                 
3034                 rsets[0] = rset;
3035                 rsets[1] = rset_trunc(
3036                     zh,
3037                     &scan_info_array[j].list[before-1-ptr[j]].isam_p, 1,
3038                     glist[lo].term,
3039                     strlen(glist[lo].term), NULL, 0,
3040                     zapt->term->which, rset_nmem,
3041                     kc, kc->scope);
3042                 rset = rsmulti_or_create(rset_nmem, kc,
3043                                          2, kc->scope, rsets);
3044                 
3045                 ptr[j]++;
3046             }
3047         }
3048         if (limit_set)
3049         {
3050             RSET rsets[2];
3051             rsets[0] = rset;
3052             rsets[1] = rset_dup(limit_set);
3053             
3054             rset = rsmulti_and_create(rset_nmem, kc,
3055                                       kc->scope, 2, rsets);
3056         }
3057         count_set (rset, &glist[lo].occurrences);
3058         rset_delete (rset);
3059     }
3060     (*kc->dec)(kc);
3061     nmem_destroy(rset_nmem);
3062     i = before-i;
3063     if (i)
3064     {
3065         *is_partial = 1;
3066         *position -= i;
3067         *num_entries -= i;
3068         if (*num_entries <= 0)
3069         {
3070             *num_entries = 0;
3071             return ZEBRA_OK;
3072         }
3073     }
3074     
3075     *list = glist + i;               /* list is set to first 'real' entry */
3076     
3077     yaz_log(YLOG_DEBUG, "position = %d, num_entries = %d",
3078             *position, *num_entries);
3079     return ZEBRA_OK;
3080 }
3081