Save ord(inal) indexes IDs so we can make snippets properly.
[idzebra-moved-to-github.git] / index / zrpn.c
1 /* $Id: zrpn.c,v 1.195 2005-06-06 21:31:08 adam Exp $
2    Copyright (C) 1995-2005
3    Index Data ApS
4
5 This file is part of the Zebra server.
6
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
11
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with Zebra; see the file LICENSE.zebra.  If not, write to the
19 Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
20 02111-1307, USA.
21 */
22
23 #include <stdio.h>
24 #include <assert.h>
25 #ifdef WIN32
26 #include <io.h>
27 #else
28 #include <unistd.h>
29 #endif
30 #include <ctype.h>
31
32 #include <yaz/diagbib1.h>
33 #include "index.h"
34 #include <zebra_xpath.h>
35
36 #include <charmap.h>
37 #include <rset.h>
38
39 struct rpn_char_map_info
40 {
41     ZebraMaps zm;
42     int reg_type;
43 };
44
45 typedef struct
46 {
47     int type;
48     int major;
49     int minor;
50     Z_AttributesPlusTerm *zapt;
51 } AttrType;
52
53 static struct ord_list *ord_list_create(NMEM nmem)
54 {
55     return 0;
56 }
57
58 static struct ord_list *ord_list_append(NMEM nmem, struct ord_list *list,
59                                         int ord)
60 {
61     struct ord_list *n = nmem_malloc(nmem, sizeof(*n));
62     n->ord = ord;
63     n->next = list;
64     return n;
65 }
66
67 static int log_level_set = 0;
68 static int log_level_rpn = 0;
69
70 static const char **rpn_char_map_handler(void *vp, const char **from, int len)
71 {
72     struct rpn_char_map_info *p = (struct rpn_char_map_info *) vp;
73     const char **out = zebra_maps_input(p->zm, p->reg_type, from, len, 0);
74 #if 0
75     if (out && *out)
76     {
77         const char *outp = *out;
78         yaz_log(YLOG_LOG, "---");
79         while (*outp)
80         {
81             yaz_log(YLOG_LOG, "%02X", *outp);
82             outp++;
83         }
84     }
85 #endif
86     return out;
87 }
88
89 static void rpn_char_map_prepare(struct zebra_register *reg, int reg_type,
90                                   struct rpn_char_map_info *map_info)
91 {
92     map_info->zm = reg->zebra_maps;
93     map_info->reg_type = reg_type;
94     dict_grep_cmap(reg->dict, map_info, rpn_char_map_handler);
95 }
96
97 static int attr_find_ex(AttrType *src, oid_value *attributeSetP,
98                          const char **string_value)
99 {
100     int num_attributes;
101
102     num_attributes = src->zapt->attributes->num_attributes;
103     while (src->major < num_attributes)
104     {
105         Z_AttributeElement *element;
106
107         element = src->zapt->attributes->attributes[src->major];
108         if (src->type == *element->attributeType)
109         {
110             switch (element->which) 
111             {
112             case Z_AttributeValue_numeric:
113                 ++(src->major);
114                 if (element->attributeSet && attributeSetP)
115                 {
116                     oident *attrset;
117
118                     attrset = oid_getentbyoid(element->attributeSet);
119                     *attributeSetP = attrset->value;
120                 }
121                 return *element->value.numeric;
122                 break;
123             case Z_AttributeValue_complex:
124                 if (src->minor >= element->value.complex->num_list)
125                     break;
126                 if (element->attributeSet && attributeSetP)
127                 {
128                     oident *attrset;
129                     
130                     attrset = oid_getentbyoid(element->attributeSet);
131                     *attributeSetP = attrset->value;
132                 }
133                 if (element->value.complex->list[src->minor]->which ==  
134                     Z_StringOrNumeric_numeric)
135                 {
136                     ++(src->minor);
137                     return
138                         *element->value.complex->list[src->minor-1]->u.numeric;
139                 }
140                 else if (element->value.complex->list[src->minor]->which ==  
141                          Z_StringOrNumeric_string)
142                 {
143                     if (!string_value)
144                         break;
145                     ++(src->minor);
146                     *string_value = 
147                         element->value.complex->list[src->minor-1]->u.string;
148                     return -2;
149                 }
150                 else
151                     break;
152             default:
153                 assert(0);
154             }
155         }
156         ++(src->major);
157     }
158     return -1;
159 }
160
161 static int attr_find(AttrType *src, oid_value *attributeSetP)
162 {
163     return attr_find_ex(src, attributeSetP, 0);
164 }
165
166 static void attr_init(AttrType *src, Z_AttributesPlusTerm *zapt,
167                        int type)
168 {
169     src->zapt = zapt;
170     src->type = type;
171     src->major = 0;
172     src->minor = 0;
173 }
174
175 #define TERM_COUNT        
176        
177 struct grep_info {        
178 #ifdef TERM_COUNT        
179     int *term_no;        
180 #endif        
181     ISAM_P *isam_p_buf;
182     int isam_p_size;        
183     int isam_p_indx;
184     ZebraHandle zh;
185     int reg_type;
186     ZebraSet termset;
187 };        
188
189 void zebra_term_untrans(ZebraHandle zh, int reg_type,
190                         char *dst, const char *src)
191 {
192     int len = 0;
193     while (*src)
194     {
195         const char *cp = zebra_maps_output(zh->reg->zebra_maps,
196                                            reg_type, &src);
197         if (!cp && len < IT_MAX_WORD-1)
198             dst[len++] = *src++;
199         else
200             while (*cp && len < IT_MAX_WORD-1)
201                 dst[len++] = *cp++;
202     }
203     dst[len] = '\0';
204 }
205
206 static void add_isam_p(const char *name, const char *info,
207                        struct grep_info *p)
208 {
209     if (!log_level_set)
210     {
211         log_level_rpn = yaz_log_module_level("rpn");
212         log_level_set = 1;
213     }
214     if (p->isam_p_indx == p->isam_p_size)
215     {
216         ISAM_P *new_isam_p_buf;
217 #ifdef TERM_COUNT        
218         int *new_term_no;        
219 #endif
220         p->isam_p_size = 2*p->isam_p_size + 100;
221         new_isam_p_buf = (ISAM_P *) xmalloc(sizeof(*new_isam_p_buf) *
222                                             p->isam_p_size);
223         if (p->isam_p_buf)
224         {
225             memcpy(new_isam_p_buf, p->isam_p_buf,
226                     p->isam_p_indx * sizeof(*p->isam_p_buf));
227             xfree(p->isam_p_buf);
228         }
229         p->isam_p_buf = new_isam_p_buf;
230
231 #ifdef TERM_COUNT
232         new_term_no = (int *) xmalloc(sizeof(*new_term_no) * p->isam_p_size);
233         if (p->term_no)
234         {
235             memcpy(new_term_no, p->isam_p_buf,
236                     p->isam_p_indx * sizeof(*p->term_no));
237             xfree(p->term_no);
238         }
239         p->term_no = new_term_no;
240 #endif
241     }
242     assert(*info == sizeof(*p->isam_p_buf));
243     memcpy(p->isam_p_buf + p->isam_p_indx, info+1, sizeof(*p->isam_p_buf));
244
245 #if 1
246     if (p->termset)
247     {
248         const char *db;
249         int set, use;
250         char term_tmp[IT_MAX_WORD];
251         int su_code = 0;
252         int len = key_SU_decode (&su_code, name);
253         
254         zebra_term_untrans  (p->zh, p->reg_type, term_tmp, name+len+1);
255         yaz_log(log_level_rpn, "grep: %d %c %s", su_code, name[len], term_tmp);
256         zebraExplain_lookup_ord (p->zh->reg->zei,
257                                  su_code, &db, &set, &use);
258         yaz_log(log_level_rpn, "grep:  set=%d use=%d db=%s", set, use, db);
259         
260         resultSetAddTerm(p->zh, p->termset, name[len], db,
261                          set, use, term_tmp);
262     }
263 #endif
264     (p->isam_p_indx)++;
265 }
266
267 static int grep_handle(char *name, const char *info, void *p)
268 {
269     add_isam_p(name, info, (struct grep_info *) p);
270     return 0;
271 }
272
273 static int term_pre(ZebraMaps zebra_maps, int reg_type, const char **src,
274                     const char *ct1, const char *ct2, int first)
275 {
276     const char *s1, *s0 = *src;
277     const char **map;
278
279     /* skip white space */
280     while (*s0)
281     {
282         if (ct1 && strchr(ct1, *s0))
283             break;
284         if (ct2 && strchr(ct2, *s0))
285             break;
286         s1 = s0;
287         map = zebra_maps_input(zebra_maps, reg_type, &s1, strlen(s1), first);
288         if (**map != *CHR_SPACE)
289             break;
290         s0 = s1;
291     }
292     *src = s0;
293     return *s0;
294 }
295
296
297 static void esc_str(char *out_buf, int out_size,
298                     const char *in_buf, int in_size)
299 {
300     int k;
301
302     assert(out_buf);
303     assert(in_buf);
304     assert(out_size > 20);
305     *out_buf = '\0';
306     for (k = 0; k<in_size; k++)
307     {
308         int c = in_buf[k] & 0xff;
309         int pc;
310         if (c < 32 || c > 126)
311             pc = '?';
312         else
313             pc = c;
314         sprintf(out_buf +strlen(out_buf), "%02X:%c  ", c, pc);
315         if (strlen(out_buf) > out_size-20)
316         {
317             strcat(out_buf, "..");
318             break;
319         }
320     }
321 }
322
323 #define REGEX_CHARS " []()|.*+?!"
324
325 /* term_100: handle term, where trunc = none(no operators at all) */
326 static int term_100(ZebraMaps zebra_maps, int reg_type,
327                     const char **src, char *dst, int space_split,
328                     char *dst_term)
329 {
330     const char *s0;
331     const char **map;
332     int i = 0;
333     int j = 0;
334
335     const char *space_start = 0;
336     const char *space_end = 0;
337
338     if (!term_pre(zebra_maps, reg_type, src, NULL, NULL, !space_split))
339         return 0;
340     s0 = *src;
341     while (*s0)
342     {
343         const char *s1 = s0;
344         int q_map_match = 0;
345         map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), 
346                                 &q_map_match);
347         if (space_split)
348         {
349             if (**map == *CHR_SPACE)
350                 break;
351         }
352         else  /* complete subfield only. */
353         {
354             if (**map == *CHR_SPACE)
355             {   /* save space mapping for later  .. */
356                 space_start = s1;
357                 space_end = s0;
358                 continue;
359             }
360             else if (space_start)
361             {   /* reload last space */
362                 while (space_start < space_end)
363                 {
364                     if (strchr(REGEX_CHARS, *space_start))
365                         dst[i++] = '\\';
366                     dst_term[j++] = *space_start;
367                     dst[i++] = *space_start++;
368                 }
369                 /* and reset */
370                 space_start = space_end = 0;
371             }
372         }
373         /* add non-space char */
374         memcpy(dst_term+j, s1, s0 - s1);
375         j += (s0 - s1);
376         if (!q_map_match)
377         {
378             while (s1 < s0)
379             {
380                 if (strchr(REGEX_CHARS, *s1))
381                     dst[i++] = '\\';
382                 dst[i++] = *s1++;
383             }
384         }
385         else
386         {
387             char tmpbuf[80];
388             esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
389             
390             strcpy(dst + i, map[0]);
391             i += strlen(map[0]);
392         }
393     }
394     dst[i] = '\0';
395     dst_term[j] = '\0';
396     *src = s0;
397     return i;
398 }
399
400 /* term_101: handle term, where trunc = Process # */
401 static int term_101(ZebraMaps zebra_maps, int reg_type,
402                     const char **src, char *dst, int space_split,
403                     char *dst_term)
404 {
405     const char *s0;
406     const char **map;
407     int i = 0;
408     int j = 0;
409
410     if (!term_pre(zebra_maps, reg_type, src, "#", "#", !space_split))
411         return 0;
412     s0 = *src;
413     while (*s0)
414     {
415         if (*s0 == '#')
416         {
417             dst[i++] = '.';
418             dst[i++] = '*';
419             dst_term[j++] = *s0++;
420         }
421         else
422         {
423             const char *s1 = s0;
424             int q_map_match = 0;
425             map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), 
426                                     &q_map_match);
427             if (space_split && **map == *CHR_SPACE)
428                 break;
429
430             /* add non-space char */
431             memcpy(dst_term+j, s1, s0 - s1);
432             j += (s0 - s1);
433             if (!q_map_match)
434             {
435                 while (s1 < s0)
436                 {
437                     if (strchr(REGEX_CHARS, *s1))
438                         dst[i++] = '\\';
439                     dst[i++] = *s1++;
440                 }
441             }
442             else
443             {
444                 char tmpbuf[80];
445                 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
446                 
447                 strcpy(dst + i, map[0]);
448                 i += strlen(map[0]);
449             }
450         }
451     }
452     dst[i] = '\0';
453     dst_term[j++] = '\0';
454     *src = s0;
455     return i;
456 }
457
458 /* term_103: handle term, where trunc = re-2 (regular expressions) */
459 static int term_103(ZebraMaps zebra_maps, int reg_type, const char **src,
460                     char *dst, int *errors, int space_split,
461                     char *dst_term)
462 {
463     int i = 0;
464     int j = 0;
465     const char *s0;
466     const char **map;
467
468     if (!term_pre(zebra_maps, reg_type, src, "^\\()[].*+?|", "(", !space_split))
469         return 0;
470     s0 = *src;
471     if (errors && *s0 == '+' && s0[1] && s0[2] == '+' && s0[3] &&
472         isdigit(((const unsigned char *)s0)[1]))
473     {
474         *errors = s0[1] - '0';
475         s0 += 3;
476         if (*errors > 3)
477             *errors = 3;
478     }
479     while (*s0)
480     {
481         if (strchr("^\\()[].*+?|-", *s0))
482         {
483             dst_term[j++] = *s0;
484             dst[i++] = *s0++;
485         }
486         else
487         {
488             const char *s1 = s0;
489             int q_map_match = 0;
490             map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), 
491                                     &q_map_match);
492             if (space_split && **map == *CHR_SPACE)
493                 break;
494
495             /* add non-space char */
496             memcpy(dst_term+j, s1, s0 - s1);
497             j += (s0 - s1);
498             if (!q_map_match)
499             {
500                 while (s1 < s0)
501                 {
502                     if (strchr(REGEX_CHARS, *s1))
503                         dst[i++] = '\\';
504                     dst[i++] = *s1++;
505                 }
506             }
507             else
508             {
509                 char tmpbuf[80];
510                 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
511                 
512                 strcpy(dst + i, map[0]);
513                 i += strlen(map[0]);
514             }
515         }
516     }
517     dst[i] = '\0';
518     dst_term[j] = '\0';
519     *src = s0;
520     
521     return i;
522 }
523
524 /* term_103: handle term, where trunc = re-1 (regular expressions) */
525 static int term_102(ZebraMaps zebra_maps, int reg_type, const char **src,
526                     char *dst, int space_split, char *dst_term)
527 {
528     return term_103(zebra_maps, reg_type, src, dst, NULL, space_split,
529                     dst_term);
530 }
531
532
533 /* term_104: handle term, where trunc = Process # and ! */
534 static int term_104(ZebraMaps zebra_maps, int reg_type,
535                     const char **src, char *dst, int space_split,
536                     char *dst_term)
537 {
538     const char *s0;
539     const char **map;
540     int i = 0;
541     int j = 0;
542
543     if (!term_pre(zebra_maps, reg_type, src, "?*#", "?*#", !space_split))
544         return 0;
545     s0 = *src;
546     while (*s0)
547     {
548         if (*s0 == '?')
549         {
550             dst_term[j++] = *s0++;
551             if (*s0 >= '0' && *s0 <= '9')
552             {
553                 int limit = 0;
554                 while (*s0 >= '0' && *s0 <= '9')
555                 {
556                     limit = limit * 10 + (*s0 - '0');
557                     dst_term[j++] = *s0++;
558                 }
559                 if (limit > 20)
560                     limit = 20;
561                 while (--limit >= 0)
562                 {
563                     dst[i++] = '.';
564                     dst[i++] = '?';
565                 }
566             }
567             else
568             {
569                 dst[i++] = '.';
570                 dst[i++] = '*';
571             }
572         }
573         else if (*s0 == '*')
574         {
575             dst[i++] = '.';
576             dst[i++] = '*';
577             dst_term[j++] = *s0++;
578         }
579         else if (*s0 == '#')
580         {
581             dst[i++] = '.';
582             dst_term[j++] = *s0++;
583         }
584         else
585         {
586             const char *s1 = s0;
587             int q_map_match = 0;
588             map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), 
589                                     &q_map_match);
590             if (space_split && **map == *CHR_SPACE)
591                 break;
592
593             /* add non-space char */
594             memcpy(dst_term+j, s1, s0 - s1);
595             j += (s0 - s1);
596             if (!q_map_match)
597             {
598                 while (s1 < s0)
599                 {
600                     if (strchr(REGEX_CHARS, *s1))
601                         dst[i++] = '\\';
602                     dst[i++] = *s1++;
603                 }
604             }
605             else
606             {
607                 char tmpbuf[80];
608                 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
609                 
610                 strcpy(dst + i, map[0]);
611                 i += strlen(map[0]);
612             }
613         }
614     }
615     dst[i] = '\0';
616     dst_term[j++] = '\0';
617     *src = s0;
618     return i;
619 }
620
621 /* term_105/106: handle term, where trunc = Process * and ! and right trunc */
622 static int term_105(ZebraMaps zebra_maps, int reg_type,
623                     const char **src, char *dst, int space_split,
624                     char *dst_term, int right_truncate)
625 {
626     const char *s0;
627     const char **map;
628     int i = 0;
629     int j = 0;
630
631     if (!term_pre(zebra_maps, reg_type, src, "*!", "*!", !space_split))
632         return 0;
633     s0 = *src;
634     while (*s0)
635     {
636         if (*s0 == '*')
637         {
638             dst[i++] = '.';
639             dst[i++] = '*';
640             dst_term[j++] = *s0++;
641         }
642         else if (*s0 == '!')
643         {
644             dst[i++] = '.';
645             dst_term[j++] = *s0++;
646         }
647         else
648         {
649             const char *s1 = s0;
650             int q_map_match = 0;
651             map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), 
652                                     &q_map_match);
653             if (space_split && **map == *CHR_SPACE)
654                 break;
655
656             /* add non-space char */
657             memcpy(dst_term+j, s1, s0 - s1);
658             j += (s0 - s1);
659             if (!q_map_match)
660             {
661                 while (s1 < s0)
662                 {
663                     if (strchr(REGEX_CHARS, *s1))
664                         dst[i++] = '\\';
665                     dst[i++] = *s1++;
666                 }
667             }
668             else
669             {
670                 char tmpbuf[80];
671                 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
672                 
673                 strcpy(dst + i, map[0]);
674                 i += strlen(map[0]);
675             }
676         }
677     }
678     if (right_truncate)
679     {
680         dst[i++] = '.';
681         dst[i++] = '*';
682     }
683     dst[i] = '\0';
684     
685     dst_term[j++] = '\0';
686     *src = s0;
687     return i;
688 }
689
690
691 /* gen_regular_rel - generate regular expression from relation
692  *  val:     border value (inclusive)
693  *  islt:    1 if <=; 0 if >=.
694  */
695 static void gen_regular_rel(char *dst, int val, int islt)
696 {
697     int dst_p;
698     int w, d, i;
699     int pos = 0;
700     char numstr[20];
701
702     yaz_log(YLOG_DEBUG, "gen_regular_rel. val=%d, islt=%d", val, islt);
703     if (val >= 0)
704     {
705         if (islt)
706             strcpy(dst, "(-[0-9]+|(");
707         else
708             strcpy(dst, "((");
709     } 
710     else
711     {
712         if (!islt)
713         {
714             strcpy(dst, "([0-9]+|-(");
715             dst_p = strlen(dst);
716             islt = 1;
717         }
718         else
719         {
720             strcpy(dst, "(-(");
721             islt = 0;
722         }
723         val = -val;
724     }
725     dst_p = strlen(dst);
726     sprintf(numstr, "%d", val);
727     for (w = strlen(numstr); --w >= 0; pos++)
728     {
729         d = numstr[w];
730         if (pos > 0)
731         {
732             if (islt)
733             {
734                 if (d == '0')
735                     continue;
736                 d--;
737             } 
738             else
739             {
740                 if (d == '9')
741                     continue;
742                 d++;
743             }
744         }
745         
746         strcpy(dst + dst_p, numstr);
747         dst_p = strlen(dst) - pos - 1;
748
749         if (islt)
750         {
751             if (d != '0')
752             {
753                 dst[dst_p++] = '[';
754                 dst[dst_p++] = '0';
755                 dst[dst_p++] = '-';
756                 dst[dst_p++] = d;
757                 dst[dst_p++] = ']';
758             }
759             else
760                 dst[dst_p++] = d;
761         }
762         else
763         {
764             if (d != '9')
765             { 
766                 dst[dst_p++] = '[';
767                 dst[dst_p++] = d;
768                 dst[dst_p++] = '-';
769                 dst[dst_p++] = '9';
770                 dst[dst_p++] = ']';
771             }
772             else
773                 dst[dst_p++] = d;
774         }
775         for (i = 0; i<pos; i++)
776         {
777             dst[dst_p++] = '[';
778             dst[dst_p++] = '0';
779             dst[dst_p++] = '-';
780             dst[dst_p++] = '9';
781             dst[dst_p++] = ']';
782         }
783         dst[dst_p++] = '|';
784     }
785     dst[dst_p] = '\0';
786     if (islt)
787     {
788         /* match everything less than 10^(pos-1) */
789         strcat(dst, "0*");
790         for (i = 1; i<pos; i++)
791             strcat(dst, "[0-9]?");
792     }
793     else
794     {
795         /* match everything greater than 10^pos */
796         for (i = 0; i <= pos; i++)
797             strcat(dst, "[0-9]");
798         strcat(dst, "[0-9]*");
799     }
800     strcat(dst, "))");
801 }
802
803 void string_rel_add_char(char **term_p, const char *src, int *indx)
804 {
805     if (src[*indx] == '\\')
806         *(*term_p)++ = src[(*indx)++];
807     *(*term_p)++ = src[(*indx)++];
808 }
809
810 /*
811  *   >  abc     ([b-].*|a[c-].*|ab[d-].*|abc.+)
812  *              ([^-a].*|a[^-b].*ab[^-c].*|abc.+)
813  *   >= abc     ([b-].*|a[c-].*|ab[c-].*)
814  *              ([^-a].*|a[^-b].*|ab[c-].*)
815  *   <  abc     ([-0].*|a[-a].*|ab[-b].*)
816  *              ([^a-].*|a[^b-].*|ab[^c-].*)
817  *   <= abc     ([-0].*|a[-a].*|ab[-b].*|abc)
818  *              ([^a-].*|a[^b-].*|ab[^c-].*|abc)
819  */
820 static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
821                            const char **term_sub, char *term_dict,
822                            oid_value attributeSet,
823                            int reg_type, int space_split, char *term_dst,
824                            int *error_code)
825 {
826     AttrType relation;
827     int relation_value;
828     int i;
829     char *term_tmp = term_dict + strlen(term_dict);
830     char term_component[2*IT_MAX_WORD+20];
831
832     attr_init(&relation, zapt, 2);
833     relation_value = attr_find(&relation, NULL);
834
835     *error_code = 0;
836     yaz_log(YLOG_DEBUG, "string relation value=%d", relation_value);
837     switch (relation_value)
838     {
839     case 1:
840         if (!term_100(zh->reg->zebra_maps, reg_type,
841                       term_sub, term_component,
842                       space_split, term_dst))
843             return 0;
844         yaz_log(log_level_rpn, "Relation <");
845         
846         *term_tmp++ = '(';
847         for (i = 0; term_component[i]; )
848         {
849             int j = 0;
850
851             if (i)
852                 *term_tmp++ = '|';
853             while (j < i)
854                 string_rel_add_char(&term_tmp, term_component, &j);
855
856             *term_tmp++ = '[';
857
858             *term_tmp++ = '^';
859             string_rel_add_char(&term_tmp, term_component, &i);
860             *term_tmp++ = '-';
861
862             *term_tmp++ = ']';
863             *term_tmp++ = '.';
864             *term_tmp++ = '*';
865
866             if ((term_tmp - term_dict) > IT_MAX_WORD)
867                 break;
868         }
869         *term_tmp++ = ')';
870         *term_tmp = '\0';
871         break;
872     case 2:
873         if (!term_100(zh->reg->zebra_maps, reg_type,
874                       term_sub, term_component,
875                       space_split, term_dst))
876             return 0;
877         yaz_log(log_level_rpn, "Relation <=");
878
879         *term_tmp++ = '(';
880         for (i = 0; term_component[i]; )
881         {
882             int j = 0;
883
884             while (j < i)
885                 string_rel_add_char(&term_tmp, term_component, &j);
886             *term_tmp++ = '[';
887
888             *term_tmp++ = '^';
889             string_rel_add_char(&term_tmp, term_component, &i);
890             *term_tmp++ = '-';
891
892             *term_tmp++ = ']';
893             *term_tmp++ = '.';
894             *term_tmp++ = '*';
895
896             *term_tmp++ = '|';
897
898             if ((term_tmp - term_dict) > IT_MAX_WORD)
899                 break;
900         }
901         for (i = 0; term_component[i]; )
902             string_rel_add_char(&term_tmp, term_component, &i);
903         *term_tmp++ = ')';
904         *term_tmp = '\0';
905         break;
906     case 5:
907         if (!term_100 (zh->reg->zebra_maps, reg_type,
908                        term_sub, term_component, space_split, term_dst))
909             return 0;
910         yaz_log(log_level_rpn, "Relation >");
911
912         *term_tmp++ = '(';
913         for (i = 0; term_component[i];)
914         {
915             int j = 0;
916
917             while (j < i)
918                 string_rel_add_char(&term_tmp, term_component, &j);
919             *term_tmp++ = '[';
920             
921             *term_tmp++ = '^';
922             *term_tmp++ = '-';
923             string_rel_add_char(&term_tmp, term_component, &i);
924
925             *term_tmp++ = ']';
926             *term_tmp++ = '.';
927             *term_tmp++ = '*';
928
929             *term_tmp++ = '|';
930
931             if ((term_tmp - term_dict) > IT_MAX_WORD)
932                 break;
933         }
934         for (i = 0; term_component[i];)
935             string_rel_add_char(&term_tmp, term_component, &i);
936         *term_tmp++ = '.';
937         *term_tmp++ = '+';
938         *term_tmp++ = ')';
939         *term_tmp = '\0';
940         break;
941     case 4:
942         if (!term_100(zh->reg->zebra_maps, reg_type, term_sub,
943                       term_component, space_split, term_dst))
944             return 0;
945         yaz_log(log_level_rpn, "Relation >=");
946
947         *term_tmp++ = '(';
948         for (i = 0; term_component[i];)
949         {
950             int j = 0;
951
952             if (i)
953                 *term_tmp++ = '|';
954             while (j < i)
955                 string_rel_add_char(&term_tmp, term_component, &j);
956             *term_tmp++ = '[';
957
958             if (term_component[i+1])
959             {
960                 *term_tmp++ = '^';
961                 *term_tmp++ = '-';
962                 string_rel_add_char(&term_tmp, term_component, &i);
963             }
964             else
965             {
966                 string_rel_add_char(&term_tmp, term_component, &i);
967                 *term_tmp++ = '-';
968             }
969             *term_tmp++ = ']';
970             *term_tmp++ = '.';
971             *term_tmp++ = '*';
972
973             if ((term_tmp - term_dict) > IT_MAX_WORD)
974                 break;
975         }
976         *term_tmp++ = ')';
977         *term_tmp = '\0';
978         break;
979     case 3:
980     case 102:
981     case -1:
982         yaz_log(log_level_rpn, "Relation =");
983         if (!term_100(zh->reg->zebra_maps, reg_type, term_sub,
984                       term_component, space_split, term_dst))
985             return 0;
986         strcat(term_tmp, "(");
987         strcat(term_tmp, term_component);
988         strcat(term_tmp, ")");
989         break;
990     default:
991         *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
992         return 0;
993     }
994     return 1;
995 }
996
997 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
998                              const char **term_sub, 
999                              oid_value attributeSet, NMEM stream,
1000                              struct grep_info *grep_info,
1001                              int reg_type, int complete_flag,
1002                              int num_bases, char **basenames,
1003                              char *term_dst, int xpath_use,
1004                              struct ord_list **ol);
1005
1006 static ZEBRA_RES term_trunc(ZebraHandle zh,
1007                             Z_AttributesPlusTerm *zapt,
1008                             const char **term_sub, 
1009                             oid_value attributeSet, NMEM stream,
1010                             struct grep_info *grep_info,
1011                             int reg_type, int complete_flag,
1012                             int num_bases, char **basenames,
1013                             char *term_dst,
1014                             const char *rank_type, int xpath_use,
1015                             NMEM rset_nmem,
1016                             RSET *rset,
1017                             struct rset_key_control *kc)
1018 {
1019     ZEBRA_RES res;
1020     struct ord_list *ol;
1021     *rset = 0;
1022     grep_info->isam_p_indx = 0;
1023     res = string_term(zh, zapt, term_sub, attributeSet, stream, grep_info,
1024                       reg_type, complete_flag, num_bases, basenames,
1025                       term_dst, xpath_use, &ol);
1026     if (res != ZEBRA_OK)
1027         return res;
1028     if (!*term_sub)  /* no more terms ? */
1029         return res;
1030     yaz_log(log_level_rpn, "term: %s", term_dst);
1031     *rset = rset_trunc(zh, grep_info->isam_p_buf,
1032                        grep_info->isam_p_indx, term_dst,
1033                        strlen(term_dst), rank_type, 1 /* preserve pos */,
1034                        zapt->term->which, rset_nmem,
1035                        kc, kc->scope, ol);
1036     if (!*rset)
1037         return ZEBRA_FAIL;
1038     return ZEBRA_OK;
1039 }
1040
1041 static char *nmem_strdup_i(NMEM nmem, int v)
1042 {
1043     char val_str[64];
1044     sprintf(val_str, "%d", v);
1045     return nmem_strdup(nmem, val_str);
1046 }
1047
1048 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1049                              const char **term_sub, 
1050                              oid_value attributeSet, NMEM stream,
1051                              struct grep_info *grep_info,
1052                              int reg_type, int complete_flag,
1053                              int num_bases, char **basenames,
1054                              char *term_dst, int xpath_use,
1055                              struct ord_list **ol)
1056 {
1057     char term_dict[2*IT_MAX_WORD+4000];
1058     int j, r, base_no;
1059     AttrType truncation;
1060     int truncation_value;
1061     AttrType use;
1062     int use_value;
1063     const char *use_string = 0;
1064     oid_value curAttributeSet = attributeSet;
1065     const char *termp;
1066     struct rpn_char_map_info rcmi;
1067     int space_split = complete_flag ? 0 : 1;
1068
1069     int bases_ok = 0;     /* no of databases with OK attribute */
1070     int errCode = 0;      /* err code (if any is not OK) */
1071     char *errString = 0;  /* addinfo */
1072
1073
1074     *ol = ord_list_create(stream);
1075
1076     rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
1077     attr_init(&use, zapt, 1);
1078     use_value = attr_find_ex(&use, &curAttributeSet, &use_string);
1079     yaz_log(log_level_rpn, "string_term, use value %d", use_value);
1080     attr_init(&truncation, zapt, 5);
1081     truncation_value = attr_find(&truncation, NULL);
1082     yaz_log(log_level_rpn, "truncation value %d", truncation_value);
1083
1084     if (use_value == -1)    /* no attribute - assumy "any" */
1085         use_value = 1016;
1086     for (base_no = 0; base_no < num_bases; base_no++)
1087     {
1088         int ord = -1;
1089         int attr_ok = 0;
1090         int regex_range = 0;
1091         int init_pos = 0;
1092         attent attp;
1093         data1_local_attribute id_xpath_attr;
1094         data1_local_attribute *local_attr;
1095         int max_pos, prefix_len = 0;
1096         int relation_error;
1097
1098         termp = *term_sub;
1099
1100         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1101         {
1102             zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
1103                            basenames[base_no]);
1104             return ZEBRA_FAIL;
1105         }
1106         if (xpath_use > 0 && use_value == -2) 
1107         {
1108             /* xpath mode and we have a string attribute */
1109             attp.local_attributes = &id_xpath_attr;
1110             attp.attset_ordinal = VAL_IDXPATH;
1111             id_xpath_attr.next = 0;
1112
1113             use_value = xpath_use;  /* xpath_use as use-attribute now */
1114             id_xpath_attr.local = use_value;
1115         }
1116         else if (curAttributeSet == VAL_IDXPATH && use_value >= 0)
1117         {
1118             /* X-Path attribute, use numeric value directly */
1119             attp.local_attributes = &id_xpath_attr;
1120             attp.attset_ordinal = VAL_IDXPATH;
1121             id_xpath_attr.next = 0;
1122             id_xpath_attr.local = use_value;
1123         }
1124         else if (use_string &&
1125                  (ord = zebraExplain_lookup_attr_str(zh->reg->zei,
1126                                                      use_string)) >= 0)
1127         {
1128             /* we have a match for a raw string attribute */
1129             char ord_buf[32];
1130             int i, ord_len;
1131
1132             if (prefix_len)
1133                 term_dict[prefix_len++] = '|';
1134             else
1135                 term_dict[prefix_len++] = '(';
1136             
1137             ord_len = key_SU_encode (ord, ord_buf);
1138             for (i = 0; i<ord_len; i++)
1139             {
1140                 term_dict[prefix_len++] = 1;
1141                 term_dict[prefix_len++] = ord_buf[i];
1142             }
1143             attp.local_attributes = 0;  /* no more attributes */
1144             *ol = ord_list_append(stream, *ol, ord);
1145         }
1146         else 
1147         {
1148             /* lookup in the .att files . Allow string as well */
1149             if ((r = att_getentbyatt (zh, &attp, curAttributeSet, use_value,
1150                                       use_string)))
1151             {
1152                 yaz_log(YLOG_DEBUG, "att_getentbyatt fail. set=%d use=%d r=%d",
1153                         curAttributeSet, use_value, r);
1154                 if (r == -1)
1155                 {
1156                     /* set was found, but value wasn't defined */
1157                     errCode = YAZ_BIB1_UNSUPP_USE_ATTRIBUTE;
1158                     if (use_string)
1159                         errString = nmem_strdup(stream, use_string);
1160                     else
1161                         errString = nmem_strdup_i (stream, use_value);
1162                 }
1163                 else
1164                 {
1165                     int oid[OID_SIZE];
1166                     struct oident oident;
1167                     
1168                     oident.proto = PROTO_Z3950;
1169                     oident.oclass = CLASS_ATTSET;
1170                     oident.value = curAttributeSet;
1171                     oid_ent_to_oid (&oident, oid);
1172                     
1173                     errCode = YAZ_BIB1_UNSUPP_ATTRIBUTE_SET;
1174                     errString = nmem_strdup(stream, oident.desc);
1175                 }
1176                 continue;
1177             }
1178         }
1179         for (local_attr = attp.local_attributes; local_attr;
1180              local_attr = local_attr->next)
1181         {
1182             char ord_buf[32];
1183             int i, ord_len;
1184             
1185             ord = zebraExplain_lookup_attr_su(zh->reg->zei,
1186                                               attp.attset_ordinal,
1187                                               local_attr->local);
1188             if (ord < 0)
1189                 continue;
1190             *ol = ord_list_append(stream, *ol, ord);
1191             if (prefix_len)
1192                 term_dict[prefix_len++] = '|';
1193             else
1194                 term_dict[prefix_len++] = '(';
1195             
1196             ord_len = key_SU_encode (ord, ord_buf);
1197             for (i = 0; i<ord_len; i++)
1198             {
1199                 term_dict[prefix_len++] = 1;
1200                 term_dict[prefix_len++] = ord_buf[i];
1201             }
1202         }
1203         bases_ok++;
1204         if (prefix_len)
1205             attr_ok = 1;
1206
1207         term_dict[prefix_len++] = ')';
1208         term_dict[prefix_len++] = 1;
1209         term_dict[prefix_len++] = reg_type;
1210         yaz_log(log_level_rpn, "reg_type = %d", term_dict[prefix_len-1]);
1211         term_dict[prefix_len] = '\0';
1212         j = prefix_len;
1213         switch (truncation_value)
1214         {
1215         case -1:         /* not specified */
1216         case 100:        /* do not truncate */
1217             if (!string_relation (zh, zapt, &termp, term_dict,
1218                                   attributeSet,
1219                                   reg_type, space_split, term_dst,
1220                                   &relation_error))
1221             {
1222                 if (relation_error)
1223                 {
1224                     zebra_setError(zh, relation_error, 0);
1225                     return ZEBRA_FAIL;
1226                 }
1227                 *term_sub = 0;
1228                 return ZEBRA_OK;
1229             }
1230             break;
1231         case 1:          /* right truncation */
1232             term_dict[j++] = '(';
1233             if (!term_100(zh->reg->zebra_maps, reg_type,
1234                           &termp, term_dict + j, space_split, term_dst))
1235             {
1236                 *term_sub = 0;
1237                 return ZEBRA_OK;
1238             }
1239             strcat(term_dict, ".*)");
1240             break;
1241         case 2:          /* keft truncation */
1242             term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*';
1243             if (!term_100(zh->reg->zebra_maps, reg_type,
1244                           &termp, term_dict + j, space_split, term_dst))
1245             {
1246                 *term_sub = 0;
1247                 return ZEBRA_OK;
1248             }
1249             strcat(term_dict, ")");
1250             break;
1251         case 3:          /* left&right truncation */
1252             term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*';
1253             if (!term_100(zh->reg->zebra_maps, reg_type,
1254                           &termp, term_dict + j, space_split, term_dst))
1255             {
1256                 *term_sub = 0;
1257                 return ZEBRA_OK;
1258             }
1259             strcat(term_dict, ".*)");
1260             break;
1261         case 101:        /* process # in term */
1262             term_dict[j++] = '(';
1263             if (!term_101(zh->reg->zebra_maps, reg_type,
1264                           &termp, term_dict + j, space_split, term_dst))
1265             {
1266                 *term_sub = 0;
1267                 return ZEBRA_OK;
1268             }
1269             strcat(term_dict, ")");
1270             break;
1271         case 102:        /* Regexp-1 */
1272             term_dict[j++] = '(';
1273             if (!term_102(zh->reg->zebra_maps, reg_type,
1274                           &termp, term_dict + j, space_split, term_dst))
1275             {
1276                 *term_sub = 0;
1277                 return ZEBRA_OK;
1278             }
1279             strcat(term_dict, ")");
1280             break;
1281         case 103:       /* Regexp-2 */
1282             regex_range = 1;
1283             term_dict[j++] = '(';
1284             init_pos = 2;
1285             if (!term_103(zh->reg->zebra_maps, reg_type,
1286                           &termp, term_dict + j, &regex_range,
1287                           space_split, term_dst))
1288             {
1289                 *term_sub = 0;
1290                 return ZEBRA_OK;
1291             }
1292             strcat(term_dict, ")");
1293             break;
1294         case 104:        /* process # and ! in term */
1295             term_dict[j++] = '(';
1296             if (!term_104(zh->reg->zebra_maps, reg_type,
1297                           &termp, term_dict + j, space_split, term_dst))
1298             {
1299                 *term_sub = 0;
1300                 return ZEBRA_OK;
1301             }
1302             strcat(term_dict, ")");
1303             break;
1304         case 105:        /* process * and ! in term */
1305             term_dict[j++] = '(';
1306             if (!term_105(zh->reg->zebra_maps, reg_type,
1307                           &termp, term_dict + j, space_split, term_dst, 1))
1308             {
1309                 *term_sub = 0;
1310                 return ZEBRA_OK;
1311             }
1312             strcat(term_dict, ")");
1313             break;
1314         case 106:        /* process * and ! in term */
1315             term_dict[j++] = '(';
1316             if (!term_105(zh->reg->zebra_maps, reg_type,
1317                           &termp, term_dict + j, space_split, term_dst, 0))
1318             {
1319                 *term_sub = 0;
1320                 return ZEBRA_OK;
1321             }
1322             strcat(term_dict, ")");
1323             break;
1324         default:
1325             zebra_setError_zint(zh,
1326                                 YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
1327                                 truncation_value);
1328             return ZEBRA_FAIL;
1329         }
1330         if (attr_ok)
1331         {
1332             char buf[80];
1333             const char *input = term_dict + prefix_len;
1334             esc_str(buf, sizeof(buf), input, strlen(input));
1335         }
1336         if (attr_ok)
1337         {
1338             yaz_log(log_level_rpn, "dict_lookup_grep: %s", term_dict+prefix_len);
1339             r = dict_lookup_grep(zh->reg->dict, term_dict, regex_range,
1340                                  grep_info, &max_pos, init_pos,
1341                                  grep_handle);
1342             if (r)
1343                 yaz_log(YLOG_WARN, "dict_lookup_grep fail %d", r);
1344         }
1345     }
1346     if (!bases_ok)
1347     {
1348         zebra_setError(zh, errCode, errString);
1349         return ZEBRA_FAIL;
1350     }
1351     *term_sub = termp;
1352     yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1353     return ZEBRA_OK;
1354 }
1355
1356
1357 /* convert APT search term to UTF8 */
1358 static ZEBRA_RES zapt_term_to_utf8(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1359                                    char *termz)
1360 {
1361     size_t sizez;
1362     Z_Term *term = zapt->term;
1363
1364     switch (term->which)
1365     {
1366     case Z_Term_general:
1367         if (zh->iconv_to_utf8 != 0)
1368         {
1369             char *inbuf = term->u.general->buf;
1370             size_t inleft = term->u.general->len;
1371             char *outbuf = termz;
1372             size_t outleft = IT_MAX_WORD-1;
1373             size_t ret;
1374
1375             ret = yaz_iconv(zh->iconv_to_utf8, &inbuf, &inleft,
1376                         &outbuf, &outleft);
1377             if (ret == (size_t)(-1))
1378             {
1379                 ret = yaz_iconv(zh->iconv_to_utf8, 0, 0, 0, 0);
1380                 zebra_setError(
1381                     zh, 
1382                     YAZ_BIB1_QUERY_TERM_INCLUDES_CHARS_THAT_DO_NOT_TRANSLATE_INTO_,
1383                     0);
1384                 return ZEBRA_FAIL;
1385             }
1386             *outbuf = 0;
1387         }
1388         else
1389         {
1390             sizez = term->u.general->len;
1391             if (sizez > IT_MAX_WORD-1)
1392                 sizez = IT_MAX_WORD-1;
1393             memcpy (termz, term->u.general->buf, sizez);
1394             termz[sizez] = '\0';
1395         }
1396         break;
1397     case Z_Term_characterString:
1398         sizez = strlen(term->u.characterString);
1399         if (sizez > IT_MAX_WORD-1)
1400             sizez = IT_MAX_WORD-1;
1401         memcpy (termz, term->u.characterString, sizez);
1402         termz[sizez] = '\0';
1403         break;
1404     default:
1405         zebra_setError(zh, YAZ_BIB1_UNSUPP_CODED_VALUE_FOR_TERM, 0);
1406         return ZEBRA_FAIL;
1407     }
1408     return ZEBRA_OK;
1409 }
1410
1411 /* convert APT SCAN term to internal cmap */
1412 static ZEBRA_RES trans_scan_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1413                                  char *termz, int reg_type)
1414 {
1415     char termz0[IT_MAX_WORD];
1416
1417     if (zapt_term_to_utf8(zh, zapt, termz0) == ZEBRA_FAIL)
1418         return ZEBRA_FAIL;    /* error */
1419     else
1420     {
1421         const char **map;
1422         const char *cp = (const char *) termz0;
1423         const char *cp_end = cp + strlen(cp);
1424         const char *src;
1425         int i = 0;
1426         const char *space_map = NULL;
1427         int len;
1428             
1429         while ((len = (cp_end - cp)) > 0)
1430         {
1431             map = zebra_maps_input(zh->reg->zebra_maps, reg_type, &cp, len, 0);
1432             if (**map == *CHR_SPACE)
1433                 space_map = *map;
1434             else
1435             {
1436                 if (i && space_map)
1437                     for (src = space_map; *src; src++)
1438                         termz[i++] = *src;
1439                 space_map = NULL;
1440                 for (src = *map; *src; src++)
1441                     termz[i++] = *src;
1442             }
1443         }
1444         termz[i] = '\0';
1445     }
1446     return ZEBRA_OK;
1447 }
1448
1449 char *normalize_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1450                      const char *termz, NMEM stream, unsigned reg_id)
1451 {
1452     WRBUF wrbuf = 0;
1453     AttrType truncation;
1454     int truncation_value;
1455     char *ex_list = 0;
1456
1457     attr_init(&truncation, zapt, 5);
1458     truncation_value = attr_find(&truncation, NULL);
1459
1460     switch (truncation_value)
1461     {
1462     default:
1463         ex_list = "";
1464         break;
1465     case 101:
1466         ex_list = "#";
1467         break;
1468     case 102:
1469     case 103:
1470         ex_list = 0;
1471         break;
1472     case 104:
1473         ex_list = "!#";
1474         break;
1475     case 105:
1476         ex_list = "!*";
1477         break;
1478     }
1479     if (ex_list)
1480         wrbuf = zebra_replace(zh->reg->zebra_maps, reg_id, ex_list,
1481                               termz, strlen(termz));
1482     if (!wrbuf)
1483         return nmem_strdup(stream, termz);
1484     else
1485     {
1486         char *buf = (char*) nmem_malloc(stream, wrbuf_len(wrbuf)+1);
1487         memcpy (buf, wrbuf_buf(wrbuf), wrbuf_len(wrbuf));
1488         buf[wrbuf_len(wrbuf)] = '\0';
1489         return buf;
1490     }
1491 }
1492
1493 static void grep_info_delete(struct grep_info *grep_info)
1494 {
1495 #ifdef TERM_COUNT
1496     xfree(grep_info->term_no);
1497 #endif
1498     xfree(grep_info->isam_p_buf);
1499 }
1500
1501 static ZEBRA_RES grep_info_prepare(ZebraHandle zh,
1502                                    Z_AttributesPlusTerm *zapt,
1503                                    struct grep_info *grep_info,
1504                                    int reg_type)
1505 {
1506     AttrType termset;
1507     int termset_value_numeric;
1508     const char *termset_value_string;
1509
1510 #ifdef TERM_COUNT
1511     grep_info->term_no = 0;
1512 #endif
1513     grep_info->isam_p_size = 0;
1514     grep_info->isam_p_buf = NULL;
1515     grep_info->zh = zh;
1516     grep_info->reg_type = reg_type;
1517     grep_info->termset = 0;
1518
1519     if (!zapt)
1520         return ZEBRA_OK;
1521     attr_init(&termset, zapt, 8);
1522     termset_value_numeric =
1523         attr_find_ex(&termset, NULL, &termset_value_string);
1524     if (termset_value_numeric != -1)
1525     {
1526         char resname[32];
1527         const char *termset_name = 0;
1528         if (termset_value_numeric != -2)
1529         {
1530     
1531             sprintf(resname, "%d", termset_value_numeric);
1532             termset_name = resname;
1533         }
1534         else
1535             termset_name = termset_value_string;
1536         yaz_log(log_level_rpn, "creating termset set %s", termset_name);
1537         grep_info->termset = resultSetAdd(zh, termset_name, 1);
1538         if (!grep_info->termset)
1539         {
1540             zebra_setError(zh, YAZ_BIB1_ILLEGAL_RESULT_SET_NAME, termset_name);
1541             return ZEBRA_FAIL;
1542         }
1543     }
1544     return ZEBRA_OK;
1545 }
1546                                
1547 /**
1548   \brief Create result set(s) for list of terms
1549   \param zh Zebra Handle
1550   \param termz_org term as used in query but converted to UTF-8
1551   \param attributeSet default attribute set
1552   \param stream memory for result
1553   \param reg_type register type ('w', 'p',..)
1554   \param complete_flag whether it's phrases or not
1555   \param rank_type term flags for ranking
1556   \param xpath_use use attribute for X-Path (-1 for no X-path)
1557   \param num_bases number of databases
1558   \param basenames array of databases
1559   \param rset_mem memory for result sets
1560   \param result_sets output result set for each term in list (output)
1561   \param number number of output result sets
1562   \param kc rset key control to be used for created result sets
1563 */
1564 static ZEBRA_RES term_list_trunc(ZebraHandle zh,
1565                                  Z_AttributesPlusTerm *zapt,
1566                                  const char *termz_org,
1567                                  oid_value attributeSet,
1568                                  NMEM stream,
1569                                  int reg_type, int complete_flag,
1570                                  const char *rank_type, int xpath_use,
1571                                  int num_bases, char **basenames, 
1572                                  NMEM rset_nmem,
1573                                  RSET **result_sets, int *num_result_sets,
1574                                  struct rset_key_control *kc)
1575 {
1576     char term_dst[IT_MAX_WORD+1];
1577     struct grep_info grep_info;
1578     char *termz = normalize_term(zh, zapt, termz_org, stream, reg_type);
1579     const char *termp = termz;
1580     int alloc_sets = 0;
1581
1582     *num_result_sets = 0;
1583     *term_dst = 0;
1584     if (grep_info_prepare(zh, zapt, &grep_info, reg_type) == ZEBRA_FAIL)
1585         return ZEBRA_FAIL;
1586     while(1)
1587     { 
1588         ZEBRA_RES res;
1589
1590         if (alloc_sets == *num_result_sets)
1591         {
1592             int add = 10;
1593             RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) * 
1594                                               sizeof(*rnew));
1595             if (alloc_sets)
1596                 memcpy(rnew, *result_sets, alloc_sets * sizeof(*rnew));
1597             alloc_sets = alloc_sets + add;
1598             *result_sets = rnew;
1599         }
1600         res = term_trunc(zh, zapt, &termp, attributeSet,
1601                          stream, &grep_info,
1602                          reg_type, complete_flag,
1603                          num_bases, basenames,
1604                          term_dst, rank_type,
1605                          xpath_use, rset_nmem,
1606                          &(*result_sets)[*num_result_sets],
1607                          kc);
1608         if (res != ZEBRA_OK)
1609         {
1610             int i;
1611             for (i = 0; i < *num_result_sets; i++)
1612                 rset_delete((*result_sets)[i]);
1613             grep_info_delete (&grep_info);
1614             return res;
1615         }
1616         if ((*result_sets)[*num_result_sets] == 0)
1617             break;
1618         (*num_result_sets)++;
1619     }
1620     grep_info_delete(&grep_info);
1621     return ZEBRA_OK;
1622 }
1623
1624 static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh,
1625                                        Z_AttributesPlusTerm *zapt,
1626                                        const char *termz_org,
1627                                        oid_value attributeSet,
1628                                        NMEM stream,
1629                                        int reg_type, int complete_flag,
1630                                        const char *rank_type, int xpath_use,
1631                                        int num_bases, char **basenames, 
1632                                        NMEM rset_nmem,
1633                                        RSET *rset,
1634                                        struct rset_key_control *kc)
1635 {
1636     RSET *result_sets = 0;
1637     int num_result_sets = 0;
1638     ZEBRA_RES res =
1639         term_list_trunc(zh, zapt, termz_org, attributeSet,
1640                         stream, reg_type, complete_flag,
1641                         rank_type, xpath_use,
1642                         num_bases, basenames,
1643                         rset_nmem,
1644                         &result_sets, &num_result_sets, kc);
1645     if (res != ZEBRA_OK)
1646         return res;
1647     if (num_result_sets == 0)
1648         *rset = rsnull_create (rset_nmem, kc, 0); 
1649     else if (num_result_sets == 1)
1650         *rset = result_sets[0];
1651     else
1652         *rset = rsprox_create(rset_nmem, kc, kc->scope,
1653                               num_result_sets, result_sets,
1654                               1 /* ordered */, 0 /* exclusion */,
1655                               3 /* relation */, 1 /* distance */);
1656     if (!*rset)
1657         return ZEBRA_FAIL;
1658     return ZEBRA_OK;
1659 }
1660
1661 static ZEBRA_RES rpn_search_APT_or_list(ZebraHandle zh,
1662                                         Z_AttributesPlusTerm *zapt,
1663                                         const char *termz_org,
1664                                         oid_value attributeSet,
1665                                         NMEM stream,
1666                                         int reg_type, int complete_flag,
1667                                         const char *rank_type,
1668                                         int xpath_use,
1669                                         int num_bases, char **basenames,
1670                                         NMEM rset_nmem,
1671                                         RSET *rset,
1672                                         struct rset_key_control *kc)
1673 {
1674     RSET *result_sets = 0;
1675     int num_result_sets = 0;
1676     ZEBRA_RES res =
1677         term_list_trunc(zh, zapt, termz_org, attributeSet,
1678                         stream, reg_type, complete_flag,
1679                         rank_type, xpath_use,
1680                         num_bases, basenames,
1681                         rset_nmem,
1682                         &result_sets, &num_result_sets, kc);
1683     if (res != ZEBRA_OK)
1684         return res;
1685     if (num_result_sets == 0)
1686         *rset = rsnull_create (rset_nmem, kc, 0); 
1687     else if (num_result_sets == 1)
1688         *rset = result_sets[0];
1689     else
1690         *rset = rsmulti_or_create(rset_nmem, kc, kc->scope, 0 /* termid */,
1691                                   num_result_sets, result_sets);
1692     if (!*rset)
1693         return ZEBRA_FAIL;
1694     return ZEBRA_OK;
1695 }
1696
1697 static ZEBRA_RES rpn_search_APT_and_list(ZebraHandle zh,
1698                                          Z_AttributesPlusTerm *zapt,
1699                                          const char *termz_org,
1700                                          oid_value attributeSet,
1701                                          NMEM stream,
1702                                          int reg_type, int complete_flag,
1703                                          const char *rank_type, 
1704                                          int xpath_use,
1705                                          int num_bases, char **basenames,
1706                                          NMEM rset_nmem,
1707                                          RSET *rset,
1708                                          struct rset_key_control *kc)
1709 {
1710     RSET *result_sets = 0;
1711     int num_result_sets = 0;
1712     ZEBRA_RES res =
1713         term_list_trunc(zh, zapt, termz_org, attributeSet,
1714                         stream, reg_type, complete_flag,
1715                         rank_type, xpath_use,
1716                         num_bases, basenames,
1717                         rset_nmem,
1718                         &result_sets, &num_result_sets,
1719                         kc);
1720     if (res != ZEBRA_OK)
1721         return res;
1722     if (num_result_sets == 0)
1723         *rset = rsnull_create (rset_nmem, kc, 0); 
1724     else if (num_result_sets == 1)
1725         *rset = result_sets[0];
1726     else
1727         *rset = rsmulti_and_create(rset_nmem, kc, kc->scope,
1728                                    num_result_sets, result_sets);
1729     if (!*rset)
1730         return ZEBRA_FAIL;
1731     return ZEBRA_OK;
1732 }
1733
1734 static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1735                             const char **term_sub,
1736                             char *term_dict,
1737                             oid_value attributeSet,
1738                             struct grep_info *grep_info,
1739                             int *max_pos,
1740                             int reg_type,
1741                             char *term_dst,
1742                             int *error_code)
1743 {
1744     AttrType relation;
1745     int relation_value;
1746     int term_value;
1747     int r;
1748     char *term_tmp = term_dict + strlen(term_dict);
1749
1750     *error_code = 0;
1751     attr_init(&relation, zapt, 2);
1752     relation_value = attr_find(&relation, NULL);
1753
1754     yaz_log(log_level_rpn, "numeric relation value=%d", relation_value);
1755
1756     if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1757                   term_dst))
1758         return 0;
1759     term_value = atoi (term_tmp);
1760     switch (relation_value)
1761     {
1762     case 1:
1763         yaz_log(log_level_rpn, "Relation <");
1764         gen_regular_rel(term_tmp, term_value-1, 1);
1765         break;
1766     case 2:
1767         yaz_log(log_level_rpn, "Relation <=");
1768         gen_regular_rel(term_tmp, term_value, 1);
1769         break;
1770     case 4:
1771         yaz_log(log_level_rpn, "Relation >=");
1772         gen_regular_rel(term_tmp, term_value, 0);
1773         break;
1774     case 5:
1775         yaz_log(log_level_rpn, "Relation >");
1776         gen_regular_rel(term_tmp, term_value+1, 0);
1777         break;
1778     case -1:
1779     case 3:
1780         yaz_log(log_level_rpn, "Relation =");
1781         sprintf(term_tmp, "(0*%d)", term_value);
1782         break;
1783     default:
1784         *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
1785         return 0;
1786     }
1787     yaz_log(log_level_rpn, "dict_lookup_grep: %s", term_tmp);
1788     r = dict_lookup_grep(zh->reg->dict, term_dict, 0, grep_info, max_pos,
1789                           0, grep_handle);
1790     if (r)
1791         yaz_log(YLOG_WARN, "dict_lookup_grep fail, rel = gt: %d", r);
1792     yaz_log(log_level_rpn, "%d positions", grep_info->isam_p_indx);
1793     return 1;
1794 }
1795
1796 static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1797                               const char **term_sub, 
1798                               oid_value attributeSet,
1799                               struct grep_info *grep_info,
1800                               int reg_type, int complete_flag,
1801                               int num_bases, char **basenames,
1802                               char *term_dst, int xpath_use, NMEM stream)
1803 {
1804     char term_dict[2*IT_MAX_WORD+2];
1805     int r, base_no;
1806     AttrType use;
1807     int use_value;
1808     const char *use_string = 0;
1809     oid_value curAttributeSet = attributeSet;
1810     const char *termp;
1811     struct rpn_char_map_info rcmi;
1812
1813     int bases_ok = 0;     /* no of databases with OK attribute */
1814     int errCode = 0;      /* err code (if any is not OK) */
1815     char *errString = 0;  /* addinfo */
1816
1817     rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
1818     attr_init(&use, zapt, 1);
1819     use_value = attr_find_ex(&use, &curAttributeSet, &use_string);
1820
1821     if (use_value == -1)
1822         use_value = 1016;
1823
1824     for (base_no = 0; base_no < num_bases; base_no++)
1825     {
1826         attent attp;
1827         data1_local_attribute id_xpath_attr;
1828         data1_local_attribute *local_attr;
1829         int max_pos, prefix_len = 0;
1830         int relation_error = 0;
1831
1832         termp = *term_sub;
1833         if (use_value == -2)  /* string attribute (assume IDXPATH/any) */
1834         {
1835             use_value = xpath_use;
1836             attp.local_attributes = &id_xpath_attr;
1837             attp.attset_ordinal = VAL_IDXPATH;
1838             id_xpath_attr.next = 0;
1839             id_xpath_attr.local = use_value;
1840         }
1841         else if (curAttributeSet == VAL_IDXPATH)
1842         {
1843             attp.local_attributes = &id_xpath_attr;
1844             attp.attset_ordinal = VAL_IDXPATH;
1845             id_xpath_attr.next = 0;
1846             id_xpath_attr.local = use_value;
1847         }
1848         else
1849         {
1850             if ((r = att_getentbyatt (zh, &attp, curAttributeSet, use_value,
1851                                             use_string)))
1852             {
1853                 yaz_log(YLOG_DEBUG, "att_getentbyatt fail. set=%d use=%d r=%d",
1854                       curAttributeSet, use_value, r);
1855                 if (r == -1)
1856                 {
1857                     errCode = YAZ_BIB1_UNSUPP_USE_ATTRIBUTE;
1858                     if (use_string)
1859                         errString = nmem_strdup(stream, use_string);
1860                     else
1861                         errString = nmem_strdup_i (stream, use_value);
1862                 }
1863                 else
1864                     errCode = YAZ_BIB1_UNSUPP_ATTRIBUTE_SET;
1865                 continue;
1866             }
1867         }
1868         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1869         {
1870             zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
1871                            basenames[base_no]);
1872             return ZEBRA_FAIL;
1873         }
1874         for (local_attr = attp.local_attributes; local_attr;
1875              local_attr = local_attr->next)
1876         {
1877             int ord;
1878             char ord_buf[32];
1879             int i, ord_len;
1880
1881             ord = zebraExplain_lookup_attr_su(zh->reg->zei,
1882                                               attp.attset_ordinal,
1883                                               local_attr->local);
1884             if (ord < 0)
1885                 continue;
1886             if (prefix_len)
1887                 term_dict[prefix_len++] = '|';
1888             else
1889                 term_dict[prefix_len++] = '(';
1890
1891             ord_len = key_SU_encode (ord, ord_buf);
1892             for (i = 0; i<ord_len; i++)
1893             {
1894                 term_dict[prefix_len++] = 1;
1895                 term_dict[prefix_len++] = ord_buf[i];
1896             }
1897         }
1898         if (!prefix_len)
1899         {
1900             zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_USE_ATTRIBUTE, use_value);
1901             continue;
1902         }
1903         bases_ok++;
1904         term_dict[prefix_len++] = ')';        
1905         term_dict[prefix_len++] = 1;
1906         term_dict[prefix_len++] = reg_type;
1907         yaz_log(YLOG_DEBUG, "reg_type = %d", term_dict[prefix_len-1]);
1908         term_dict[prefix_len] = '\0';
1909         if (!numeric_relation(zh, zapt, &termp, term_dict,
1910                               attributeSet, grep_info, &max_pos, reg_type,
1911                               term_dst, &relation_error))
1912         {
1913             if (relation_error)
1914             {
1915                 zebra_setError(zh, relation_error, 0);
1916                 return ZEBRA_FAIL;
1917             }
1918             *term_sub = 0;
1919             return ZEBRA_OK;
1920         }
1921     }
1922     if (!bases_ok)
1923     {
1924         zebra_setError(zh, errCode, errString);
1925         return ZEBRA_FAIL;
1926     }
1927     *term_sub = termp;
1928     yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1929     return ZEBRA_OK;
1930 }
1931
1932 static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh,
1933                                         Z_AttributesPlusTerm *zapt,
1934                                         const char *termz,
1935                                         oid_value attributeSet,
1936                                         NMEM stream,
1937                                         int reg_type, int complete_flag,
1938                                         const char *rank_type, int xpath_use,
1939                                         int num_bases, char **basenames,
1940                                         NMEM rset_nmem,
1941                                         RSET *rset,
1942                                         struct rset_key_control *kc)
1943 {
1944     char term_dst[IT_MAX_WORD+1];
1945     const char *termp = termz;
1946     RSET *result_sets = 0;
1947     int num_result_sets = 0;
1948     ZEBRA_RES res;
1949     struct grep_info grep_info;
1950     int alloc_sets = 0;
1951
1952     yaz_log(log_level_rpn, "APT_numeric t='%s'", termz);
1953     if (grep_info_prepare(zh, zapt, &grep_info, reg_type) == ZEBRA_FAIL)
1954         return ZEBRA_FAIL;
1955     while (1)
1956     { 
1957         if (alloc_sets == num_result_sets)
1958         {
1959             int add = 10;
1960             RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) * 
1961                                               sizeof(*rnew));
1962             if (alloc_sets)
1963                 memcpy(rnew, result_sets, alloc_sets * sizeof(*rnew));
1964             alloc_sets = alloc_sets + add;
1965             result_sets = rnew;
1966         }
1967         yaz_log(YLOG_DEBUG, "APT_numeric termp=%s", termp);
1968         grep_info.isam_p_indx = 0;
1969         res = numeric_term(zh, zapt, &termp, attributeSet, &grep_info,
1970                            reg_type, complete_flag, num_bases, basenames,
1971                            term_dst, xpath_use,
1972                            stream);
1973         if (res == ZEBRA_FAIL || termp == 0)
1974             break;
1975         yaz_log(YLOG_DEBUG, "term: %s", term_dst);
1976         result_sets[num_result_sets] =
1977             rset_trunc(zh, grep_info.isam_p_buf,
1978                        grep_info.isam_p_indx, term_dst,
1979                        strlen(term_dst), rank_type,
1980                        0 /* preserve position */,
1981                        zapt->term->which, rset_nmem, 
1982                        kc, kc->scope, 0);
1983         if (!result_sets[num_result_sets])
1984             break;
1985         num_result_sets++;
1986     }
1987     grep_info_delete(&grep_info);
1988     if (termp)
1989     {
1990         int i;
1991         for (i = 0; i<num_result_sets; i++)
1992             rset_delete(result_sets[i]);
1993         return ZEBRA_FAIL;
1994     }
1995     if (num_result_sets == 0)
1996         *rset = rsnull_create(rset_nmem, kc, 0);
1997     if (num_result_sets == 1)
1998         *rset = result_sets[0];
1999     else
2000         *rset = rsmulti_and_create(rset_nmem, kc, kc->scope,
2001                                    num_result_sets, result_sets);
2002     if (!*rset)
2003         return ZEBRA_FAIL;
2004     return ZEBRA_OK;
2005 }
2006
2007 static ZEBRA_RES rpn_search_APT_local(ZebraHandle zh,
2008                                       Z_AttributesPlusTerm *zapt,
2009                                       const char *termz,
2010                                       oid_value attributeSet,
2011                                       NMEM stream,
2012                                       const char *rank_type, NMEM rset_nmem,
2013                                       RSET *rset,
2014                                       struct rset_key_control *kc)
2015 {
2016     RSFD rsfd;
2017     struct it_key key;
2018     int sys;
2019     *rset = rstemp_create(rset_nmem, kc, kc->scope,
2020                           res_get (zh->res, "setTmpDir"),0 );
2021     rsfd = rset_open(*rset, RSETF_WRITE);
2022     
2023     sys = atoi(termz);
2024     if (sys <= 0)
2025         sys = 1;
2026     key.mem[0] = sys;
2027     key.mem[1] = 1;
2028     key.len = 2;
2029     rset_write (rsfd, &key);
2030     rset_close (rsfd);
2031     return ZEBRA_OK;
2032 }
2033
2034 static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2035                                oid_value attributeSet, NMEM stream,
2036                                Z_SortKeySpecList *sort_sequence,
2037                                const char *rank_type,
2038                                NMEM rset_nmem,
2039                                RSET *rset,
2040                                struct rset_key_control *kc)
2041 {
2042     int i;
2043     int sort_relation_value;
2044     AttrType sort_relation_type;
2045     Z_SortKeySpec *sks;
2046     Z_SortKey *sk;
2047     int oid[OID_SIZE];
2048     oident oe;
2049     char termz[20];
2050     
2051     attr_init(&sort_relation_type, zapt, 7);
2052     sort_relation_value = attr_find(&sort_relation_type, &attributeSet);
2053
2054     if (!sort_sequence->specs)
2055     {
2056         sort_sequence->num_specs = 10;
2057         sort_sequence->specs = (Z_SortKeySpec **)
2058             nmem_malloc(stream, sort_sequence->num_specs *
2059                          sizeof(*sort_sequence->specs));
2060         for (i = 0; i<sort_sequence->num_specs; i++)
2061             sort_sequence->specs[i] = 0;
2062     }
2063     if (zapt->term->which != Z_Term_general)
2064         i = 0;
2065     else
2066         i = atoi_n ((char *) zapt->term->u.general->buf,
2067                     zapt->term->u.general->len);
2068     if (i >= sort_sequence->num_specs)
2069         i = 0;
2070     sprintf(termz, "%d", i);
2071
2072     oe.proto = PROTO_Z3950;
2073     oe.oclass = CLASS_ATTSET;
2074     oe.value = attributeSet;
2075     if (!oid_ent_to_oid (&oe, oid))
2076         return ZEBRA_FAIL;
2077
2078     sks = (Z_SortKeySpec *) nmem_malloc(stream, sizeof(*sks));
2079     sks->sortElement = (Z_SortElement *)
2080         nmem_malloc(stream, sizeof(*sks->sortElement));
2081     sks->sortElement->which = Z_SortElement_generic;
2082     sk = sks->sortElement->u.generic = (Z_SortKey *)
2083         nmem_malloc(stream, sizeof(*sk));
2084     sk->which = Z_SortKey_sortAttributes;
2085     sk->u.sortAttributes = (Z_SortAttributes *)
2086         nmem_malloc(stream, sizeof(*sk->u.sortAttributes));
2087
2088     sk->u.sortAttributes->id = oid;
2089     sk->u.sortAttributes->list = zapt->attributes;
2090
2091     sks->sortRelation = (int *)
2092         nmem_malloc(stream, sizeof(*sks->sortRelation));
2093     if (sort_relation_value == 1)
2094         *sks->sortRelation = Z_SortKeySpec_ascending;
2095     else if (sort_relation_value == 2)
2096         *sks->sortRelation = Z_SortKeySpec_descending;
2097     else 
2098         *sks->sortRelation = Z_SortKeySpec_ascending;
2099
2100     sks->caseSensitivity = (int *)
2101         nmem_malloc(stream, sizeof(*sks->caseSensitivity));
2102     *sks->caseSensitivity = 0;
2103
2104     sks->which = Z_SortKeySpec_null;
2105     sks->u.null = odr_nullval ();
2106     sort_sequence->specs[i] = sks;
2107     *rset = rsnull_create (rset_nmem, kc, 0);
2108     return ZEBRA_OK;
2109 }
2110
2111
2112 static int parse_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2113                        oid_value attributeSet,
2114                        struct xpath_location_step *xpath, int max, NMEM mem)
2115 {
2116     oid_value curAttributeSet = attributeSet;
2117     AttrType use;
2118     const char *use_string = 0;
2119     
2120     attr_init(&use, zapt, 1);
2121     attr_find_ex(&use, &curAttributeSet, &use_string);
2122
2123     if (!use_string || *use_string != '/')
2124         return -1;
2125
2126     return zebra_parse_xpath_str(use_string, xpath, max, mem);
2127 }
2128  
2129                
2130
2131 static RSET xpath_trunc(ZebraHandle zh, NMEM stream,
2132                         int reg_type, const char *term, int use,
2133                         oid_value curAttributeSet, NMEM rset_nmem,
2134                         struct rset_key_control *kc)
2135 {
2136     RSET rset;
2137     struct grep_info grep_info;
2138     char term_dict[2048];
2139     char ord_buf[32];
2140     int prefix_len = 0;
2141     int ord = zebraExplain_lookup_attr_su(zh->reg->zei, curAttributeSet, use);
2142     int ord_len, i, r, max_pos;
2143     int term_type = Z_Term_characterString;
2144     const char *flags = "void";
2145
2146     if (grep_info_prepare(zh, 0 /* zapt */, &grep_info, '0') == ZEBRA_FAIL)
2147         return rsnull_create(rset_nmem, kc, 0);
2148     
2149     if (ord < 0)
2150         return rsnull_create(rset_nmem, kc, 0);
2151     if (prefix_len)
2152         term_dict[prefix_len++] = '|';
2153     else
2154         term_dict[prefix_len++] = '(';
2155     
2156     ord_len = key_SU_encode (ord, ord_buf);
2157     for (i = 0; i<ord_len; i++)
2158     {
2159         term_dict[prefix_len++] = 1;
2160         term_dict[prefix_len++] = ord_buf[i];
2161     }
2162     term_dict[prefix_len++] = ')';
2163     term_dict[prefix_len++] = 1;
2164     term_dict[prefix_len++] = reg_type;
2165     
2166     strcpy(term_dict+prefix_len, term);
2167     
2168     grep_info.isam_p_indx = 0;
2169     r = dict_lookup_grep(zh->reg->dict, term_dict, 0,
2170                           &grep_info, &max_pos, 0, grep_handle);
2171     yaz_log(YLOG_DEBUG, "%s %d positions", term,
2172              grep_info.isam_p_indx);
2173     rset = rset_trunc(zh, grep_info.isam_p_buf,
2174                       grep_info.isam_p_indx, term, strlen(term),
2175                       flags, 1, term_type,rset_nmem,
2176                       kc, kc->scope, 0);
2177     grep_info_delete(&grep_info);
2178     return rset;
2179 }
2180
2181 static
2182 ZEBRA_RES rpn_search_xpath(ZebraHandle zh,
2183                            oid_value attributeSet,
2184                            int num_bases, char **basenames,
2185                            NMEM stream, const char *rank_type, RSET rset,
2186                            int xpath_len, struct xpath_location_step *xpath,
2187                            NMEM rset_nmem,
2188                            RSET *rset_out,
2189                            struct rset_key_control *kc)
2190 {
2191     oid_value curAttributeSet = attributeSet;
2192     int base_no;
2193     int i;
2194
2195     if (xpath_len < 0)
2196     {
2197         *rset_out = rset;
2198         return ZEBRA_OK;
2199     }
2200
2201     yaz_log(YLOG_DEBUG, "xpath len=%d", xpath_len);
2202     for (i = 0; i<xpath_len; i++)
2203     {
2204         yaz_log(log_level_rpn, "XPATH %d %s", i, xpath[i].part);
2205
2206     }
2207
2208     curAttributeSet = VAL_IDXPATH;
2209
2210     /*
2211       //a    ->    a/.*
2212       //a/b  ->    b/a/.*
2213       /a     ->    a/
2214       /a/b   ->    b/a/
2215
2216       /      ->    none
2217
2218    a[@attr = value]/b[@other = othervalue]
2219
2220  /e/@a val      range(e/,range(@a,freetext(w,1015,val),@a),e/)
2221  /a/b val       range(b/a/,freetext(w,1016,val),b/a/)
2222  /a/b/@c val    range(b/a/,range(@c,freetext(w,1016,val),@c),b/a/)
2223  /a/b[@c = y] val range(b/a/,freetext(w,1016,val),b/a/,@c = y)
2224  /a[@c = y]/b val range(a/,range(b/a/,freetext(w,1016,val),b/a/),a/,@c = y)
2225  /a[@c = x]/b[@c = y] range(a/,range(b/a/,freetext(w,1016,val),b/a/,@c = y),a/,@c = x)
2226       
2227     */
2228
2229     dict_grep_cmap (zh->reg->dict, 0, 0);
2230
2231     for (base_no = 0; base_no < num_bases; base_no++)
2232     {
2233         int level = xpath_len;
2234         int first_path = 1;
2235         
2236         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
2237         {
2238             zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
2239                            basenames[base_no]);
2240             *rset_out = rset;
2241             return ZEBRA_FAIL;
2242         }
2243         while (--level >= 0)
2244         {
2245             char xpath_rev[128];
2246             int i, len;
2247             RSET rset_start_tag = 0, rset_end_tag = 0, rset_attr = 0;
2248
2249             *xpath_rev = 0;
2250             len = 0;
2251             for (i = level; i >= 1; --i)
2252             {
2253                 const char *cp = xpath[i].part;
2254                 if (*cp)
2255                 {
2256                     for (;*cp; cp++)
2257                         if (*cp == '*')
2258                         {
2259                             memcpy (xpath_rev + len, "[^/]*", 5);
2260                             len += 5;
2261                         }
2262                         else if (*cp == ' ')
2263                         {
2264
2265                             xpath_rev[len++] = 1;
2266                             xpath_rev[len++] = ' ';
2267                         }
2268
2269                         else
2270                             xpath_rev[len++] = *cp;
2271                     xpath_rev[len++] = '/';
2272                 }
2273                 else if (i == 1)  /* // case */
2274                 {
2275                     xpath_rev[len++] = '.';
2276                     xpath_rev[len++] = '*';
2277                 }
2278             }
2279             xpath_rev[len] = 0;
2280
2281             if (xpath[level].predicate &&
2282                 xpath[level].predicate->which == XPATH_PREDICATE_RELATION &&
2283                 xpath[level].predicate->u.relation.name[0])
2284             {
2285                 WRBUF wbuf = wrbuf_alloc();
2286                 wrbuf_puts(wbuf, xpath[level].predicate->u.relation.name+1);
2287                 if (xpath[level].predicate->u.relation.value)
2288                 {
2289                     const char *cp = xpath[level].predicate->u.relation.value;
2290                     wrbuf_putc(wbuf, '=');
2291                     
2292                     while (*cp)
2293                     {
2294                         if (strchr(REGEX_CHARS, *cp))
2295                             wrbuf_putc(wbuf, '\\');
2296                         wrbuf_putc(wbuf, *cp);
2297                         cp++;
2298                     }
2299                 }
2300                 wrbuf_puts(wbuf, "");
2301                 rset_attr = xpath_trunc(
2302                     zh, stream, '0', wrbuf_buf(wbuf), 3, 
2303                     curAttributeSet, rset_nmem, kc);
2304                 wrbuf_free(wbuf, 1);
2305             } 
2306             else 
2307             {
2308                 if (!first_path)
2309                     continue;
2310             }
2311             yaz_log(log_level_rpn, "xpath_rev (%d) = %s", level, xpath_rev);
2312             if (strlen(xpath_rev))
2313             {
2314                 rset_start_tag = xpath_trunc(zh, stream, '0', 
2315                         xpath_rev, 1, curAttributeSet, rset_nmem, kc);
2316             
2317                 rset_end_tag = xpath_trunc(zh, stream, '0', 
2318                         xpath_rev, 2, curAttributeSet, rset_nmem, kc);
2319
2320                 rset = rsbetween_create(rset_nmem, kc, kc->scope,
2321                                         rset_start_tag, rset,
2322                                         rset_end_tag, rset_attr);
2323             }
2324             first_path = 0;
2325         }
2326     }
2327     *rset_out = rset;
2328     return ZEBRA_OK;
2329 }
2330
2331 static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2332                                 oid_value attributeSet, NMEM stream,
2333                                 Z_SortKeySpecList *sort_sequence,
2334                                 int num_bases, char **basenames, 
2335                                 NMEM rset_nmem,
2336                                 RSET *rset,
2337                                 struct rset_key_control *kc)
2338 {
2339     ZEBRA_RES res = ZEBRA_OK;
2340     unsigned reg_id;
2341     char *search_type = NULL;
2342     char rank_type[128];
2343     int complete_flag;
2344     int sort_flag;
2345     char termz[IT_MAX_WORD+1];
2346     int xpath_len;
2347     int xpath_use = 0;
2348     struct xpath_location_step xpath[10];
2349
2350     if (!log_level_set)
2351     {
2352         log_level_rpn = yaz_log_module_level("rpn");
2353         log_level_set = 1;
2354     }
2355     zebra_maps_attr(zh->reg->zebra_maps, zapt, &reg_id, &search_type,
2356                     rank_type, &complete_flag, &sort_flag);
2357     
2358     yaz_log(YLOG_DEBUG, "reg_id=%c", reg_id);
2359     yaz_log(YLOG_DEBUG, "complete_flag=%d", complete_flag);
2360     yaz_log(YLOG_DEBUG, "search_type=%s", search_type);
2361     yaz_log(YLOG_DEBUG, "rank_type=%s", rank_type);
2362
2363     if (zapt_term_to_utf8(zh, zapt, termz) == ZEBRA_FAIL)
2364         return ZEBRA_FAIL;
2365
2366     if (sort_flag)
2367         return rpn_sort_spec(zh, zapt, attributeSet, stream, sort_sequence,
2368                              rank_type, rset_nmem, rset, kc);
2369     /* consider if an X-Path query is used */
2370     xpath_len = parse_xpath(zh, zapt, attributeSet, xpath, 10, stream);
2371     if (xpath_len >= 0)
2372     {
2373         xpath_use = 1016;  /* searching for element by default */
2374         if (xpath[xpath_len-1].part[0] == '@') 
2375             xpath_use = 1015;  /* last step an attribute .. */
2376     }
2377
2378     /* search using one of the various search type strategies
2379        termz is our UTF-8 search term
2380        attributeSet is top-level default attribute set 
2381        stream is ODR for search
2382        reg_id is the register type
2383        complete_flag is 1 for complete subfield, 0 for incomplete
2384        xpath_use is use-attribute to be used for X-Path search, 0 for none
2385     */
2386     if (!strcmp(search_type, "phrase"))
2387     {
2388         res = rpn_search_APT_phrase(zh, zapt, termz, attributeSet, stream,
2389                                     reg_id, complete_flag, rank_type,
2390                                     xpath_use,
2391                                     num_bases, basenames, rset_nmem,
2392                                     rset, kc);
2393     }
2394     else if (!strcmp(search_type, "and-list"))
2395     {
2396         res = rpn_search_APT_and_list(zh, zapt, termz, attributeSet, stream,
2397                                       reg_id, complete_flag, rank_type,
2398                                       xpath_use,
2399                                       num_bases, basenames, rset_nmem,
2400                                       rset, kc);
2401     }
2402     else if (!strcmp(search_type, "or-list"))
2403     {
2404         res = rpn_search_APT_or_list(zh, zapt, termz, attributeSet, stream,
2405                                      reg_id, complete_flag, rank_type,
2406                                      xpath_use,
2407                                      num_bases, basenames, rset_nmem,
2408                                      rset, kc);
2409     }
2410     else if (!strcmp(search_type, "local"))
2411     {
2412         res = rpn_search_APT_local(zh, zapt, termz, attributeSet, stream,
2413                                    rank_type, rset_nmem, rset, kc);
2414     }
2415     else if (!strcmp(search_type, "numeric"))
2416     {
2417         res = rpn_search_APT_numeric(zh, zapt, termz, attributeSet, stream,
2418                                      reg_id, complete_flag, rank_type,
2419                                      xpath_use,
2420                                      num_bases, basenames, rset_nmem,
2421                                      rset, kc);
2422     }
2423     else
2424     {
2425         zebra_setError(zh, YAZ_BIB1_UNSUPP_STRUCTURE_ATTRIBUTE, 0);
2426         res = ZEBRA_FAIL;
2427     }
2428     if (res != ZEBRA_OK)
2429         return res;
2430     if (!*rset)
2431         return ZEBRA_FAIL;
2432     return rpn_search_xpath(zh, attributeSet, num_bases, basenames,
2433                             stream, rank_type, *rset, 
2434                             xpath_len, xpath, rset_nmem, rset, kc);
2435 }
2436
2437 static ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2438                                       oid_value attributeSet, 
2439                                       NMEM stream, NMEM rset_nmem,
2440                                       Z_SortKeySpecList *sort_sequence,
2441                                       int num_bases, char **basenames,
2442                                       RSET **result_sets, int *num_result_sets,
2443                                       Z_Operator *parent_op,
2444                                       struct rset_key_control *kc);
2445
2446 ZEBRA_RES rpn_search_top(ZebraHandle zh, Z_RPNStructure *zs,
2447                          oid_value attributeSet, 
2448                          NMEM stream, NMEM rset_nmem,
2449                          Z_SortKeySpecList *sort_sequence,
2450                          int num_bases, char **basenames,
2451                          RSET *result_set)
2452 {
2453     RSET *result_sets = 0;
2454     int num_result_sets = 0;
2455     ZEBRA_RES res;
2456     struct rset_key_control *kc = zebra_key_control_create(zh);
2457
2458     res = rpn_search_structure(zh, zs, attributeSet,
2459                                stream, rset_nmem,
2460                                sort_sequence, 
2461                                num_bases, basenames,
2462                                &result_sets, &num_result_sets,
2463                                0 /* no parent op */,
2464                                kc);
2465     if (res != ZEBRA_OK)
2466     {
2467         int i;
2468         for (i = 0; i<num_result_sets; i++)
2469             rset_delete(result_sets[i]);
2470         *result_set = 0;
2471     }
2472     else
2473     {
2474         assert(num_result_sets == 1);
2475         assert(result_sets);
2476         assert(*result_sets);
2477         *result_set = *result_sets;
2478     }
2479     (*kc->dec)(kc);
2480     return res;
2481 }
2482
2483 ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2484                                oid_value attributeSet, 
2485                                NMEM stream, NMEM rset_nmem,
2486                                Z_SortKeySpecList *sort_sequence,
2487                                int num_bases, char **basenames,
2488                                RSET **result_sets, int *num_result_sets,
2489                                Z_Operator *parent_op,
2490                                struct rset_key_control *kc)
2491 {
2492     *num_result_sets = 0;
2493     if (zs->which == Z_RPNStructure_complex)
2494     {
2495         ZEBRA_RES res;
2496         Z_Operator *zop = zs->u.complex->roperator;
2497         RSET *result_sets_l = 0;
2498         int num_result_sets_l = 0;
2499         RSET *result_sets_r = 0;
2500         int num_result_sets_r = 0;
2501
2502         res = rpn_search_structure(zh, zs->u.complex->s1,
2503                                    attributeSet, stream, rset_nmem,
2504                                    sort_sequence,
2505                                    num_bases, basenames,
2506                                    &result_sets_l, &num_result_sets_l,
2507                                    zop, kc);
2508         if (res != ZEBRA_OK)
2509         {
2510             int i;
2511             for (i = 0; i<num_result_sets_l; i++)
2512                 rset_delete(result_sets_l[i]);
2513             return res;
2514         }
2515         res = rpn_search_structure(zh, zs->u.complex->s2,
2516                                    attributeSet, stream, rset_nmem,
2517                                    sort_sequence,
2518                                    num_bases, basenames,
2519                                    &result_sets_r, &num_result_sets_r,
2520                                    zop, kc);
2521         if (res != ZEBRA_OK)
2522         {
2523             int i;
2524             for (i = 0; i<num_result_sets_l; i++)
2525                 rset_delete(result_sets_l[i]);
2526             for (i = 0; i<num_result_sets_r; i++)
2527                 rset_delete(result_sets_r[i]);
2528             return res;
2529         }
2530
2531         /* make a new list of result for all children */
2532         *num_result_sets = num_result_sets_l + num_result_sets_r;
2533         *result_sets = nmem_malloc(stream, *num_result_sets * 
2534                                    sizeof(**result_sets));
2535         memcpy(*result_sets, result_sets_l, 
2536                num_result_sets_l * sizeof(**result_sets));
2537         memcpy(*result_sets + num_result_sets_l, result_sets_r, 
2538                num_result_sets_r * sizeof(**result_sets));
2539
2540         if (!parent_op || parent_op->which != zop->which
2541             || (zop->which != Z_Operator_and &&
2542                 zop->which != Z_Operator_or))
2543         {
2544             /* parent node different from this one (or non-present) */
2545             /* we must combine result sets now */
2546             RSET rset;
2547             switch (zop->which)
2548             {
2549             case Z_Operator_and:
2550                 rset = rsmulti_and_create(rset_nmem, kc,
2551                                           kc->scope,
2552                                           *num_result_sets, *result_sets);
2553                 break;
2554             case Z_Operator_or:
2555                 rset = rsmulti_or_create(rset_nmem, kc,
2556                                          kc->scope, 0, /* termid */
2557                                          *num_result_sets, *result_sets);
2558                 break;
2559             case Z_Operator_and_not:
2560                 rset = rsbool_create_not(rset_nmem, kc,
2561                                          kc->scope,
2562                                          (*result_sets)[0],
2563                                          (*result_sets)[1]);
2564                 break;
2565             case Z_Operator_prox:
2566                 if (zop->u.prox->which != Z_ProximityOperator_known)
2567                 {
2568                     zebra_setError(zh, 
2569                                    YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2570                                    0);
2571                     return ZEBRA_FAIL;
2572                 }
2573                 if (*zop->u.prox->u.known != Z_ProxUnit_word)
2574                 {
2575                     zebra_setError_zint(zh,
2576                                         YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2577                                         *zop->u.prox->u.known);
2578                     return ZEBRA_FAIL;
2579                 }
2580                 else
2581                 {
2582                     rset = rsprox_create(rset_nmem, kc,
2583                                          kc->scope,
2584                                          *num_result_sets, *result_sets, 
2585                                          *zop->u.prox->ordered,
2586                                          (!zop->u.prox->exclusion ? 
2587                                           0 : *zop->u.prox->exclusion),
2588                                          *zop->u.prox->relationType,
2589                                          *zop->u.prox->distance );
2590                 }
2591                 break;
2592             default:
2593                 zebra_setError(zh, YAZ_BIB1_OPERATOR_UNSUPP, 0);
2594                 return ZEBRA_FAIL;
2595             }
2596             *num_result_sets = 1;
2597             *result_sets = nmem_malloc(stream, *num_result_sets * 
2598                                        sizeof(**result_sets));
2599             (*result_sets)[0] = rset;
2600         }
2601     }
2602     else if (zs->which == Z_RPNStructure_simple)
2603     {
2604         RSET rset;
2605         ZEBRA_RES res;
2606
2607         if (zs->u.simple->which == Z_Operand_APT)
2608         {
2609             yaz_log(YLOG_DEBUG, "rpn_search_APT");
2610             res = rpn_search_APT(zh, zs->u.simple->u.attributesPlusTerm,
2611                                  attributeSet, stream, sort_sequence,
2612                                  num_bases, basenames, rset_nmem, &rset,
2613                                  kc);
2614             if (res != ZEBRA_OK)
2615                 return res;
2616         }
2617         else if (zs->u.simple->which == Z_Operand_resultSetId)
2618         {
2619             yaz_log(YLOG_DEBUG, "rpn_search_ref");
2620             rset = resultSetRef(zh, zs->u.simple->u.resultSetId);
2621             if (!rset)
2622             {
2623                 zebra_setError(zh, 
2624                                YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
2625                                zs->u.simple->u.resultSetId);
2626                 return ZEBRA_FAIL;
2627             }
2628             rset_dup(rset);
2629         }
2630         else
2631         {
2632             zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2633             return ZEBRA_FAIL;
2634         }
2635         *num_result_sets = 1;
2636         *result_sets = nmem_malloc(stream, *num_result_sets * 
2637                                    sizeof(**result_sets));
2638         (*result_sets)[0] = rset;
2639     }
2640     else
2641     {
2642         zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2643         return ZEBRA_FAIL;
2644     }
2645     return ZEBRA_OK;
2646 }
2647
2648 struct scan_info_entry {
2649     char *term;
2650     ISAM_P isam_p;
2651 };
2652
2653 struct scan_info {
2654     struct scan_info_entry *list;
2655     ODR odr;
2656     int before, after;
2657     char prefix[20];
2658 };
2659
2660 static int scan_handle (char *name, const char *info, int pos, void *client)
2661 {
2662     int len_prefix, idx;
2663     struct scan_info *scan_info = (struct scan_info *) client;
2664
2665     len_prefix = strlen(scan_info->prefix);
2666     if (memcmp (name, scan_info->prefix, len_prefix))
2667         return 1;
2668     if (pos > 0)
2669         idx = scan_info->after - pos + scan_info->before;
2670     else
2671         idx = - pos - 1;
2672
2673     if (idx < 0)
2674         return 0;
2675     scan_info->list[idx].term = (char *)
2676         odr_malloc(scan_info->odr, strlen(name + len_prefix)+1);
2677     strcpy(scan_info->list[idx].term, name + len_prefix);
2678     assert (*info == sizeof(ISAM_P));
2679     memcpy (&scan_info->list[idx].isam_p, info+1, sizeof(ISAM_P));
2680     return 0;
2681 }
2682
2683 void zebra_term_untrans_iconv(ZebraHandle zh, NMEM stream, int reg_type,
2684                               char **dst, const char *src)
2685 {
2686     char term_src[IT_MAX_WORD];
2687     char term_dst[IT_MAX_WORD];
2688     
2689     zebra_term_untrans (zh, reg_type, term_src, src);
2690
2691     if (zh->iconv_from_utf8 != 0)
2692     {
2693         int len;
2694         char *inbuf = term_src;
2695         size_t inleft = strlen(term_src);
2696         char *outbuf = term_dst;
2697         size_t outleft = sizeof(term_dst)-1;
2698         size_t ret;
2699         
2700         ret = yaz_iconv (zh->iconv_from_utf8, &inbuf, &inleft,
2701                          &outbuf, &outleft);
2702         if (ret == (size_t)(-1))
2703             len = 0;
2704         else
2705             len = outbuf - term_dst;
2706         *dst = nmem_malloc(stream, len + 1);
2707         if (len > 0)
2708             memcpy (*dst, term_dst, len);
2709         (*dst)[len] = '\0';
2710     }
2711     else
2712         *dst = nmem_strdup(stream, term_src);
2713 }
2714
2715 static void count_set (RSET r, int *count)
2716 {
2717     zint psysno = 0;
2718     int kno = 0;
2719     struct it_key key;
2720     RSFD rfd;
2721
2722     yaz_log(YLOG_DEBUG, "count_set");
2723
2724     *count = 0;
2725     rfd = rset_open (r, RSETF_READ);
2726     while (rset_read (rfd, &key,0 /* never mind terms */))
2727     {
2728         if (key.mem[0] != psysno)
2729         {
2730             psysno = key.mem[0];
2731             (*count)++;
2732         }
2733         kno++;
2734     }
2735     rset_close (rfd);
2736     yaz_log(YLOG_DEBUG, "%d keys, %d records", kno, *count);
2737 }
2738
2739 ZEBRA_RES rpn_scan(ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt,
2740                    oid_value attributeset,
2741                    int num_bases, char **basenames,
2742                    int *position, int *num_entries, ZebraScanEntry **list,
2743                    int *is_partial, RSET limit_set, int return_zero)
2744 {
2745     int i;
2746     int pos = *position;
2747     int num = *num_entries;
2748     int before;
2749     int after;
2750     int base_no;
2751     char termz[IT_MAX_WORD+20];
2752     AttrType use;
2753     int use_value;
2754     const char *use_string = 0;
2755     struct scan_info *scan_info_array;
2756     ZebraScanEntry *glist;
2757     int ords[32], ord_no = 0;
2758     int ptr[32];
2759
2760     int bases_ok = 0;     /* no of databases with OK attribute */
2761     int errCode = 0;      /* err code (if any is not OK) */
2762     char *errString = 0;  /* addinfo */
2763
2764     unsigned reg_id;
2765     char *search_type = NULL;
2766     char rank_type[128];
2767     int complete_flag;
2768     int sort_flag;
2769     NMEM rset_nmem = NULL; 
2770     struct rset_key_control *kc = 0;
2771
2772     *list = 0;
2773     *is_partial = 0;
2774
2775     if (attributeset == VAL_NONE)
2776         attributeset = VAL_BIB1;
2777
2778     if (!limit_set)
2779     {
2780         AttrType termset;
2781         int termset_value_numeric;
2782         const char *termset_value_string;
2783         attr_init(&termset, zapt, 8);
2784         termset_value_numeric =
2785             attr_find_ex(&termset, NULL, &termset_value_string);
2786         if (termset_value_numeric != -1)
2787         {
2788             char resname[32];
2789             const char *termset_name = 0;
2790             
2791             if (termset_value_numeric != -2)
2792             {
2793                 
2794                 sprintf(resname, "%d", termset_value_numeric);
2795                 termset_name = resname;
2796             }
2797             else
2798                 termset_name = termset_value_string;
2799             
2800             limit_set = resultSetRef (zh, termset_name);
2801         }
2802     }
2803         
2804     yaz_log(YLOG_DEBUG, "position = %d, num = %d set=%d",
2805             pos, num, attributeset);
2806         
2807     attr_init(&use, zapt, 1);
2808     use_value = attr_find_ex(&use, &attributeset, &use_string);
2809
2810     if (zebra_maps_attr(zh->reg->zebra_maps, zapt, &reg_id, &search_type,
2811                         rank_type, &complete_flag, &sort_flag))
2812     {
2813         *num_entries = 0;
2814         zebra_setError(zh, YAZ_BIB1_UNSUPP_ATTRIBUTE_TYPE, 0);
2815         return ZEBRA_FAIL;
2816     }
2817     yaz_log(YLOG_DEBUG, "use_value = %d", use_value);
2818
2819     if (use_value == -1)
2820         use_value = 1016;
2821     for (base_no = 0; base_no < num_bases && ord_no < 32; base_no++)
2822     {
2823         data1_local_attribute *local_attr;
2824         attent attp;
2825         int ord;
2826
2827         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
2828         {
2829             zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
2830                            basenames[base_no]);
2831             *num_entries = 0;
2832             return ZEBRA_FAIL;
2833         }
2834
2835         if (use_string &&
2836             (ord = zebraExplain_lookup_attr_str(zh->reg->zei,
2837                                                 use_string)) >= 0)
2838         {
2839             /* we have a match for a raw string attribute */
2840             if (ord > 0)
2841                 ords[ord_no++] = ord;
2842             attp.local_attributes = 0;  /* no more attributes */
2843         }
2844         else
2845         {
2846             int r;
2847             
2848             if ((r = att_getentbyatt (zh, &attp, attributeset, use_value,
2849                                       use_string)))
2850             {
2851                 yaz_log(YLOG_DEBUG, "att_getentbyatt fail. set=%d use=%d",
2852                         attributeset, use_value);
2853                 if (r == -1)
2854                 {
2855                     errCode = YAZ_BIB1_UNSUPP_USE_ATTRIBUTE;
2856                     if (use_string)
2857                         zebra_setError(zh, YAZ_BIB1_UNSUPP_USE_ATTRIBUTE,
2858                                        use_string);
2859                     else
2860                         zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_USE_ATTRIBUTE,
2861                                             use_value);
2862                 }   
2863                 else
2864                 {
2865                     zebra_setError(zh, YAZ_BIB1_UNSUPP_ATTRIBUTE_SET, 0);
2866                 }
2867                 continue;
2868             }
2869         }
2870         bases_ok++;
2871         for (local_attr = attp.local_attributes; local_attr && ord_no < 32;
2872              local_attr = local_attr->next)
2873         {
2874             ord = zebraExplain_lookup_attr_su(zh->reg->zei,
2875                                               attp.attset_ordinal,
2876                                               local_attr->local);
2877             if (ord > 0)
2878                 ords[ord_no++] = ord;
2879         }
2880     }
2881     if (!bases_ok && errCode)
2882     {
2883         zebra_setError(zh, errCode, errString);
2884         *num_entries = 0;
2885         return ZEBRA_FAIL;
2886     }
2887     if (ord_no == 0)
2888     {
2889         *num_entries = 0;
2890         return ZEBRA_OK;
2891     }
2892     /* prepare dictionary scanning */
2893     if (num < 1)
2894     {
2895         *num_entries = 0;
2896         return ZEBRA_OK;
2897     }
2898     before = pos-1;
2899     if (before < 0)
2900         before = 0;
2901     after = 1+num-pos;
2902     if (after < 0)
2903         after = 0;
2904     yaz_log(YLOG_DEBUG, "rpn_scan pos=%d num=%d before=%d "
2905             "after=%d before+after=%d",
2906             pos, num, before, after, before+after);
2907     scan_info_array = (struct scan_info *)
2908         odr_malloc(stream, ord_no * sizeof(*scan_info_array));
2909     for (i = 0; i < ord_no; i++)
2910     {
2911         int j, prefix_len = 0;
2912         int before_tmp = before, after_tmp = after;
2913         struct scan_info *scan_info = scan_info_array + i;
2914         struct rpn_char_map_info rcmi;
2915
2916         rpn_char_map_prepare (zh->reg, reg_id, &rcmi);
2917
2918         scan_info->before = before;
2919         scan_info->after = after;
2920         scan_info->odr = stream;
2921
2922         scan_info->list = (struct scan_info_entry *)
2923             odr_malloc(stream, (before+after) * sizeof(*scan_info->list));
2924         for (j = 0; j<before+after; j++)
2925             scan_info->list[j].term = NULL;
2926
2927         prefix_len += key_SU_encode (ords[i], termz + prefix_len);
2928         termz[prefix_len++] = reg_id;
2929         termz[prefix_len] = 0;
2930         strcpy(scan_info->prefix, termz);
2931
2932         if (trans_scan_term(zh, zapt, termz+prefix_len, reg_id) == ZEBRA_FAIL)
2933             return ZEBRA_FAIL;
2934         
2935         dict_scan(zh->reg->dict, termz, &before_tmp, &after_tmp,
2936                   scan_info, scan_handle);
2937     }
2938     glist = (ZebraScanEntry *)
2939         odr_malloc(stream, (before+after)*sizeof(*glist));
2940
2941     rset_nmem = nmem_create();
2942     kc = zebra_key_control_create(zh);
2943
2944     /* consider terms after main term */
2945     for (i = 0; i < ord_no; i++)
2946         ptr[i] = before;
2947     
2948     *is_partial = 0;
2949     for (i = 0; i<after; i++)
2950     {
2951         int j, j0 = -1;
2952         const char *mterm = NULL;
2953         const char *tst;
2954         RSET rset = 0;
2955         int lo = i + pos-1; /* offset in result list */
2956
2957         /* find: j0 is the first of the minimal values */
2958         for (j = 0; j < ord_no; j++)
2959         {
2960             if (ptr[j] < before+after && ptr[j] >= 0 &&
2961                 (tst = scan_info_array[j].list[ptr[j]].term) &&
2962                 (!mterm || strcmp (tst, mterm) < 0))
2963             {
2964                 j0 = j;
2965                 mterm = tst;
2966             }
2967         }
2968         if (j0 == -1)
2969             break;  /* no value found, stop */
2970
2971         /* get result set for first one , but only if it's within bounds */
2972         if (lo >= 0)
2973         {
2974             /* get result set for first term */
2975             zebra_term_untrans_iconv(zh, stream->mem, reg_id,
2976                                      &glist[lo].term, mterm);
2977             rset = rset_trunc(zh, &scan_info_array[j0].list[ptr[j0]].isam_p, 1,
2978                               glist[lo].term, strlen(glist[lo].term),
2979                               NULL, 0, zapt->term->which, rset_nmem, 
2980                               kc, kc->scope, 0);
2981         }
2982         ptr[j0]++; /* move index for this set .. */
2983         /* get result set for remaining scan terms */
2984         for (j = j0+1; j<ord_no; j++)
2985         {
2986             if (ptr[j] < before+after && ptr[j] >= 0 &&
2987                 (tst = scan_info_array[j].list[ptr[j]].term) &&
2988                 !strcmp (tst, mterm))
2989             {
2990                 if (lo >= 0)
2991                 {
2992                     RSET rsets[2];
2993                     
2994                     rsets[0] = rset;
2995                     rsets[1] =
2996                         rset_trunc(
2997                             zh, &scan_info_array[j].list[ptr[j]].isam_p, 1,
2998                             glist[lo].term,
2999                             strlen(glist[lo].term), NULL, 0,
3000                             zapt->term->which,rset_nmem,
3001                             kc, kc->scope, 0);
3002                     rset = rsmulti_or_create(rset_nmem, kc,
3003                                              kc->scope, 0 /* termid */,
3004                                              2, rsets);
3005                 }
3006                 ptr[j]++;
3007             }
3008         }
3009         if (lo >= 0)
3010         {
3011             /* merge with limit_set if given */
3012             if (limit_set)
3013             {
3014                 RSET rsets[2];
3015                 rsets[0] = rset;
3016                 rsets[1] = rset_dup(limit_set);
3017                 
3018                 rset = rsmulti_and_create(rset_nmem, kc,
3019                                           kc->scope,
3020                                           2, rsets);
3021             }
3022             /* count it */
3023             count_set(rset, &glist[lo].occurrences);
3024             rset_delete(rset);
3025         }
3026     }
3027     if (i < after)
3028     {
3029         *num_entries -= (after-i);
3030         *is_partial = 1;
3031         if (*num_entries < 0)
3032         {
3033             (*kc->dec)(kc);
3034             nmem_destroy(rset_nmem);
3035             *num_entries = 0;
3036             return ZEBRA_OK;
3037         }
3038     }
3039     /* consider terms before main term */
3040     for (i = 0; i<ord_no; i++)
3041         ptr[i] = 0;
3042     
3043     for (i = 0; i<before; i++)
3044     {
3045         int j, j0 = -1;
3046         const char *mterm = NULL;
3047         const char *tst;
3048         RSET rset;
3049         int lo = before-1-i; /* offset in result list */
3050         
3051         for (j = 0; j <ord_no; j++)
3052         {
3053             if (ptr[j] < before && ptr[j] >= 0 &&
3054                 (tst = scan_info_array[j].list[before-1-ptr[j]].term) &&
3055                 (!mterm || strcmp (tst, mterm) > 0))
3056             {
3057                 j0 = j;
3058                     mterm = tst;
3059             }
3060         }
3061         if (j0 == -1)
3062             break;
3063         
3064         zebra_term_untrans_iconv(zh, stream->mem, reg_id,
3065                                  &glist[lo].term, mterm);
3066         
3067         rset = rset_trunc
3068             (zh, &scan_info_array[j0].list[before-1-ptr[j0]].isam_p, 1,
3069              glist[lo].term, strlen(glist[lo].term),
3070              NULL, 0, zapt->term->which, rset_nmem,
3071              kc, kc->scope, 0);
3072         
3073         ptr[j0]++;
3074         
3075         for (j = j0+1; j<ord_no; j++)
3076         {
3077             if (ptr[j] < before && ptr[j] >= 0 &&
3078                 (tst = scan_info_array[j].list[before-1-ptr[j]].term) &&
3079                 !strcmp (tst, mterm))
3080             {
3081                 RSET rsets[2];
3082                 
3083                 rsets[0] = rset;
3084                 rsets[1] = rset_trunc(
3085                     zh,
3086                     &scan_info_array[j].list[before-1-ptr[j]].isam_p, 1,
3087                     glist[lo].term,
3088                     strlen(glist[lo].term), NULL, 0,
3089                     zapt->term->which, rset_nmem,
3090                     kc, kc->scope, 0);
3091                 rset = rsmulti_or_create(rset_nmem, kc,
3092                                          kc->scope, 0 /* termid */, 2, rsets);
3093                 
3094                 ptr[j]++;
3095             }
3096         }
3097         if (limit_set)
3098         {
3099             RSET rsets[2];
3100             rsets[0] = rset;
3101             rsets[1] = rset_dup(limit_set);
3102             
3103             rset = rsmulti_and_create(rset_nmem, kc,
3104                                       kc->scope, 2, rsets);
3105         }
3106         count_set (rset, &glist[lo].occurrences);
3107         rset_delete (rset);
3108     }
3109     (*kc->dec)(kc);
3110     nmem_destroy(rset_nmem);
3111     i = before-i;
3112     if (i)
3113     {
3114         *is_partial = 1;
3115         *position -= i;
3116         *num_entries -= i;
3117         if (*num_entries <= 0)
3118         {
3119             *num_entries = 0;
3120             return ZEBRA_OK;
3121         }
3122     }
3123     
3124     *list = glist + i;               /* list is set to first 'real' entry */
3125     
3126     yaz_log(YLOG_DEBUG, "position = %d, num_entries = %d",
3127             *position, *num_entries);
3128     return ZEBRA_OK;
3129 }
3130