Fix bug #171. Change logging a bit
[idzebra-moved-to-github.git] / index / zrpn.c
1 /* $Id: zrpn.c,v 1.141.2.2 2004-11-15 21:53:45 adam Exp $
2    Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003,2004
3    Index Data Aps
4
5 This file is part of the Zebra server.
6
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
11
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with Zebra; see the file LICENSE.zebra.  If not, write to the
19 Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
20 02111-1307, USA.
21 */
22
23
24 #include <stdio.h>
25 #include <assert.h>
26 #ifdef WIN32
27 #include <io.h>
28 #else
29 #include <unistd.h>
30 #endif
31 #include <ctype.h>
32
33 #include "index.h"
34 #include <zebra_xpath.h>
35
36 #include <charmap.h>
37 #include <rstemp.h>
38 #include <rsnull.h>
39 #include <rsbool.h>
40 #include <rsbetween.h>
41 #include <rsprox.h>
42
43 struct rpn_char_map_info {
44     ZebraMaps zm;
45     int reg_type;
46 };
47
48 typedef struct {
49     int type;
50     int major;
51     int minor;
52     Z_AttributesPlusTerm *zapt;
53 } AttrType;
54
55
56 static const char **rpn_char_map_handler(void *vp, const char **from, int len)
57 {
58     struct rpn_char_map_info *p = (struct rpn_char_map_info *) vp;
59     const char **out = zebra_maps_input(p->zm, p->reg_type, from, len, 0);
60 #if 0
61     if (out && *out)
62     {
63         const char *outp = *out;
64         yaz_log(LOG_LOG, "---");
65         while (*outp)
66         {
67             yaz_log(LOG_LOG, "%02X", *outp);
68             outp++;
69         }
70     }
71 #endif
72     return out;
73 }
74
75 static void rpn_char_map_prepare(struct zebra_register *reg, int reg_type,
76                                   struct rpn_char_map_info *map_info)
77 {
78     map_info->zm = reg->zebra_maps;
79     map_info->reg_type = reg_type;
80     dict_grep_cmap(reg->dict, map_info, rpn_char_map_handler);
81 }
82
83 static int attr_find_ex(AttrType *src, oid_value *attributeSetP,
84                          const char **string_value)
85 {
86     int num_attributes;
87
88     num_attributes = src->zapt->attributes->num_attributes;
89     while (src->major < num_attributes)
90     {
91         Z_AttributeElement *element;
92
93         element = src->zapt->attributes->attributes[src->major];
94         if (src->type == *element->attributeType)
95         {
96             switch (element->which) 
97             {
98             case Z_AttributeValue_numeric:
99                 ++(src->major);
100                 if (element->attributeSet && attributeSetP)
101                 {
102                     oident *attrset;
103
104                     attrset = oid_getentbyoid(element->attributeSet);
105                     *attributeSetP = attrset->value;
106                 }
107                 return *element->value.numeric;
108                 break;
109             case Z_AttributeValue_complex:
110                 if (src->minor >= element->value.complex->num_list)
111                     break;
112                 if (element->attributeSet && attributeSetP)
113                 {
114                     oident *attrset;
115                     
116                     attrset = oid_getentbyoid(element->attributeSet);
117                     *attributeSetP = attrset->value;
118                 }
119                 if (element->value.complex->list[src->minor]->which ==  
120                     Z_StringOrNumeric_numeric)
121                 {
122                     ++(src->minor);
123                     return
124                         *element->value.complex->list[src->minor-1]->u.numeric;
125                 }
126                 else if (element->value.complex->list[src->minor]->which ==  
127                          Z_StringOrNumeric_string)
128                 {
129                     if (!string_value)
130                         break;
131                     ++(src->minor);
132                     *string_value = 
133                         element->value.complex->list[src->minor-1]->u.string;
134                     return -2;
135                 }
136                 else
137                     break;
138             default:
139                 assert(0);
140             }
141         }
142         ++(src->major);
143     }
144     return -1;
145 }
146
147 static int attr_find(AttrType *src, oid_value *attributeSetP)
148 {
149     return attr_find_ex(src, attributeSetP, 0);
150 }
151
152 static void attr_init(AttrType *src, Z_AttributesPlusTerm *zapt,
153                        int type)
154 {
155     src->zapt = zapt;
156     src->type = type;
157     src->major = 0;
158     src->minor = 0;
159 }
160
161 #define TERM_COUNT        
162        
163 struct grep_info {        
164 #ifdef TERM_COUNT        
165     int *term_no;        
166 #endif        
167     ISAMS_P *isam_p_buf;
168     int isam_p_size;        
169     int isam_p_indx;
170     ZebraHandle zh;
171     int reg_type;
172     ZebraSet termset;
173 };        
174
175 static void term_untrans(ZebraHandle zh, int reg_type,
176                            char *dst, const char *src)
177 {
178     int len = 0;
179     while (*src)
180     {
181         const char *cp = zebra_maps_output(zh->reg->zebra_maps,
182                                             reg_type, &src);
183         if (!cp && len < IT_MAX_WORD-1)
184             dst[len++] = *src++;
185         else
186             while (*cp && len < IT_MAX_WORD-1)
187                 dst[len++] = *cp++;
188     }
189     dst[len] = '\0';
190 }
191
192 static void add_isam_p(const char *name, const char *info,
193                         struct grep_info *p)
194 {
195     if (p->isam_p_indx == p->isam_p_size)
196     {
197         ISAMS_P *new_isam_p_buf;
198 #ifdef TERM_COUNT        
199         int *new_term_no;        
200 #endif
201         p->isam_p_size = 2*p->isam_p_size + 100;
202         new_isam_p_buf = (ISAMS_P *) xmalloc(sizeof(*new_isam_p_buf) *
203                                              p->isam_p_size);
204         if (p->isam_p_buf)
205         {
206             memcpy(new_isam_p_buf, p->isam_p_buf,
207                     p->isam_p_indx * sizeof(*p->isam_p_buf));
208             xfree(p->isam_p_buf);
209         }
210         p->isam_p_buf = new_isam_p_buf;
211
212 #ifdef TERM_COUNT
213         new_term_no = (int *) xmalloc(sizeof(*new_term_no) *
214                                        p->isam_p_size);
215         if (p->term_no)
216         {
217             memcpy(new_term_no, p->isam_p_buf,
218                     p->isam_p_indx * sizeof(*p->term_no));
219             xfree(p->term_no);
220         }
221         p->term_no = new_term_no;
222 #endif
223     }
224     assert(*info == sizeof(*p->isam_p_buf));
225     memcpy(p->isam_p_buf + p->isam_p_indx, info+1, sizeof(*p->isam_p_buf));
226
227 #if 1
228     if (p->termset)
229     {
230         const char *db;
231         int set, use;
232         char term_tmp[IT_MAX_WORD];
233         int su_code = 0;
234         int len = key_SU_decode(&su_code, name);
235         
236         term_untrans(p->zh, p->reg_type, term_tmp, name+len+1);
237         yaz_log(LOG_LOG, "grep: %d %c %s", su_code, name[len], term_tmp);
238         zebraExplain_lookup_ord(p->zh->reg->zei,
239                                  su_code, &db, &set, &use);
240         yaz_log(LOG_LOG, "grep:  set=%d use=%d db=%s", set, use, db);
241         
242         resultSetAddTerm(p->zh, p->termset, name[len], db,
243                           set, use, term_tmp);
244     }
245 #endif
246     (p->isam_p_indx)++;
247 }
248
249 static int grep_handle(char *name, const char *info, void *p)
250 {
251     add_isam_p(name, info, (struct grep_info *) p);
252     return 0;
253 }
254
255 static int term_pre(ZebraMaps zebra_maps, int reg_type, const char **src,
256                      const char *ct1, const char *ct2, int first)
257 {
258     const char *s1, *s0 = *src;
259     const char **map;
260
261     /* skip white space */
262     while (*s0)
263     {
264         if (ct1 && strchr(ct1, *s0))
265             break;
266         if (ct2 && strchr(ct2, *s0))
267             break;
268         s1 = s0;
269         map = zebra_maps_input(zebra_maps, reg_type, &s1, strlen(s1), first);
270         if (**map != *CHR_SPACE)
271             break;
272         s0 = s1;
273     }
274     *src = s0;
275     return *s0;
276 }
277
278 #define REGEX_CHARS " []()|.*+?!"
279
280 /* term_100: handle term, where trunc=none(no operators at all) */
281 static int term_100(ZebraMaps zebra_maps, int reg_type,
282                      const char **src, char *dst, int space_split,
283                      char *dst_term)
284 {
285     const char *s0, *s1;
286     const char **map;
287     int i = 0;
288     int j = 0;
289
290     const char *space_start = 0;
291     const char *space_end = 0;
292
293     if (!term_pre(zebra_maps, reg_type, src, NULL, NULL, !space_split))
294         return 0;
295     s0 = *src;
296     while (*s0)
297     {
298         s1 = s0;
299         map = zebra_maps_input(zebra_maps, reg_type, &s0, strlen(s0), 0);
300         if (space_split)
301         {
302             if (**map == *CHR_SPACE)
303                 break;
304         }
305         else  /* complete subfield only. */
306         {
307             if (**map == *CHR_SPACE)
308             {   /* save space mapping for later  .. */
309                 space_start = s1;
310                 space_end = s0;
311                 continue;
312             }
313             else if (space_start)
314             {   /* reload last space */
315                 while (space_start < space_end)
316                 {
317                     if (strchr(REGEX_CHARS, *space_start))
318                         dst[i++] = '\\';
319                     dst_term[j++] = *space_start;
320                     dst[i++] = *space_start++;
321                 }
322                 /* and reset */
323                 space_start = space_end = 0;
324             }
325         }
326         /* add non-space char */
327         while (s1 < s0)
328         {
329             if (strchr(REGEX_CHARS, *s1))
330                 dst[i++] = '\\';
331             dst_term[j++] = *s1;
332             dst[i++] = *s1++;
333         }
334     }
335     dst[i] = '\0';
336     dst_term[j] = '\0';
337     *src = s0;
338     return i;
339 }
340
341 /* term_101: handle term, where trunc=Process # */
342 static int term_101(ZebraMaps zebra_maps, int reg_type,
343                      const char **src, char *dst, int space_split,
344                      char *dst_term)
345 {
346     const char *s0, *s1;
347     const char **map;
348     int i = 0;
349     int j = 0;
350
351     if (!term_pre(zebra_maps, reg_type, src, "#", "#", !space_split))
352         return 0;
353     s0 = *src;
354     while (*s0)
355     {
356         if (*s0 == '#')
357         {
358             dst[i++] = '.';
359             dst[i++] = '*';
360             dst_term[j++] = *s0++;
361         }
362         else
363         {
364             s1 = s0;
365             map = zebra_maps_input(zebra_maps, reg_type, &s0, strlen(s0), 0);
366             if (space_split && **map == *CHR_SPACE)
367                 break;
368             while (s1 < s0)
369             {
370                 if (strchr(REGEX_CHARS, *s1))
371                     dst[i++] = '\\';
372                 dst_term[j++] = *s1;
373                 dst[i++] = *s1++;
374             }
375         }
376     }
377     dst[i] = '\0';
378     dst_term[j++] = '\0';
379     *src = s0;
380     return i;
381 }
382
383 /* term_103: handle term, where trunc=re-2 (regular expressions) */
384 static int term_103(ZebraMaps zebra_maps, int reg_type, const char **src,
385                      char *dst, int *errors, int space_split,
386                      char *dst_term)
387 {
388     int i = 0;
389     int j = 0;
390     const char *s0, *s1;
391     const char **map;
392
393     if (!term_pre(zebra_maps, reg_type, src, "^\\()[].*+?|", "(", !space_split))
394         return 0;
395     s0 = *src;
396     if (errors && *s0 == '+' && s0[1] && s0[2] == '+' && s0[3] &&
397         isdigit(s0[1]))
398     {
399         *errors = s0[1] - '0';
400         s0 += 3;
401         if (*errors > 3)
402             *errors = 3;
403     }
404     while (*s0)
405     {
406         if (strchr("^\\()[].*+?|-", *s0))
407         {
408             dst_term[j++] = *s0;
409             dst[i++] = *s0++;
410         }
411         else
412         {
413             s1 = s0;
414             map = zebra_maps_input(zebra_maps, reg_type, &s0, strlen(s0), 0);
415             if (**map == *CHR_SPACE)
416                 break;
417             while (s1 < s0)
418             {
419                 if (strchr(REGEX_CHARS, *s1))
420                     dst[i++] = '\\';
421                 dst_term[j++] = *s1;
422                 dst[i++] = *s1++;
423             }
424         }
425     }
426     dst[i] = '\0';
427     dst_term[j] = '\0';
428     *src = s0;
429     return i;
430 }
431
432 /* term_103: handle term, where trunc=re-1 (regular expressions) */
433 static int term_102 (ZebraMaps zebra_maps, int reg_type, const char **src,
434                      char *dst, int space_split, char *dst_term)
435 {
436     return term_103(zebra_maps, reg_type, src, dst, NULL, space_split,
437                      dst_term);
438 }
439
440
441 /* term_104: handle term, where trunc=Process # and ! */
442 static int term_104(ZebraMaps zebra_maps, int reg_type,
443                      const char **src, char *dst, int space_split,
444                      char *dst_term)
445 {
446     const char *s0, *s1;
447     const char **map;
448     int i = 0;
449     int j = 0;
450
451     if (!term_pre(zebra_maps, reg_type, src, "?*#", "?*#", !space_split))
452         return 0;
453     s0 = *src;
454     while (*s0)
455     {
456         if (*s0 == '?')
457         {
458             dst_term[j++] = *s0++;
459             if (*s0 >= '0' && *s0 <= '9')
460             {
461                 int limit = 0;
462                 while (*s0 >= '0' && *s0 <= '9')
463                 {
464                     limit = limit * 10 + (*s0 - '0');
465                     dst_term[j++] = *s0++;
466                 }
467                 if (limit > 20)
468                     limit = 20;
469                 while (--limit >= 0)
470                 {
471                     dst[i++] = '.';
472                     dst[i++] = '?';
473                 }
474             }
475             else
476             {
477                 dst[i++] = '.';
478                 dst[i++] = '*';
479             }
480         }
481         else if (*s0 == '*')
482         {
483             dst[i++] = '.';
484             dst[i++] = '*';
485             dst_term[j++] = *s0++;
486         }
487         else if (*s0 == '#')
488         {
489             dst[i++] = '.';
490             dst_term[j++] = *s0++;
491         }
492         {
493             s1 = s0;
494             map = zebra_maps_input(zebra_maps, reg_type, &s0, strlen(s0), 0);
495             if (space_split && **map == *CHR_SPACE)
496                 break;
497             while (s1 < s0)
498             {
499                 if (strchr(REGEX_CHARS, *s1))
500                     dst[i++] = '\\';
501                 dst_term[j++] = *s1;
502                 dst[i++] = *s1++;
503             }
504         }
505     }
506     dst[i] = '\0';
507     dst_term[j++] = '\0';
508     *src = s0;
509     return i;
510 }
511
512 /* term_105/106: handle term, where trunc=Process * and ! and right trunc */
513 static int term_105 (ZebraMaps zebra_maps, int reg_type,
514                      const char **src, char *dst, int space_split,
515                      char *dst_term, int right_truncate)
516 {
517     const char *s0, *s1;
518     const char **map;
519     int i = 0;
520     int j = 0;
521
522     if (!term_pre(zebra_maps, reg_type, src, "*!", "*!", !space_split))
523         return 0;
524     s0 = *src;
525     while (*s0)
526     {
527         if (*s0 == '*')
528         {
529             dst[i++] = '.';
530             dst[i++] = '*';
531             dst_term[j++] = *s0++;
532         }
533         else if (*s0 == '!')
534         {
535             dst[i++] = '.';
536             dst_term[j++] = *s0++;
537         }
538         {
539             s1 = s0;
540             map = zebra_maps_input(zebra_maps, reg_type, &s0, strlen(s0), 0);
541             if (space_split && **map == *CHR_SPACE)
542                 break;
543             while (s1 < s0)
544             {
545                 if (strchr(REGEX_CHARS, *s1))
546                     dst[i++] = '\\';
547                 dst_term[j++] = *s1;
548                 dst[i++] = *s1++;
549             }
550         }
551     }
552     if (right_truncate)
553     {
554         dst[i++] = '.';
555         dst[i++] = '*';
556     }
557     dst[i] = '\0';
558     
559     dst_term[j++] = '\0';
560     *src = s0;
561     return i;
562 }
563
564
565 /* gen_regular_rel - generate regular expression from relation
566  *  val:     border value (inclusive)
567  *  islt:    1 if <=; 0 if >=.
568  */
569 static void gen_regular_rel(char *dst, int val, int islt)
570 {
571     int dst_p;
572     int w, d, i;
573     int pos = 0;
574     char numstr[20];
575
576     yaz_log(LOG_DEBUG, "gen_regular_rel. val=%d, islt=%d", val, islt);
577     if (val >= 0)
578     {
579         if (islt)
580             strcpy(dst, "(-[0-9]+|(");
581         else
582             strcpy(dst, "((");
583     } 
584     else
585     {
586         if (!islt)
587         {
588             strcpy(dst, "([0-9]+|-(");
589             dst_p = strlen(dst);
590             islt = 1;
591         }
592         else
593         {
594             strcpy(dst, "(-(");
595             islt = 0;
596         }
597         val = -val;
598     }
599     dst_p = strlen(dst);
600     sprintf(numstr, "%d", val);
601     for (w = strlen(numstr); --w >= 0; pos++)
602     {
603         d = numstr[w];
604         if (pos > 0)
605         {
606             if (islt)
607             {
608                 if (d == '0')
609                     continue;
610                 d--;
611             } 
612             else
613             {
614                 if (d == '9')
615                     continue;
616                 d++;
617             }
618         }
619         
620         strcpy(dst + dst_p, numstr);
621         dst_p = strlen(dst) - pos - 1;
622
623         if (islt)
624         {
625             if (d != '0')
626             {
627                 dst[dst_p++] = '[';
628                 dst[dst_p++] = '0';
629                 dst[dst_p++] = '-';
630                 dst[dst_p++] = d;
631                 dst[dst_p++] = ']';
632             }
633             else
634                 dst[dst_p++] = d;
635         }
636         else
637         {
638             if (d != '9')
639             { 
640                 dst[dst_p++] = '[';
641                 dst[dst_p++] = d;
642                 dst[dst_p++] = '-';
643                 dst[dst_p++] = '9';
644                 dst[dst_p++] = ']';
645             }
646             else
647                 dst[dst_p++] = d;
648         }
649         for (i = 0; i<pos; i++)
650         {
651             dst[dst_p++] = '[';
652             dst[dst_p++] = '0';
653             dst[dst_p++] = '-';
654             dst[dst_p++] = '9';
655             dst[dst_p++] = ']';
656         }
657         dst[dst_p++] = '|';
658     }
659     dst[dst_p] = '\0';
660     if (islt)
661     {
662         /* match everything less than 10^(pos-1) */
663         strcat(dst, "0*");
664         for (i = 1; i<pos; i++)
665             strcat(dst, "[0-9]?");
666     }
667     else
668     {
669         /* match everything greater than 10^pos */
670         for (i = 0; i <= pos; i++)
671             strcat(dst, "[0-9]");
672         strcat(dst, "[0-9]*");
673     }
674     strcat(dst, "))");
675 }
676
677 void string_rel_add_char(char **term_p, const char *src, int *indx)
678 {
679     if (src[*indx] == '\\')
680         *(*term_p)++ = src[(*indx)++];
681     *(*term_p)++ = src[(*indx)++];
682 }
683
684 /*
685  *   >  abc     ([b-].*|a[c-].*|ab[d-].*|abc.+)
686  *              ([^-a].*|a[^-b].*ab[^-c].*|abc.+)
687  *   >= abc     ([b-].*|a[c-].*|ab[c-].*)
688  *              ([^-a].*|a[^-b].*|ab[c-].*)
689  *   <  abc     ([-0].*|a[-a].*|ab[-b].*)
690  *              ([^a-].*|a[^b-].*|ab[^c-].*)
691  *   <= abc     ([-0].*|a[-a].*|ab[-b].*|abc)
692  *              ([^a-].*|a[^b-].*|ab[^c-].*|abc)
693  */
694 static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
695                             const char **term_sub, char *term_dict,
696                             oid_value attributeSet,
697                             int reg_type, int space_split, char *term_dst)
698 {
699     AttrType relation;
700     int relation_value;
701     int i;
702     char *term_tmp = term_dict + strlen(term_dict);
703     char term_component[2*IT_MAX_WORD+20];
704
705     attr_init(&relation, zapt, 2);
706     relation_value = attr_find(&relation, NULL);
707
708     yaz_log(LOG_DEBUG, "string relation value=%d", relation_value);
709     switch (relation_value)
710     {
711     case 1:
712         if (!term_100 (zh->reg->zebra_maps, reg_type,
713                        term_sub, term_component,
714                        space_split, term_dst))
715             return 0;
716         yaz_log(LOG_DEBUG, "Relation <");
717         
718         *term_tmp++ = '(';
719         for (i = 0; term_component[i]; )
720         {
721             int j = 0;
722
723             if (i)
724                 *term_tmp++ = '|';
725             while (j < i)
726                 string_rel_add_char (&term_tmp, term_component, &j);
727
728             *term_tmp++ = '[';
729
730             *term_tmp++ = '^';
731             string_rel_add_char (&term_tmp, term_component, &i);
732             *term_tmp++ = '-';
733
734             *term_tmp++ = ']';
735             *term_tmp++ = '.';
736             *term_tmp++ = '*';
737
738             if ((term_tmp - term_dict) > IT_MAX_WORD)
739                 break;
740         }
741         *term_tmp++ = ')';
742         *term_tmp = '\0';
743         break;
744     case 2:
745         if (!term_100 (zh->reg->zebra_maps, reg_type,
746                        term_sub, term_component,
747                        space_split, term_dst))
748             return 0;
749         yaz_log(LOG_DEBUG, "Relation <=");
750
751         *term_tmp++ = '(';
752         for (i = 0; term_component[i]; )
753         {
754             int j = 0;
755
756             while (j < i)
757                 string_rel_add_char (&term_tmp, term_component, &j);
758             *term_tmp++ = '[';
759
760             *term_tmp++ = '^';
761             string_rel_add_char (&term_tmp, term_component, &i);
762             *term_tmp++ = '-';
763
764             *term_tmp++ = ']';
765             *term_tmp++ = '.';
766             *term_tmp++ = '*';
767
768             *term_tmp++ = '|';
769
770             if ((term_tmp - term_dict) > IT_MAX_WORD)
771                 break;
772         }
773         for (i = 0; term_component[i]; )
774             string_rel_add_char (&term_tmp, term_component, &i);
775         *term_tmp++ = ')';
776         *term_tmp = '\0';
777         break;
778     case 5:
779         if (!term_100 (zh->reg->zebra_maps, reg_type,
780                        term_sub, term_component, space_split, term_dst))
781             return 0;
782         yaz_log(LOG_DEBUG, "Relation >");
783
784         *term_tmp++ = '(';
785         for (i = 0; term_component[i];)
786         {
787             int j = 0;
788
789             while (j < i)
790                 string_rel_add_char (&term_tmp, term_component, &j);
791             *term_tmp++ = '[';
792             
793             *term_tmp++ = '^';
794             *term_tmp++ = '-';
795             string_rel_add_char (&term_tmp, term_component, &i);
796
797             *term_tmp++ = ']';
798             *term_tmp++ = '.';
799             *term_tmp++ = '*';
800
801             *term_tmp++ = '|';
802
803             if ((term_tmp - term_dict) > IT_MAX_WORD)
804                 break;
805         }
806         for (i = 0; term_component[i];)
807             string_rel_add_char (&term_tmp, term_component, &i);
808         *term_tmp++ = '.';
809         *term_tmp++ = '+';
810         *term_tmp++ = ')';
811         *term_tmp = '\0';
812         break;
813     case 4:
814         if (!term_100 (zh->reg->zebra_maps, reg_type, term_sub,
815                        term_component, space_split, term_dst))
816             return 0;
817         yaz_log(LOG_DEBUG, "Relation >=");
818
819         *term_tmp++ = '(';
820         for (i = 0; term_component[i];)
821         {
822             int j = 0;
823
824             if (i)
825                 *term_tmp++ = '|';
826             while (j < i)
827                 string_rel_add_char (&term_tmp, term_component, &j);
828             *term_tmp++ = '[';
829
830             if (term_component[i+1])
831             {
832                 *term_tmp++ = '^';
833                 *term_tmp++ = '-';
834                 string_rel_add_char (&term_tmp, term_component, &i);
835             }
836             else
837             {
838                 string_rel_add_char (&term_tmp, term_component, &i);
839                 *term_tmp++ = '-';
840             }
841             *term_tmp++ = ']';
842             *term_tmp++ = '.';
843             *term_tmp++ = '*';
844
845             if ((term_tmp - term_dict) > IT_MAX_WORD)
846                 break;
847         }
848         *term_tmp++ = ')';
849         *term_tmp = '\0';
850         break;
851     case 3:
852     default:
853         yaz_log(LOG_DEBUG, "Relation =");
854         if (!term_100 (zh->reg->zebra_maps, reg_type, term_sub,
855                        term_component, space_split, term_dst))
856             return 0;
857         strcat (term_tmp, "(");
858         strcat (term_tmp, term_component);
859         strcat (term_tmp, ")");
860     }
861     return 1;
862 }
863
864 static int string_term (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
865                         const char **term_sub, 
866                         oid_value attributeSet, NMEM stream,
867                         struct grep_info *grep_info,
868                         int reg_type, int complete_flag,
869                         int num_bases, char **basenames,
870                         char *term_dst, int xpath_use);
871
872 static RSET term_trunc (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
873                         const char **term_sub, 
874                         oid_value attributeSet, NMEM stream,
875                         struct grep_info *grep_info,
876                         int reg_type, int complete_flag,
877                         int num_bases, char **basenames,
878                         char *term_dst,
879                         const char *rank_type, int xpath_use)
880 {
881     int r;
882     grep_info->isam_p_indx = 0;
883     r = string_term (zh, zapt, term_sub, attributeSet, stream, grep_info,
884                      reg_type, complete_flag, num_bases, basenames,
885                      term_dst, xpath_use);
886     if (r < 1)
887         return 0;
888     yaz_log(LOG_DEBUG, "term: %s", term_dst);
889     return rset_trunc (zh, grep_info->isam_p_buf,
890                        grep_info->isam_p_indx, term_dst,
891                        strlen(term_dst), rank_type, 1 /* preserve pos */,
892                        zapt->term->which);
893 }
894
895 static char *nmem_strdup_i(NMEM nmem, int v)
896 {
897     char val_str[64];
898     sprintf (val_str, "%d", v);
899     return nmem_strdup (nmem, val_str);
900 }
901     
902 static int string_term (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
903                         const char **term_sub, 
904                         oid_value attributeSet, NMEM stream,
905                         struct grep_info *grep_info,
906                         int reg_type, int complete_flag,
907                         int num_bases, char **basenames,
908                         char *term_dst, int xpath_use)
909 {
910     char term_dict[2*IT_MAX_WORD+4000];
911     int j, r, base_no;
912     AttrType truncation;
913     int truncation_value;
914     AttrType use;
915     int use_value;
916     const char *use_string = 0;
917     oid_value curAttributeSet = attributeSet;
918     const char *termp;
919     struct rpn_char_map_info rcmi;
920     int space_split = complete_flag ? 0 : 1;
921
922     int bases_ok = 0;     /* no of databases with OK attribute */
923     int errCode = 0;      /* err code (if any is not OK) */
924     char *errString = 0;  /* addinfo */
925
926     rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
927     attr_init (&use, zapt, 1);
928     use_value = attr_find_ex (&use, &curAttributeSet, &use_string);
929     yaz_log(LOG_DEBUG, "string_term, use value %d", use_value);
930     attr_init (&truncation, zapt, 5);
931     truncation_value = attr_find (&truncation, NULL);
932     yaz_log(LOG_DEBUG, "truncation value %d", truncation_value);
933
934     if (use_value == -1)    /* no attribute - assumy "any" */
935         use_value = 1016;
936     for (base_no = 0; base_no < num_bases; base_no++)
937     {
938         int attr_ok = 0;
939         int regex_range = 0;
940         attent attp;
941         data1_local_attribute id_xpath_attr;
942         data1_local_attribute *local_attr;
943         int max_pos, prefix_len = 0;
944
945         termp = *term_sub;
946
947         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
948         {
949             zh->errCode = 109; /* Database unavailable */
950             zh->errString = basenames[base_no];
951             return -1;
952         }
953         if (xpath_use > 0 && use_value == -2) 
954         {
955             use_value = xpath_use;
956             attp.local_attributes = &id_xpath_attr;
957             attp.attset_ordinal = VAL_IDXPATH;
958             id_xpath_attr.next = 0;
959             id_xpath_attr.local = use_value;
960         }
961         else if (curAttributeSet == VAL_IDXPATH)
962         {
963             attp.local_attributes = &id_xpath_attr;
964             attp.attset_ordinal = VAL_IDXPATH;
965             id_xpath_attr.next = 0;
966             id_xpath_attr.local = use_value;
967         }
968         else
969         {
970             if ((r=att_getentbyatt (zh, &attp, curAttributeSet, use_value,
971                                             use_string)))
972             {
973                 yaz_log(LOG_DEBUG, "att_getentbyatt fail. set=%d use=%d r=%d",
974                       curAttributeSet, use_value, r);
975                 if (r == -1)
976                 {
977                     /* set was found, but value wasn't defined */
978                     errCode = 114;
979                     if (use_string)
980                         errString = nmem_strdup(stream, use_string);
981                     else
982                         errString = nmem_strdup_i(stream, use_value);
983                 }
984                 else
985                 {
986                     int oid[OID_SIZE];
987                     struct oident oident;
988                     
989                     oident.proto = PROTO_Z3950;
990                     oident.oclass = CLASS_ATTSET;
991                     oident.value = curAttributeSet;
992                     oid_ent_to_oid (&oident, oid);
993                     
994                     errCode = 121;
995                     errString = nmem_strdup (stream, oident.desc);
996                 }
997                 continue;
998             }
999         }
1000         for (local_attr = attp.local_attributes; local_attr;
1001              local_attr = local_attr->next)
1002         {
1003             int ord;
1004             char ord_buf[32];
1005             int i, ord_len;
1006             
1007             ord = zebraExplain_lookupSU (zh->reg->zei, attp.attset_ordinal,
1008                                          local_attr->local);
1009             if (ord < 0)
1010                 continue;
1011             if (prefix_len)
1012                 term_dict[prefix_len++] = '|';
1013             else
1014                 term_dict[prefix_len++] = '(';
1015             
1016             ord_len = key_SU_encode (ord, ord_buf);
1017             for (i = 0; i<ord_len; i++)
1018             {
1019                 term_dict[prefix_len++] = 1;
1020                 term_dict[prefix_len++] = ord_buf[i];
1021             }
1022         }
1023         if (!prefix_len)
1024         {
1025 #if 1
1026             bases_ok++;
1027 #else
1028             errCode = 114;
1029             errString = nmem_strdup_i(stream, use_value);
1030             continue;
1031 #endif
1032         }
1033         else
1034         {
1035             attr_ok = 1;
1036             bases_ok++; /* this has OK attributes */
1037         }
1038
1039         term_dict[prefix_len++] = ')';
1040         term_dict[prefix_len++] = 1;
1041         term_dict[prefix_len++] = reg_type;
1042         yaz_log(LOG_DEBUG, "reg_type = %d", term_dict[prefix_len-1]);
1043         term_dict[prefix_len] = '\0';
1044         j = prefix_len;
1045         switch (truncation_value)
1046         {
1047         case -1:         /* not specified */
1048         case 100:        /* do not truncate */
1049             if (!string_relation (zh, zapt, &termp, term_dict,
1050                                   attributeSet,
1051                                   reg_type, space_split, term_dst))
1052                 return 0;
1053             break;
1054         case 1:          /* right truncation */
1055             term_dict[j++] = '(';
1056             if (!term_100 (zh->reg->zebra_maps, reg_type,
1057                            &termp, term_dict + j, space_split, term_dst))
1058                 return 0;
1059             strcat (term_dict, ".*)");
1060             break;
1061         case 2:          /* keft truncation */
1062             term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*';
1063             if (!term_100 (zh->reg->zebra_maps, reg_type,
1064                            &termp, term_dict + j, space_split, term_dst))
1065                 return 0;
1066             strcat (term_dict, ")");
1067             break;
1068         case 3:          /* left&right truncation */
1069             term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*';
1070             if (!term_100 (zh->reg->zebra_maps, reg_type,
1071                            &termp, term_dict + j, space_split, term_dst))
1072                 return 0;
1073             strcat (term_dict, ".*)");
1074             break;
1075         case 101:        /* process # in term */
1076             term_dict[j++] = '(';
1077             if (!term_101 (zh->reg->zebra_maps, reg_type,
1078                            &termp, term_dict + j, space_split, term_dst))
1079                 return 0;
1080             strcat (term_dict, ")");
1081             break;
1082         case 102:        /* Regexp-1 */
1083             term_dict[j++] = '(';
1084             if (!term_102 (zh->reg->zebra_maps, reg_type,
1085                            &termp, term_dict + j, space_split, term_dst))
1086                 return 0;
1087             strcat (term_dict, ")");
1088             break;
1089         case 103:       /* Regexp-2 */
1090             r = 1;
1091             term_dict[j++] = '(';
1092             if (!term_103 (zh->reg->zebra_maps, reg_type,
1093                            &termp, term_dict + j, &r, space_split, term_dst))
1094                 return 0;
1095             strcat (term_dict, ")");
1096             regex_range = 2;
1097             break;
1098         case 104:        /* process # and ! in term */
1099             term_dict[j++] = '(';
1100             if (!term_104 (zh->reg->zebra_maps, reg_type,
1101                            &termp, term_dict + j, space_split, term_dst))
1102                 return 0;
1103             strcat (term_dict, ")");
1104             break;
1105         case 105:        /* process * and ! in term */
1106             term_dict[j++] = '(';
1107             if (!term_105 (zh->reg->zebra_maps, reg_type,
1108                            &termp, term_dict + j, space_split, term_dst, 1))
1109                 return 0;
1110             strcat (term_dict, ")");
1111             break;
1112         case 106:        /* process * and ! in term */
1113             term_dict[j++] = '(';
1114             if (!term_105 (zh->reg->zebra_maps, reg_type,
1115                            &termp, term_dict + j, space_split, term_dst, 0))
1116                 return 0;
1117             strcat (term_dict, ")");
1118             break;
1119         default:
1120             zh->errCode = 120;
1121             zh->errString = nmem_strdup_i(stream, truncation_value);
1122             return -1;
1123         }
1124         if (attr_ok)
1125         {
1126             yaz_log(LOG_DEBUG, "dict_lookup_grep: %s", term_dict+prefix_len);
1127             r = dict_lookup_grep (zh->reg->dict, term_dict, 0,
1128                                   grep_info, &max_pos, regex_range,
1129                                   grep_handle);
1130             if (r)
1131                 yaz_log(LOG_WARN, "dict_lookup_grep fail %d", r);
1132         }
1133     }
1134     if (!bases_ok)
1135     {
1136         zh->errCode = errCode;
1137         zh->errString = errString;
1138         return -1;
1139     }
1140     *term_sub = termp;
1141     yaz_log(LOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1142     return 1;
1143 }
1144
1145
1146 /* convert APT search term to UTF8 */
1147 static int zapt_term_to_utf8 (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1148                               char *termz)
1149 {
1150     size_t sizez;
1151     Z_Term *term = zapt->term;
1152
1153     switch (term->which)
1154     {
1155     case Z_Term_general:
1156         if (zh->iconv_to_utf8 != 0)
1157         {
1158             char *inbuf = term->u.general->buf;
1159             size_t inleft = term->u.general->len;
1160             char *outbuf = termz;
1161             size_t outleft = IT_MAX_WORD-1;
1162             size_t ret;
1163
1164             ret = yaz_iconv(zh->iconv_to_utf8, &inbuf, &inleft,
1165                         &outbuf, &outleft);
1166             if (ret == (size_t)(-1))
1167             {
1168                 ret = yaz_iconv(zh->iconv_to_utf8, 0, 0, 0, 0);
1169                 zh->errCode = 125;
1170                 return -1;
1171             }
1172             *outbuf = 0;
1173         }
1174         else
1175         {
1176             sizez = term->u.general->len;
1177             if (sizez > IT_MAX_WORD-1)
1178                 sizez = IT_MAX_WORD-1;
1179             memcpy (termz, term->u.general->buf, sizez);
1180             termz[sizez] = '\0';
1181         }
1182         break;
1183     case Z_Term_characterString:
1184         sizez = strlen(term->u.characterString);
1185         if (sizez > IT_MAX_WORD-1)
1186             sizez = IT_MAX_WORD-1;
1187         memcpy (termz, term->u.characterString, sizez);
1188         termz[sizez] = '\0';
1189         break;
1190     default:
1191         zh->errCode = 124;
1192         return -1;
1193     }
1194     return 0;
1195 }
1196
1197 /* convert APT SCAN term to internal cmap */
1198 static int trans_scan_term (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1199                             char *termz, int reg_type)
1200 {
1201     char termz0[IT_MAX_WORD];
1202
1203     if (zapt_term_to_utf8(zh, zapt, termz0))
1204         return -1;    /* error */
1205     else
1206     {
1207         const char **map;
1208         const char *cp = (const char *) termz0;
1209         const char *cp_end = cp + strlen(cp);
1210         const char *src;
1211         int i = 0;
1212         const char *space_map = NULL;
1213         int len;
1214             
1215         while ((len = (cp_end - cp)) > 0)
1216         {
1217             map = zebra_maps_input (zh->reg->zebra_maps, reg_type, &cp, len, 0);
1218             if (**map == *CHR_SPACE)
1219                 space_map = *map;
1220             else
1221             {
1222                 if (i && space_map)
1223                     for (src = space_map; *src; src++)
1224                         termz[i++] = *src;
1225                 space_map = NULL;
1226                 for (src = *map; *src; src++)
1227                     termz[i++] = *src;
1228             }
1229         }
1230         termz[i] = '\0';
1231     }
1232     return 0;
1233 }
1234
1235 char *normalize_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1236                      const char *termz, NMEM stream, unsigned reg_id)
1237 {
1238     WRBUF wrbuf = 0;
1239     AttrType truncation;
1240     int truncation_value;
1241     char *ex_list = 0;
1242
1243     attr_init (&truncation, zapt, 5);
1244     truncation_value = attr_find (&truncation, NULL);
1245
1246     switch (truncation_value)
1247     {
1248     default:
1249         ex_list = "";
1250         break;
1251     case 101:
1252         ex_list = "#";
1253         break;
1254     case 102:
1255     case 103:
1256         ex_list = 0;
1257         break;
1258     case 104:
1259         ex_list = "!#";
1260         break;
1261     case 105:
1262         ex_list = "!*";
1263         break;
1264     }
1265     if (ex_list)
1266         wrbuf = zebra_replace(zh->reg->zebra_maps, reg_id, ex_list,
1267                               termz, strlen(termz));
1268     if (!wrbuf)
1269         return nmem_strdup(stream, termz);
1270     else
1271     {
1272         char *buf = (char*) nmem_malloc (stream, wrbuf_len(wrbuf)+1);
1273         memcpy (buf, wrbuf_buf(wrbuf), wrbuf_len(wrbuf));
1274         buf[wrbuf_len(wrbuf)] = '\0';
1275         return buf;
1276     }
1277 }
1278
1279 static void grep_info_delete (struct grep_info *grep_info)
1280 {
1281 #ifdef TERM_COUNT
1282     xfree(grep_info->term_no);
1283 #endif
1284     xfree (grep_info->isam_p_buf);
1285 }
1286
1287 static int grep_info_prepare (ZebraHandle zh,
1288                               Z_AttributesPlusTerm *zapt,
1289                               struct grep_info *grep_info,
1290                               int reg_type,
1291                               NMEM stream)
1292 {
1293     AttrType termset;
1294     int termset_value_numeric;
1295     const char *termset_value_string;
1296
1297 #ifdef TERM_COUNT
1298     grep_info->term_no = 0;
1299 #endif
1300     grep_info->isam_p_size = 0;
1301     grep_info->isam_p_buf = NULL;
1302     grep_info->zh = zh;
1303     grep_info->reg_type = reg_type;
1304     grep_info->termset = 0;
1305
1306     if (!zapt)
1307         return 0;
1308     attr_init (&termset, zapt, 8);
1309     termset_value_numeric =
1310         attr_find_ex (&termset, NULL, &termset_value_string);
1311     if (termset_value_numeric != -1)
1312     {
1313         char resname[32];
1314         const char *termset_name = 0;
1315         if (termset_value_numeric != -2)
1316         {
1317             sprintf (resname, "%d", termset_value_numeric);
1318             termset_name = resname;
1319         }
1320         else
1321             termset_name = termset_value_string;
1322         yaz_log(LOG_LOG, "creating termset set %s", termset_name);
1323         grep_info->termset = resultSetAdd (zh, termset_name, 1);
1324         if (!grep_info->termset)
1325         {
1326             zh->errCode = 128;
1327             zh->errString = nmem_strdup (stream, termset_name);
1328             return -1;
1329         }
1330     }
1331     return 0;
1332 }
1333                                
1334
1335 static RSET rpn_search_APT_phrase (ZebraHandle zh,
1336                                    Z_AttributesPlusTerm *zapt,
1337                                    const char *termz_org,
1338                                    oid_value attributeSet,
1339                                    NMEM stream,
1340                                    int reg_type, int complete_flag,
1341                                    const char *rank_type, int xpath_use,
1342                                    int num_bases, char **basenames)
1343 {
1344     char term_dst[IT_MAX_WORD+1];
1345     RSET rset[60], result;
1346     int  rset_no = 0;
1347     struct grep_info grep_info;
1348     char *termz = normalize_term(zh, zapt, termz_org, stream, reg_type);
1349     const char *termp = termz;
1350
1351     *term_dst = 0;
1352     if (grep_info_prepare (zh, zapt, &grep_info, reg_type, stream))
1353         return 0;
1354     while (1)
1355     { 
1356         yaz_log(LOG_DEBUG, "APT_phrase termp=%s", termp);
1357         rset[rset_no] = term_trunc (zh, zapt, &termp, attributeSet,
1358                                     stream, &grep_info,
1359                                     reg_type, complete_flag,
1360                                     num_bases, basenames,
1361                                     term_dst, rank_type,
1362                                     xpath_use);
1363         if (!rset[rset_no])
1364             break;
1365         if (++rset_no >= (int) (sizeof(rset)/sizeof(*rset)))
1366             break;
1367     }
1368     grep_info_delete (&grep_info);
1369     if (rset_no == 0)
1370     {
1371         rset_null_parms parms;
1372         
1373         parms.rset_term = rset_term_create (termz, -1, rank_type,
1374                                             zapt->term->which);
1375         return rset_create (rset_kind_null, &parms);
1376     }
1377     else if (rset_no == 1)
1378         return (rset[0]);
1379     else
1380     {
1381         /* new / old prox */
1382         rset_prox_parms parms;
1383         
1384         parms.rset = rset;
1385         parms.rset_no = rset_no;
1386         parms.ordered = 1;
1387         parms.exclusion = 0;
1388         parms.relation = 3;
1389         parms.distance = 1;
1390         parms.key_size = sizeof(struct it_key);
1391         parms.cmp = key_compare_it;
1392         parms.getseq = key_get_seq;
1393         parms.log_item = key_logdump_txt;
1394         result = rset_create(rset_kind_prox, &parms);
1395     }
1396     return result;
1397 }
1398
1399 static RSET rpn_search_APT_or_list (ZebraHandle zh,
1400                                     Z_AttributesPlusTerm *zapt,
1401                                     const char *termz_org,
1402                                     oid_value attributeSet,
1403                                     NMEM stream,
1404                                     int reg_type, int complete_flag,
1405                                     const char *rank_type,
1406                                     int xpath_use,
1407                                     int num_bases, char **basenames)
1408 {
1409     char term_dst[IT_MAX_WORD+1];
1410     RSET rset[60], result;
1411     int i, rset_no = 0;
1412     struct grep_info grep_info;
1413     char *termz = normalize_term(zh, zapt, termz_org, stream, reg_type);
1414     const char *termp = termz;
1415
1416     if (grep_info_prepare (zh, zapt, &grep_info, reg_type, stream))
1417         return 0;
1418     while (1)
1419     { 
1420         yaz_log(LOG_DEBUG, "APT_or_list termp=%s", termp);
1421         rset[rset_no] = term_trunc (zh, zapt, &termp, attributeSet,
1422                                     stream, &grep_info,
1423                                     reg_type, complete_flag,
1424                                     num_bases, basenames,
1425                                     term_dst, rank_type,
1426                                     xpath_use);
1427         if (!rset[rset_no])
1428             break;
1429         if (++rset_no >= (int) (sizeof(rset)/sizeof(*rset)))
1430             break;
1431     }
1432     grep_info_delete (&grep_info);
1433     if (rset_no == 0)
1434     {
1435         rset_null_parms parms;
1436         
1437         parms.rset_term = rset_term_create (termz, -1, rank_type,
1438                                             zapt->term->which);
1439         return rset_create (rset_kind_null, &parms);
1440     }
1441     result = rset[0];
1442     for (i = 1; i<rset_no; i++)
1443     {
1444         rset_bool_parms bool_parms;
1445
1446         bool_parms.rset_l = result;
1447         bool_parms.rset_r = rset[i];
1448         bool_parms.key_size = sizeof(struct it_key);
1449         bool_parms.cmp = key_compare_it;
1450         bool_parms.log_item = key_logdump_txt;
1451         result = rset_create (rset_kind_or, &bool_parms);
1452     }
1453     return result;
1454 }
1455
1456 static RSET rpn_search_APT_and_list (ZebraHandle zh,
1457                                      Z_AttributesPlusTerm *zapt,
1458                                      const char *termz_org,
1459                                      oid_value attributeSet,
1460                                      NMEM stream,
1461                                      int reg_type, int complete_flag,
1462                                      const char *rank_type, 
1463                                      int xpath_use,
1464                                      int num_bases, char **basenames)
1465 {
1466     char term_dst[IT_MAX_WORD+1];
1467     RSET rset[60], result;
1468     int i, rset_no = 0;
1469     struct grep_info grep_info;
1470     char *termz = normalize_term(zh, zapt, termz_org, stream, reg_type);
1471     const char *termp = termz;
1472
1473     if (grep_info_prepare (zh, zapt, &grep_info, reg_type, stream))
1474         return 0;
1475     while (1)
1476     { 
1477         yaz_log(LOG_DEBUG, "APT_and_list termp=%s", termp);
1478         rset[rset_no] = term_trunc (zh, zapt, &termp, attributeSet,
1479                                     stream, &grep_info,
1480                                     reg_type, complete_flag,
1481                                     num_bases, basenames,
1482                                     term_dst, rank_type,
1483                                     xpath_use);
1484         if (!rset[rset_no])
1485             break;
1486         assert (rset[rset_no]);
1487         if (++rset_no >= (int) (sizeof(rset)/sizeof(*rset)))
1488             break;
1489     }
1490     grep_info_delete (&grep_info);
1491     if (rset_no == 0)
1492     {
1493         rset_null_parms parms;
1494         
1495         parms.rset_term = rset_term_create (termz, -1, rank_type,
1496                                             zapt->term->which);
1497         return rset_create (rset_kind_null, &parms);
1498     }
1499     result = rset[0];
1500     for (i = 1; i<rset_no; i++)
1501     {
1502         rset_bool_parms bool_parms;
1503
1504         bool_parms.rset_l = result;
1505         bool_parms.rset_r = rset[i];
1506         bool_parms.key_size = sizeof(struct it_key);
1507         bool_parms.cmp = key_compare_it;
1508         bool_parms.log_item = key_logdump_txt;
1509         result = rset_create (rset_kind_and, &bool_parms);
1510     }
1511     return result;
1512 }
1513
1514 static int numeric_relation (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1515                              const char **term_sub,
1516                              char *term_dict,
1517                              oid_value attributeSet,
1518                              struct grep_info *grep_info,
1519                              int *max_pos,
1520                              int reg_type,
1521                              char *term_dst)
1522 {
1523     AttrType relation;
1524     int relation_value;
1525     int term_value;
1526     int r;
1527     char *term_tmp = term_dict + strlen(term_dict);
1528
1529     attr_init (&relation, zapt, 2);
1530     relation_value = attr_find (&relation, NULL);
1531
1532     yaz_log(LOG_DEBUG, "numeric relation value=%d", relation_value);
1533
1534     if (!term_100 (zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1535                    term_dst))
1536         return 0;
1537     term_value = atoi (term_tmp);
1538     switch (relation_value)
1539     {
1540     case 1:
1541         yaz_log(LOG_DEBUG, "Relation <");
1542         gen_regular_rel (term_tmp, term_value-1, 1);
1543         break;
1544     case 2:
1545         yaz_log(LOG_DEBUG, "Relation <=");
1546         gen_regular_rel (term_tmp, term_value, 1);
1547         break;
1548     case 4:
1549         yaz_log(LOG_DEBUG, "Relation >=");
1550         gen_regular_rel (term_tmp, term_value, 0);
1551         break;
1552     case 5:
1553         yaz_log(LOG_DEBUG, "Relation >");
1554         gen_regular_rel (term_tmp, term_value+1, 0);
1555         break;
1556     case 3:
1557     default:
1558         yaz_log(LOG_DEBUG, "Relation =");
1559         sprintf (term_tmp, "(0*%d)", term_value);
1560     }
1561     yaz_log(LOG_DEBUG, "dict_lookup_grep: %s", term_tmp);
1562     r = dict_lookup_grep (zh->reg->dict, term_dict, 0, grep_info, max_pos,
1563                           0, grep_handle);
1564     if (r)
1565         yaz_log(LOG_WARN, "dict_lookup_grep fail, rel=gt: %d", r);
1566     yaz_log(LOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1567     return 1;
1568 }
1569
1570 static int numeric_term (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1571                          const char **term_sub, 
1572                          oid_value attributeSet, struct grep_info *grep_info,
1573                          int reg_type, int complete_flag,
1574                          int num_bases, char **basenames,
1575                          char *term_dst, int xpath_use, NMEM stream)
1576 {
1577     char term_dict[2*IT_MAX_WORD+2];
1578     int r, base_no;
1579     AttrType use;
1580     int use_value;
1581     const char *use_string = 0;
1582     oid_value curAttributeSet = attributeSet;
1583     const char *termp;
1584     struct rpn_char_map_info rcmi;
1585
1586     int bases_ok = 0;     /* no of databases with OK attribute */
1587     int errCode = 0;      /* err code (if any is not OK) */
1588     char *errString = 0;  /* addinfo */
1589
1590     rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
1591     attr_init (&use, zapt, 1);
1592     use_value = attr_find_ex (&use, &curAttributeSet, &use_string);
1593
1594     if (use_value == -1)
1595         use_value = 1016;
1596
1597     for (base_no = 0; base_no < num_bases; base_no++)
1598     {
1599         attent attp;
1600         data1_local_attribute id_xpath_attr;
1601         data1_local_attribute *local_attr;
1602         int max_pos, prefix_len = 0;
1603
1604         termp = *term_sub;
1605         if (use_value == -2)  /* string attribute (assume IDXPATH/any) */
1606         {
1607             use_value = xpath_use;
1608             attp.local_attributes = &id_xpath_attr;
1609             attp.attset_ordinal = VAL_IDXPATH;
1610             id_xpath_attr.next = 0;
1611             id_xpath_attr.local = use_value;
1612         }
1613         else if (curAttributeSet == VAL_IDXPATH)
1614         {
1615             attp.local_attributes = &id_xpath_attr;
1616             attp.attset_ordinal = VAL_IDXPATH;
1617             id_xpath_attr.next = 0;
1618             id_xpath_attr.local = use_value;
1619         }
1620         else
1621         {
1622             if ((r=att_getentbyatt (zh, &attp, curAttributeSet, use_value,
1623                                             use_string)))
1624             {
1625                 yaz_log(LOG_DEBUG, "att_getentbyatt fail. set=%d use=%d r=%d",
1626                       curAttributeSet, use_value, r);
1627                 if (r == -1)
1628                 {
1629                     errString = nmem_strdup_i (stream, use_value);
1630                     errCode = 114;
1631                 }
1632                 else
1633                     errCode = 121;
1634                 continue;
1635             }
1636         }
1637         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1638         {
1639             zh->errCode = 109; /* Database unavailable */
1640             zh->errString = basenames[base_no];
1641             return -1;
1642         }
1643         for (local_attr = attp.local_attributes; local_attr;
1644              local_attr = local_attr->next)
1645         {
1646             int ord;
1647             char ord_buf[32];
1648             int i, ord_len;
1649
1650             ord = zebraExplain_lookupSU (zh->reg->zei, attp.attset_ordinal,
1651                                           local_attr->local);
1652             if (ord < 0)
1653                 continue;
1654             if (prefix_len)
1655                 term_dict[prefix_len++] = '|';
1656             else
1657                 term_dict[prefix_len++] = '(';
1658
1659             ord_len = key_SU_encode (ord, ord_buf);
1660             for (i = 0; i<ord_len; i++)
1661             {
1662                 term_dict[prefix_len++] = 1;
1663                 term_dict[prefix_len++] = ord_buf[i];
1664             }
1665         }
1666         if (!prefix_len)
1667         {
1668             char val_str[32];
1669             sprintf (val_str, "%d", use_value);
1670             errCode = 114;
1671             errString = nmem_strdup (stream, val_str);
1672             continue;
1673         }
1674         bases_ok++;
1675         term_dict[prefix_len++] = ')';        
1676         term_dict[prefix_len++] = 1;
1677         term_dict[prefix_len++] = reg_type;
1678         yaz_log(LOG_DEBUG, "reg_type = %d", term_dict[prefix_len-1]);
1679         term_dict[prefix_len] = '\0';
1680         if (!numeric_relation (zh, zapt, &termp, term_dict,
1681                                attributeSet, grep_info, &max_pos, reg_type,
1682                                term_dst))
1683             return 0;
1684     }
1685     if (!bases_ok)
1686     {
1687         zh->errCode = errCode;
1688         zh->errString = errString;
1689         return -1;
1690     }
1691     *term_sub = termp;
1692     yaz_log(LOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1693     return 1;
1694 }
1695
1696 static RSET rpn_search_APT_numeric (ZebraHandle zh,
1697                                     Z_AttributesPlusTerm *zapt,
1698                                     const char *termz,
1699                                     oid_value attributeSet,
1700                                     NMEM stream,
1701                                     int reg_type, int complete_flag,
1702                                     const char *rank_type, int xpath_use,
1703                                     int num_bases, char **basenames)
1704 {
1705     char term_dst[IT_MAX_WORD+1];
1706     const char *termp = termz;
1707     RSET rset[60], result;
1708     int i, r, rset_no = 0;
1709     struct grep_info grep_info;
1710
1711     if (grep_info_prepare (zh, zapt, &grep_info, reg_type, stream))
1712         return 0;
1713     while (1)
1714     { 
1715         yaz_log(LOG_DEBUG, "APT_numeric termp=%s", termp);
1716         grep_info.isam_p_indx = 0;
1717         r = numeric_term (zh, zapt, &termp, attributeSet, &grep_info,
1718                           reg_type, complete_flag, num_bases, basenames,
1719                           term_dst, xpath_use,
1720                           stream);
1721         if (r < 1)
1722             break;
1723         yaz_log(LOG_DEBUG, "term: %s", term_dst);
1724         rset[rset_no] = rset_trunc (zh, grep_info.isam_p_buf,
1725                                     grep_info.isam_p_indx, term_dst,
1726                                     strlen(term_dst), rank_type,
1727                                     0 /* preserve position */,
1728                                     zapt->term->which);
1729         assert (rset[rset_no]);
1730         if (++rset_no >= (int) (sizeof(rset)/sizeof(*rset)))
1731             break;
1732     }
1733     grep_info_delete (&grep_info);
1734     if (rset_no == 0)
1735     {
1736         rset_null_parms parms;
1737         
1738         parms.rset_term = rset_term_create (term_dst, -1, rank_type,
1739                                             zapt->term->which);
1740         return rset_create (rset_kind_null, &parms);
1741     }
1742     result = rset[0];
1743     for (i = 1; i<rset_no; i++)
1744     {
1745         rset_bool_parms bool_parms;
1746
1747         bool_parms.rset_l = result;
1748         bool_parms.rset_r = rset[i];
1749         bool_parms.key_size = sizeof(struct it_key);
1750         bool_parms.cmp = key_compare_it;
1751         bool_parms.log_item = key_logdump_txt;
1752         result = rset_create (rset_kind_and, &bool_parms);
1753     }
1754     return result;
1755 }
1756
1757 static RSET rpn_search_APT_local (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1758                                   const char *termz,
1759                                   oid_value attributeSet,
1760                                   NMEM stream,
1761                                   const char *rank_type)
1762 {
1763     RSET result;
1764     RSFD rsfd;
1765     struct it_key key;
1766     rset_temp_parms parms;
1767
1768     parms.rset_term = rset_term_create (termz, -1, rank_type,
1769                                         zapt->term->which);
1770     parms.cmp = key_compare_it;
1771     parms.key_size = sizeof (struct it_key);
1772     parms.temp_path = res_get (zh->res, "setTmpDir");
1773     result = rset_create (rset_kind_temp, &parms);
1774     rsfd = rset_open (result, RSETF_WRITE);
1775
1776     key.sysno = atoi (termz);
1777     key.seqno = 1;
1778     if (key.sysno <= 0)
1779         key.sysno = 1;
1780     rset_write (result, rsfd, &key);
1781     rset_close (result, rsfd);
1782     return result;
1783 }
1784
1785 static RSET rpn_sort_spec (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1786                            oid_value attributeSet, NMEM stream,
1787                            Z_SortKeySpecList *sort_sequence,
1788                            const char *rank_type)
1789 {
1790     rset_null_parms parms;    
1791     int i;
1792     int sort_relation_value;
1793     AttrType sort_relation_type;
1794     int use_value;
1795     AttrType use_type;
1796     Z_SortKeySpec *sks;
1797     Z_SortKey *sk;
1798     Z_AttributeElement *ae;
1799     int oid[OID_SIZE];
1800     oident oe;
1801     char termz[20];
1802     
1803     attr_init (&sort_relation_type, zapt, 7);
1804     sort_relation_value = attr_find (&sort_relation_type, &attributeSet);
1805
1806     attr_init (&use_type, zapt, 1);
1807     use_value = attr_find (&use_type, &attributeSet);
1808
1809     if (!sort_sequence->specs)
1810     {
1811         sort_sequence->num_specs = 10;
1812         sort_sequence->specs = (Z_SortKeySpec **)
1813             nmem_malloc (stream, sort_sequence->num_specs *
1814                          sizeof(*sort_sequence->specs));
1815         for (i = 0; i<sort_sequence->num_specs; i++)
1816             sort_sequence->specs[i] = 0;
1817     }
1818     if (zapt->term->which != Z_Term_general)
1819         i = 0;
1820     else
1821         i = atoi_n ((char *) zapt->term->u.general->buf,
1822                     zapt->term->u.general->len);
1823     if (i >= sort_sequence->num_specs)
1824         i = 0;
1825     sprintf (termz, "%d", i);
1826
1827     oe.proto = PROTO_Z3950;
1828     oe.oclass = CLASS_ATTSET;
1829     oe.value = attributeSet;
1830     if (!oid_ent_to_oid (&oe, oid))
1831         return 0;
1832
1833     sks = (Z_SortKeySpec *) nmem_malloc (stream, sizeof(*sks));
1834     sks->sortElement = (Z_SortElement *)
1835         nmem_malloc (stream, sizeof(*sks->sortElement));
1836     sks->sortElement->which = Z_SortElement_generic;
1837     sk = sks->sortElement->u.generic = (Z_SortKey *)
1838         nmem_malloc (stream, sizeof(*sk));
1839     sk->which = Z_SortKey_sortAttributes;
1840     sk->u.sortAttributes = (Z_SortAttributes *)
1841         nmem_malloc (stream, sizeof(*sk->u.sortAttributes));
1842
1843     sk->u.sortAttributes->id = oid;
1844     sk->u.sortAttributes->list = (Z_AttributeList *)
1845         nmem_malloc (stream, sizeof(*sk->u.sortAttributes->list));
1846     sk->u.sortAttributes->list->num_attributes = 1;
1847     sk->u.sortAttributes->list->attributes = (Z_AttributeElement **)
1848         nmem_malloc (stream, sizeof(*sk->u.sortAttributes->list->attributes));
1849     ae = *sk->u.sortAttributes->list->attributes = (Z_AttributeElement *)
1850         nmem_malloc (stream, sizeof(**sk->u.sortAttributes->list->attributes));
1851     ae->attributeSet = 0;
1852     ae->attributeType = (int *)
1853         nmem_malloc (stream, sizeof(*ae->attributeType));
1854     *ae->attributeType = 1;
1855     ae->which = Z_AttributeValue_numeric;
1856     ae->value.numeric = (int *)
1857         nmem_malloc (stream, sizeof(*ae->value.numeric));
1858     *ae->value.numeric = use_value;
1859
1860     sks->sortRelation = (int *)
1861         nmem_malloc (stream, sizeof(*sks->sortRelation));
1862     if (sort_relation_value == 1)
1863         *sks->sortRelation = Z_SortKeySpec_ascending;
1864     else if (sort_relation_value == 2)
1865         *sks->sortRelation = Z_SortKeySpec_descending;
1866     else 
1867         *sks->sortRelation = Z_SortKeySpec_ascending;
1868
1869     sks->caseSensitivity = (int *)
1870         nmem_malloc (stream, sizeof(*sks->caseSensitivity));
1871     *sks->caseSensitivity = 0;
1872
1873     sks->which = Z_SortKeySpec_null;
1874     sks->u.null = odr_nullval ();
1875     sort_sequence->specs[i] = sks;
1876
1877     parms.rset_term = rset_term_create (termz, -1, rank_type,
1878                                         zapt->term->which);
1879     return rset_create (rset_kind_null, &parms);
1880 }
1881
1882
1883 static int parse_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1884                        oid_value attributeSet,
1885                        struct xpath_location_step *xpath, int max, NMEM mem)
1886 {
1887     oid_value curAttributeSet = attributeSet;
1888     AttrType use;
1889     const char *use_string = 0;
1890     
1891     attr_init (&use, zapt, 1);
1892     attr_find_ex (&use, &curAttributeSet, &use_string);
1893
1894     if (!use_string || *use_string != '/')
1895         return -1;
1896
1897     return zebra_parse_xpath_str(use_string, xpath, max, mem);
1898 }
1899  
1900                
1901
1902 static RSET xpath_trunc(ZebraHandle zh, NMEM stream,
1903                         int reg_type, const char *term, int use,
1904                         oid_value curAttributeSet)
1905 {
1906     RSET rset;
1907     struct grep_info grep_info;
1908     char term_dict[2048];
1909     char ord_buf[32];
1910     int prefix_len = 0;
1911     int ord = zebraExplain_lookupSU (zh->reg->zei, curAttributeSet, use);
1912     int ord_len, i, r, max_pos;
1913     int term_type = Z_Term_characterString;
1914     const char *flags = "void";
1915
1916     if (grep_info_prepare (zh, 0 /* zapt */, &grep_info, '0', stream))
1917     {
1918         rset_null_parms parms;
1919         
1920         parms.rset_term = rset_term_create (term, strlen(term),
1921                                             flags, term_type);
1922         parms.rset_term->nn = 0;
1923         return rset_create (rset_kind_null, &parms);
1924     }
1925
1926     if (ord < 0)
1927     {
1928         rset_null_parms parms;
1929         
1930         parms.rset_term = rset_term_create (term, strlen(term),
1931                                             flags, term_type);
1932         parms.rset_term->nn = 0;
1933         return rset_create (rset_kind_null, &parms);
1934     }
1935     if (prefix_len)
1936         term_dict[prefix_len++] = '|';
1937     else
1938         term_dict[prefix_len++] = '(';
1939     
1940     ord_len = key_SU_encode (ord, ord_buf);
1941     for (i = 0; i<ord_len; i++)
1942     {
1943         term_dict[prefix_len++] = 1;
1944         term_dict[prefix_len++] = ord_buf[i];
1945     }
1946     term_dict[prefix_len++] = ')';
1947     term_dict[prefix_len++] = 1;
1948     term_dict[prefix_len++] = reg_type;
1949     
1950     strcpy (term_dict+prefix_len, term);
1951     
1952     grep_info.isam_p_indx = 0;
1953     r = dict_lookup_grep (zh->reg->dict, term_dict, 0,
1954                           &grep_info, &max_pos, 0, grep_handle);
1955     yaz_log (LOG_LOG, "%s %d positions", term, grep_info.isam_p_indx);
1956     rset = rset_trunc (zh, grep_info.isam_p_buf,
1957                        grep_info.isam_p_indx, term, strlen(term),
1958                        flags, 1, term_type);
1959     grep_info_delete (&grep_info);
1960     return rset;
1961 }
1962
1963 static RSET rpn_search_xpath (ZebraHandle zh,
1964                               oid_value attributeSet,
1965                               int num_bases, char **basenames,
1966                               NMEM stream, const char *rank_type, RSET rset,
1967                               int xpath_len, struct xpath_location_step *xpath)
1968 {
1969     oid_value curAttributeSet = attributeSet;
1970     int base_no;
1971     int i;
1972
1973     if (xpath_len < 0)
1974         return rset;
1975
1976     yaz_log (LOG_DEBUG, "len=%d", xpath_len);
1977     for (i = 0; i<xpath_len; i++)
1978     {
1979         yaz_log (LOG_DEBUG, "XPATH %d %s", i, xpath[i].part);
1980     }
1981
1982     curAttributeSet = VAL_IDXPATH;
1983
1984     /*
1985       //a    ->    a/.*
1986       //a/b  ->    b/a/.*
1987       /a     ->    a/
1988       /a/b   ->    b/a/
1989
1990       /      ->    none
1991
1992    a[@attr=value]/b[@other=othervalue]
1993
1994  /e/@a val      range(e/,range(@a,freetext(w,1015,val),@a),e/)
1995  /a/b val       range(b/a/,freetext(w,1016,val),b/a/)
1996  /a/b/@c val    range(b/a/,range(@c,freetext(w,1016,val),@c),b/a/)
1997  /a/b[@c=y] val range(b/a/,freetext(w,1016,val),b/a/,@c=y)
1998  /a[@c=y]/b val range(a/,range(b/a/,freetext(w,1016,val),b/a/),a/,@c=y)
1999  /a[@c=x]/b[@c=y] range(a/,range(b/a/,freetext(w,1016,val),b/a/,@c=y),a/,@c=x)
2000       
2001     */
2002
2003     dict_grep_cmap (zh->reg->dict, 0, 0);
2004
2005     for (base_no = 0; base_no < num_bases; base_no++)
2006     {
2007         int level = xpath_len;
2008         int first_path = 1;
2009         
2010         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
2011         {
2012             zh->errCode = 109; /* Database unavailable */
2013             zh->errString = basenames[base_no];
2014             return rset;
2015         }
2016         while (--level >= 0)
2017         {
2018             char xpath_rev[128];
2019             int i, len;
2020             rset_between_parms parms;
2021             RSET rset_start_tag = 0, rset_end_tag = 0, rset_attr = 0;
2022
2023             *xpath_rev = 0;
2024             len = 0;
2025             for (i = level; i >= 1; --i)
2026             {
2027                 const char *cp = xpath[i].part;
2028                 if (*cp)
2029                 {
2030                     for (;*cp; cp++)
2031                         if (*cp == '*')
2032                         {
2033                             memcpy (xpath_rev + len, "[^/]*", 5);
2034                             len += 5;
2035                         }
2036                         else if (*cp == ' ')
2037                         {
2038
2039                             xpath_rev[len++] = 1;
2040                             xpath_rev[len++] = ' ';
2041                         }
2042
2043                         else
2044                             xpath_rev[len++] = *cp;
2045                     xpath_rev[len++] = '/';
2046                 }
2047                 else if (i == 1)  /* // case */
2048                 {
2049                     xpath_rev[len++] = '.';
2050                     xpath_rev[len++] = '*';
2051                 }
2052             }
2053             xpath_rev[len] = 0;
2054
2055             if (xpath[level].predicate &&
2056                 xpath[level].predicate->which == XPATH_PREDICATE_RELATION &&
2057                 xpath[level].predicate->u.relation.name[0])
2058             {
2059                 WRBUF wbuf = wrbuf_alloc();
2060                 wrbuf_puts(wbuf, xpath[level].predicate->u.relation.name+1);
2061                 if (xpath[level].predicate->u.relation.value)
2062                 {
2063                     const char *cp = xpath[level].predicate->u.relation.value;
2064                     wrbuf_putc(wbuf, '=');
2065                     
2066                     while (*cp)
2067                     {
2068                         if (strchr(REGEX_CHARS, *cp))
2069                             wrbuf_putc(wbuf, '\\');
2070                         wrbuf_putc(wbuf, *cp);
2071                         cp++;
2072                     }
2073                 }
2074                 wrbuf_puts(wbuf, "");
2075                 rset_attr = xpath_trunc (
2076                     zh, stream, '0', wrbuf_buf(wbuf), 3, curAttributeSet);
2077                 wrbuf_free(wbuf, 1);
2078             } 
2079             else 
2080             {
2081                 if (!first_path)
2082                     continue;
2083             }
2084             yaz_log (LOG_DEBUG, "xpath_rev (%d) = %s", level, xpath_rev);
2085             if (strlen(xpath_rev))
2086             {
2087                 rset_start_tag = xpath_trunc(zh, stream, 
2088                                          '0', xpath_rev, 1, curAttributeSet);
2089             
2090                 rset_end_tag = xpath_trunc(zh, stream,
2091                                        '0', xpath_rev, 2, curAttributeSet);
2092
2093                 parms.key_size = sizeof(struct it_key);
2094                 parms.cmp = key_compare_it;
2095                 parms.rset_l = rset_start_tag;
2096                 parms.rset_m = rset;
2097                 parms.rset_r = rset_end_tag;
2098                 parms.rset_attr = rset_attr;
2099                 parms.printer = key_print_it;
2100                 rset = rset_create (rset_kind_between, &parms);
2101             }
2102             first_path = 0;
2103         }
2104     }
2105
2106     return rset;
2107 }
2108
2109
2110
2111 static RSET rpn_search_APT (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2112                             oid_value attributeSet, NMEM stream,
2113                             Z_SortKeySpecList *sort_sequence,
2114                             int num_bases, char **basenames)
2115 {
2116     unsigned reg_id;
2117     char *search_type = NULL;
2118     char rank_type[128];
2119     int complete_flag;
2120     int sort_flag;
2121     char termz[IT_MAX_WORD+1];
2122     RSET rset = 0;
2123     int xpath_len;
2124     int xpath_use = 0;
2125     struct xpath_location_step xpath[10];
2126
2127     zebra_maps_attr (zh->reg->zebra_maps, zapt, &reg_id, &search_type,
2128                      rank_type, &complete_flag, &sort_flag);
2129     
2130     yaz_log(LOG_DEBUG, "reg_id=%c", reg_id);
2131     yaz_log(LOG_DEBUG, "complete_flag=%d", complete_flag);
2132     yaz_log(LOG_DEBUG, "search_type=%s", search_type);
2133     yaz_log(LOG_DEBUG, "rank_type=%s", rank_type);
2134
2135     if (zapt_term_to_utf8(zh, zapt, termz))
2136         return 0;
2137
2138     if (sort_flag)
2139         return rpn_sort_spec (zh, zapt, attributeSet, stream, sort_sequence,
2140                               rank_type);
2141     xpath_len = parse_xpath(zh, zapt, attributeSet, xpath, 10, stream);
2142     if (xpath_len >= 0)
2143     {
2144         xpath_use = 1016;
2145         if (xpath[xpath_len-1].part[0] == '@')
2146             xpath_use = 1015;
2147     }
2148
2149     if (!strcmp (search_type, "phrase"))
2150     {
2151         rset = rpn_search_APT_phrase (zh, zapt, termz, attributeSet, stream,
2152                                       reg_id, complete_flag, rank_type,
2153                                       xpath_use,
2154                                       num_bases, basenames);
2155     }
2156     else if (!strcmp (search_type, "and-list"))
2157     {
2158         rset = rpn_search_APT_and_list (zh, zapt, termz, attributeSet, stream,
2159                                         reg_id, complete_flag, rank_type,
2160                                         xpath_use,
2161                                         num_bases, basenames);
2162     }
2163     else if (!strcmp (search_type, "or-list"))
2164     {
2165         rset = rpn_search_APT_or_list (zh, zapt, termz, attributeSet, stream,
2166                                        reg_id, complete_flag, rank_type,
2167                                        xpath_use,
2168                                        num_bases, basenames);
2169     }
2170     else if (!strcmp (search_type, "local"))
2171     {
2172         rset = rpn_search_APT_local (zh, zapt, termz, attributeSet, stream,
2173                                      rank_type);
2174     }
2175     else if (!strcmp (search_type, "numeric"))
2176     {
2177         rset = rpn_search_APT_numeric (zh, zapt, termz, attributeSet, stream,
2178                                        reg_id, complete_flag, rank_type,
2179                                        xpath_use,
2180                                        num_bases, basenames);
2181     }
2182     else if (!strcmp (search_type, "always"))
2183     {
2184         rset = 0;
2185     }
2186     else
2187         zh->errCode = 118;
2188     return rpn_search_xpath (zh, attributeSet, num_bases, basenames,
2189                              stream, rank_type, rset, xpath_len, xpath);
2190 }
2191
2192 static RSET rpn_search_structure (ZebraHandle zh, Z_RPNStructure *zs,
2193                                   oid_value attributeSet, NMEM stream,
2194                                   Z_SortKeySpecList *sort_sequence,
2195                                   int num_bases, char **basenames)
2196 {
2197     RSET r = NULL;
2198     if (zs->which == Z_RPNStructure_complex)
2199     {
2200         Z_Operator *zop = zs->u.complex->roperator;
2201         rset_bool_parms bool_parms;
2202
2203         bool_parms.rset_l = rpn_search_structure (zh, zs->u.complex->s1,
2204                                                   attributeSet, stream,
2205                                                   sort_sequence,
2206                                                   num_bases, basenames);
2207         if (bool_parms.rset_l == NULL)
2208             return NULL;
2209         bool_parms.rset_r = rpn_search_structure (zh, zs->u.complex->s2,
2210                                                   attributeSet, stream,
2211                                                   sort_sequence,
2212                                                   num_bases, basenames);
2213         if (bool_parms.rset_r == NULL)
2214         {
2215             rset_delete (bool_parms.rset_l);
2216             return NULL;
2217         }
2218         bool_parms.key_size = sizeof(struct it_key);
2219         bool_parms.cmp = key_compare_it;
2220         bool_parms.log_item = key_logdump_txt;
2221
2222         switch (zop->which)
2223         {
2224         case Z_Operator_and:
2225             r = rset_create (rset_kind_and, &bool_parms);
2226             break;
2227         case Z_Operator_or:
2228             r = rset_create (rset_kind_or, &bool_parms);
2229             break;
2230         case Z_Operator_and_not:
2231             r = rset_create (rset_kind_not, &bool_parms);
2232             break;
2233         case Z_Operator_prox:
2234             if (zop->u.prox->which != Z_ProximityOperator_known)
2235             {
2236                 zh->errCode = 132;
2237                 return NULL;
2238             }
2239             if (*zop->u.prox->u.known != Z_ProxUnit_word)
2240             {
2241                 char *val = (char *) nmem_malloc (stream, 16);
2242                 zh->errCode = 132;
2243                 zh->errString = val;
2244                 sprintf (val, "%d", *zop->u.prox->u.known);
2245                 return NULL;
2246             }
2247             else
2248             {
2249                 /* new / old prox */
2250                 rset_prox_parms parms;
2251                 RSET twosets[2];
2252                 
2253                 twosets[0] = bool_parms.rset_l;
2254                 twosets[1] = bool_parms.rset_r;
2255                 parms.rset = twosets;
2256                 parms.rset_no = 2;
2257                 parms.ordered = *zop->u.prox->ordered;
2258                 parms.exclusion = (!zop->u.prox->exclusion ? 0 :
2259                                    *zop->u.prox->exclusion);
2260                 parms.relation = *zop->u.prox->relationType;
2261                 parms.distance = *zop->u.prox->distance;
2262                 parms.key_size = sizeof(struct it_key);
2263                 parms.cmp = key_compare_it;
2264                 parms.getseq = key_get_seq;
2265                 parms.log_item = key_logdump_txt;
2266                 r = rset_create(rset_kind_prox, &parms);
2267             }
2268             break;
2269         default:
2270             zh->errCode = 110;
2271             return NULL;
2272         }
2273     }
2274     else if (zs->which == Z_RPNStructure_simple)
2275     {
2276         if (zs->u.simple->which == Z_Operand_APT)
2277         {
2278             yaz_log(LOG_DEBUG, "rpn_search_APT");
2279             r = rpn_search_APT (zh, zs->u.simple->u.attributesPlusTerm,
2280                                 attributeSet, stream, sort_sequence,
2281                                 num_bases, basenames);
2282         }
2283         else if (zs->u.simple->which == Z_Operand_resultSetId)
2284         {
2285             yaz_log(LOG_DEBUG, "rpn_search_ref");
2286             r = resultSetRef (zh, zs->u.simple->u.resultSetId);
2287             if (!r)
2288             {
2289                 r = rset_create (rset_kind_null, NULL);
2290                 zh->errCode = 30;
2291                 zh->errString =
2292                     nmem_strdup (stream, zs->u.simple->u.resultSetId);
2293                 return 0;
2294             }
2295             else
2296                 rset_dup(r);
2297         }
2298         else
2299         {
2300             zh->errCode = 3;
2301             return 0;
2302         }
2303     }
2304     else
2305     {
2306         zh->errCode = 3;
2307         return 0;
2308     }
2309     return r;
2310 }
2311
2312
2313 RSET rpn_search (ZebraHandle zh, NMEM nmem,
2314                  Z_RPNQuery *rpn, int num_bases, char **basenames, 
2315                  const char *setname,
2316                  ZebraSet sset)
2317 {
2318     RSET rset;
2319     oident *attrset;
2320     oid_value attributeSet;
2321     Z_SortKeySpecList *sort_sequence;
2322     int sort_status, i;
2323
2324     zh->errCode = 0;
2325     zh->errString = NULL;
2326     zh->hits = 0;
2327
2328     sort_sequence = (Z_SortKeySpecList *)
2329         nmem_malloc (nmem, sizeof(*sort_sequence));
2330     sort_sequence->num_specs = 10;
2331     sort_sequence->specs = (Z_SortKeySpec **)
2332         nmem_malloc (nmem, sort_sequence->num_specs *
2333                      sizeof(*sort_sequence->specs));
2334     for (i = 0; i<sort_sequence->num_specs; i++)
2335         sort_sequence->specs[i] = 0;
2336     
2337     attrset = oid_getentbyoid (rpn->attributeSetId);
2338     attributeSet = attrset->value;
2339     rset = rpn_search_structure (zh, rpn->RPNStructure, attributeSet,
2340                                  nmem, sort_sequence, num_bases, basenames);
2341     if (!rset)
2342         return 0;
2343
2344     if (zh->errCode)
2345         yaz_log(LOG_DEBUG, "search error: %d", zh->errCode);
2346     
2347     for (i = 0; sort_sequence->specs[i]; i++)
2348         ;
2349     sort_sequence->num_specs = i;
2350     if (!i)
2351         resultSetRank (zh, sset, rset);
2352     else
2353     {
2354         yaz_log(LOG_DEBUG, "resultSetSortSingle in rpn_search");
2355         resultSetSortSingle (zh, nmem, sset, rset,
2356                              sort_sequence, &sort_status);
2357         if (zh->errCode)
2358         {
2359             yaz_log(LOG_DEBUG, "resultSetSortSingle status = %d", zh->errCode);
2360         }
2361     }
2362     return rset;
2363 }
2364
2365 struct scan_info_entry {
2366     char *term;
2367     ISAMS_P isam_p;
2368 };
2369
2370 struct scan_info {
2371     struct scan_info_entry *list;
2372     ODR odr;
2373     int before, after;
2374     char prefix[20];
2375 };
2376
2377 static int scan_handle (char *name, const char *info, int pos, void *client)
2378 {
2379     int len_prefix, idx;
2380     struct scan_info *scan_info = (struct scan_info *) client;
2381
2382     len_prefix = strlen(scan_info->prefix);
2383     if (memcmp (name, scan_info->prefix, len_prefix))
2384         return 1;
2385     if (pos > 0)        idx = scan_info->after - pos + scan_info->before;
2386     else
2387         idx = - pos - 1;
2388     scan_info->list[idx].term = (char *)
2389         odr_malloc (scan_info->odr, strlen(name + len_prefix)+1);
2390     strcpy (scan_info->list[idx].term, name + len_prefix);
2391     assert (*info == sizeof(ISAMS_P));
2392     memcpy (&scan_info->list[idx].isam_p, info+1, sizeof(ISAMS_P));
2393     return 0;
2394 }
2395
2396 static void scan_term_untrans (ZebraHandle zh, NMEM stream, int reg_type,
2397                                char **dst, const char *src)
2398 {
2399     char term_src[IT_MAX_WORD];
2400     char term_dst[IT_MAX_WORD];
2401     
2402     term_untrans (zh, reg_type, term_src, src);
2403
2404     if (zh->iconv_from_utf8 != 0)
2405     {
2406         int len;
2407         char *inbuf = term_src;
2408         size_t inleft = strlen(term_src);
2409         char *outbuf = term_dst;
2410         size_t outleft = sizeof(term_dst)-1;
2411         size_t ret;
2412         
2413         ret = yaz_iconv (zh->iconv_from_utf8, &inbuf, &inleft,
2414                          &outbuf, &outleft);
2415         if (ret == (size_t)(-1))
2416             len = 0;
2417         else
2418             len = outbuf - term_dst;
2419         *dst = nmem_malloc (stream, len + 1);
2420         if (len > 0)
2421             memcpy (*dst, term_dst, len);
2422         (*dst)[len] = '\0';
2423     }
2424     else
2425         *dst = nmem_strdup (stream, term_src);
2426 }
2427
2428 static void count_set (RSET r, int *count)
2429 {
2430     int psysno = 0;
2431     int kno = 0;
2432     struct it_key key;
2433     RSFD rfd;
2434     int term_index;
2435
2436     yaz_log(LOG_DEBUG, "count_set");
2437
2438     *count = 0;
2439     rfd = rset_open (r, RSETF_READ);
2440     while (rset_read (r, rfd, &key, &term_index))
2441     {
2442         if (key.sysno != psysno)
2443         {
2444             psysno = key.sysno;
2445             (*count)++;
2446         }
2447         kno++;
2448     }
2449     rset_close (r, rfd);
2450     yaz_log(LOG_DEBUG, "%d keys, %d records", kno, *count);
2451 }
2452
2453 void rpn_scan (ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt,
2454                oid_value attributeset,
2455                int num_bases, char **basenames,
2456                int *position, int *num_entries, ZebraScanEntry **list,
2457                int *is_partial, RSET limit_set, int return_zero)
2458 {
2459     int i;
2460     int pos = *position;
2461     int num = *num_entries;
2462     int before;
2463     int after;
2464     int base_no;
2465     char termz[IT_MAX_WORD+20];
2466     AttrType use;
2467     int use_value;
2468     const char *use_string = 0;
2469     struct scan_info *scan_info_array;
2470     ZebraScanEntry *glist;
2471     int ords[32], ord_no = 0;
2472     int ptr[32];
2473
2474     int bases_ok = 0;     /* no of databases with OK attribute */
2475     int errCode = 0;      /* err code (if any is not OK) */
2476     char *errString = 0;  /* addinfo */
2477
2478     unsigned reg_id;
2479     char *search_type = NULL;
2480     char rank_type[128];
2481     int complete_flag;
2482     int sort_flag;
2483
2484     *list = 0;
2485
2486     if (attributeset == VAL_NONE)
2487         attributeset = VAL_BIB1;
2488
2489     if (!limit_set)
2490     {
2491         AttrType termset;
2492         int termset_value_numeric;
2493         const char *termset_value_string;
2494         attr_init (&termset, zapt, 8);
2495         termset_value_numeric =
2496             attr_find_ex (&termset, NULL, &termset_value_string);
2497         if (termset_value_numeric != -1)
2498         {
2499             char resname[32];
2500             const char *termset_name = 0;
2501             
2502             if (termset_value_numeric != -2)
2503             {
2504                 
2505                 sprintf (resname, "%d", termset_value_numeric);
2506                 termset_name = resname;
2507             }
2508             else
2509                 termset_name = termset_value_string;
2510             
2511             limit_set = resultSetRef (zh, termset_name);
2512         }
2513     }
2514         
2515     yaz_log (LOG_DEBUG, "position = %d, num = %d set=%d",
2516              pos, num, attributeset);
2517         
2518     attr_init (&use, zapt, 1);
2519     use_value = attr_find_ex (&use, &attributeset, &use_string);
2520
2521     if (zebra_maps_attr (zh->reg->zebra_maps, zapt, &reg_id, &search_type,
2522                          rank_type, &complete_flag, &sort_flag))
2523     {
2524         *num_entries = 0;
2525         zh->errCode = 113;
2526         return ;
2527     }
2528     yaz_log (LOG_DEBUG, "use_value = %d", use_value);
2529
2530     if (use_value == -1)
2531         use_value = 1016;
2532     for (base_no = 0; base_no < num_bases && ord_no < 32; base_no++)
2533     {
2534         int r;
2535         attent attp;
2536         data1_local_attribute *local_attr;
2537
2538         if ((r=att_getentbyatt (zh, &attp, attributeset, use_value,
2539                                 use_string)))
2540         {
2541             yaz_log(LOG_DEBUG, "att_getentbyatt fail. set=%d use=%d",
2542                   attributeset, use_value);
2543             if (r == -1)
2544             {
2545                 char val_str[32];
2546                 sprintf (val_str, "%d", use_value);
2547                 errCode = 114;
2548                 errString = odr_strdup (stream, val_str);
2549             }   
2550             else
2551                 errCode = 121;
2552             continue;
2553         }
2554         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
2555         {
2556             zh->errString = basenames[base_no];
2557             zh->errCode = 109; /* Database unavailable */
2558             *num_entries = 0;
2559             return;
2560         }
2561         bases_ok++;
2562         for (local_attr = attp.local_attributes; local_attr && ord_no < 32;
2563              local_attr = local_attr->next)
2564         {
2565             int ord;
2566
2567             ord = zebraExplain_lookupSU (zh->reg->zei, attp.attset_ordinal,
2568                                          local_attr->local);
2569             if (ord > 0)
2570                 ords[ord_no++] = ord;
2571         }
2572     }
2573     if (!bases_ok && errCode)
2574     {
2575         zh->errCode = errCode;
2576         zh->errString = errString;
2577         *num_entries = 0;
2578     }
2579     if (ord_no == 0)
2580     {
2581         char val_str[32];
2582         sprintf (val_str, "%d", use_value);
2583         zh->errCode = 114;
2584         zh->errString = odr_strdup (stream, val_str);
2585
2586         *num_entries = 0;
2587         return;
2588     }
2589     /* prepare dictionary scanning */
2590     before = pos-1;
2591     after = 1+num-pos;
2592     scan_info_array = (struct scan_info *)
2593         odr_malloc (stream, ord_no * sizeof(*scan_info_array));
2594     for (i = 0; i < ord_no; i++)
2595     {
2596         int j, prefix_len = 0;
2597         int before_tmp = before, after_tmp = after;
2598         struct scan_info *scan_info = scan_info_array + i;
2599         struct rpn_char_map_info rcmi;
2600
2601         rpn_char_map_prepare (zh->reg, reg_id, &rcmi);
2602
2603         scan_info->before = before;
2604         scan_info->after = after;
2605         scan_info->odr = stream;
2606
2607         scan_info->list = (struct scan_info_entry *)
2608             odr_malloc (stream, (before+after) * sizeof(*scan_info->list));
2609         for (j = 0; j<before+after; j++)
2610             scan_info->list[j].term = NULL;
2611
2612         prefix_len += key_SU_encode (ords[i], termz + prefix_len);
2613         termz[prefix_len++] = reg_id;
2614         termz[prefix_len] = 0;
2615         strcpy (scan_info->prefix, termz);
2616
2617         if (trans_scan_term (zh, zapt, termz+prefix_len, reg_id))
2618             return ;
2619                     
2620         dict_scan (zh->reg->dict, termz, &before_tmp, &after_tmp,
2621                    scan_info, scan_handle);
2622     }
2623     glist = (ZebraScanEntry *)
2624         odr_malloc (stream, (before+after)*sizeof(*glist));
2625
2626     /* consider terms after main term */
2627     for (i = 0; i < ord_no; i++)
2628         ptr[i] = before;
2629     
2630     *is_partial = 0;
2631     for (i = 0; i<after; i++)
2632     {
2633         int j, j0 = -1;
2634         const char *mterm = NULL;
2635         const char *tst;
2636         RSET rset;
2637         
2638         for (j = 0; j < ord_no; j++)
2639         {
2640             if (ptr[j] < before+after &&
2641                 (tst=scan_info_array[j].list[ptr[j]].term) &&
2642                 (!mterm || strcmp (tst, mterm) < 0))
2643             {
2644                 j0 = j;
2645                 mterm = tst;
2646             }
2647         }
2648         if (j0 == -1)
2649             break;
2650         scan_term_untrans (zh, stream->mem, reg_id,
2651                            &glist[i+before].term, mterm);
2652         rset = rset_trunc (zh, &scan_info_array[j0].list[ptr[j0]].isam_p, 1,
2653                            glist[i+before].term, strlen(glist[i+before].term),
2654                            NULL, 0, zapt->term->which);
2655
2656         ptr[j0]++;
2657         for (j = j0+1; j<ord_no; j++)
2658         {
2659             if (ptr[j] < before+after &&
2660                 (tst=scan_info_array[j].list[ptr[j]].term) &&
2661                 !strcmp (tst, mterm))
2662             {
2663                 rset_bool_parms bool_parms;
2664                 RSET rset2;
2665
2666                 rset2 =
2667                    rset_trunc (zh, &scan_info_array[j].list[ptr[j]].isam_p, 1,
2668                                glist[i+before].term,
2669                                strlen(glist[i+before].term), NULL, 0,
2670                                zapt->term->which);
2671
2672                 bool_parms.key_size = sizeof(struct it_key);
2673                 bool_parms.cmp = key_compare_it;
2674                 bool_parms.log_item = key_logdump_txt;
2675                 bool_parms.rset_l = rset;
2676                 bool_parms.rset_r = rset2;
2677               
2678                 rset = rset_create (rset_kind_or, &bool_parms);
2679
2680                 ptr[j]++;
2681             }
2682         }
2683         if (limit_set)
2684         {
2685             rset_bool_parms bool_parms;
2686
2687             bool_parms.key_size = sizeof(struct it_key);
2688             bool_parms.cmp = key_compare_it;
2689             bool_parms.log_item = key_logdump_txt;
2690             bool_parms.rset_l = rset;
2691             bool_parms.rset_r = rset_dup(limit_set);
2692
2693             rset = rset_create (rset_kind_and, &bool_parms);
2694         }
2695         count_set (rset, &glist[i+before].occurrences);
2696         rset_delete (rset);
2697     }
2698     if (i < after)
2699     {
2700         *num_entries -= (after-i);
2701         *is_partial = 1;
2702     }
2703
2704     /* consider terms before main term */
2705     for (i = 0; i<ord_no; i++)
2706         ptr[i] = 0;
2707
2708     for (i = 0; i<before; i++)
2709     {
2710         int j, j0 = -1;
2711         const char *mterm = NULL;
2712         const char *tst;
2713         RSET rset;
2714         
2715         for (j = 0; j <ord_no; j++)
2716         {
2717             if (ptr[j] < before &&
2718                 (tst=scan_info_array[j].list[before-1-ptr[j]].term) &&
2719                 (!mterm || strcmp (tst, mterm) > 0))
2720             {
2721                 j0 = j;
2722                 mterm = tst;
2723             }
2724         }
2725         if (j0 == -1)
2726             break;
2727
2728         scan_term_untrans (zh, stream->mem, reg_id,
2729                            &glist[before-1-i].term, mterm);
2730
2731         rset = rset_trunc
2732                (zh, &scan_info_array[j0].list[before-1-ptr[j0]].isam_p, 1,
2733                 glist[before-1-i].term, strlen(glist[before-1-i].term),
2734                 NULL, 0, zapt->term->which);
2735
2736         ptr[j0]++;
2737
2738         for (j = j0+1; j<ord_no; j++)
2739         {
2740             if (ptr[j] < before &&
2741                 (tst=scan_info_array[j].list[before-1-ptr[j]].term) &&
2742                 !strcmp (tst, mterm))
2743             {
2744                 rset_bool_parms bool_parms;
2745                 RSET rset2;
2746
2747                 rset2 = rset_trunc (zh,
2748                          &scan_info_array[j].list[before-1-ptr[j]].isam_p, 1,
2749                                     glist[before-1-i].term,
2750                                     strlen(glist[before-1-i].term), NULL, 0,
2751                                     zapt->term->which);
2752
2753                 bool_parms.key_size = sizeof(struct it_key);
2754                 bool_parms.cmp = key_compare_it;
2755                 bool_parms.log_item = key_logdump_txt;
2756                 bool_parms.rset_l = rset;
2757                 bool_parms.rset_r = rset2;
2758               
2759                 rset = rset_create (rset_kind_or, &bool_parms);
2760
2761                 ptr[j]++;
2762             }
2763         }
2764         if (limit_set)
2765         {
2766             rset_bool_parms bool_parms;
2767
2768             bool_parms.key_size = sizeof(struct it_key);
2769             bool_parms.cmp = key_compare_it;
2770             bool_parms.log_item = key_logdump_txt;
2771             bool_parms.rset_l = rset;
2772             bool_parms.rset_r = rset_dup(limit_set);
2773
2774             rset = rset_create (rset_kind_and, &bool_parms);
2775         }
2776         count_set (rset, &glist[before-1-i].occurrences);
2777         rset_delete (rset);
2778     }
2779     i = before-i;
2780     if (i)
2781     {
2782         *is_partial = 1;
2783         *position -= i;
2784         *num_entries -= i;
2785     }
2786     *list = glist + i;               /* list is set to first 'real' entry */
2787     
2788     yaz_log(LOG_DEBUG, "position = %d, num_entries = %d",
2789           *position, *num_entries);
2790     if (zh->errCode)
2791         yaz_log(LOG_DEBUG, "scan error: %d", zh->errCode);
2792 }
2793