For truncations being limited (abort at truncmax terms), zebrasrv returns
[idzebra-moved-to-github.git] / index / rpnsearch.c
1 /* $Id: rpnsearch.c,v 1.7 2007-01-16 15:31:23 adam Exp $
2    Copyright (C) 1995-2007
3    Index Data ApS
4
5 This file is part of the Zebra server.
6
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
11
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
20
21 */
22
23 #include <stdio.h>
24 #include <assert.h>
25 #ifdef WIN32
26 #include <io.h>
27 #endif
28 #if HAVE_UNISTD_H
29 #include <unistd.h>
30 #endif
31 #include <ctype.h>
32
33 #include <yaz/diagbib1.h>
34 #include "index.h"
35 #include <zebra_xpath.h>
36 #include <attrfind.h>
37 #include <charmap.h>
38 #include <rset.h>
39
40 static int log_level_set = 0;
41 static int log_level_rpn = 0;
42
43 #define TERMSET_DISABLE 1
44
45 static const char **rpn_char_map_handler(void *vp, const char **from, int len)
46 {
47     struct rpn_char_map_info *p = (struct rpn_char_map_info *) vp;
48     const char **out = zebra_maps_input(p->zm, p->reg_type, from, len, 0);
49 #if 0
50     if (out && *out)
51     {
52         const char *outp = *out;
53         yaz_log(YLOG_LOG, "---");
54         while (*outp)
55         {
56             yaz_log(YLOG_LOG, "%02X", *outp);
57             outp++;
58         }
59     }
60 #endif
61     return out;
62 }
63
64 void rpn_char_map_prepare(struct zebra_register *reg, int reg_type,
65                           struct rpn_char_map_info *map_info)
66 {
67     map_info->zm = reg->zebra_maps;
68     map_info->reg_type = reg_type;
69     dict_grep_cmap(reg->dict, map_info, rpn_char_map_handler);
70 }
71
72 #define TERM_COUNT        
73        
74 struct grep_info {        
75 #ifdef TERM_COUNT        
76     int *term_no;        
77 #endif        
78     ISAM_P *isam_p_buf;
79     int isam_p_size;        
80     int isam_p_indx;
81     int trunc_max;
82     ZebraHandle zh;
83     int reg_type;
84     ZebraSet termset;
85 };        
86
87 static int add_isam_p(const char *name, const char *info,
88                       struct grep_info *p)
89 {
90     if (!log_level_set)
91     {
92         log_level_rpn = yaz_log_module_level("rpn");
93         log_level_set = 1;
94     }
95     /* we may have to stop this madness.. NOTE: -1 so that if
96        truncmax == trunxlimit we do *not* generate result sets */
97     if (p->isam_p_indx >= p->trunc_max - 1)
98         return 1;
99
100     if (p->isam_p_indx == p->isam_p_size)
101     {
102         ISAM_P *new_isam_p_buf;
103 #ifdef TERM_COUNT        
104         int *new_term_no;        
105 #endif
106         p->isam_p_size = 2*p->isam_p_size + 100;
107         new_isam_p_buf = (ISAM_P *) xmalloc(sizeof(*new_isam_p_buf) *
108                                             p->isam_p_size);
109         if (p->isam_p_buf)
110         {
111             memcpy(new_isam_p_buf, p->isam_p_buf,
112                     p->isam_p_indx * sizeof(*p->isam_p_buf));
113             xfree(p->isam_p_buf);
114         }
115         p->isam_p_buf = new_isam_p_buf;
116
117 #ifdef TERM_COUNT
118         new_term_no = (int *) xmalloc(sizeof(*new_term_no) * p->isam_p_size);
119         if (p->term_no)
120         {
121             memcpy(new_term_no, p->isam_p_buf,
122                     p->isam_p_indx * sizeof(*p->term_no));
123             xfree(p->term_no);
124         }
125         p->term_no = new_term_no;
126 #endif
127     }
128     assert(*info == sizeof(*p->isam_p_buf));
129     memcpy(p->isam_p_buf + p->isam_p_indx, info+1, sizeof(*p->isam_p_buf));
130
131     if (p->termset)
132     {
133         const char *db;
134         char term_tmp[IT_MAX_WORD];
135         int ord = 0;
136         const char *index_name;
137         int len = key_SU_decode (&ord, (const unsigned char *) name);
138         
139         zebra_term_untrans  (p->zh, p->reg_type, term_tmp, name+len);
140         yaz_log(log_level_rpn, "grep: %d %c %s", ord, name[len], term_tmp);
141         zebraExplain_lookup_ord(p->zh->reg->zei,
142                                 ord, 0 /* index_type */, &db, &index_name);
143         yaz_log(log_level_rpn, "grep:  db=%s index=%s", db, index_name);
144         
145         resultSetAddTerm(p->zh, p->termset, name[len], db,
146                          index_name, term_tmp);
147     }
148     (p->isam_p_indx)++;
149     return 0;
150 }
151
152 static int grep_handle(char *name, const char *info, void *p)
153 {
154     return add_isam_p(name, info, (struct grep_info *) p);
155 }
156
157 static int term_pre(ZebraMaps zebra_maps, int reg_type, const char **src,
158                     const char *ct1, const char *ct2, int first)
159 {
160     const char *s1, *s0 = *src;
161     const char **map;
162
163     /* skip white space */
164     while (*s0)
165     {
166         if (ct1 && strchr(ct1, *s0))
167             break;
168         if (ct2 && strchr(ct2, *s0))
169             break;
170         s1 = s0;
171         map = zebra_maps_input(zebra_maps, reg_type, &s1, strlen(s1), first);
172         if (**map != *CHR_SPACE)
173             break;
174         s0 = s1;
175     }
176     *src = s0;
177     return *s0;
178 }
179
180
181 static void esc_str(char *out_buf, size_t out_size,
182                     const char *in_buf, int in_size)
183 {
184     int k;
185
186     assert(out_buf);
187     assert(in_buf);
188     assert(out_size > 20);
189     *out_buf = '\0';
190     for (k = 0; k<in_size; k++)
191     {
192         int c = in_buf[k] & 0xff;
193         int pc;
194         if (c < 32 || c > 126)
195             pc = '?';
196         else
197             pc = c;
198         sprintf(out_buf +strlen(out_buf), "%02X:%c  ", c, pc);
199         if (strlen(out_buf) > out_size-20)
200         {
201             strcat(out_buf, "..");
202             break;
203         }
204     }
205 }
206
207 #define REGEX_CHARS " []()|.*+?!"
208
209 /* term_100: handle term, where trunc = none(no operators at all) */
210 static int term_100(ZebraMaps zebra_maps, int reg_type,
211                     const char **src, char *dst, int space_split,
212                     char *dst_term)
213 {
214     const char *s0;
215     const char **map;
216     int i = 0;
217     int j = 0;
218
219     const char *space_start = 0;
220     const char *space_end = 0;
221
222     if (!term_pre(zebra_maps, reg_type, src, NULL, NULL, !space_split))
223         return 0;
224     s0 = *src;
225     while (*s0)
226     {
227         const char *s1 = s0;
228         int q_map_match = 0;
229         map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), 
230                                 &q_map_match);
231         if (space_split)
232         {
233             if (**map == *CHR_SPACE)
234                 break;
235         }
236         else  /* complete subfield only. */
237         {
238             if (**map == *CHR_SPACE)
239             {   /* save space mapping for later  .. */
240                 space_start = s1;
241                 space_end = s0;
242                 continue;
243             }
244             else if (space_start)
245             {   /* reload last space */
246                 while (space_start < space_end)
247                 {
248                     if (strchr(REGEX_CHARS, *space_start))
249                         dst[i++] = '\\';
250                     dst_term[j++] = *space_start;
251                     dst[i++] = *space_start++;
252                 }
253                 /* and reset */
254                 space_start = space_end = 0;
255             }
256         }
257         /* add non-space char */
258         memcpy(dst_term+j, s1, s0 - s1);
259         j += (s0 - s1);
260         if (!q_map_match)
261         {
262             while (s1 < s0)
263             {
264                 if (strchr(REGEX_CHARS, *s1))
265                     dst[i++] = '\\';
266                 dst[i++] = *s1++;
267             }
268         }
269         else
270         {
271             char tmpbuf[80];
272             esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
273             
274             strcpy(dst + i, map[0]);
275             i += strlen(map[0]);
276         }
277     }
278     dst[i] = '\0';
279     dst_term[j] = '\0';
280     *src = s0;
281     return i;
282 }
283
284 /* term_101: handle term, where trunc = Process # */
285 static int term_101(ZebraMaps zebra_maps, int reg_type,
286                     const char **src, char *dst, int space_split,
287                     char *dst_term)
288 {
289     const char *s0;
290     const char **map;
291     int i = 0;
292     int j = 0;
293
294     if (!term_pre(zebra_maps, reg_type, src, "#", "#", !space_split))
295         return 0;
296     s0 = *src;
297     while (*s0)
298     {
299         if (*s0 == '#')
300         {
301             dst[i++] = '.';
302             dst[i++] = '*';
303             dst_term[j++] = *s0++;
304         }
305         else
306         {
307             const char *s1 = s0;
308             int q_map_match = 0;
309             map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), 
310                                     &q_map_match);
311             if (space_split && **map == *CHR_SPACE)
312                 break;
313
314             /* add non-space char */
315             memcpy(dst_term+j, s1, s0 - s1);
316             j += (s0 - s1);
317             if (!q_map_match)
318             {
319                 while (s1 < s0)
320                 {
321                     if (strchr(REGEX_CHARS, *s1))
322                         dst[i++] = '\\';
323                     dst[i++] = *s1++;
324                 }
325             }
326             else
327             {
328                 char tmpbuf[80];
329                 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
330                 
331                 strcpy(dst + i, map[0]);
332                 i += strlen(map[0]);
333             }
334         }
335     }
336     dst[i] = '\0';
337     dst_term[j++] = '\0';
338     *src = s0;
339     return i;
340 }
341
342 /* term_103: handle term, where trunc = re-2 (regular expressions) */
343 static int term_103(ZebraMaps zebra_maps, int reg_type, const char **src,
344                     char *dst, int *errors, int space_split,
345                     char *dst_term)
346 {
347     int i = 0;
348     int j = 0;
349     const char *s0;
350     const char **map;
351
352     if (!term_pre(zebra_maps, reg_type, src, "^\\()[].*+?|", "(", !space_split))
353         return 0;
354     s0 = *src;
355     if (errors && *s0 == '+' && s0[1] && s0[2] == '+' && s0[3] &&
356         isdigit(((const unsigned char *)s0)[1]))
357     {
358         *errors = s0[1] - '0';
359         s0 += 3;
360         if (*errors > 3)
361             *errors = 3;
362     }
363     while (*s0)
364     {
365         if (strchr("^\\()[].*+?|-", *s0))
366         {
367             dst_term[j++] = *s0;
368             dst[i++] = *s0++;
369         }
370         else
371         {
372             const char *s1 = s0;
373             int q_map_match = 0;
374             map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), 
375                                     &q_map_match);
376             if (space_split && **map == *CHR_SPACE)
377                 break;
378
379             /* add non-space char */
380             memcpy(dst_term+j, s1, s0 - s1);
381             j += (s0 - s1);
382             if (!q_map_match)
383             {
384                 while (s1 < s0)
385                 {
386                     if (strchr(REGEX_CHARS, *s1))
387                         dst[i++] = '\\';
388                     dst[i++] = *s1++;
389                 }
390             }
391             else
392             {
393                 char tmpbuf[80];
394                 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
395                 
396                 strcpy(dst + i, map[0]);
397                 i += strlen(map[0]);
398             }
399         }
400     }
401     dst[i] = '\0';
402     dst_term[j] = '\0';
403     *src = s0;
404     
405     return i;
406 }
407
408 /* term_103: handle term, where trunc = re-1 (regular expressions) */
409 static int term_102(ZebraMaps zebra_maps, int reg_type, const char **src,
410                     char *dst, int space_split, char *dst_term)
411 {
412     return term_103(zebra_maps, reg_type, src, dst, NULL, space_split,
413                     dst_term);
414 }
415
416
417 /* term_104: handle term, where trunc = Process # and ! */
418 static int term_104(ZebraMaps zebra_maps, int reg_type,
419                     const char **src, char *dst, int space_split,
420                     char *dst_term)
421 {
422     const char *s0;
423     const char **map;
424     int i = 0;
425     int j = 0;
426
427     if (!term_pre(zebra_maps, reg_type, src, "?*#", "?*#", !space_split))
428         return 0;
429     s0 = *src;
430     while (*s0)
431     {
432         if (*s0 == '?')
433         {
434             dst_term[j++] = *s0++;
435             if (*s0 >= '0' && *s0 <= '9')
436             {
437                 int limit = 0;
438                 while (*s0 >= '0' && *s0 <= '9')
439                 {
440                     limit = limit * 10 + (*s0 - '0');
441                     dst_term[j++] = *s0++;
442                 }
443                 if (limit > 20)
444                     limit = 20;
445                 while (--limit >= 0)
446                 {
447                     dst[i++] = '.';
448                     dst[i++] = '?';
449                 }
450             }
451             else
452             {
453                 dst[i++] = '.';
454                 dst[i++] = '*';
455             }
456         }
457         else if (*s0 == '*')
458         {
459             dst[i++] = '.';
460             dst[i++] = '*';
461             dst_term[j++] = *s0++;
462         }
463         else if (*s0 == '#')
464         {
465             dst[i++] = '.';
466             dst_term[j++] = *s0++;
467         }
468         else
469         {
470             const char *s1 = s0;
471             int q_map_match = 0;
472             map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), 
473                                     &q_map_match);
474             if (space_split && **map == *CHR_SPACE)
475                 break;
476
477             /* add non-space char */
478             memcpy(dst_term+j, s1, s0 - s1);
479             j += (s0 - s1);
480             if (!q_map_match)
481             {
482                 while (s1 < s0)
483                 {
484                     if (strchr(REGEX_CHARS, *s1))
485                         dst[i++] = '\\';
486                     dst[i++] = *s1++;
487                 }
488             }
489             else
490             {
491                 char tmpbuf[80];
492                 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
493                 
494                 strcpy(dst + i, map[0]);
495                 i += strlen(map[0]);
496             }
497         }
498     }
499     dst[i] = '\0';
500     dst_term[j++] = '\0';
501     *src = s0;
502     return i;
503 }
504
505 /* term_105/106: handle term, where trunc = Process * and ! and right trunc */
506 static int term_105(ZebraMaps zebra_maps, int reg_type,
507                     const char **src, char *dst, int space_split,
508                     char *dst_term, int right_truncate)
509 {
510     const char *s0;
511     const char **map;
512     int i = 0;
513     int j = 0;
514
515     if (!term_pre(zebra_maps, reg_type, src, "*!", "*!", !space_split))
516         return 0;
517     s0 = *src;
518     while (*s0)
519     {
520         if (*s0 == '*')
521         {
522             dst[i++] = '.';
523             dst[i++] = '*';
524             dst_term[j++] = *s0++;
525         }
526         else if (*s0 == '!')
527         {
528             dst[i++] = '.';
529             dst_term[j++] = *s0++;
530         }
531         else
532         {
533             const char *s1 = s0;
534             int q_map_match = 0;
535             map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), 
536                                     &q_map_match);
537             if (space_split && **map == *CHR_SPACE)
538                 break;
539
540             /* add non-space char */
541             memcpy(dst_term+j, s1, s0 - s1);
542             j += (s0 - s1);
543             if (!q_map_match)
544             {
545                 while (s1 < s0)
546                 {
547                     if (strchr(REGEX_CHARS, *s1))
548                         dst[i++] = '\\';
549                     dst[i++] = *s1++;
550                 }
551             }
552             else
553             {
554                 char tmpbuf[80];
555                 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
556                 
557                 strcpy(dst + i, map[0]);
558                 i += strlen(map[0]);
559             }
560         }
561     }
562     if (right_truncate)
563     {
564         dst[i++] = '.';
565         dst[i++] = '*';
566     }
567     dst[i] = '\0';
568     
569     dst_term[j++] = '\0';
570     *src = s0;
571     return i;
572 }
573
574
575 /* gen_regular_rel - generate regular expression from relation
576  *  val:     border value (inclusive)
577  *  islt:    1 if <=; 0 if >=.
578  */
579 static void gen_regular_rel(char *dst, int val, int islt)
580 {
581     int dst_p;
582     int w, d, i;
583     int pos = 0;
584     char numstr[20];
585
586     yaz_log(YLOG_DEBUG, "gen_regular_rel. val=%d, islt=%d", val, islt);
587     if (val >= 0)
588     {
589         if (islt)
590             strcpy(dst, "(-[0-9]+|(");
591         else
592             strcpy(dst, "((");
593     } 
594     else
595     {
596         if (!islt)
597         {
598             strcpy(dst, "([0-9]+|-(");
599             dst_p = strlen(dst);
600             islt = 1;
601         }
602         else
603         {
604             strcpy(dst, "(-(");
605             islt = 0;
606         }
607         val = -val;
608     }
609     dst_p = strlen(dst);
610     sprintf(numstr, "%d", val);
611     for (w = strlen(numstr); --w >= 0; pos++)
612     {
613         d = numstr[w];
614         if (pos > 0)
615         {
616             if (islt)
617             {
618                 if (d == '0')
619                     continue;
620                 d--;
621             } 
622             else
623             {
624                 if (d == '9')
625                     continue;
626                 d++;
627             }
628         }
629         
630         strcpy(dst + dst_p, numstr);
631         dst_p = strlen(dst) - pos - 1;
632
633         if (islt)
634         {
635             if (d != '0')
636             {
637                 dst[dst_p++] = '[';
638                 dst[dst_p++] = '0';
639                 dst[dst_p++] = '-';
640                 dst[dst_p++] = d;
641                 dst[dst_p++] = ']';
642             }
643             else
644                 dst[dst_p++] = d;
645         }
646         else
647         {
648             if (d != '9')
649             { 
650                 dst[dst_p++] = '[';
651                 dst[dst_p++] = d;
652                 dst[dst_p++] = '-';
653                 dst[dst_p++] = '9';
654                 dst[dst_p++] = ']';
655             }
656             else
657                 dst[dst_p++] = d;
658         }
659         for (i = 0; i<pos; i++)
660         {
661             dst[dst_p++] = '[';
662             dst[dst_p++] = '0';
663             dst[dst_p++] = '-';
664             dst[dst_p++] = '9';
665             dst[dst_p++] = ']';
666         }
667         dst[dst_p++] = '|';
668     }
669     dst[dst_p] = '\0';
670     if (islt)
671     {
672         /* match everything less than 10^(pos-1) */
673         strcat(dst, "0*");
674         for (i = 1; i<pos; i++)
675             strcat(dst, "[0-9]?");
676     }
677     else
678     {
679         /* match everything greater than 10^pos */
680         for (i = 0; i <= pos; i++)
681             strcat(dst, "[0-9]");
682         strcat(dst, "[0-9]*");
683     }
684     strcat(dst, "))");
685 }
686
687 void string_rel_add_char(char **term_p, const char *src, int *indx)
688 {
689     if (src[*indx] == '\\')
690         *(*term_p)++ = src[(*indx)++];
691     *(*term_p)++ = src[(*indx)++];
692 }
693
694 /*
695  *   >  abc     ([b-].*|a[c-].*|ab[d-].*|abc.+)
696  *              ([^-a].*|a[^-b].*ab[^-c].*|abc.+)
697  *   >= abc     ([b-].*|a[c-].*|ab[c-].*)
698  *              ([^-a].*|a[^-b].*|ab[c-].*)
699  *   <  abc     ([-0].*|a[-a].*|ab[-b].*)
700  *              ([^a-].*|a[^b-].*|ab[^c-].*)
701  *   <= abc     ([-0].*|a[-a].*|ab[-b].*|abc)
702  *              ([^a-].*|a[^b-].*|ab[^c-].*|abc)
703  */
704 static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
705                            const char **term_sub, char *term_dict,
706                            oid_value attributeSet,
707                            int reg_type, int space_split, char *term_dst,
708                            int *error_code)
709 {
710     AttrType relation;
711     int relation_value;
712     int i;
713     char *term_tmp = term_dict + strlen(term_dict);
714     char term_component[2*IT_MAX_WORD+20];
715
716     attr_init_APT(&relation, zapt, 2);
717     relation_value = attr_find(&relation, NULL);
718
719     *error_code = 0;
720     yaz_log(YLOG_DEBUG, "string relation value=%d", relation_value);
721     switch (relation_value)
722     {
723     case 1:
724         if (!term_100(zh->reg->zebra_maps, reg_type,
725                       term_sub, term_component,
726                       space_split, term_dst))
727             return 0;
728         yaz_log(log_level_rpn, "Relation <");
729         
730         *term_tmp++ = '(';
731         for (i = 0; term_component[i]; )
732         {
733             int j = 0;
734
735             if (i)
736                 *term_tmp++ = '|';
737             while (j < i)
738                 string_rel_add_char(&term_tmp, term_component, &j);
739
740             *term_tmp++ = '[';
741
742             *term_tmp++ = '^';
743
744             *term_tmp++ = 1;
745             *term_tmp++ = FIRST_IN_FIELD_CHAR;
746
747             string_rel_add_char(&term_tmp, term_component, &i);
748             *term_tmp++ = '-';
749
750             *term_tmp++ = ']';
751             *term_tmp++ = '.';
752             *term_tmp++ = '*';
753
754             if ((term_tmp - term_dict) > IT_MAX_WORD)
755                 break;
756         }
757         *term_tmp++ = ')';
758         *term_tmp = '\0';
759         yaz_log(YLOG_LOG, "term_dict=%s", term_dict);
760         break;
761     case 2:
762         if (!term_100(zh->reg->zebra_maps, reg_type,
763                       term_sub, term_component,
764                       space_split, term_dst))
765             return 0;
766         yaz_log(log_level_rpn, "Relation <=");
767
768         *term_tmp++ = '(';
769         for (i = 0; term_component[i]; )
770         {
771             int j = 0;
772
773             while (j < i)
774                 string_rel_add_char(&term_tmp, term_component, &j);
775             *term_tmp++ = '[';
776
777             *term_tmp++ = '^';
778
779             *term_tmp++ = 1;
780             *term_tmp++ = FIRST_IN_FIELD_CHAR;
781
782             string_rel_add_char(&term_tmp, term_component, &i);
783             *term_tmp++ = '-';
784
785             *term_tmp++ = ']';
786             *term_tmp++ = '.';
787             *term_tmp++ = '*';
788
789             *term_tmp++ = '|';
790
791             if ((term_tmp - term_dict) > IT_MAX_WORD)
792                 break;
793         }
794         for (i = 0; term_component[i]; )
795             string_rel_add_char(&term_tmp, term_component, &i);
796         *term_tmp++ = ')';
797         *term_tmp = '\0';
798         break;
799     case 5:
800         if (!term_100 (zh->reg->zebra_maps, reg_type,
801                        term_sub, term_component, space_split, term_dst))
802             return 0;
803         yaz_log(log_level_rpn, "Relation >");
804
805         *term_tmp++ = '(';
806         for (i = 0; term_component[i];)
807         {
808             int j = 0;
809
810             while (j < i)
811                 string_rel_add_char(&term_tmp, term_component, &j);
812             *term_tmp++ = '[';
813             
814             *term_tmp++ = '^';
815             *term_tmp++ = '-';
816             string_rel_add_char(&term_tmp, term_component, &i);
817
818             *term_tmp++ = ']';
819             *term_tmp++ = '.';
820             *term_tmp++ = '*';
821
822             *term_tmp++ = '|';
823
824             if ((term_tmp - term_dict) > IT_MAX_WORD)
825                 break;
826         }
827         for (i = 0; term_component[i];)
828             string_rel_add_char(&term_tmp, term_component, &i);
829         *term_tmp++ = '.';
830         *term_tmp++ = '+';
831         *term_tmp++ = ')';
832         *term_tmp = '\0';
833         break;
834     case 4:
835         if (!term_100(zh->reg->zebra_maps, reg_type, term_sub,
836                       term_component, space_split, term_dst))
837             return 0;
838         yaz_log(log_level_rpn, "Relation >=");
839
840         *term_tmp++ = '(';
841         for (i = 0; term_component[i];)
842         {
843             int j = 0;
844
845             if (i)
846                 *term_tmp++ = '|';
847             while (j < i)
848                 string_rel_add_char(&term_tmp, term_component, &j);
849             *term_tmp++ = '[';
850
851             if (term_component[i+1])
852             {
853                 *term_tmp++ = '^';
854                 *term_tmp++ = '-';
855                 string_rel_add_char(&term_tmp, term_component, &i);
856             }
857             else
858             {
859                 string_rel_add_char(&term_tmp, term_component, &i);
860                 *term_tmp++ = '-';
861             }
862             *term_tmp++ = ']';
863             *term_tmp++ = '.';
864             *term_tmp++ = '*';
865
866             if ((term_tmp - term_dict) > IT_MAX_WORD)
867                 break;
868         }
869         *term_tmp++ = ')';
870         *term_tmp = '\0';
871         break;
872     case 3:
873     case 102:
874     case -1:
875         if (!**term_sub)
876             return 1;
877         yaz_log(log_level_rpn, "Relation =");
878         if (!term_100(zh->reg->zebra_maps, reg_type, term_sub,
879                       term_component, space_split, term_dst))
880             return 0;
881         strcat(term_tmp, "(");
882         strcat(term_tmp, term_component);
883         strcat(term_tmp, ")");
884         break;
885     case 103:
886         yaz_log(log_level_rpn, "Relation always matches");
887         /* skip to end of term (we don't care what it is) */
888         while (**term_sub != '\0')
889             (*term_sub)++;
890         break;
891     default:
892         *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
893         return 0;
894     }
895     return 1;
896 }
897
898 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
899                              const char **term_sub, 
900                              oid_value attributeSet, NMEM stream,
901                              struct grep_info *grep_info,
902                              int reg_type, int complete_flag,
903                              int num_bases, char **basenames,
904                              char *term_dst,
905                              const char *xpath_use,
906                              struct ord_list **ol);
907
908 static ZEBRA_RES term_limits_APT(ZebraHandle zh,
909                                  Z_AttributesPlusTerm *zapt,
910                                  zint *hits_limit_value,
911                                  const char **term_ref_id_str,
912                                  NMEM nmem)
913 {
914     AttrType term_ref_id_attr;
915     AttrType hits_limit_attr;
916     int term_ref_id_int;
917  
918     attr_init_APT(&hits_limit_attr, zapt, 11);
919     *hits_limit_value  = attr_find(&hits_limit_attr, NULL);
920
921     attr_init_APT(&term_ref_id_attr, zapt, 10);
922     term_ref_id_int = attr_find_ex(&term_ref_id_attr, NULL, term_ref_id_str);
923     if (term_ref_id_int >= 0)
924     {
925         char *res = nmem_malloc(nmem, 20);
926         sprintf(res, "%d", term_ref_id_int);
927         *term_ref_id_str = res;
928     }
929
930     /* no limit given ? */
931     if (*hits_limit_value == -1)
932     {
933         if (*term_ref_id_str)
934         {
935             /* use global if term_ref is present */
936             *hits_limit_value = zh->approx_limit;
937         }
938         else
939         {
940             /* no counting if term_ref is not present */
941             *hits_limit_value = 0;
942         }
943     }
944     else if (*hits_limit_value == 0)
945     {
946         /* 0 is the same as global limit */
947         *hits_limit_value = zh->approx_limit;
948     }
949     yaz_log(YLOG_DEBUG, "term_limits_APT ref_id=%s limit=" ZINT_FORMAT,
950             *term_ref_id_str ? *term_ref_id_str : "none",
951             *hits_limit_value);
952     return ZEBRA_OK;
953 }
954
955 static ZEBRA_RES term_trunc(ZebraHandle zh,
956                             Z_AttributesPlusTerm *zapt,
957                             const char **term_sub, 
958                             oid_value attributeSet, NMEM stream,
959                             struct grep_info *grep_info,
960                             int reg_type, int complete_flag,
961                             int num_bases, char **basenames,
962                             char *term_dst,
963                             const char *rank_type, 
964                             const char *xpath_use,
965                             NMEM rset_nmem,
966                             RSET *rset,
967                             struct rset_key_control *kc)
968 {
969     ZEBRA_RES res;
970     struct ord_list *ol;
971     zint hits_limit_value;
972     const char *term_ref_id_str = 0;
973     *rset = 0;
974
975     term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str, stream);
976     grep_info->isam_p_indx = 0;
977     res = string_term(zh, zapt, term_sub, attributeSet, stream, grep_info,
978                       reg_type, complete_flag, num_bases, basenames,
979                       term_dst, xpath_use, &ol);
980     if (res != ZEBRA_OK)
981         return res;
982     if (!*term_sub)  /* no more terms ? */
983         return res;
984     yaz_log(log_level_rpn, "term: %s", term_dst);
985     *rset = rset_trunc(zh, grep_info->isam_p_buf,
986                        grep_info->isam_p_indx, term_dst,
987                        strlen(term_dst), rank_type, 1 /* preserve pos */,
988                        zapt->term->which, rset_nmem,
989                        kc, kc->scope, ol, reg_type, hits_limit_value,
990                        term_ref_id_str);
991     if (!*rset)
992         return ZEBRA_FAIL;
993     return ZEBRA_OK;
994 }
995
996 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
997                              const char **term_sub, 
998                              oid_value attributeSet, NMEM stream,
999                              struct grep_info *grep_info,
1000                              int reg_type, int complete_flag,
1001                              int num_bases, char **basenames,
1002                              char *term_dst,
1003                              const char *xpath_use,
1004                              struct ord_list **ol)
1005 {
1006     char term_dict[2*IT_MAX_WORD+4000];
1007     int j, r, base_no;
1008     AttrType truncation;
1009     int truncation_value;
1010     const char *termp;
1011     struct rpn_char_map_info rcmi;
1012     int space_split = complete_flag ? 0 : 1;
1013
1014     int bases_ok = 0;     /* no of databases with OK attribute */
1015
1016     *ol = ord_list_create(stream);
1017
1018     rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
1019     attr_init_APT(&truncation, zapt, 5);
1020     truncation_value = attr_find(&truncation, NULL);
1021     yaz_log(log_level_rpn, "truncation value %d", truncation_value);
1022
1023     for (base_no = 0; base_no < num_bases; base_no++)
1024     {
1025         int ord = -1;
1026         int regex_range = 0;
1027         int max_pos, prefix_len = 0;
1028         int relation_error;
1029         char ord_buf[32];
1030         int ord_len, i;
1031
1032         termp = *term_sub; /* start of term for each database */
1033
1034         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1035         {
1036             zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
1037                            basenames[base_no]);
1038             return ZEBRA_FAIL;
1039         }
1040         
1041         if (zebra_apt_get_ord(zh, zapt, reg_type, xpath_use,
1042                               attributeSet, &ord) != ZEBRA_OK)
1043             continue;
1044
1045         bases_ok++;
1046
1047         *ol = ord_list_append(stream, *ol, ord);
1048         ord_len = key_SU_encode (ord, ord_buf);
1049         
1050         term_dict[prefix_len++] = '(';
1051         for (i = 0; i<ord_len; i++)
1052         {
1053             term_dict[prefix_len++] = 1;  /* our internal regexp escape char */
1054             term_dict[prefix_len++] = ord_buf[i];
1055         }
1056         term_dict[prefix_len++] = ')';
1057         term_dict[prefix_len] = '\0';
1058         j = prefix_len;
1059         switch (truncation_value)
1060         {
1061         case -1:         /* not specified */
1062         case 100:        /* do not truncate */
1063             if (!string_relation(zh, zapt, &termp, term_dict,
1064                                  attributeSet,
1065                                  reg_type, space_split, term_dst,
1066                                  &relation_error))
1067             {
1068                 if (relation_error)
1069                 {
1070                     zebra_setError(zh, relation_error, 0);
1071                     return ZEBRA_FAIL;
1072                 }
1073                 *term_sub = 0;
1074                 return ZEBRA_OK;
1075             }
1076             break;
1077         case 1:          /* right truncation */
1078             term_dict[j++] = '(';
1079             if (!term_100(zh->reg->zebra_maps, reg_type,
1080                           &termp, term_dict + j, space_split, term_dst))
1081             {
1082                 *term_sub = 0;
1083                 return ZEBRA_OK;
1084             }
1085             strcat(term_dict, ".*)");
1086             break;
1087         case 2:          /* keft truncation */
1088             term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*';
1089             if (!term_100(zh->reg->zebra_maps, reg_type,
1090                           &termp, term_dict + j, space_split, term_dst))
1091             {
1092                 *term_sub = 0;
1093                 return ZEBRA_OK;
1094             }
1095             strcat(term_dict, ")");
1096             break;
1097         case 3:          /* left&right truncation */
1098             term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*';
1099             if (!term_100(zh->reg->zebra_maps, reg_type,
1100                           &termp, term_dict + j, space_split, term_dst))
1101             {
1102                 *term_sub = 0;
1103                 return ZEBRA_OK;
1104             }
1105             strcat(term_dict, ".*)");
1106             break;
1107         case 101:        /* process # in term */
1108             term_dict[j++] = '(';
1109             if (!term_101(zh->reg->zebra_maps, reg_type,
1110                           &termp, term_dict + j, space_split, term_dst))
1111             {
1112                 *term_sub = 0;
1113                 return ZEBRA_OK;
1114             }
1115             strcat(term_dict, ")");
1116             break;
1117         case 102:        /* Regexp-1 */
1118             term_dict[j++] = '(';
1119             if (!term_102(zh->reg->zebra_maps, reg_type,
1120                           &termp, term_dict + j, space_split, term_dst))
1121             {
1122                 *term_sub = 0;
1123                 return ZEBRA_OK;
1124             }
1125             strcat(term_dict, ")");
1126             break;
1127         case 103:       /* Regexp-2 */
1128             regex_range = 1;
1129             term_dict[j++] = '(';
1130             if (!term_103(zh->reg->zebra_maps, reg_type,
1131                           &termp, term_dict + j, &regex_range,
1132                           space_split, term_dst))
1133             {
1134                 *term_sub = 0;
1135                 return ZEBRA_OK;
1136             }
1137             strcat(term_dict, ")");
1138             break;
1139         case 104:        /* process # and ! in term */
1140             term_dict[j++] = '(';
1141             if (!term_104(zh->reg->zebra_maps, reg_type,
1142                           &termp, term_dict + j, space_split, term_dst))
1143             {
1144                 *term_sub = 0;
1145                 return ZEBRA_OK;
1146             }
1147             strcat(term_dict, ")");
1148             break;
1149         case 105:        /* process * and ! in term */
1150             term_dict[j++] = '(';
1151             if (!term_105(zh->reg->zebra_maps, reg_type,
1152                           &termp, term_dict + j, space_split, term_dst, 1))
1153             {
1154                 *term_sub = 0;
1155                 return ZEBRA_OK;
1156             }
1157             strcat(term_dict, ")");
1158             break;
1159         case 106:        /* process * and ! in term */
1160             term_dict[j++] = '(';
1161             if (!term_105(zh->reg->zebra_maps, reg_type,
1162                           &termp, term_dict + j, space_split, term_dst, 0))
1163             {
1164                 *term_sub = 0;
1165                 return ZEBRA_OK;
1166             }
1167             strcat(term_dict, ")");
1168             break;
1169         default:
1170             zebra_setError_zint(zh,
1171                                 YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
1172                                 truncation_value);
1173             return ZEBRA_FAIL;
1174         }
1175         if (1)
1176         {
1177             char buf[80];
1178             const char *input = term_dict + prefix_len;
1179             esc_str(buf, sizeof(buf), input, strlen(input));
1180         }
1181         yaz_log(log_level_rpn, "dict_lookup_grep: %s", term_dict+prefix_len);
1182         r = dict_lookup_grep(zh->reg->dict, term_dict, regex_range,
1183                              grep_info, &max_pos, 
1184                              ord_len /* number of "exact" chars */,
1185                              grep_handle);
1186         if (r == 1)
1187             zebra_set_partial_result(zh);
1188         else if (r)
1189             yaz_log(YLOG_WARN, "dict_lookup_grep fail %d", r);
1190     }
1191     if (!bases_ok)
1192         return ZEBRA_FAIL;
1193     *term_sub = termp;
1194     yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1195     return ZEBRA_OK;
1196 }
1197
1198
1199
1200 static void grep_info_delete(struct grep_info *grep_info)
1201 {
1202 #ifdef TERM_COUNT
1203     xfree(grep_info->term_no);
1204 #endif
1205     xfree(grep_info->isam_p_buf);
1206 }
1207
1208 static ZEBRA_RES grep_info_prepare(ZebraHandle zh,
1209                                    Z_AttributesPlusTerm *zapt,
1210                                    struct grep_info *grep_info,
1211                                    int reg_type)
1212 {
1213     AttrType termset;
1214     int termset_value_numeric;
1215     const char *termset_value_string;
1216
1217 #ifdef TERM_COUNT
1218     grep_info->term_no = 0;
1219 #endif
1220     grep_info->trunc_max = atoi(res_get_def(zh->res, "truncmax", "10000"));
1221     grep_info->isam_p_size = 0;
1222     grep_info->isam_p_buf = NULL;
1223     grep_info->zh = zh;
1224     grep_info->reg_type = reg_type;
1225     grep_info->termset = 0;
1226     if (!zapt)
1227         return ZEBRA_OK;
1228     attr_init_APT(&termset, zapt, 8);
1229     termset_value_numeric =
1230         attr_find_ex(&termset, NULL, &termset_value_string);
1231     if (termset_value_numeric != -1)
1232     {
1233 #if TERMSET_DISABLE
1234         zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, "termset");
1235         return ZEBRA_FAIL;
1236 #else
1237         char resname[32];
1238         const char *termset_name = 0;
1239         if (termset_value_numeric != -2)
1240         {
1241     
1242             sprintf(resname, "%d", termset_value_numeric);
1243             termset_name = resname;
1244         }
1245         else
1246             termset_name = termset_value_string;
1247         yaz_log(log_level_rpn, "creating termset set %s", termset_name);
1248         grep_info->termset = resultSetAdd(zh, termset_name, 1);
1249         if (!grep_info->termset)
1250         {
1251             zebra_setError(zh, YAZ_BIB1_ILLEGAL_RESULT_SET_NAME, termset_name);
1252             return ZEBRA_FAIL;
1253         }
1254 #endif
1255     }
1256     return ZEBRA_OK;
1257 }
1258                                
1259 /**
1260   \brief Create result set(s) for list of terms
1261   \param zh Zebra Handle
1262   \param zapt Attributes Plust Term (RPN leaf)
1263   \param termz term as used in query but converted to UTF-8
1264   \param attributeSet default attribute set
1265   \param stream memory for result
1266   \param reg_type register type ('w', 'p',..)
1267   \param complete_flag whether it's phrases or not
1268   \param rank_type term flags for ranking
1269   \param xpath_use use attribute for X-Path (-1 for no X-path)
1270   \param num_bases number of databases
1271   \param basenames array of databases
1272   \param rset_nmem memory for result sets
1273   \param result_sets output result set for each term in list (output)
1274   \param num_result_sets number of output result sets
1275   \param kc rset key control to be used for created result sets
1276 */
1277 static ZEBRA_RES term_list_trunc(ZebraHandle zh,
1278                                  Z_AttributesPlusTerm *zapt,
1279                                  const char *termz,
1280                                  oid_value attributeSet,
1281                                  NMEM stream,
1282                                  int reg_type, int complete_flag,
1283                                  const char *rank_type,
1284                                  const char *xpath_use,
1285                                  int num_bases, char **basenames, 
1286                                  NMEM rset_nmem,
1287                                  RSET **result_sets, int *num_result_sets,
1288                                  struct rset_key_control *kc)
1289 {
1290     char term_dst[IT_MAX_WORD+1];
1291     struct grep_info grep_info;
1292     const char *termp = termz;
1293     int alloc_sets = 0;
1294
1295     *num_result_sets = 0;
1296     *term_dst = 0;
1297     if (grep_info_prepare(zh, zapt, &grep_info, reg_type) == ZEBRA_FAIL)
1298         return ZEBRA_FAIL;
1299     while(1)
1300     { 
1301         ZEBRA_RES res;
1302
1303         if (alloc_sets == *num_result_sets)
1304         {
1305             int add = 10;
1306             RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) * 
1307                                               sizeof(*rnew));
1308             if (alloc_sets)
1309                 memcpy(rnew, *result_sets, alloc_sets * sizeof(*rnew));
1310             alloc_sets = alloc_sets + add;
1311             *result_sets = rnew;
1312         }
1313         res = term_trunc(zh, zapt, &termp, attributeSet,
1314                          stream, &grep_info,
1315                          reg_type, complete_flag,
1316                          num_bases, basenames,
1317                          term_dst, rank_type,
1318                          xpath_use, rset_nmem,
1319                          &(*result_sets)[*num_result_sets],
1320                          kc);
1321         if (res != ZEBRA_OK)
1322         {
1323             int i;
1324             for (i = 0; i < *num_result_sets; i++)
1325                 rset_delete((*result_sets)[i]);
1326             grep_info_delete (&grep_info);
1327             return res;
1328         }
1329         if ((*result_sets)[*num_result_sets] == 0)
1330             break;
1331         (*num_result_sets)++;
1332
1333         if (!*termp)
1334             break;
1335     }
1336     grep_info_delete(&grep_info);
1337     return ZEBRA_OK;
1338 }
1339
1340 static ZEBRA_RES rpn_search_APT_position(ZebraHandle zh,
1341                                          Z_AttributesPlusTerm *zapt,
1342                                          oid_value attributeSet,
1343                                          int reg_type,
1344                                          int num_bases, char **basenames,
1345                                          NMEM rset_nmem,
1346                                          RSET *rset,
1347                                          struct rset_key_control *kc)
1348 {
1349     RSET *f_set;
1350     int base_no;
1351     int position_value;
1352     int num_sets = 0;
1353     AttrType position;
1354
1355     attr_init_APT(&position, zapt, 3);
1356     position_value = attr_find(&position, NULL);
1357     switch(position_value)
1358     {
1359     case 3:
1360     case -1:
1361         return ZEBRA_OK;
1362     case 1:
1363     case 2:
1364         break;
1365     default:
1366         zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1367                             position_value);
1368         return ZEBRA_FAIL;
1369     }
1370
1371     if (!zebra_maps_is_first_in_field(zh->reg->zebra_maps, reg_type))
1372     {
1373         zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1374                             position_value);
1375         return ZEBRA_FAIL;
1376     }
1377
1378     if (!zh->reg->isamb && !zh->reg->isamc)
1379     {
1380         zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1381                             position_value);
1382         return ZEBRA_FAIL;
1383     }
1384     f_set = xmalloc(sizeof(RSET) * num_bases);
1385     for (base_no = 0; base_no < num_bases; base_no++)
1386     {
1387         int ord = -1;
1388         char ord_buf[32];
1389         char term_dict[100];
1390         int ord_len;
1391         char *val;
1392         ISAM_P isam_p;
1393
1394         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1395         {
1396             zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
1397                            basenames[base_no]);
1398             return ZEBRA_FAIL;
1399         }
1400         
1401         if (zebra_apt_get_ord(zh, zapt, reg_type, 0,
1402                               attributeSet, &ord) != ZEBRA_OK)
1403             continue;
1404
1405         ord_len = key_SU_encode (ord, ord_buf);
1406         memcpy(term_dict, ord_buf, ord_len);
1407         strcpy(term_dict+ord_len, FIRST_IN_FIELD_STR);
1408         val = dict_lookup(zh->reg->dict, term_dict);
1409         if (!val)
1410             continue;
1411         assert(*val == sizeof(ISAM_P));
1412         memcpy(&isam_p, val+1, sizeof(isam_p));
1413         
1414
1415         if (zh->reg->isamb)
1416             f_set[num_sets++] = rsisamb_create(rset_nmem, kc, kc->scope,
1417                                                zh->reg->isamb, isam_p, 0);
1418         else if (zh->reg->isamc)
1419             f_set[num_sets++] = rsisamc_create(rset_nmem, kc, kc->scope,
1420                                                zh->reg->isamc, isam_p, 0);
1421     }
1422     if (num_sets)
1423     {
1424         *rset = rset_create_or(rset_nmem, kc, kc->scope,
1425                                0 /* termid */, num_sets, f_set);
1426     }
1427     xfree(f_set);
1428     return ZEBRA_OK;
1429 }
1430                                          
1431 static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh,
1432                                        Z_AttributesPlusTerm *zapt,
1433                                        const char *termz_org,
1434                                        oid_value attributeSet,
1435                                        NMEM stream,
1436                                        int reg_type, int complete_flag,
1437                                        const char *rank_type,
1438                                        const char *xpath_use,
1439                                        int num_bases, char **basenames, 
1440                                        NMEM rset_nmem,
1441                                        RSET *rset,
1442                                        struct rset_key_control *kc)
1443 {
1444     RSET *result_sets = 0;
1445     int num_result_sets = 0;
1446     ZEBRA_RES res =
1447         term_list_trunc(zh, zapt, termz_org, attributeSet,
1448                         stream, reg_type, complete_flag,
1449                         rank_type, xpath_use,
1450                         num_bases, basenames,
1451                         rset_nmem,
1452                         &result_sets, &num_result_sets, kc);
1453
1454     if (res != ZEBRA_OK)
1455         return res;
1456
1457     if (num_result_sets > 0)
1458     {
1459         RSET first_set = 0;
1460         res = rpn_search_APT_position(zh, zapt, attributeSet, 
1461                                       reg_type,
1462                                       num_bases, basenames,
1463                                       rset_nmem, &first_set,
1464                                       kc);
1465         if (res != ZEBRA_OK)
1466             return res;
1467         if (first_set)
1468         {
1469             RSET *nsets = nmem_malloc(stream,
1470                                       sizeof(RSET) * (num_result_sets+1));
1471             nsets[0] = first_set;
1472             memcpy(nsets+1, result_sets, sizeof(RSET) * num_result_sets);
1473             result_sets = nsets;
1474             num_result_sets++;
1475         }
1476     }
1477     if (num_result_sets == 0)
1478         *rset = rset_create_null(rset_nmem, kc, 0); 
1479     else if (num_result_sets == 1)
1480         *rset = result_sets[0];
1481     else
1482         *rset = rset_create_prox(rset_nmem, kc, kc->scope,
1483                                  num_result_sets, result_sets,
1484                                  1 /* ordered */, 0 /* exclusion */,
1485                                  3 /* relation */, 1 /* distance */);
1486     if (!*rset)
1487         return ZEBRA_FAIL;
1488     return ZEBRA_OK;
1489 }
1490
1491 static ZEBRA_RES rpn_search_APT_or_list(ZebraHandle zh,
1492                                         Z_AttributesPlusTerm *zapt,
1493                                         const char *termz_org,
1494                                         oid_value attributeSet,
1495                                         NMEM stream,
1496                                         int reg_type, int complete_flag,
1497                                         const char *rank_type,
1498                                         const char *xpath_use,
1499                                         int num_bases, char **basenames,
1500                                         NMEM rset_nmem,
1501                                         RSET *rset,
1502                                         struct rset_key_control *kc)
1503 {
1504     RSET *result_sets = 0;
1505     int num_result_sets = 0;
1506     int i;
1507     ZEBRA_RES res =
1508         term_list_trunc(zh, zapt, termz_org, attributeSet,
1509                         stream, reg_type, complete_flag,
1510                         rank_type, xpath_use,
1511                         num_bases, basenames,
1512                         rset_nmem,
1513                         &result_sets, &num_result_sets, kc);
1514     if (res != ZEBRA_OK)
1515         return res;
1516
1517     for (i = 0; i<num_result_sets; i++)
1518     {
1519         RSET first_set = 0;
1520         res = rpn_search_APT_position(zh, zapt, attributeSet, 
1521                                       reg_type,
1522                                       num_bases, basenames,
1523                                       rset_nmem, &first_set,
1524                                       kc);
1525         if (res != ZEBRA_OK)
1526         {
1527             for (i = 0; i<num_result_sets; i++)
1528                 rset_delete(result_sets[i]);
1529             return res;
1530         }
1531
1532         if (first_set)
1533         {
1534             RSET tmp_set[2];
1535
1536             tmp_set[0] = first_set;
1537             tmp_set[1] = result_sets[i];
1538             
1539             result_sets[i] = rset_create_prox(
1540                 rset_nmem, kc, kc->scope,
1541                 2, tmp_set,
1542                 1 /* ordered */, 0 /* exclusion */,
1543                 3 /* relation */, 1 /* distance */);
1544         }
1545     }
1546     if (num_result_sets == 0)
1547         *rset = rset_create_null(rset_nmem, kc, 0); 
1548     else if (num_result_sets == 1)
1549         *rset = result_sets[0];
1550     else
1551         *rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* termid */,
1552                                num_result_sets, result_sets);
1553     if (!*rset)
1554         return ZEBRA_FAIL;
1555     return ZEBRA_OK;
1556 }
1557
1558 static ZEBRA_RES rpn_search_APT_and_list(ZebraHandle zh,
1559                                          Z_AttributesPlusTerm *zapt,
1560                                          const char *termz_org,
1561                                          oid_value attributeSet,
1562                                          NMEM stream,
1563                                          int reg_type, int complete_flag,
1564                                          const char *rank_type, 
1565                                          const char *xpath_use,
1566                                          int num_bases, char **basenames,
1567                                          NMEM rset_nmem,
1568                                          RSET *rset,
1569                                          struct rset_key_control *kc)
1570 {
1571     RSET *result_sets = 0;
1572     int num_result_sets = 0;
1573     int i;
1574     ZEBRA_RES res =
1575         term_list_trunc(zh, zapt, termz_org, attributeSet,
1576                         stream, reg_type, complete_flag,
1577                         rank_type, xpath_use,
1578                         num_bases, basenames,
1579                         rset_nmem,
1580                         &result_sets, &num_result_sets,
1581                         kc);
1582     if (res != ZEBRA_OK)
1583         return res;
1584     for (i = 0; i<num_result_sets; i++)
1585     {
1586         RSET first_set = 0;
1587         res = rpn_search_APT_position(zh, zapt, attributeSet, 
1588                                       reg_type,
1589                                       num_bases, basenames,
1590                                       rset_nmem, &first_set,
1591                                       kc);
1592         if (res != ZEBRA_OK)
1593         {
1594             for (i = 0; i<num_result_sets; i++)
1595                 rset_delete(result_sets[i]);
1596             return res;
1597         }
1598
1599         if (first_set)
1600         {
1601             RSET tmp_set[2];
1602
1603             tmp_set[0] = first_set;
1604             tmp_set[1] = result_sets[i];
1605             
1606             result_sets[i] = rset_create_prox(
1607                 rset_nmem, kc, kc->scope,
1608                 2, tmp_set,
1609                 1 /* ordered */, 0 /* exclusion */,
1610                 3 /* relation */, 1 /* distance */);
1611         }
1612     }
1613
1614
1615     if (num_result_sets == 0)
1616         *rset = rset_create_null(rset_nmem, kc, 0); 
1617     else if (num_result_sets == 1)
1618         *rset = result_sets[0];
1619     else
1620         *rset = rset_create_and(rset_nmem, kc, kc->scope,
1621                                 num_result_sets, result_sets);
1622     if (!*rset)
1623         return ZEBRA_FAIL;
1624     return ZEBRA_OK;
1625 }
1626
1627 static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1628                             const char **term_sub,
1629                             char *term_dict,
1630                             oid_value attributeSet,
1631                             struct grep_info *grep_info,
1632                             int *max_pos,
1633                             int reg_type,
1634                             char *term_dst,
1635                             int *error_code)
1636 {
1637     AttrType relation;
1638     int relation_value;
1639     int term_value;
1640     int r;
1641     char *term_tmp = term_dict + strlen(term_dict);
1642
1643     *error_code = 0;
1644     attr_init_APT(&relation, zapt, 2);
1645     relation_value = attr_find(&relation, NULL);
1646
1647     yaz_log(log_level_rpn, "numeric relation value=%d", relation_value);
1648
1649     switch (relation_value)
1650     {
1651     case 1:
1652         yaz_log(log_level_rpn, "Relation <");
1653         if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1654                       term_dst))
1655             return 0;
1656         term_value = atoi (term_tmp);
1657         gen_regular_rel(term_tmp, term_value-1, 1);
1658         break;
1659     case 2:
1660         yaz_log(log_level_rpn, "Relation <=");
1661         if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1662                       term_dst))
1663             return 0;
1664         term_value = atoi (term_tmp);
1665         gen_regular_rel(term_tmp, term_value, 1);
1666         break;
1667     case 4:
1668         yaz_log(log_level_rpn, "Relation >=");
1669         if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1670                       term_dst))
1671             return 0;
1672         term_value = atoi (term_tmp);
1673         gen_regular_rel(term_tmp, term_value, 0);
1674         break;
1675     case 5:
1676         yaz_log(log_level_rpn, "Relation >");
1677         if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1678                       term_dst))
1679             return 0;
1680         term_value = atoi (term_tmp);
1681         gen_regular_rel(term_tmp, term_value+1, 0);
1682         break;
1683     case -1:
1684     case 3:
1685         yaz_log(log_level_rpn, "Relation =");
1686         if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1687                       term_dst))
1688             return 0;
1689         term_value = atoi (term_tmp);
1690         sprintf(term_tmp, "(0*%d)", term_value);
1691         break;
1692     case 103:
1693         /* term_tmp untouched.. */
1694         while (**term_sub != '\0')
1695             (*term_sub)++;
1696         break;
1697     default:
1698         *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
1699         return 0;
1700     }
1701     yaz_log(log_level_rpn, "dict_lookup_grep: %s", term_tmp);
1702     r = dict_lookup_grep(zh->reg->dict, term_dict, 0, grep_info, max_pos,
1703                           0, grep_handle);
1704
1705     if (r == 1)
1706         zebra_set_partial_result(zh);
1707     else if (r)
1708         yaz_log(YLOG_WARN, "dict_lookup_grep fail, rel = gt: %d", r);
1709     yaz_log(log_level_rpn, "%d positions", grep_info->isam_p_indx);
1710     return 1;
1711 }
1712
1713 static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1714                               const char **term_sub, 
1715                               oid_value attributeSet, NMEM stream,
1716                               struct grep_info *grep_info,
1717                               int reg_type, int complete_flag,
1718                               int num_bases, char **basenames,
1719                               char *term_dst, 
1720                               const char *xpath_use,
1721                               struct ord_list **ol)
1722 {
1723     char term_dict[2*IT_MAX_WORD+2];
1724     int base_no;
1725     const char *termp;
1726     struct rpn_char_map_info rcmi;
1727
1728     int bases_ok = 0;     /* no of databases with OK attribute */
1729
1730     *ol = ord_list_create(stream);
1731
1732     rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
1733
1734     for (base_no = 0; base_no < num_bases; base_no++)
1735     {
1736         int max_pos, prefix_len = 0;
1737         int relation_error = 0;
1738         int ord, ord_len, i;
1739         char ord_buf[32];
1740
1741         termp = *term_sub;
1742
1743         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1744         {
1745             zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
1746                            basenames[base_no]);
1747             return ZEBRA_FAIL;
1748         }
1749
1750         if (zebra_apt_get_ord(zh, zapt, reg_type, xpath_use,
1751                               attributeSet, &ord) != ZEBRA_OK)
1752             continue;
1753         bases_ok++;
1754
1755         *ol = ord_list_append(stream, *ol, ord);
1756
1757         ord_len = key_SU_encode (ord, ord_buf);
1758
1759         term_dict[prefix_len++] = '(';
1760         for (i = 0; i < ord_len; i++)
1761         {
1762             term_dict[prefix_len++] = 1;
1763             term_dict[prefix_len++] = ord_buf[i];
1764         }
1765         term_dict[prefix_len++] = ')';
1766         term_dict[prefix_len] = '\0';
1767
1768         if (!numeric_relation(zh, zapt, &termp, term_dict,
1769                               attributeSet, grep_info, &max_pos, reg_type,
1770                               term_dst, &relation_error))
1771         {
1772             if (relation_error)
1773             {
1774                 zebra_setError(zh, relation_error, 0);
1775                 return ZEBRA_FAIL;
1776             }
1777             *term_sub = 0;
1778             return ZEBRA_OK;
1779         }
1780     }
1781     if (!bases_ok)
1782         return ZEBRA_FAIL;
1783     *term_sub = termp;
1784     yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1785     return ZEBRA_OK;
1786 }
1787
1788                                  
1789 static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh,
1790                                         Z_AttributesPlusTerm *zapt,
1791                                         const char *termz,
1792                                         oid_value attributeSet,
1793                                         NMEM stream,
1794                                         int reg_type, int complete_flag,
1795                                         const char *rank_type, 
1796                                         const char *xpath_use,
1797                                         int num_bases, char **basenames,
1798                                         NMEM rset_nmem,
1799                                         RSET *rset,
1800                                         struct rset_key_control *kc)
1801 {
1802     char term_dst[IT_MAX_WORD+1];
1803     const char *termp = termz;
1804     RSET *result_sets = 0;
1805     int num_result_sets = 0;
1806     ZEBRA_RES res;
1807     struct grep_info grep_info;
1808     int alloc_sets = 0;
1809     zint hits_limit_value;
1810     const char *term_ref_id_str = 0;
1811
1812     term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str, stream);
1813
1814     yaz_log(log_level_rpn, "APT_numeric t='%s'", termz);
1815     if (grep_info_prepare(zh, zapt, &grep_info, reg_type) == ZEBRA_FAIL)
1816         return ZEBRA_FAIL;
1817     while (1)
1818     { 
1819         struct ord_list *ol;
1820         if (alloc_sets == num_result_sets)
1821         {
1822             int add = 10;
1823             RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) * 
1824                                               sizeof(*rnew));
1825             if (alloc_sets)
1826                 memcpy(rnew, result_sets, alloc_sets * sizeof(*rnew));
1827             alloc_sets = alloc_sets + add;
1828             result_sets = rnew;
1829         }
1830         yaz_log(YLOG_DEBUG, "APT_numeric termp=%s", termp);
1831         grep_info.isam_p_indx = 0;
1832         res = numeric_term(zh, zapt, &termp, attributeSet, stream, &grep_info,
1833                            reg_type, complete_flag, num_bases, basenames,
1834                            term_dst, xpath_use, &ol);
1835         if (res == ZEBRA_FAIL || termp == 0)
1836             break;
1837         yaz_log(YLOG_DEBUG, "term: %s", term_dst);
1838         result_sets[num_result_sets] =
1839             rset_trunc(zh, grep_info.isam_p_buf,
1840                        grep_info.isam_p_indx, term_dst,
1841                        strlen(term_dst), rank_type,
1842                        0 /* preserve position */,
1843                        zapt->term->which, rset_nmem, 
1844                        kc, kc->scope, ol, reg_type,
1845                        hits_limit_value,
1846                        term_ref_id_str);
1847         if (!result_sets[num_result_sets])
1848             break;
1849         num_result_sets++;
1850         if (!*termp)
1851             break;
1852     }
1853     grep_info_delete(&grep_info);
1854
1855     if (res != ZEBRA_OK)
1856         return res;
1857     if (num_result_sets == 0)
1858         *rset = rset_create_null(rset_nmem, kc, 0);
1859     else if (num_result_sets == 1)
1860         *rset = result_sets[0];
1861     else
1862         *rset = rset_create_and(rset_nmem, kc, kc->scope,
1863                                 num_result_sets, result_sets);
1864     if (!*rset)
1865         return ZEBRA_FAIL;
1866     return ZEBRA_OK;
1867 }
1868
1869 static ZEBRA_RES rpn_search_APT_local(ZebraHandle zh,
1870                                       Z_AttributesPlusTerm *zapt,
1871                                       const char *termz,
1872                                       oid_value attributeSet,
1873                                       NMEM stream,
1874                                       const char *rank_type, NMEM rset_nmem,
1875                                       RSET *rset,
1876                                       struct rset_key_control *kc)
1877 {
1878     RSFD rsfd;
1879     struct it_key key;
1880     int sys;
1881     *rset = rset_create_temp(rset_nmem, kc, kc->scope,
1882                              res_get (zh->res, "setTmpDir"),0 );
1883     rsfd = rset_open(*rset, RSETF_WRITE);
1884     
1885     sys = atoi(termz);
1886     if (sys <= 0)
1887         sys = 1;
1888     key.mem[0] = sys;
1889     key.mem[1] = 1;
1890     key.len = 2;
1891     rset_write (rsfd, &key);
1892     rset_close (rsfd);
1893     return ZEBRA_OK;
1894 }
1895
1896 static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1897                                oid_value attributeSet, NMEM stream,
1898                                Z_SortKeySpecList *sort_sequence,
1899                                const char *rank_type,
1900                                NMEM rset_nmem,
1901                                RSET *rset,
1902                                struct rset_key_control *kc)
1903 {
1904     int i;
1905     int sort_relation_value;
1906     AttrType sort_relation_type;
1907     Z_SortKeySpec *sks;
1908     Z_SortKey *sk;
1909     int oid[OID_SIZE];
1910     oident oe;
1911     char termz[20];
1912     
1913     attr_init_APT(&sort_relation_type, zapt, 7);
1914     sort_relation_value = attr_find(&sort_relation_type, &attributeSet);
1915
1916     if (!sort_sequence->specs)
1917     {
1918         sort_sequence->num_specs = 10;
1919         sort_sequence->specs = (Z_SortKeySpec **)
1920             nmem_malloc(stream, sort_sequence->num_specs *
1921                          sizeof(*sort_sequence->specs));
1922         for (i = 0; i<sort_sequence->num_specs; i++)
1923             sort_sequence->specs[i] = 0;
1924     }
1925     if (zapt->term->which != Z_Term_general)
1926         i = 0;
1927     else
1928         i = atoi_n ((char *) zapt->term->u.general->buf,
1929                     zapt->term->u.general->len);
1930     if (i >= sort_sequence->num_specs)
1931         i = 0;
1932     sprintf(termz, "%d", i);
1933
1934     oe.proto = PROTO_Z3950;
1935     oe.oclass = CLASS_ATTSET;
1936     oe.value = attributeSet;
1937     if (!oid_ent_to_oid (&oe, oid))
1938         return ZEBRA_FAIL;
1939
1940     sks = (Z_SortKeySpec *) nmem_malloc(stream, sizeof(*sks));
1941     sks->sortElement = (Z_SortElement *)
1942         nmem_malloc(stream, sizeof(*sks->sortElement));
1943     sks->sortElement->which = Z_SortElement_generic;
1944     sk = sks->sortElement->u.generic = (Z_SortKey *)
1945         nmem_malloc(stream, sizeof(*sk));
1946     sk->which = Z_SortKey_sortAttributes;
1947     sk->u.sortAttributes = (Z_SortAttributes *)
1948         nmem_malloc(stream, sizeof(*sk->u.sortAttributes));
1949
1950     sk->u.sortAttributes->id = oid;
1951     sk->u.sortAttributes->list = zapt->attributes;
1952
1953     sks->sortRelation = (int *)
1954         nmem_malloc(stream, sizeof(*sks->sortRelation));
1955     if (sort_relation_value == 1)
1956         *sks->sortRelation = Z_SortKeySpec_ascending;
1957     else if (sort_relation_value == 2)
1958         *sks->sortRelation = Z_SortKeySpec_descending;
1959     else 
1960         *sks->sortRelation = Z_SortKeySpec_ascending;
1961
1962     sks->caseSensitivity = (int *)
1963         nmem_malloc(stream, sizeof(*sks->caseSensitivity));
1964     *sks->caseSensitivity = 0;
1965
1966     sks->which = Z_SortKeySpec_null;
1967     sks->u.null = odr_nullval ();
1968     sort_sequence->specs[i] = sks;
1969     *rset = rset_create_null(rset_nmem, kc, 0);
1970     return ZEBRA_OK;
1971 }
1972
1973
1974 static int rpn_check_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1975                            oid_value attributeSet,
1976                            struct xpath_location_step *xpath, int max,
1977                            NMEM mem)
1978 {
1979     oid_value curAttributeSet = attributeSet;
1980     AttrType use;
1981     const char *use_string = 0;
1982     
1983     attr_init_APT(&use, zapt, 1);
1984     attr_find_ex(&use, &curAttributeSet, &use_string);
1985
1986     if (!use_string || *use_string != '/')
1987         return -1;
1988
1989     return zebra_parse_xpath_str(use_string, xpath, max, mem);
1990 }
1991  
1992                
1993
1994 static RSET xpath_trunc(ZebraHandle zh, NMEM stream,
1995                         int reg_type, const char *term, 
1996                         const char *xpath_use,
1997                         NMEM rset_nmem,
1998                         struct rset_key_control *kc)
1999 {
2000     RSET rset;
2001     struct grep_info grep_info;
2002     char term_dict[2048];
2003     char ord_buf[32];
2004     int prefix_len = 0;
2005     int ord = zebraExplain_lookup_attr_str(zh->reg->zei, 
2006                                            zinfo_index_category_index,
2007                                            reg_type,
2008                                            xpath_use);
2009     int ord_len, i, r, max_pos;
2010     int term_type = Z_Term_characterString;
2011     const char *flags = "void";
2012
2013     if (grep_info_prepare(zh, 0 /* zapt */, &grep_info, '0') == ZEBRA_FAIL)
2014         return rset_create_null(rset_nmem, kc, 0);
2015     
2016     if (ord < 0)
2017         return rset_create_null(rset_nmem, kc, 0);
2018     if (prefix_len)
2019         term_dict[prefix_len++] = '|';
2020     else
2021         term_dict[prefix_len++] = '(';
2022     
2023     ord_len = key_SU_encode (ord, ord_buf);
2024     for (i = 0; i<ord_len; i++)
2025     {
2026         term_dict[prefix_len++] = 1;
2027         term_dict[prefix_len++] = ord_buf[i];
2028     }
2029     term_dict[prefix_len++] = ')';
2030     strcpy(term_dict+prefix_len, term);
2031     
2032     grep_info.isam_p_indx = 0;
2033     r = dict_lookup_grep(zh->reg->dict, term_dict, 0,
2034                           &grep_info, &max_pos, 0, grep_handle);
2035     yaz_log(YLOG_DEBUG, "%s %d positions", term,
2036              grep_info.isam_p_indx);
2037     rset = rset_trunc(zh, grep_info.isam_p_buf,
2038                       grep_info.isam_p_indx, term, strlen(term),
2039                       flags, 1, term_type,rset_nmem,
2040                       kc, kc->scope, 0, reg_type, 0 /* hits_limit */,
2041                       0 /* term_ref_id_str */);
2042     grep_info_delete(&grep_info);
2043     return rset;
2044 }
2045
2046 static
2047 ZEBRA_RES rpn_search_xpath(ZebraHandle zh,
2048                            int num_bases, char **basenames,
2049                            NMEM stream, const char *rank_type, RSET rset,
2050                            int xpath_len, struct xpath_location_step *xpath,
2051                            NMEM rset_nmem,
2052                            RSET *rset_out,
2053                            struct rset_key_control *kc)
2054 {
2055     int base_no;
2056     int i;
2057     int always_matches = rset ? 0 : 1;
2058
2059     if (xpath_len < 0)
2060     {
2061         *rset_out = rset;
2062         return ZEBRA_OK;
2063     }
2064
2065     yaz_log(YLOG_DEBUG, "xpath len=%d", xpath_len);
2066     for (i = 0; i<xpath_len; i++)
2067     {
2068         yaz_log(log_level_rpn, "XPATH %d %s", i, xpath[i].part);
2069
2070     }
2071
2072     /*
2073       //a    ->    a/.*
2074       //a/b  ->    b/a/.*
2075       /a     ->    a/
2076       /a/b   ->    b/a/
2077
2078       /      ->    none
2079
2080    a[@attr = value]/b[@other = othervalue]
2081
2082  /e/@a val      range(e/,range(@a,freetext(w,1015,val),@a),e/)
2083  /a/b val       range(b/a/,freetext(w,1016,val),b/a/)
2084  /a/b/@c val    range(b/a/,range(@c,freetext(w,1016,val),@c),b/a/)
2085  /a/b[@c = y] val range(b/a/,freetext(w,1016,val),b/a/,@c = y)
2086  /a[@c = y]/b val range(a/,range(b/a/,freetext(w,1016,val),b/a/),a/,@c = y)
2087  /a[@c = x]/b[@c = y] range(a/,range(b/a/,freetext(w,1016,val),b/a/,@c = y),a/,@c = x)
2088       
2089     */
2090
2091     dict_grep_cmap (zh->reg->dict, 0, 0);
2092
2093     for (base_no = 0; base_no < num_bases; base_no++)
2094     {
2095         int level = xpath_len;
2096         int first_path = 1;
2097         
2098         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
2099         {
2100             zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
2101                            basenames[base_no]);
2102             *rset_out = rset;
2103             return ZEBRA_FAIL;
2104         }
2105         while (--level >= 0)
2106         {
2107             WRBUF xpath_rev = wrbuf_alloc();
2108             int i;
2109             RSET rset_start_tag = 0, rset_end_tag = 0, rset_attr = 0;
2110
2111             for (i = level; i >= 1; --i)
2112             {
2113                 const char *cp = xpath[i].part;
2114                 if (*cp)
2115                 {
2116                     for (; *cp; cp++)
2117                     {
2118                         if (*cp == '*')
2119                             wrbuf_puts(xpath_rev, "[^/]*");
2120                         else if (*cp == ' ')
2121                             wrbuf_puts(xpath_rev, "\001 ");
2122                         else
2123                             wrbuf_putc(xpath_rev, *cp);
2124
2125                         /* wrbuf_putc does not null-terminate , but
2126                            wrbuf_puts below ensures it does.. so xpath_rev
2127                            is OK iff length is > 0 */
2128                     }
2129                     wrbuf_puts(xpath_rev, "/");
2130                 }
2131                 else if (i == 1)  /* // case */
2132                     wrbuf_puts(xpath_rev, ".*");
2133             }
2134             if (xpath[level].predicate &&
2135                 xpath[level].predicate->which == XPATH_PREDICATE_RELATION &&
2136                 xpath[level].predicate->u.relation.name[0])
2137             {
2138                 WRBUF wbuf = wrbuf_alloc();
2139                 wrbuf_puts(wbuf, xpath[level].predicate->u.relation.name+1);
2140                 if (xpath[level].predicate->u.relation.value)
2141                 {
2142                     const char *cp = xpath[level].predicate->u.relation.value;
2143                     wrbuf_putc(wbuf, '=');
2144                     
2145                     while (*cp)
2146                     {
2147                         if (strchr(REGEX_CHARS, *cp))
2148                             wrbuf_putc(wbuf, '\\');
2149                         wrbuf_putc(wbuf, *cp);
2150                         cp++;
2151                     }
2152                 }
2153                 wrbuf_puts(wbuf, "");
2154                 rset_attr = xpath_trunc(
2155                     zh, stream, '0', wrbuf_buf(wbuf), ZEBRA_XPATH_ATTR_NAME, 
2156                     rset_nmem, kc);
2157                 wrbuf_free(wbuf, 1);
2158             } 
2159             else 
2160             {
2161                 if (!first_path)
2162                 {
2163                     wrbuf_free(xpath_rev, 1);
2164                     continue;
2165                 }
2166             }
2167             yaz_log(log_level_rpn, "xpath_rev (%d) = %.*s", level, 
2168                     wrbuf_len(xpath_rev), wrbuf_buf(xpath_rev));
2169             if (wrbuf_len(xpath_rev))
2170             {
2171                 rset_start_tag = xpath_trunc(zh, stream, '0', 
2172                                              wrbuf_buf(xpath_rev),
2173                                              ZEBRA_XPATH_ELM_BEGIN, 
2174                                              rset_nmem, kc);
2175                 if (always_matches)
2176                     rset = rset_start_tag;
2177                 else
2178                 {
2179                     rset_end_tag = xpath_trunc(zh, stream, '0', 
2180                                                wrbuf_buf(xpath_rev),
2181                                                ZEBRA_XPATH_ELM_END, 
2182                                                rset_nmem, kc);
2183                     
2184                     rset = rset_create_between(rset_nmem, kc, kc->scope,
2185                                                rset_start_tag, rset,
2186                                                rset_end_tag, rset_attr);
2187                 }
2188             }
2189             wrbuf_free(xpath_rev, 1);
2190             first_path = 0;
2191         }
2192     }
2193     *rset_out = rset;
2194     return ZEBRA_OK;
2195 }
2196
2197 #define MAX_XPATH_STEPS 10
2198
2199 static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2200                                 oid_value attributeSet, NMEM stream,
2201                                 Z_SortKeySpecList *sort_sequence,
2202                                 int num_bases, char **basenames, 
2203                                 NMEM rset_nmem,
2204                                 RSET *rset,
2205                                 struct rset_key_control *kc)
2206 {
2207     ZEBRA_RES res = ZEBRA_OK;
2208     unsigned reg_id;
2209     char *search_type = NULL;
2210     char rank_type[128];
2211     int complete_flag;
2212     int sort_flag;
2213     char termz[IT_MAX_WORD+1];
2214     int xpath_len;
2215     const char *xpath_use = 0;
2216     struct xpath_location_step xpath[MAX_XPATH_STEPS];
2217
2218     if (!log_level_set)
2219     {
2220         log_level_rpn = yaz_log_module_level("rpn");
2221         log_level_set = 1;
2222     }
2223     zebra_maps_attr(zh->reg->zebra_maps, zapt, &reg_id, &search_type,
2224                     rank_type, &complete_flag, &sort_flag);
2225     
2226     yaz_log(YLOG_DEBUG, "reg_id=%c", reg_id);
2227     yaz_log(YLOG_DEBUG, "complete_flag=%d", complete_flag);
2228     yaz_log(YLOG_DEBUG, "search_type=%s", search_type);
2229     yaz_log(YLOG_DEBUG, "rank_type=%s", rank_type);
2230
2231     if (zapt_term_to_utf8(zh, zapt, termz) == ZEBRA_FAIL)
2232         return ZEBRA_FAIL;
2233
2234     if (sort_flag)
2235         return rpn_sort_spec(zh, zapt, attributeSet, stream, sort_sequence,
2236                              rank_type, rset_nmem, rset, kc);
2237     /* consider if an X-Path query is used */
2238     xpath_len = rpn_check_xpath(zh, zapt, attributeSet, 
2239                                 xpath, MAX_XPATH_STEPS, stream);
2240     if (xpath_len >= 0)
2241     {
2242         if (xpath[xpath_len-1].part[0] == '@') 
2243             xpath_use = ZEBRA_XPATH_ATTR_CDATA;  /* last step is attribute  */
2244         else
2245             xpath_use = ZEBRA_XPATH_CDATA;  /* searching for cdata */        
2246
2247         if (1)
2248         {
2249             AttrType relation;
2250             int relation_value;
2251
2252             attr_init_APT(&relation, zapt, 2);
2253             relation_value = attr_find(&relation, NULL);
2254
2255             if (relation_value == 103) /* alwaysmatches */
2256             {
2257                 *rset = 0; /* signal no "term" set */
2258                 return rpn_search_xpath(zh, num_bases, basenames,
2259                                         stream, rank_type, *rset, 
2260                                         xpath_len, xpath, rset_nmem, rset, kc);
2261             }
2262         }
2263     }
2264
2265     /* search using one of the various search type strategies
2266        termz is our UTF-8 search term
2267        attributeSet is top-level default attribute set 
2268        stream is ODR for search
2269        reg_id is the register type
2270        complete_flag is 1 for complete subfield, 0 for incomplete
2271        xpath_use is use-attribute to be used for X-Path search, 0 for none
2272     */
2273     if (!strcmp(search_type, "phrase"))
2274     {
2275         res = rpn_search_APT_phrase(zh, zapt, termz, attributeSet, stream,
2276                                     reg_id, complete_flag, rank_type,
2277                                     xpath_use,
2278                                     num_bases, basenames, rset_nmem,
2279                                     rset, kc);
2280     }
2281     else if (!strcmp(search_type, "and-list"))
2282     {
2283         res = rpn_search_APT_and_list(zh, zapt, termz, attributeSet, stream,
2284                                       reg_id, complete_flag, rank_type,
2285                                       xpath_use,
2286                                       num_bases, basenames, rset_nmem,
2287                                       rset, kc);
2288     }
2289     else if (!strcmp(search_type, "or-list"))
2290     {
2291         res = rpn_search_APT_or_list(zh, zapt, termz, attributeSet, stream,
2292                                      reg_id, complete_flag, rank_type,
2293                                      xpath_use,
2294                                      num_bases, basenames, rset_nmem,
2295                                      rset, kc);
2296     }
2297     else if (!strcmp(search_type, "local"))
2298     {
2299         res = rpn_search_APT_local(zh, zapt, termz, attributeSet, stream,
2300                                    rank_type, rset_nmem, rset, kc);
2301     }
2302     else if (!strcmp(search_type, "numeric"))
2303     {
2304         res = rpn_search_APT_numeric(zh, zapt, termz, attributeSet, stream,
2305                                      reg_id, complete_flag, rank_type,
2306                                      xpath_use,
2307                                      num_bases, basenames, rset_nmem,
2308                                      rset, kc);
2309     }
2310     else
2311     {
2312         zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2313         res = ZEBRA_FAIL;
2314     }
2315     if (res != ZEBRA_OK)
2316         return res;
2317     if (!*rset)
2318         return ZEBRA_FAIL;
2319     return rpn_search_xpath(zh, num_bases, basenames,
2320                             stream, rank_type, *rset, 
2321                             xpath_len, xpath, rset_nmem, rset, kc);
2322 }
2323
2324 static ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2325                                       oid_value attributeSet, 
2326                                       NMEM stream, NMEM rset_nmem,
2327                                       Z_SortKeySpecList *sort_sequence,
2328                                       int num_bases, char **basenames,
2329                                       RSET **result_sets, int *num_result_sets,
2330                                       Z_Operator *parent_op,
2331                                       struct rset_key_control *kc);
2332
2333 ZEBRA_RES rpn_get_top_approx_limit(ZebraHandle zh, Z_RPNStructure *zs,
2334                                    zint *approx_limit)
2335 {
2336     ZEBRA_RES res = ZEBRA_OK;
2337     if (zs->which == Z_RPNStructure_complex)
2338     {
2339         if (res == ZEBRA_OK)
2340             res = rpn_get_top_approx_limit(zh, zs->u.complex->s1,
2341                                            approx_limit);
2342         if (res == ZEBRA_OK)
2343             res = rpn_get_top_approx_limit(zh, zs->u.complex->s2,
2344                                            approx_limit);
2345     }
2346     else if (zs->which == Z_RPNStructure_simple)
2347     {
2348         if (zs->u.simple->which == Z_Operand_APT)
2349         {
2350             Z_AttributesPlusTerm *zapt = zs->u.simple->u.attributesPlusTerm;
2351             AttrType global_hits_limit_attr;
2352             int l;
2353             
2354             attr_init_APT(&global_hits_limit_attr, zapt, 12);
2355             
2356             l = attr_find(&global_hits_limit_attr, NULL);
2357             if (l != -1)
2358                 *approx_limit = l;
2359         }
2360     }
2361     return res;
2362 }
2363
2364 ZEBRA_RES rpn_search_top(ZebraHandle zh, Z_RPNStructure *zs,
2365                          oid_value attributeSet, 
2366                          NMEM stream, NMEM rset_nmem,
2367                          Z_SortKeySpecList *sort_sequence,
2368                          int num_bases, char **basenames,
2369                          RSET *result_set)
2370 {
2371     RSET *result_sets = 0;
2372     int num_result_sets = 0;
2373     ZEBRA_RES res;
2374     struct rset_key_control *kc = zebra_key_control_create(zh);
2375
2376     res = rpn_search_structure(zh, zs, attributeSet,
2377                                stream, rset_nmem,
2378                                sort_sequence, 
2379                                num_bases, basenames,
2380                                &result_sets, &num_result_sets,
2381                                0 /* no parent op */,
2382                                kc);
2383     if (res != ZEBRA_OK)
2384     {
2385         int i;
2386         for (i = 0; i<num_result_sets; i++)
2387             rset_delete(result_sets[i]);
2388         *result_set = 0;
2389     }
2390     else
2391     {
2392         assert(num_result_sets == 1);
2393         assert(result_sets);
2394         assert(*result_sets);
2395         *result_set = *result_sets;
2396     }
2397     (*kc->dec)(kc);
2398     return res;
2399 }
2400
2401 ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2402                                oid_value attributeSet, 
2403                                NMEM stream, NMEM rset_nmem,
2404                                Z_SortKeySpecList *sort_sequence,
2405                                int num_bases, char **basenames,
2406                                RSET **result_sets, int *num_result_sets,
2407                                Z_Operator *parent_op,
2408                                struct rset_key_control *kc)
2409 {
2410     *num_result_sets = 0;
2411     if (zs->which == Z_RPNStructure_complex)
2412     {
2413         ZEBRA_RES res;
2414         Z_Operator *zop = zs->u.complex->roperator;
2415         RSET *result_sets_l = 0;
2416         int num_result_sets_l = 0;
2417         RSET *result_sets_r = 0;
2418         int num_result_sets_r = 0;
2419
2420         res = rpn_search_structure(zh, zs->u.complex->s1,
2421                                    attributeSet, stream, rset_nmem,
2422                                    sort_sequence,
2423                                    num_bases, basenames,
2424                                    &result_sets_l, &num_result_sets_l,
2425                                    zop, kc);
2426         if (res != ZEBRA_OK)
2427         {
2428             int i;
2429             for (i = 0; i<num_result_sets_l; i++)
2430                 rset_delete(result_sets_l[i]);
2431             return res;
2432         }
2433         res = rpn_search_structure(zh, zs->u.complex->s2,
2434                                    attributeSet, stream, rset_nmem,
2435                                    sort_sequence,
2436                                    num_bases, basenames,
2437                                    &result_sets_r, &num_result_sets_r,
2438                                    zop, kc);
2439         if (res != ZEBRA_OK)
2440         {
2441             int i;
2442             for (i = 0; i<num_result_sets_l; i++)
2443                 rset_delete(result_sets_l[i]);
2444             for (i = 0; i<num_result_sets_r; i++)
2445                 rset_delete(result_sets_r[i]);
2446             return res;
2447         }
2448
2449         /* make a new list of result for all children */
2450         *num_result_sets = num_result_sets_l + num_result_sets_r;
2451         *result_sets = nmem_malloc(stream, *num_result_sets * 
2452                                    sizeof(**result_sets));
2453         memcpy(*result_sets, result_sets_l, 
2454                num_result_sets_l * sizeof(**result_sets));
2455         memcpy(*result_sets + num_result_sets_l, result_sets_r, 
2456                num_result_sets_r * sizeof(**result_sets));
2457
2458         if (!parent_op || parent_op->which != zop->which
2459             || (zop->which != Z_Operator_and &&
2460                 zop->which != Z_Operator_or))
2461         {
2462             /* parent node different from this one (or non-present) */
2463             /* we must combine result sets now */
2464             RSET rset;
2465             switch (zop->which)
2466             {
2467             case Z_Operator_and:
2468                 rset = rset_create_and(rset_nmem, kc,
2469                                        kc->scope,
2470                                        *num_result_sets, *result_sets);
2471                 break;
2472             case Z_Operator_or:
2473                 rset = rset_create_or(rset_nmem, kc,
2474                                       kc->scope, 0, /* termid */
2475                                       *num_result_sets, *result_sets);
2476                 break;
2477             case Z_Operator_and_not:
2478                 rset = rset_create_not(rset_nmem, kc,
2479                                        kc->scope,
2480                                        (*result_sets)[0],
2481                                        (*result_sets)[1]);
2482                 break;
2483             case Z_Operator_prox:
2484                 if (zop->u.prox->which != Z_ProximityOperator_known)
2485                 {
2486                     zebra_setError(zh, 
2487                                    YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2488                                    0);
2489                     return ZEBRA_FAIL;
2490                 }
2491                 if (*zop->u.prox->u.known != Z_ProxUnit_word)
2492                 {
2493                     zebra_setError_zint(zh,
2494                                         YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2495                                         *zop->u.prox->u.known);
2496                     return ZEBRA_FAIL;
2497                 }
2498                 else
2499                 {
2500                     rset = rset_create_prox(rset_nmem, kc,
2501                                             kc->scope,
2502                                             *num_result_sets, *result_sets, 
2503                                             *zop->u.prox->ordered,
2504                                             (!zop->u.prox->exclusion ? 
2505                                              0 : *zop->u.prox->exclusion),
2506                                             *zop->u.prox->relationType,
2507                                             *zop->u.prox->distance );
2508                 }
2509                 break;
2510             default:
2511                 zebra_setError(zh, YAZ_BIB1_OPERATOR_UNSUPP, 0);
2512                 return ZEBRA_FAIL;
2513             }
2514             *num_result_sets = 1;
2515             *result_sets = nmem_malloc(stream, *num_result_sets * 
2516                                        sizeof(**result_sets));
2517             (*result_sets)[0] = rset;
2518         }
2519     }
2520     else if (zs->which == Z_RPNStructure_simple)
2521     {
2522         RSET rset;
2523         ZEBRA_RES res;
2524
2525         if (zs->u.simple->which == Z_Operand_APT)
2526         {
2527             yaz_log(YLOG_DEBUG, "rpn_search_APT");
2528             res = rpn_search_APT(zh, zs->u.simple->u.attributesPlusTerm,
2529                                  attributeSet, stream, sort_sequence,
2530                                  num_bases, basenames, rset_nmem, &rset,
2531                                  kc);
2532             if (res != ZEBRA_OK)
2533                 return res;
2534         }
2535         else if (zs->u.simple->which == Z_Operand_resultSetId)
2536         {
2537             yaz_log(YLOG_DEBUG, "rpn_search_ref");
2538             rset = resultSetRef(zh, zs->u.simple->u.resultSetId);
2539             if (!rset)
2540             {
2541                 zebra_setError(zh, 
2542                                YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
2543                                zs->u.simple->u.resultSetId);
2544                 return ZEBRA_FAIL;
2545             }
2546             rset_dup(rset);
2547         }
2548         else
2549         {
2550             zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2551             return ZEBRA_FAIL;
2552         }
2553         *num_result_sets = 1;
2554         *result_sets = nmem_malloc(stream, *num_result_sets * 
2555                                    sizeof(**result_sets));
2556         (*result_sets)[0] = rset;
2557     }
2558     else
2559     {
2560         zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2561         return ZEBRA_FAIL;
2562     }
2563     return ZEBRA_OK;
2564 }
2565
2566
2567
2568 /*
2569  * Local variables:
2570  * c-basic-offset: 4
2571  * indent-tabs-mode: nil
2572  * End:
2573  * vim: shiftwidth=4 tabstop=8 expandtab
2574  */
2575