Version 2.0.19. Use YAZ' wrbuf_write_escaped.
[idzebra-moved-to-github.git] / index / rpnsearch.c
1 /* $Id: rpnsearch.c,v 1.23 2007-11-15 08:53:25 adam Exp $
2    Copyright (C) 1995-2007
3    Index Data ApS
4
5 This file is part of the Zebra server.
6
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
11
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
20
21 */
22
23 #include <stdio.h>
24 #include <assert.h>
25 #ifdef WIN32
26 #include <io.h>
27 #endif
28 #if HAVE_UNISTD_H
29 #include <unistd.h>
30 #endif
31 #include <ctype.h>
32
33 #include <yaz/diagbib1.h>
34 #include "index.h"
35 #include <zebra_xpath.h>
36 #include <attrfind.h>
37 #include <charmap.h>
38 #include <rset.h>
39
40 static int log_level_set = 0;
41 static int log_level_rpn = 0;
42
43 #define TERMSET_DISABLE 1
44
45 static const char **rpn_char_map_handler(void *vp, const char **from, int len)
46 {
47     struct rpn_char_map_info *p = (struct rpn_char_map_info *) vp;
48     const char **out = zebra_maps_input(p->zm, from, len, 0);
49 #if 0
50     if (out && *out)
51     {
52         const char *outp = *out;
53         yaz_log(YLOG_LOG, "---");
54         while (*outp)
55         {
56             yaz_log(YLOG_LOG, "%02X", *outp);
57             outp++;
58         }
59     }
60 #endif
61     return out;
62 }
63
64 void rpn_char_map_prepare(struct zebra_register *reg, zebra_map_t zm,
65                           struct rpn_char_map_info *map_info)
66 {
67     map_info->zm = zm;
68     if (zebra_maps_is_icu(zm))
69         dict_grep_cmap(reg->dict, 0, 0);
70     else
71         dict_grep_cmap(reg->dict, map_info, rpn_char_map_handler);
72 }
73
74 #define TERM_COUNT        
75        
76 struct grep_info {        
77 #ifdef TERM_COUNT        
78     int *term_no;        
79 #endif        
80     ISAM_P *isam_p_buf;
81     int isam_p_size;        
82     int isam_p_indx;
83     int trunc_max;
84     ZebraHandle zh;
85     const char *index_type;
86     ZebraSet termset;
87 };        
88
89 static int add_isam_p(const char *name, const char *info,
90                       struct grep_info *p)
91 {
92     if (!log_level_set)
93     {
94         log_level_rpn = yaz_log_module_level("rpn");
95         log_level_set = 1;
96     }
97     /* we may have to stop this madness.. NOTE: -1 so that if
98        truncmax == trunxlimit we do *not* generate result sets */
99     if (p->isam_p_indx >= p->trunc_max - 1)
100         return 1;
101
102     if (p->isam_p_indx == p->isam_p_size)
103     {
104         ISAM_P *new_isam_p_buf;
105 #ifdef TERM_COUNT        
106         int *new_term_no;        
107 #endif
108         p->isam_p_size = 2*p->isam_p_size + 100;
109         new_isam_p_buf = (ISAM_P *) xmalloc(sizeof(*new_isam_p_buf) *
110                                             p->isam_p_size);
111         if (p->isam_p_buf)
112         {
113             memcpy(new_isam_p_buf, p->isam_p_buf,
114                     p->isam_p_indx * sizeof(*p->isam_p_buf));
115             xfree(p->isam_p_buf);
116         }
117         p->isam_p_buf = new_isam_p_buf;
118
119 #ifdef TERM_COUNT
120         new_term_no = (int *) xmalloc(sizeof(*new_term_no) * p->isam_p_size);
121         if (p->term_no)
122         {
123             memcpy(new_term_no, p->isam_p_buf,
124                     p->isam_p_indx * sizeof(*p->term_no));
125             xfree(p->term_no);
126         }
127         p->term_no = new_term_no;
128 #endif
129     }
130     assert(*info == sizeof(*p->isam_p_buf));
131     memcpy(p->isam_p_buf + p->isam_p_indx, info+1, sizeof(*p->isam_p_buf));
132
133     if (p->termset)
134     {
135         const char *db;
136         char term_tmp[IT_MAX_WORD];
137         int ord = 0;
138         const char *index_name;
139         int len = key_SU_decode(&ord, (const unsigned char *) name);
140         
141         zebra_term_untrans (p->zh, p->index_type, term_tmp, name+len);
142         yaz_log(log_level_rpn, "grep: %d %c %s", ord, name[len], term_tmp);
143         zebraExplain_lookup_ord(p->zh->reg->zei,
144                                 ord, 0 /* index_type */, &db, &index_name);
145         yaz_log(log_level_rpn, "grep:  db=%s index=%s", db, index_name);
146         
147         resultSetAddTerm(p->zh, p->termset, name[len], db,
148                          index_name, term_tmp);
149     }
150     (p->isam_p_indx)++;
151     return 0;
152 }
153
154 static int grep_handle(char *name, const char *info, void *p)
155 {
156     return add_isam_p(name, info, (struct grep_info *) p);
157 }
158
159 static int term_pre(zebra_map_t zm, const char **src,
160                     const char *ct1, const char *ct2, int first)
161 {
162     const char *s1, *s0 = *src;
163     const char **map;
164
165     /* skip white space */
166     while (*s0)
167     {
168         if (ct1 && strchr(ct1, *s0))
169             break;
170         if (ct2 && strchr(ct2, *s0))
171             break;
172         s1 = s0;
173         map = zebra_maps_input(zm, &s1, strlen(s1), first);
174         if (**map != *CHR_SPACE)
175             break;
176         s0 = s1;
177     }
178     *src = s0;
179     return *s0;
180 }
181
182
183 static void esc_str(char *out_buf, size_t out_size,
184                     const char *in_buf, int in_size)
185 {
186     int k;
187
188     assert(out_buf);
189     assert(in_buf);
190     assert(out_size > 20);
191     *out_buf = '\0';
192     for (k = 0; k<in_size; k++)
193     {
194         int c = in_buf[k] & 0xff;
195         int pc;
196         if (c < 32 || c > 126)
197             pc = '?';
198         else
199             pc = c;
200         sprintf(out_buf +strlen(out_buf), "%02X:%c  ", c, pc);
201         if (strlen(out_buf) > out_size-20)
202         {
203             strcat(out_buf, "..");
204             break;
205         }
206     }
207 }
208
209 #define REGEX_CHARS " []()|.*+?!"
210
211 static void add_non_space(const char *start, const char *end,
212                           WRBUF term_dict,
213                           char *dst_term, int *dst_ptr,
214                           const char **map, int q_map_match)
215 {
216     size_t sz = end - start;
217     memcpy(dst_term + *dst_ptr, start, sz);
218     (*dst_ptr) += sz;
219     if (!q_map_match)
220     {
221         while (start < end)
222         {
223             if (strchr(REGEX_CHARS, *start))
224                 wrbuf_putc(term_dict, '\\');
225             wrbuf_putc(term_dict, *start);
226             start++;
227         }
228     }
229     else
230     {
231         char tmpbuf[80];
232         esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
233         
234         wrbuf_puts(term_dict, map[0]);
235     }
236 }
237
238
239 static int term_100_icu(zebra_map_t zm,
240                         const char **src, WRBUF term_dict, int space_split,
241                         char *dst_term)
242 {
243     int no = 0;
244     const char *res_buf = 0;
245     size_t res_len = 0;
246     int r = zebra_map_tokenize(zm, *src, strlen(*src),
247                                &res_buf, &res_len);
248
249     yaz_log(YLOG_LOG, "term_100_icu r=%d", r);
250     if (r)
251         strcat(dst_term, *src);
252     *src += strlen(*src);
253     while (r)
254     {
255         int i;
256         no++;
257         for (i = 0; i < res_len; i++)
258         {
259             if (strchr(REGEX_CHARS, res_buf[i]))
260                 wrbuf_putc(term_dict, '\\');
261             if (res_buf[i] < 32)
262                 wrbuf_putc(term_dict, 1);
263             wrbuf_putc(term_dict, res_buf[i]);
264         }
265         r = zebra_map_tokenize(zm, 0, 0, &res_buf, &res_len);
266     }
267     return no;
268 }
269
270 /* term_100: handle term, where trunc = none(no operators at all) */
271 static int term_100(zebra_map_t zm,
272                     const char **src, WRBUF term_dict, int space_split,
273                     char *dst_term)
274 {
275     const char *s0;
276     const char **map;
277     int i = 0;
278     int j = 0;
279
280     const char *space_start = 0;
281     const char *space_end = 0;
282
283     if (zebra_maps_is_icu(zm))
284         return term_100_icu(zm, src, term_dict, space_split, dst_term);
285
286     if (!term_pre(zm, src, NULL, NULL, !space_split))
287         return 0;
288     s0 = *src;
289     while (*s0)
290     {
291         const char *s1 = s0;
292         int q_map_match = 0;
293         map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
294         if (space_split)
295         {
296             if (**map == *CHR_SPACE)
297                 break;
298         }
299         else  /* complete subfield only. */
300         {
301             if (**map == *CHR_SPACE)
302             {   /* save space mapping for later  .. */
303                 space_start = s1;
304                 space_end = s0;
305                 continue;
306             }
307             else if (space_start)
308             {   /* reload last space */
309                 while (space_start < space_end)
310                 {
311                     if (strchr(REGEX_CHARS, *space_start))
312                         wrbuf_putc(term_dict, '\\');
313                     dst_term[j++] = *space_start;
314                     wrbuf_putc(term_dict, *space_start);
315                     space_start++;
316                                
317                 }
318                 /* and reset */
319                 space_start = space_end = 0;
320             }
321         }
322         i++;
323
324         add_non_space(s1, s0, term_dict, dst_term, &j,
325                       map, q_map_match);
326     }
327     dst_term[j] = '\0';
328     *src = s0;
329     return i;
330 }
331
332 /* term_101: handle term, where trunc = Process # */
333 static int term_101(zebra_map_t zm,
334                     const char **src, WRBUF term_dict, int space_split,
335                     char *dst_term)
336 {
337     const char *s0;
338     const char **map;
339     int i = 0;
340     int j = 0;
341
342     if (!term_pre(zm, src, "#", "#", !space_split))
343         return 0;
344     s0 = *src;
345     while (*s0)
346     {
347         if (*s0 == '#')
348         {
349             i++;
350             wrbuf_puts(term_dict, ".*");
351             dst_term[j++] = *s0++;
352         }
353         else
354         {
355             const char *s1 = s0;
356             int q_map_match = 0;
357             map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
358             if (space_split && **map == *CHR_SPACE)
359                 break;
360
361             i++;
362             add_non_space(s1, s0, term_dict, dst_term, &j,
363                           map, q_map_match);
364         }
365     }
366     dst_term[j++] = '\0';
367     *src = s0;
368     return i;
369 }
370
371 /* term_103: handle term, where trunc = re-2 (regular expressions) */
372 static int term_103(zebra_map_t zm, const char **src,
373                     WRBUF term_dict, int *errors, int space_split,
374                     char *dst_term)
375 {
376     int i = 0;
377     int j = 0;
378     const char *s0;
379     const char **map;
380
381     if (!term_pre(zm, src, "^\\()[].*+?|", "(", !space_split))
382         return 0;
383     s0 = *src;
384     if (errors && *s0 == '+' && s0[1] && s0[2] == '+' && s0[3] &&
385         isdigit(((const unsigned char *)s0)[1]))
386     {
387         *errors = s0[1] - '0';
388         s0 += 3;
389         if (*errors > 3)
390             *errors = 3;
391     }
392     while (*s0)
393     {
394         if (strchr("^\\()[].*+?|-", *s0))
395         {
396             dst_term[j++] = *s0;
397             wrbuf_putc(term_dict, *s0);
398             s0++;
399             i++;
400         }
401         else
402         {
403             const char *s1 = s0;
404             int q_map_match = 0;
405             map = zebra_maps_search(zm, &s0, strlen(s0),  &q_map_match);
406             if (space_split && **map == *CHR_SPACE)
407                 break;
408
409             i++;
410             add_non_space(s1, s0, term_dict, dst_term, &j,
411                           map, q_map_match);
412         }
413     }
414     dst_term[j] = '\0';
415     *src = s0;
416     
417     return i;
418 }
419
420 /* term_103: handle term, where trunc = re-1 (regular expressions) */
421 static int term_102(zebra_map_t zm, const char **src,
422                     WRBUF term_dict, int space_split, char *dst_term)
423 {
424     return term_103(zm, src, term_dict, NULL, space_split, dst_term);
425 }
426
427
428 /* term_104: handle term, process # and ! */
429 static int term_104(zebra_map_t zm, const char **src, 
430                     WRBUF term_dict, int space_split, char *dst_term)
431 {
432     const char *s0;
433     const char **map;
434     int i = 0;
435     int j = 0;
436
437     if (!term_pre(zm, src, "?*#", "?*#", !space_split))
438         return 0;
439     s0 = *src;
440     while (*s0)
441     {
442         if (*s0 == '?')
443         {
444             i++;
445             dst_term[j++] = *s0++;
446             if (*s0 >= '0' && *s0 <= '9')
447             {
448                 int limit = 0;
449                 while (*s0 >= '0' && *s0 <= '9')
450                 {
451                     limit = limit * 10 + (*s0 - '0');
452                     dst_term[j++] = *s0++;
453                 }
454                 if (limit > 20)
455                     limit = 20;
456                 while (--limit >= 0)
457                 {
458                     wrbuf_puts(term_dict, ".?");
459                 }
460             }
461             else
462             {
463                 wrbuf_puts(term_dict, ".*");
464             }
465         }
466         else if (*s0 == '*')
467         {
468             i++;
469             wrbuf_puts(term_dict, ".*");
470             dst_term[j++] = *s0++;
471         }
472         else if (*s0 == '#')
473         {
474             i++;
475             wrbuf_puts(term_dict, ".");
476             dst_term[j++] = *s0++;
477         }
478         else
479         {
480             const char *s1 = s0;
481             int q_map_match = 0;
482             map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
483             if (space_split && **map == *CHR_SPACE)
484                 break;
485
486             i++;
487             add_non_space(s1, s0, term_dict, dst_term, &j,
488                           map, q_map_match);
489         }
490     }
491     dst_term[j++] = '\0';
492     *src = s0;
493     return i;
494 }
495
496 /* term_105/106: handle term, where trunc = Process * and ! and right trunc */
497 static int term_105(zebra_map_t zm, const char **src, 
498                     WRBUF term_dict, int space_split,
499                     char *dst_term, int right_truncate)
500 {
501     const char *s0;
502     const char **map;
503     int i = 0;
504     int j = 0;
505
506     if (!term_pre(zm, src, "*!", "*!", !space_split))
507         return 0;
508     s0 = *src;
509     while (*s0)
510     {
511         if (*s0 == '*')
512         {
513             i++;
514             wrbuf_puts(term_dict, ".*");
515             dst_term[j++] = *s0++;
516         }
517         else if (*s0 == '!')
518         {
519             i++;
520             wrbuf_putc(term_dict, '.');
521             dst_term[j++] = *s0++;
522         }
523         else
524         {
525             const char *s1 = s0;
526             int q_map_match = 0;
527             map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
528             if (space_split && **map == *CHR_SPACE)
529                 break;
530
531             i++;
532             add_non_space(s1, s0, term_dict, dst_term, &j,
533                           map, q_map_match);
534         }
535     }
536     if (right_truncate)
537         wrbuf_puts(term_dict, ".*");
538     dst_term[j++] = '\0';
539     *src = s0;
540     return i;
541 }
542
543
544 /* gen_regular_rel - generate regular expression from relation
545  *  val:     border value (inclusive)
546  *  islt:    1 if <=; 0 if >=.
547  */
548 static void gen_regular_rel(WRBUF term_dict, int val, int islt)
549 {
550     char dst_buf[20*5*20]; /* assuming enough for expansion */
551     char *dst = dst_buf;
552     int dst_p;
553     int w, d, i;
554     int pos = 0;
555     char numstr[20];
556
557     yaz_log(YLOG_DEBUG, "gen_regular_rel. val=%d, islt=%d", val, islt);
558     if (val >= 0)
559     {
560         if (islt)
561             strcpy(dst, "(-[0-9]+|(");
562         else
563             strcpy(dst, "((");
564     } 
565     else
566     {
567         if (!islt)
568         {
569             strcpy(dst, "([0-9]+|-(");
570             islt = 1;
571         }
572         else
573         {
574             strcpy(dst, "(-(");
575             islt = 0;
576         }
577         val = -val;
578     }
579     dst_p = strlen(dst);
580     sprintf(numstr, "%d", val);
581     for (w = strlen(numstr); --w >= 0; pos++)
582     {
583         d = numstr[w];
584         if (pos > 0)
585         {
586             if (islt)
587             {
588                 if (d == '0')
589                     continue;
590                 d--;
591             } 
592             else
593             {
594                 if (d == '9')
595                     continue;
596                 d++;
597             }
598         }
599         
600         strcpy(dst + dst_p, numstr);
601         dst_p = strlen(dst) - pos - 1;
602
603         if (islt)
604         {
605             if (d != '0')
606             {
607                 dst[dst_p++] = '[';
608                 dst[dst_p++] = '0';
609                 dst[dst_p++] = '-';
610                 dst[dst_p++] = d;
611                 dst[dst_p++] = ']';
612             }
613             else
614                 dst[dst_p++] = d;
615         }
616         else
617         {
618             if (d != '9')
619             { 
620                 dst[dst_p++] = '[';
621                 dst[dst_p++] = d;
622                 dst[dst_p++] = '-';
623                 dst[dst_p++] = '9';
624                 dst[dst_p++] = ']';
625             }
626             else
627                 dst[dst_p++] = d;
628         }
629         for (i = 0; i<pos; i++)
630         {
631             dst[dst_p++] = '[';
632             dst[dst_p++] = '0';
633             dst[dst_p++] = '-';
634             dst[dst_p++] = '9';
635             dst[dst_p++] = ']';
636         }
637         dst[dst_p++] = '|';
638     }
639     dst[dst_p] = '\0';
640     if (islt)
641     {
642         /* match everything less than 10^(pos-1) */
643         strcat(dst, "0*");
644         for (i = 1; i<pos; i++)
645             strcat(dst, "[0-9]?");
646     }
647     else
648     {
649         /* match everything greater than 10^pos */
650         for (i = 0; i <= pos; i++)
651             strcat(dst, "[0-9]");
652         strcat(dst, "[0-9]*");
653     }
654     strcat(dst, "))");
655     wrbuf_puts(term_dict, dst);
656 }
657
658 void string_rel_add_char(WRBUF term_p, WRBUF wsrc, int *indx)
659 {
660     const char *src = wrbuf_cstr(wsrc);
661     if (src[*indx] == '\\')
662     {
663         wrbuf_putc(term_p, src[*indx]);
664         (*indx)++;
665     }
666     wrbuf_putc(term_p, src[*indx]);
667     (*indx)++;
668 }
669
670 /*
671  *   >  abc     ([b-].*|a[c-].*|ab[d-].*|abc.+)
672  *              ([^-a].*|a[^-b].*ab[^-c].*|abc.+)
673  *   >= abc     ([b-].*|a[c-].*|ab[c-].*)
674  *              ([^-a].*|a[^-b].*|ab[c-].*)
675  *   <  abc     ([-0].*|a[-a].*|ab[-b].*)
676  *              ([^a-].*|a[^b-].*|ab[^c-].*)
677  *   <= abc     ([-0].*|a[-a].*|ab[-b].*|abc)
678  *              ([^a-].*|a[^b-].*|ab[^c-].*|abc)
679  */
680 static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
681                            const char **term_sub, WRBUF term_dict,
682                            const Odr_oid *attributeSet,
683                            zebra_map_t zm, int space_split, char *term_dst,
684                            int *error_code)
685 {
686     AttrType relation;
687     int relation_value;
688     int i;
689     WRBUF term_component = wrbuf_alloc();
690
691     attr_init_APT(&relation, zapt, 2);
692     relation_value = attr_find(&relation, NULL);
693
694     *error_code = 0;
695     yaz_log(YLOG_DEBUG, "string relation value=%d", relation_value);
696     switch (relation_value)
697     {
698     case 1:
699         if (!term_100(zm, term_sub, term_component, space_split, term_dst))
700         {
701             wrbuf_destroy(term_component);
702             return 0;
703         }
704         yaz_log(log_level_rpn, "Relation <");
705         
706         wrbuf_putc(term_dict, '(');
707         for (i = 0; i < wrbuf_len(term_component); )
708         {
709             int j = 0;
710             
711             if (i)
712                 wrbuf_putc(term_dict, '|');
713             while (j < i)
714                 string_rel_add_char(term_dict, term_component, &j);
715
716             wrbuf_putc(term_dict, '[');
717
718             wrbuf_putc(term_dict, '^');
719             
720             wrbuf_putc(term_dict, 1);
721             wrbuf_putc(term_dict, FIRST_IN_FIELD_CHAR);
722             
723             string_rel_add_char(term_dict, term_component, &i);
724             wrbuf_putc(term_dict, '-');
725             
726             wrbuf_putc(term_dict, ']');
727             wrbuf_putc(term_dict, '.');
728             wrbuf_putc(term_dict, '*');
729         }
730         wrbuf_putc(term_dict, ')');
731         break;
732     case 2:
733         if (!term_100(zm, term_sub, term_component, space_split, term_dst))
734         {
735             wrbuf_destroy(term_component);
736             return 0;
737         }
738         yaz_log(log_level_rpn, "Relation <=");
739
740         wrbuf_putc(term_dict, '(');
741         for (i = 0; i < wrbuf_len(term_component); )
742         {
743             int j = 0;
744
745             while (j < i)
746                 string_rel_add_char(term_dict, term_component, &j);
747             wrbuf_putc(term_dict, '[');
748
749             wrbuf_putc(term_dict, '^');
750
751             wrbuf_putc(term_dict, 1);
752             wrbuf_putc(term_dict, FIRST_IN_FIELD_CHAR);
753
754             string_rel_add_char(term_dict, term_component, &i);
755             wrbuf_putc(term_dict, '-');
756
757             wrbuf_putc(term_dict, ']');
758             wrbuf_putc(term_dict, '.');
759             wrbuf_putc(term_dict, '*');
760
761             wrbuf_putc(term_dict, '|');
762         }
763         for (i = 0; i < wrbuf_len(term_component); )
764             string_rel_add_char(term_dict, term_component, &i);
765         wrbuf_putc(term_dict, ')');
766         break;
767     case 5:
768         if (!term_100(zm, term_sub, term_component, space_split, term_dst))
769         {
770             wrbuf_destroy(term_component);
771             return 0;
772         }
773         yaz_log(log_level_rpn, "Relation >");
774
775         wrbuf_putc(term_dict, '(');
776         for (i = 0; i < wrbuf_len(term_component); )
777         {
778             int j = 0;
779
780             while (j < i)
781                 string_rel_add_char(term_dict, term_component, &j);
782             wrbuf_putc(term_dict, '[');
783             
784             wrbuf_putc(term_dict, '^');
785             wrbuf_putc(term_dict, '-');
786             string_rel_add_char(term_dict, term_component, &i);
787
788             wrbuf_putc(term_dict, ']');
789             wrbuf_putc(term_dict, '.');
790             wrbuf_putc(term_dict, '*');
791
792             wrbuf_putc(term_dict, '|');
793         }
794         for (i = 0; i < wrbuf_len(term_component); )
795             string_rel_add_char(term_dict, term_component, &i);
796         wrbuf_putc(term_dict, '.');
797         wrbuf_putc(term_dict, '+');
798         wrbuf_putc(term_dict, ')');
799         break;
800     case 4:
801         if (!term_100(zm, term_sub, term_component, space_split, term_dst))
802         {
803             wrbuf_destroy(term_component);
804             return 0;
805         }
806         yaz_log(log_level_rpn, "Relation >=");
807
808         wrbuf_putc(term_dict, '(');
809         for (i = 0; i < wrbuf_len(term_component); )
810         {
811             int j = 0;
812
813             if (i)
814                 wrbuf_putc(term_dict, '|');
815             while (j < i)
816                 string_rel_add_char(term_dict, term_component, &j);
817             wrbuf_putc(term_dict, '[');
818
819             if (i < wrbuf_len(term_component)-1)
820             {
821                 wrbuf_putc(term_dict, '^');
822                 wrbuf_putc(term_dict, '-');
823                 string_rel_add_char(term_dict, term_component, &i);
824             }
825             else
826             {
827                 string_rel_add_char(term_dict, term_component, &i);
828                 wrbuf_putc(term_dict, '-');
829             }
830             wrbuf_putc(term_dict, ']');
831             wrbuf_putc(term_dict, '.');
832             wrbuf_putc(term_dict, '*');
833         }
834         wrbuf_putc(term_dict, ')');
835         break;
836     case 3:
837     case 102:
838     case -1:
839         if (!**term_sub)
840             return 1;
841         yaz_log(log_level_rpn, "Relation =");
842         if (!term_100(zm, term_sub, term_component, space_split, term_dst))
843         {
844             wrbuf_destroy(term_component);
845             return 0;
846         }
847         wrbuf_puts(term_dict, "(");
848         wrbuf_puts(term_dict, wrbuf_cstr(term_component));
849         wrbuf_puts(term_dict, ")");
850         break;
851     case 103:
852         yaz_log(log_level_rpn, "Relation always matches");
853         /* skip to end of term (we don't care what it is) */
854         while (**term_sub != '\0')
855             (*term_sub)++;
856         break;
857     default:
858         *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
859         wrbuf_destroy(term_component);
860         return 0;
861     }
862     wrbuf_destroy(term_component);
863     return 1;
864 }
865
866 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
867                              const char **term_sub, 
868                              WRBUF term_dict,
869                              const Odr_oid *attributeSet, NMEM stream,
870                              struct grep_info *grep_info,
871                              const char *index_type, int complete_flag,
872                              char *term_dst,
873                              const char *xpath_use,
874                              struct ord_list **ol);
875
876 ZEBRA_RES zebra_term_limits_APT(ZebraHandle zh,
877                                 Z_AttributesPlusTerm *zapt,
878                                 zint *hits_limit_value,
879                                 const char **term_ref_id_str,
880                                 NMEM nmem)
881 {
882     AttrType term_ref_id_attr;
883     AttrType hits_limit_attr;
884     int term_ref_id_int;
885  
886     attr_init_APT(&hits_limit_attr, zapt, 11);
887     *hits_limit_value  = attr_find(&hits_limit_attr, NULL);
888
889     attr_init_APT(&term_ref_id_attr, zapt, 10);
890     term_ref_id_int = attr_find_ex(&term_ref_id_attr, NULL, term_ref_id_str);
891     if (term_ref_id_int >= 0)
892     {
893         char *res = nmem_malloc(nmem, 20);
894         sprintf(res, "%d", term_ref_id_int);
895         *term_ref_id_str = res;
896     }
897
898     /* no limit given ? */
899     if (*hits_limit_value == -1)
900     {
901         if (*term_ref_id_str)
902         {
903             /* use global if term_ref is present */
904             *hits_limit_value = zh->approx_limit;
905         }
906         else
907         {
908             /* no counting if term_ref is not present */
909             *hits_limit_value = 0;
910         }
911     }
912     else if (*hits_limit_value == 0)
913     {
914         /* 0 is the same as global limit */
915         *hits_limit_value = zh->approx_limit;
916     }
917     yaz_log(YLOG_DEBUG, "term_limits_APT ref_id=%s limit=" ZINT_FORMAT,
918             *term_ref_id_str ? *term_ref_id_str : "none",
919             *hits_limit_value);
920     return ZEBRA_OK;
921 }
922
923 static ZEBRA_RES term_trunc(ZebraHandle zh,
924                             Z_AttributesPlusTerm *zapt,
925                             const char **term_sub, 
926                             const Odr_oid *attributeSet, NMEM stream,
927                             struct grep_info *grep_info,
928                             const char *index_type, int complete_flag,
929                             char *term_dst,
930                             const char *rank_type, 
931                             const char *xpath_use,
932                             NMEM rset_nmem,
933                             RSET *rset,
934                             struct rset_key_control *kc)
935 {
936     ZEBRA_RES res;
937     struct ord_list *ol;
938     zint hits_limit_value;
939     const char *term_ref_id_str = 0;
940     WRBUF term_dict = wrbuf_alloc();
941
942     *rset = 0;
943     zebra_term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str,
944                           stream);
945     grep_info->isam_p_indx = 0;
946     res = string_term(zh, zapt, term_sub, term_dict,
947                       attributeSet, stream, grep_info,
948                       index_type, complete_flag,
949                       term_dst, xpath_use, &ol);
950     wrbuf_destroy(term_dict);
951     if (res != ZEBRA_OK)
952         return res;
953     if (!*term_sub)  /* no more terms ? */
954         return res;
955     yaz_log(log_level_rpn, "term: %s", term_dst);
956     *rset = rset_trunc(zh, grep_info->isam_p_buf,
957                        grep_info->isam_p_indx, term_dst,
958                        strlen(term_dst), rank_type, 1 /* preserve pos */,
959                        zapt->term->which, rset_nmem,
960                        kc, kc->scope, ol, index_type, hits_limit_value,
961                        term_ref_id_str);
962     if (!*rset)
963         return ZEBRA_FAIL;
964     return ZEBRA_OK;
965 }
966
967 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
968                              const char **term_sub, 
969                              WRBUF term_dict,
970                              const Odr_oid *attributeSet, NMEM stream,
971                              struct grep_info *grep_info,
972                              const char *index_type, int complete_flag,
973                              char *term_dst,
974                              const char *xpath_use,
975                              struct ord_list **ol)
976 {
977     int r;
978     AttrType truncation;
979     int truncation_value;
980     const char *termp;
981     struct rpn_char_map_info rcmi;
982
983     int space_split = complete_flag ? 0 : 1;
984     int ord = -1;
985     int regex_range = 0;
986     int max_pos, prefix_len = 0;
987     int relation_error;
988     char ord_buf[32];
989     int ord_len, i;
990     zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type);
991
992     *ol = ord_list_create(stream);
993
994     rpn_char_map_prepare(zh->reg, zm, &rcmi);
995     attr_init_APT(&truncation, zapt, 5);
996     truncation_value = attr_find(&truncation, NULL);
997     yaz_log(log_level_rpn, "truncation value %d", truncation_value);
998
999     termp = *term_sub; /* start of term for each database */
1000     
1001     if (zebra_apt_get_ord(zh, zapt, index_type, xpath_use,
1002                           attributeSet, &ord) != ZEBRA_OK)
1003     {
1004         *term_sub = 0;
1005         return ZEBRA_FAIL;
1006     }
1007     
1008     wrbuf_rewind(term_dict); /* new dictionary regexp term */
1009     
1010     *ol = ord_list_append(stream, *ol, ord);
1011     ord_len = key_SU_encode(ord, ord_buf);
1012     
1013     wrbuf_putc(term_dict, '(');
1014     
1015     for (i = 0; i<ord_len; i++)
1016     {
1017         wrbuf_putc(term_dict, 1);  /* our internal regexp escape char */
1018         wrbuf_putc(term_dict, ord_buf[i]);
1019     }
1020     wrbuf_putc(term_dict, ')');
1021     
1022     prefix_len = wrbuf_len(term_dict);
1023     
1024     switch (truncation_value)
1025     {
1026     case -1:         /* not specified */
1027     case 100:        /* do not truncate */
1028         if (!string_relation(zh, zapt, &termp, term_dict,
1029                              attributeSet,
1030                              zm, space_split, term_dst,
1031                              &relation_error))
1032         {
1033             if (relation_error)
1034             {
1035                 zebra_setError(zh, relation_error, 0);
1036                 return ZEBRA_FAIL;
1037             }
1038             *term_sub = 0;
1039             return ZEBRA_OK;
1040         }
1041         break;
1042     case 1:          /* right truncation */
1043         wrbuf_putc(term_dict, '(');
1044         if (!term_100(zm, &termp, term_dict, space_split, term_dst))
1045         {
1046             *term_sub = 0;
1047             return ZEBRA_OK;
1048         }
1049         wrbuf_puts(term_dict, ".*)");
1050         break;
1051     case 2:          /* keft truncation */
1052         wrbuf_puts(term_dict, "(.*");
1053         if (!term_100(zm, &termp, term_dict, space_split, term_dst))
1054         {
1055             *term_sub = 0;
1056             return ZEBRA_OK;
1057         }
1058         wrbuf_putc(term_dict, ')');
1059         break;
1060     case 3:          /* left&right truncation */
1061         wrbuf_puts(term_dict, "(.*");
1062         if (!term_100(zm, &termp, term_dict, space_split, term_dst))
1063         {
1064             *term_sub = 0;
1065             return ZEBRA_OK;
1066         }
1067         wrbuf_puts(term_dict, ".*)");
1068         break;
1069     case 101:        /* process # in term */
1070         wrbuf_putc(term_dict, '(');
1071         if (!term_101(zm, &termp, term_dict, space_split, term_dst))
1072         {
1073             *term_sub = 0;
1074             return ZEBRA_OK;
1075         }
1076         wrbuf_puts(term_dict, ")");
1077         break;
1078     case 102:        /* Regexp-1 */
1079         wrbuf_putc(term_dict, '(');
1080         if (!term_102(zm, &termp, term_dict, space_split, term_dst))
1081         {
1082             *term_sub = 0;
1083             return ZEBRA_OK;
1084         }
1085         wrbuf_putc(term_dict, ')');
1086         break;
1087     case 103:       /* Regexp-2 */
1088         regex_range = 1;
1089         wrbuf_putc(term_dict, '(');
1090         if (!term_103(zm, &termp, term_dict, &regex_range,
1091                       space_split, term_dst))
1092         {
1093             *term_sub = 0;
1094             return ZEBRA_OK;
1095         }
1096         wrbuf_putc(term_dict, ')');
1097         break;
1098     case 104:        /* process # and ! in term */
1099         wrbuf_putc(term_dict, '(');
1100         if (!term_104(zm, &termp, term_dict, space_split, term_dst))
1101         {
1102             *term_sub = 0;
1103             return ZEBRA_OK;
1104         }
1105         wrbuf_putc(term_dict, ')');
1106         break;
1107     case 105:        /* process * and ! in term */
1108         wrbuf_putc(term_dict, '(');
1109         if (!term_105(zm, &termp, term_dict, space_split, term_dst, 1))
1110         {
1111             *term_sub = 0;
1112             return ZEBRA_OK;
1113         }
1114         wrbuf_putc(term_dict, ')');
1115         break;
1116     case 106:        /* process * and ! in term */
1117         wrbuf_putc(term_dict, '(');
1118         if (!term_105(zm, &termp, term_dict, space_split, term_dst, 0))
1119         {
1120             *term_sub = 0;
1121             return ZEBRA_OK;
1122         }
1123         wrbuf_putc(term_dict, ')');
1124         break;
1125     default:
1126         zebra_setError_zint(zh,
1127                             YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
1128                             truncation_value);
1129         return ZEBRA_FAIL;
1130     }
1131     if (1)
1132     {
1133         char buf[1000];
1134         const char *input = wrbuf_cstr(term_dict) + prefix_len;
1135         esc_str(buf, sizeof(buf), input, strlen(input));
1136     }
1137     {
1138         WRBUF pr_wr = wrbuf_alloc();
1139
1140         wrbuf_write_escaped(pr_wr, wrbuf_buf(term_dict), wrbuf_len(term_dict));
1141         yaz_log(YLOG_LOG, "dict_lookup_grep: %s", wrbuf_cstr(pr_wr));
1142         wrbuf_destroy(pr_wr);
1143     }
1144     r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), regex_range,
1145                          grep_info, &max_pos, 
1146                          ord_len /* number of "exact" chars */,
1147                          grep_handle);
1148     if (r == 1)
1149         zebra_set_partial_result(zh);
1150     else if (r)
1151         yaz_log(YLOG_WARN, "dict_lookup_grep fail %d", r);
1152     *term_sub = termp;
1153     yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1154     return ZEBRA_OK;
1155 }
1156
1157
1158
1159 static void grep_info_delete(struct grep_info *grep_info)
1160 {
1161 #ifdef TERM_COUNT
1162     xfree(grep_info->term_no);
1163 #endif
1164     xfree(grep_info->isam_p_buf);
1165 }
1166
1167 static ZEBRA_RES grep_info_prepare(ZebraHandle zh,
1168                                    Z_AttributesPlusTerm *zapt,
1169                                    struct grep_info *grep_info,
1170                                    const char *index_type)
1171 {
1172 #ifdef TERM_COUNT
1173     grep_info->term_no = 0;
1174 #endif
1175     grep_info->trunc_max = atoi(res_get_def(zh->res, "truncmax", "10000"));
1176     grep_info->isam_p_size = 0;
1177     grep_info->isam_p_buf = NULL;
1178     grep_info->zh = zh;
1179     grep_info->index_type = index_type;
1180     grep_info->termset = 0;
1181     if (zapt)
1182     {
1183         AttrType truncmax;
1184         int truncmax_value;
1185
1186         attr_init_APT(&truncmax, zapt, 13);
1187         truncmax_value = attr_find(&truncmax, NULL);
1188         if (truncmax_value != -1)
1189             grep_info->trunc_max = truncmax_value;
1190     }
1191     if (zapt)
1192     {
1193         AttrType termset;
1194         int termset_value_numeric;
1195         const char *termset_value_string;
1196
1197         attr_init_APT(&termset, zapt, 8);
1198         termset_value_numeric =
1199             attr_find_ex(&termset, NULL, &termset_value_string);
1200         if (termset_value_numeric != -1)
1201         {
1202 #if TERMSET_DISABLE
1203             zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, "termset");
1204             return ZEBRA_FAIL;
1205 #else
1206             char resname[32];
1207             const char *termset_name = 0;
1208             if (termset_value_numeric != -2)
1209             {
1210                 
1211                 sprintf(resname, "%d", termset_value_numeric);
1212                 termset_name = resname;
1213             }
1214             else
1215             termset_name = termset_value_string;
1216             yaz_log(log_level_rpn, "creating termset set %s", termset_name);
1217             grep_info->termset = resultSetAdd(zh, termset_name, 1);
1218             if (!grep_info->termset)
1219             {
1220                 zebra_setError(zh, YAZ_BIB1_ILLEGAL_RESULT_SET_NAME, termset_name);
1221                 return ZEBRA_FAIL;
1222             }
1223 #endif
1224         }
1225     }
1226     return ZEBRA_OK;
1227 }
1228                                
1229 /**
1230   \brief Create result set(s) for list of terms
1231   \param zh Zebra Handle
1232   \param zapt Attributes Plust Term (RPN leaf)
1233   \param termz term as used in query but converted to UTF-8
1234   \param attributeSet default attribute set
1235   \param stream memory for result
1236   \param index_type register type ("w", "p",..)
1237   \param complete_flag whether it's phrases or not
1238   \param rank_type term flags for ranking
1239   \param xpath_use use attribute for X-Path (-1 for no X-path)
1240   \param rset_nmem memory for result sets
1241   \param result_sets output result set for each term in list (output)
1242   \param num_result_sets number of output result sets
1243   \param kc rset key control to be used for created result sets
1244 */
1245 static ZEBRA_RES term_list_trunc(ZebraHandle zh,
1246                                  Z_AttributesPlusTerm *zapt,
1247                                  const char *termz,
1248                                  const Odr_oid *attributeSet,
1249                                  NMEM stream,
1250                                  const char *index_type, int complete_flag,
1251                                  const char *rank_type,
1252                                  const char *xpath_use,
1253                                  NMEM rset_nmem,
1254                                  RSET **result_sets, int *num_result_sets,
1255                                  struct rset_key_control *kc)
1256 {
1257     char term_dst[IT_MAX_WORD+1];
1258     struct grep_info grep_info;
1259     const char *termp = termz;
1260     int alloc_sets = 0;
1261
1262     *num_result_sets = 0;
1263     *term_dst = 0;
1264     if (grep_info_prepare(zh, zapt, &grep_info, index_type) == ZEBRA_FAIL)
1265         return ZEBRA_FAIL;
1266     while(1)
1267     { 
1268         ZEBRA_RES res;
1269
1270         if (alloc_sets == *num_result_sets)
1271         {
1272             int add = 10;
1273             RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) * 
1274                                               sizeof(*rnew));
1275             if (alloc_sets)
1276                 memcpy(rnew, *result_sets, alloc_sets * sizeof(*rnew));
1277             alloc_sets = alloc_sets + add;
1278             *result_sets = rnew;
1279         }
1280         res = term_trunc(zh, zapt, &termp, attributeSet,
1281                          stream, &grep_info,
1282                          index_type, complete_flag,
1283                          term_dst, rank_type,
1284                          xpath_use, rset_nmem,
1285                          &(*result_sets)[*num_result_sets],
1286                          kc);
1287         if (res != ZEBRA_OK)
1288         {
1289             int i;
1290             for (i = 0; i < *num_result_sets; i++)
1291                 rset_delete((*result_sets)[i]);
1292             grep_info_delete(&grep_info);
1293             return res;
1294         }
1295         if ((*result_sets)[*num_result_sets] == 0)
1296             break;
1297         (*num_result_sets)++;
1298
1299         if (!*termp)
1300             break;
1301     }
1302     grep_info_delete(&grep_info);
1303     return ZEBRA_OK;
1304 }
1305
1306 static ZEBRA_RES rpn_search_APT_position(ZebraHandle zh,
1307                                          Z_AttributesPlusTerm *zapt,
1308                                          const Odr_oid *attributeSet,
1309                                          const char *index_type,
1310                                          NMEM rset_nmem,
1311                                          RSET *rset,
1312                                          struct rset_key_control *kc)
1313 {
1314     int position_value;
1315     AttrType position;
1316     int ord = -1;
1317     char ord_buf[32];
1318     char term_dict[100];
1319     int ord_len;
1320     char *val;
1321     ISAM_P isam_p;
1322     zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type);
1323     
1324     attr_init_APT(&position, zapt, 3);
1325     position_value = attr_find(&position, NULL);
1326     switch(position_value)
1327     {
1328     case 3:
1329     case -1:
1330         return ZEBRA_OK;
1331     case 1:
1332     case 2:
1333         break;
1334     default:
1335         zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1336                             position_value);
1337         return ZEBRA_FAIL;
1338     }
1339
1340
1341     if (!zebra_maps_is_first_in_field(zm))
1342     {
1343         zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1344                             position_value);
1345         return ZEBRA_FAIL;
1346     }
1347
1348     if (!zh->reg->isamb && !zh->reg->isamc)
1349     {
1350         zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1351                             position_value);
1352         return ZEBRA_FAIL;
1353     }
1354
1355     if (zebra_apt_get_ord(zh, zapt, index_type, 0,
1356                           attributeSet, &ord) != ZEBRA_OK)
1357     {
1358         return ZEBRA_FAIL;
1359     }
1360     ord_len = key_SU_encode(ord, ord_buf);
1361     memcpy(term_dict, ord_buf, ord_len);
1362     strcpy(term_dict+ord_len, FIRST_IN_FIELD_STR);
1363     val = dict_lookup(zh->reg->dict, term_dict);
1364     if (val)
1365     {
1366         assert(*val == sizeof(ISAM_P));
1367         memcpy(&isam_p, val+1, sizeof(isam_p));
1368         
1369         if (zh->reg->isamb)
1370             *rset = rsisamb_create(rset_nmem, kc, kc->scope,
1371                                    zh->reg->isamb, isam_p, 0);
1372         else if (zh->reg->isamc)
1373             *rset = rsisamc_create(rset_nmem, kc, kc->scope,
1374                                    zh->reg->isamc, isam_p, 0);
1375     }
1376     return ZEBRA_OK;
1377 }
1378                                          
1379 static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh,
1380                                        Z_AttributesPlusTerm *zapt,
1381                                        const char *termz_org,
1382                                        const Odr_oid *attributeSet,
1383                                        NMEM stream,
1384                                        const char *index_type, int complete_flag,
1385                                        const char *rank_type,
1386                                        const char *xpath_use,
1387                                        NMEM rset_nmem,
1388                                        RSET *rset,
1389                                        struct rset_key_control *kc)
1390 {
1391     RSET *result_sets = 0;
1392     int num_result_sets = 0;
1393     ZEBRA_RES res =
1394         term_list_trunc(zh, zapt, termz_org, attributeSet,
1395                         stream, index_type, complete_flag,
1396                         rank_type, xpath_use,
1397                         rset_nmem,
1398                         &result_sets, &num_result_sets, kc);
1399
1400     if (res != ZEBRA_OK)
1401         return res;
1402
1403     if (num_result_sets > 0)
1404     {
1405         RSET first_set = 0;
1406         res = rpn_search_APT_position(zh, zapt, attributeSet, 
1407                                       index_type,
1408                                       rset_nmem, &first_set,
1409                                       kc);
1410         if (res != ZEBRA_OK)
1411             return res;
1412         if (first_set)
1413         {
1414             RSET *nsets = nmem_malloc(stream,
1415                                       sizeof(RSET) * (num_result_sets+1));
1416             nsets[0] = first_set;
1417             memcpy(nsets+1, result_sets, sizeof(RSET) * num_result_sets);
1418             result_sets = nsets;
1419             num_result_sets++;
1420         }
1421     }
1422     if (num_result_sets == 0)
1423         *rset = rset_create_null(rset_nmem, kc, 0); 
1424     else if (num_result_sets == 1)
1425         *rset = result_sets[0];
1426     else
1427         *rset = rset_create_prox(rset_nmem, kc, kc->scope,
1428                                  num_result_sets, result_sets,
1429                                  1 /* ordered */, 0 /* exclusion */,
1430                                  3 /* relation */, 1 /* distance */);
1431     if (!*rset)
1432         return ZEBRA_FAIL;
1433     return ZEBRA_OK;
1434 }
1435
1436 static ZEBRA_RES rpn_search_APT_or_list(ZebraHandle zh,
1437                                         Z_AttributesPlusTerm *zapt,
1438                                         const char *termz_org,
1439                                         const Odr_oid *attributeSet,
1440                                         NMEM stream,
1441                                         const char *index_type, 
1442                                         int complete_flag,
1443                                         const char *rank_type,
1444                                         const char *xpath_use,
1445                                         NMEM rset_nmem,
1446                                         RSET *rset,
1447                                         struct rset_key_control *kc)
1448 {
1449     RSET *result_sets = 0;
1450     int num_result_sets = 0;
1451     int i;
1452     ZEBRA_RES res =
1453         term_list_trunc(zh, zapt, termz_org, attributeSet,
1454                         stream, index_type, complete_flag,
1455                         rank_type, xpath_use,
1456                         rset_nmem,
1457                         &result_sets, &num_result_sets, kc);
1458     if (res != ZEBRA_OK)
1459         return res;
1460
1461     for (i = 0; i<num_result_sets; i++)
1462     {
1463         RSET first_set = 0;
1464         res = rpn_search_APT_position(zh, zapt, attributeSet, 
1465                                       index_type,
1466                                       rset_nmem, &first_set,
1467                                       kc);
1468         if (res != ZEBRA_OK)
1469         {
1470             for (i = 0; i<num_result_sets; i++)
1471                 rset_delete(result_sets[i]);
1472             return res;
1473         }
1474
1475         if (first_set)
1476         {
1477             RSET tmp_set[2];
1478
1479             tmp_set[0] = first_set;
1480             tmp_set[1] = result_sets[i];
1481             
1482             result_sets[i] = rset_create_prox(
1483                 rset_nmem, kc, kc->scope,
1484                 2, tmp_set,
1485                 1 /* ordered */, 0 /* exclusion */,
1486                 3 /* relation */, 1 /* distance */);
1487         }
1488     }
1489     if (num_result_sets == 0)
1490         *rset = rset_create_null(rset_nmem, kc, 0); 
1491     else if (num_result_sets == 1)
1492         *rset = result_sets[0];
1493     else
1494         *rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* termid */,
1495                                num_result_sets, result_sets);
1496     if (!*rset)
1497         return ZEBRA_FAIL;
1498     return ZEBRA_OK;
1499 }
1500
1501 static ZEBRA_RES rpn_search_APT_and_list(ZebraHandle zh,
1502                                          Z_AttributesPlusTerm *zapt,
1503                                          const char *termz_org,
1504                                          const Odr_oid *attributeSet,
1505                                          NMEM stream,
1506                                          const char *index_type, 
1507                                          int complete_flag,
1508                                          const char *rank_type, 
1509                                          const char *xpath_use,
1510                                          NMEM rset_nmem,
1511                                          RSET *rset,
1512                                          struct rset_key_control *kc)
1513 {
1514     RSET *result_sets = 0;
1515     int num_result_sets = 0;
1516     int i;
1517     ZEBRA_RES res =
1518         term_list_trunc(zh, zapt, termz_org, attributeSet,
1519                         stream, index_type, complete_flag,
1520                         rank_type, xpath_use,
1521                         rset_nmem,
1522                         &result_sets, &num_result_sets,
1523                         kc);
1524     if (res != ZEBRA_OK)
1525         return res;
1526     for (i = 0; i<num_result_sets; i++)
1527     {
1528         RSET first_set = 0;
1529         res = rpn_search_APT_position(zh, zapt, attributeSet, 
1530                                       index_type,
1531                                       rset_nmem, &first_set,
1532                                       kc);
1533         if (res != ZEBRA_OK)
1534         {
1535             for (i = 0; i<num_result_sets; i++)
1536                 rset_delete(result_sets[i]);
1537             return res;
1538         }
1539
1540         if (first_set)
1541         {
1542             RSET tmp_set[2];
1543
1544             tmp_set[0] = first_set;
1545             tmp_set[1] = result_sets[i];
1546             
1547             result_sets[i] = rset_create_prox(
1548                 rset_nmem, kc, kc->scope,
1549                 2, tmp_set,
1550                 1 /* ordered */, 0 /* exclusion */,
1551                 3 /* relation */, 1 /* distance */);
1552         }
1553     }
1554
1555
1556     if (num_result_sets == 0)
1557         *rset = rset_create_null(rset_nmem, kc, 0); 
1558     else if (num_result_sets == 1)
1559         *rset = result_sets[0];
1560     else
1561         *rset = rset_create_and(rset_nmem, kc, kc->scope,
1562                                num_result_sets, result_sets);
1563     if (!*rset)
1564         return ZEBRA_FAIL;
1565     return ZEBRA_OK;
1566 }
1567
1568 static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1569                             const char **term_sub,
1570                             WRBUF term_dict,
1571                             const Odr_oid *attributeSet,
1572                             struct grep_info *grep_info,
1573                             int *max_pos,
1574                             zebra_map_t zm,
1575                             char *term_dst,
1576                             int *error_code)
1577 {
1578     AttrType relation;
1579     int relation_value;
1580     int term_value;
1581     int r;
1582     WRBUF term_num = wrbuf_alloc();
1583
1584     *error_code = 0;
1585     attr_init_APT(&relation, zapt, 2);
1586     relation_value = attr_find(&relation, NULL);
1587
1588     yaz_log(log_level_rpn, "numeric relation value=%d", relation_value);
1589
1590     switch (relation_value)
1591     {
1592     case 1:
1593         yaz_log(log_level_rpn, "Relation <");
1594         if (!term_100(zm, term_sub, term_num, 1, term_dst))
1595         { 
1596             wrbuf_destroy(term_num);
1597             return 0;
1598         }
1599         term_value = atoi(wrbuf_cstr(term_num));
1600         gen_regular_rel(term_dict, term_value-1, 1);
1601         break;
1602     case 2:
1603         yaz_log(log_level_rpn, "Relation <=");
1604         if (!term_100(zm, term_sub, term_num, 1, term_dst))
1605         {
1606             wrbuf_destroy(term_num);
1607             return 0;
1608         }
1609         term_value = atoi(wrbuf_cstr(term_num));
1610         gen_regular_rel(term_dict, term_value, 1);
1611         break;
1612     case 4:
1613         yaz_log(log_level_rpn, "Relation >=");
1614         if (!term_100(zm, term_sub, term_num, 1, term_dst))
1615         {
1616             wrbuf_destroy(term_num);
1617             return 0;
1618         }
1619         term_value = atoi(wrbuf_cstr(term_num));
1620         gen_regular_rel(term_dict, term_value, 0);
1621         break;
1622     case 5:
1623         yaz_log(log_level_rpn, "Relation >");
1624         if (!term_100(zm, term_sub, term_num, 1, term_dst))
1625         {
1626             wrbuf_destroy(term_num);
1627             return 0;
1628         }
1629         term_value = atoi(wrbuf_cstr(term_num));
1630         gen_regular_rel(term_dict, term_value+1, 0);
1631         break;
1632     case -1:
1633     case 3:
1634         yaz_log(log_level_rpn, "Relation =");
1635         if (!term_100(zm, term_sub, term_num, 1, term_dst))
1636         {
1637             wrbuf_destroy(term_num);
1638             return 0; 
1639         }
1640         term_value = atoi(wrbuf_cstr(term_num));
1641         wrbuf_printf(term_dict, "(0*%d)", term_value);
1642         break;
1643     case 103:
1644         /* term_tmp untouched.. */
1645         while (**term_sub != '\0')
1646             (*term_sub)++;
1647         break;
1648     default:
1649         *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
1650         wrbuf_destroy(term_num); 
1651         return 0;
1652     }
1653     r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), 
1654                          0, grep_info, max_pos, 0, grep_handle);
1655
1656     if (r == 1)
1657         zebra_set_partial_result(zh);
1658     else if (r)
1659         yaz_log(YLOG_WARN, "dict_lookup_grep fail, rel = gt: %d", r);
1660     yaz_log(log_level_rpn, "%d positions", grep_info->isam_p_indx);
1661     wrbuf_destroy(term_num);
1662     return 1;
1663 }
1664
1665 static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1666                               const char **term_sub, 
1667                               WRBUF term_dict,
1668                               const Odr_oid *attributeSet, NMEM stream,
1669                               struct grep_info *grep_info,
1670                               const char *index_type, int complete_flag,
1671                               char *term_dst, 
1672                               const char *xpath_use,
1673                               struct ord_list **ol)
1674 {
1675     const char *termp;
1676     struct rpn_char_map_info rcmi;
1677     int max_pos;
1678     int relation_error = 0;
1679     int ord, ord_len, i;
1680     char ord_buf[32];
1681     zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type);
1682     
1683     *ol = ord_list_create(stream);
1684
1685     rpn_char_map_prepare(zh->reg, zm, &rcmi);
1686
1687     termp = *term_sub;
1688     
1689     if (zebra_apt_get_ord(zh, zapt, index_type, xpath_use,
1690                           attributeSet, &ord) != ZEBRA_OK)
1691     {
1692         return ZEBRA_FAIL;
1693     }
1694     
1695     wrbuf_rewind(term_dict);
1696     
1697     *ol = ord_list_append(stream, *ol, ord);
1698     
1699     ord_len = key_SU_encode(ord, ord_buf);
1700     
1701     wrbuf_putc(term_dict, '(');
1702     for (i = 0; i < ord_len; i++)
1703     {
1704         wrbuf_putc(term_dict, 1);
1705         wrbuf_putc(term_dict, ord_buf[i]);
1706     }
1707     wrbuf_putc(term_dict, ')');
1708     
1709     if (!numeric_relation(zh, zapt, &termp, term_dict,
1710                           attributeSet, grep_info, &max_pos, zm,
1711                           term_dst, &relation_error))
1712     {
1713         if (relation_error)
1714         {
1715             zebra_setError(zh, relation_error, 0);
1716             return ZEBRA_FAIL;
1717         }
1718         *term_sub = 0;
1719         return ZEBRA_OK;
1720     }
1721     *term_sub = termp;
1722     yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1723     return ZEBRA_OK;
1724 }
1725
1726                                  
1727 static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh,
1728                                         Z_AttributesPlusTerm *zapt,
1729                                         const char *termz,
1730                                         const Odr_oid *attributeSet,
1731                                         NMEM stream,
1732                                         const char *index_type, 
1733                                         int complete_flag,
1734                                         const char *rank_type, 
1735                                         const char *xpath_use,
1736                                         NMEM rset_nmem,
1737                                         RSET *rset,
1738                                         struct rset_key_control *kc)
1739 {
1740     char term_dst[IT_MAX_WORD+1];
1741     const char *termp = termz;
1742     RSET *result_sets = 0;
1743     int num_result_sets = 0;
1744     ZEBRA_RES res;
1745     struct grep_info grep_info;
1746     int alloc_sets = 0;
1747     zint hits_limit_value;
1748     const char *term_ref_id_str = 0;
1749
1750     zebra_term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str,
1751                           stream);
1752
1753     yaz_log(log_level_rpn, "APT_numeric t='%s'", termz);
1754     if (grep_info_prepare(zh, zapt, &grep_info, index_type) == ZEBRA_FAIL)
1755         return ZEBRA_FAIL;
1756     while (1)
1757     { 
1758         struct ord_list *ol;
1759         WRBUF term_dict = wrbuf_alloc();
1760         if (alloc_sets == num_result_sets)
1761         {
1762             int add = 10;
1763             RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) * 
1764                                               sizeof(*rnew));
1765             if (alloc_sets)
1766                 memcpy(rnew, result_sets, alloc_sets * sizeof(*rnew));
1767             alloc_sets = alloc_sets + add;
1768             result_sets = rnew;
1769         }
1770         yaz_log(YLOG_DEBUG, "APT_numeric termp=%s", termp);
1771         grep_info.isam_p_indx = 0;
1772         res = numeric_term(zh, zapt, &termp, term_dict,
1773                            attributeSet, stream, &grep_info,
1774                            index_type, complete_flag,
1775                            term_dst, xpath_use, &ol);
1776         wrbuf_destroy(term_dict);
1777         if (res == ZEBRA_FAIL || termp == 0)
1778             break;
1779         yaz_log(YLOG_DEBUG, "term: %s", term_dst);
1780         result_sets[num_result_sets] =
1781             rset_trunc(zh, grep_info.isam_p_buf,
1782                        grep_info.isam_p_indx, term_dst,
1783                        strlen(term_dst), rank_type,
1784                        0 /* preserve position */,
1785                        zapt->term->which, rset_nmem, 
1786                        kc, kc->scope, ol, index_type,
1787                        hits_limit_value,
1788                        term_ref_id_str);
1789         if (!result_sets[num_result_sets])
1790             break;
1791         num_result_sets++;
1792         if (!*termp)
1793             break;
1794     }
1795     grep_info_delete(&grep_info);
1796
1797     if (res != ZEBRA_OK)
1798         return res;
1799     if (num_result_sets == 0)
1800         *rset = rset_create_null(rset_nmem, kc, 0);
1801     else if (num_result_sets == 1)
1802         *rset = result_sets[0];
1803     else
1804         *rset = rset_create_and(rset_nmem, kc, kc->scope,
1805                                 num_result_sets, result_sets);
1806     if (!*rset)
1807         return ZEBRA_FAIL;
1808     return ZEBRA_OK;
1809 }
1810
1811 static ZEBRA_RES rpn_search_APT_local(ZebraHandle zh,
1812                                       Z_AttributesPlusTerm *zapt,
1813                                       const char *termz,
1814                                       const Odr_oid *attributeSet,
1815                                       NMEM stream,
1816                                       const char *rank_type, NMEM rset_nmem,
1817                                       RSET *rset,
1818                                       struct rset_key_control *kc)
1819 {
1820     Record rec;
1821     zint sysno = atozint(termz);
1822     
1823     if (sysno <= 0)
1824         sysno = 0;
1825     rec = rec_get(zh->reg->records, sysno);
1826     if (!rec)
1827         sysno = 0;
1828
1829     rec_free(&rec);
1830
1831     if (sysno <= 0)
1832     {
1833         *rset = rset_create_null(rset_nmem, kc, 0);
1834     }
1835     else
1836     {
1837         RSFD rsfd;
1838         struct it_key key;
1839         *rset = rset_create_temp(rset_nmem, kc, kc->scope,
1840                                  res_get(zh->res, "setTmpDir"), 0);
1841         rsfd = rset_open(*rset, RSETF_WRITE);
1842         
1843         key.mem[0] = sysno;
1844         key.mem[1] = 1;
1845         key.len = 2;
1846         rset_write(rsfd, &key);
1847         rset_close(rsfd);
1848     }
1849     return ZEBRA_OK;
1850 }
1851
1852 static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1853                                const Odr_oid *attributeSet, NMEM stream,
1854                                Z_SortKeySpecList *sort_sequence,
1855                                const char *rank_type,
1856                                NMEM rset_nmem,
1857                                RSET *rset,
1858                                struct rset_key_control *kc)
1859 {
1860     int i;
1861     int sort_relation_value;
1862     AttrType sort_relation_type;
1863     Z_SortKeySpec *sks;
1864     Z_SortKey *sk;
1865     char termz[20];
1866     
1867     attr_init_APT(&sort_relation_type, zapt, 7);
1868     sort_relation_value = attr_find(&sort_relation_type, &attributeSet);
1869
1870     if (!sort_sequence->specs)
1871     {
1872         sort_sequence->num_specs = 10;
1873         sort_sequence->specs = (Z_SortKeySpec **)
1874             nmem_malloc(stream, sort_sequence->num_specs *
1875                          sizeof(*sort_sequence->specs));
1876         for (i = 0; i<sort_sequence->num_specs; i++)
1877             sort_sequence->specs[i] = 0;
1878     }
1879     if (zapt->term->which != Z_Term_general)
1880         i = 0;
1881     else
1882         i = atoi_n((char *) zapt->term->u.general->buf,
1883                     zapt->term->u.general->len);
1884     if (i >= sort_sequence->num_specs)
1885         i = 0;
1886     sprintf(termz, "%d", i);
1887
1888     sks = (Z_SortKeySpec *) nmem_malloc(stream, sizeof(*sks));
1889     sks->sortElement = (Z_SortElement *)
1890         nmem_malloc(stream, sizeof(*sks->sortElement));
1891     sks->sortElement->which = Z_SortElement_generic;
1892     sk = sks->sortElement->u.generic = (Z_SortKey *)
1893         nmem_malloc(stream, sizeof(*sk));
1894     sk->which = Z_SortKey_sortAttributes;
1895     sk->u.sortAttributes = (Z_SortAttributes *)
1896         nmem_malloc(stream, sizeof(*sk->u.sortAttributes));
1897
1898     sk->u.sortAttributes->id = odr_oiddup_nmem(stream, attributeSet);
1899     sk->u.sortAttributes->list = zapt->attributes;
1900
1901     sks->sortRelation = (int *)
1902         nmem_malloc(stream, sizeof(*sks->sortRelation));
1903     if (sort_relation_value == 1)
1904         *sks->sortRelation = Z_SortKeySpec_ascending;
1905     else if (sort_relation_value == 2)
1906         *sks->sortRelation = Z_SortKeySpec_descending;
1907     else 
1908         *sks->sortRelation = Z_SortKeySpec_ascending;
1909
1910     sks->caseSensitivity = (int *)
1911         nmem_malloc(stream, sizeof(*sks->caseSensitivity));
1912     *sks->caseSensitivity = 0;
1913
1914     sks->which = Z_SortKeySpec_null;
1915     sks->u.null = odr_nullval ();
1916     sort_sequence->specs[i] = sks;
1917     *rset = rset_create_null(rset_nmem, kc, 0);
1918     return ZEBRA_OK;
1919 }
1920
1921
1922 static int rpn_check_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1923                            const Odr_oid *attributeSet,
1924                            struct xpath_location_step *xpath, int max,
1925                            NMEM mem)
1926 {
1927     const Odr_oid *curAttributeSet = attributeSet;
1928     AttrType use;
1929     const char *use_string = 0;
1930     
1931     attr_init_APT(&use, zapt, 1);
1932     attr_find_ex(&use, &curAttributeSet, &use_string);
1933
1934     if (!use_string || *use_string != '/')
1935         return -1;
1936
1937     return zebra_parse_xpath_str(use_string, xpath, max, mem);
1938 }
1939  
1940                
1941
1942 static RSET xpath_trunc(ZebraHandle zh, NMEM stream,
1943                         const char *index_type, const char *term, 
1944                         const char *xpath_use,
1945                         NMEM rset_nmem,
1946                         struct rset_key_control *kc)
1947 {
1948     struct grep_info grep_info;
1949     int ord = zebraExplain_lookup_attr_str(zh->reg->zei, 
1950                                            zinfo_index_category_index,
1951                                            index_type, xpath_use);
1952     if (grep_info_prepare(zh, 0 /* zapt */, &grep_info, "0") == ZEBRA_FAIL)
1953         return rset_create_null(rset_nmem, kc, 0);
1954     
1955     if (ord < 0)
1956         return rset_create_null(rset_nmem, kc, 0);
1957     else
1958     {
1959         int i, r, max_pos;
1960         char ord_buf[32];
1961         RSET rset;
1962         WRBUF term_dict = wrbuf_alloc();
1963         int ord_len = key_SU_encode(ord, ord_buf);
1964         int term_type = Z_Term_characterString;
1965         const char *flags = "void";
1966
1967         wrbuf_putc(term_dict, '(');
1968         for (i = 0; i<ord_len; i++)
1969         {
1970             wrbuf_putc(term_dict, 1);
1971             wrbuf_putc(term_dict, ord_buf[i]);
1972         }
1973         wrbuf_putc(term_dict, ')');
1974         wrbuf_puts(term_dict, term);
1975         
1976         grep_info.isam_p_indx = 0;
1977         r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), 0,
1978                              &grep_info, &max_pos, 0, grep_handle);
1979         yaz_log(YLOG_DEBUG, "%s %d positions", term,
1980                 grep_info.isam_p_indx);
1981         rset = rset_trunc(zh, grep_info.isam_p_buf,
1982                           grep_info.isam_p_indx, term, strlen(term),
1983                           flags, 1, term_type, rset_nmem,
1984                           kc, kc->scope, 0, index_type, 0 /* hits_limit */,
1985                           0 /* term_ref_id_str */);
1986         grep_info_delete(&grep_info);
1987         wrbuf_destroy(term_dict);
1988         return rset;
1989     }
1990 }
1991
1992 static
1993 ZEBRA_RES rpn_search_xpath(ZebraHandle zh,
1994                            NMEM stream, const char *rank_type, RSET rset,
1995                            int xpath_len, struct xpath_location_step *xpath,
1996                            NMEM rset_nmem,
1997                            RSET *rset_out,
1998                            struct rset_key_control *kc)
1999 {
2000     int i;
2001     int always_matches = rset ? 0 : 1;
2002
2003     if (xpath_len < 0)
2004     {
2005         *rset_out = rset;
2006         return ZEBRA_OK;
2007     }
2008
2009     yaz_log(YLOG_DEBUG, "xpath len=%d", xpath_len);
2010     for (i = 0; i<xpath_len; i++)
2011     {
2012         yaz_log(log_level_rpn, "XPATH %d %s", i, xpath[i].part);
2013
2014     }
2015
2016     /*
2017       //a    ->    a/.*
2018       //a/b  ->    b/a/.*
2019       /a     ->    a/
2020       /a/b   ->    b/a/
2021
2022       /      ->    none
2023
2024    a[@attr = value]/b[@other = othervalue]
2025
2026  /e/@a val      range(e/,range(@a,freetext(w,1015,val),@a),e/)
2027  /a/b val       range(b/a/,freetext(w,1016,val),b/a/)
2028  /a/b/@c val    range(b/a/,range(@c,freetext(w,1016,val),@c),b/a/)
2029  /a/b[@c = y] val range(b/a/,freetext(w,1016,val),b/a/,@c = y)
2030  /a[@c = y]/b val range(a/,range(b/a/,freetext(w,1016,val),b/a/),a/,@c = y)
2031  /a[@c = x]/b[@c = y] range(a/,range(b/a/,freetext(w,1016,val),b/a/,@c = y),a/,@c = x)
2032       
2033     */
2034
2035     dict_grep_cmap(zh->reg->dict, 0, 0);
2036     
2037     {
2038         int level = xpath_len;
2039         int first_path = 1;
2040         
2041         while (--level >= 0)
2042         {
2043             WRBUF xpath_rev = wrbuf_alloc();
2044             int i;
2045             RSET rset_start_tag = 0, rset_end_tag = 0, rset_attr = 0;
2046
2047             for (i = level; i >= 1; --i)
2048             {
2049                 const char *cp = xpath[i].part;
2050                 if (*cp)
2051                 {
2052                     for (; *cp; cp++)
2053                     {
2054                         if (*cp == '*')
2055                             wrbuf_puts(xpath_rev, "[^/]*");
2056                         else if (*cp == ' ')
2057                             wrbuf_puts(xpath_rev, "\001 ");
2058                         else
2059                             wrbuf_putc(xpath_rev, *cp);
2060
2061                         /* wrbuf_putc does not null-terminate , but
2062                            wrbuf_puts below ensures it does.. so xpath_rev
2063                            is OK iff length is > 0 */
2064                     }
2065                     wrbuf_puts(xpath_rev, "/");
2066                 }
2067                 else if (i == 1)  /* // case */
2068                     wrbuf_puts(xpath_rev, ".*");
2069             }
2070             if (xpath[level].predicate &&
2071                 xpath[level].predicate->which == XPATH_PREDICATE_RELATION &&
2072                 xpath[level].predicate->u.relation.name[0])
2073             {
2074                 WRBUF wbuf = wrbuf_alloc();
2075                 wrbuf_puts(wbuf, xpath[level].predicate->u.relation.name+1);
2076                 if (xpath[level].predicate->u.relation.value)
2077                 {
2078                     const char *cp = xpath[level].predicate->u.relation.value;
2079                     wrbuf_putc(wbuf, '=');
2080                     
2081                     while (*cp)
2082                     {
2083                         if (strchr(REGEX_CHARS, *cp))
2084                             wrbuf_putc(wbuf, '\\');
2085                         wrbuf_putc(wbuf, *cp);
2086                         cp++;
2087                     }
2088                 }
2089                 rset_attr = xpath_trunc(
2090                     zh, stream, "0", wrbuf_cstr(wbuf), 
2091                     ZEBRA_XPATH_ATTR_NAME, 
2092                     rset_nmem, kc);
2093                 wrbuf_destroy(wbuf);
2094             } 
2095             else 
2096             {
2097                 if (!first_path)
2098                 {
2099                     wrbuf_destroy(xpath_rev);
2100                     continue;
2101                 }
2102             }
2103             yaz_log(log_level_rpn, "xpath_rev (%d) = %s", level, 
2104                     wrbuf_cstr(xpath_rev));
2105             if (wrbuf_len(xpath_rev))
2106             {
2107                 rset_start_tag = xpath_trunc(zh, stream, "0", 
2108                                              wrbuf_cstr(xpath_rev),
2109                                              ZEBRA_XPATH_ELM_BEGIN, 
2110                                              rset_nmem, kc);
2111                 if (always_matches)
2112                     rset = rset_start_tag;
2113                 else
2114                 {
2115                     rset_end_tag = xpath_trunc(zh, stream, "0", 
2116                                                wrbuf_cstr(xpath_rev),
2117                                                ZEBRA_XPATH_ELM_END, 
2118                                                rset_nmem, kc);
2119                     
2120                     rset = rset_create_between(rset_nmem, kc, kc->scope,
2121                                                rset_start_tag, rset,
2122                                                rset_end_tag, rset_attr);
2123                 }
2124             }
2125             wrbuf_destroy(xpath_rev);
2126             first_path = 0;
2127         }
2128     }
2129     *rset_out = rset;
2130     return ZEBRA_OK;
2131 }
2132
2133 #define MAX_XPATH_STEPS 10
2134
2135 static ZEBRA_RES rpn_search_database(ZebraHandle zh, 
2136                                      Z_AttributesPlusTerm *zapt,
2137                                      const Odr_oid *attributeSet, NMEM stream,
2138                                      Z_SortKeySpecList *sort_sequence,
2139                                      NMEM rset_nmem,
2140                                      RSET *rset,
2141                                      struct rset_key_control *kc);
2142
2143 static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2144                                 const Odr_oid *attributeSet, NMEM stream,
2145                                 Z_SortKeySpecList *sort_sequence,
2146                                 int num_bases, char **basenames, 
2147                                 NMEM rset_nmem,
2148                                 RSET *rset,
2149                                 struct rset_key_control *kc)
2150 {
2151     RSET *rsets = nmem_malloc(stream, num_bases * sizeof(*rsets));
2152     ZEBRA_RES res = ZEBRA_OK;
2153     int i;
2154     for (i = 0; i < num_bases; i++)
2155     {
2156
2157         if (zebraExplain_curDatabase(zh->reg->zei, basenames[i]))
2158         {
2159             zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
2160                            basenames[i]);
2161             res = ZEBRA_FAIL;
2162             break;
2163         }
2164         res = rpn_search_database(zh, zapt, attributeSet, stream,
2165                                   sort_sequence,
2166                                   rset_nmem, rsets+i, kc);
2167         if (res != ZEBRA_OK)
2168             break;
2169     }
2170     if (res != ZEBRA_OK)
2171     {   /* must clean up the already created sets */
2172         while (--i >= 0)
2173             rset_delete(rsets[i]);
2174         *rset = 0;
2175     }
2176     else 
2177     {
2178         if (num_bases == 1)
2179             *rset = rsets[0];
2180         else if (num_bases == 0)
2181             *rset = rset_create_null(rset_nmem, kc, 0); 
2182         else
2183             *rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* TERMID */,
2184                                    num_bases, rsets);
2185     }
2186     return res;
2187 }
2188
2189 static ZEBRA_RES rpn_search_database(ZebraHandle zh, 
2190                                      Z_AttributesPlusTerm *zapt,
2191                                      const Odr_oid *attributeSet, NMEM stream,
2192                                      Z_SortKeySpecList *sort_sequence,
2193                                      NMEM rset_nmem,
2194                                      RSET *rset,
2195                                      struct rset_key_control *kc)
2196 {
2197     ZEBRA_RES res = ZEBRA_OK;
2198     const char *index_type;
2199     char *search_type = NULL;
2200     char rank_type[128];
2201     int complete_flag;
2202     int sort_flag;
2203     char termz[IT_MAX_WORD+1];
2204     int xpath_len;
2205     const char *xpath_use = 0;
2206     struct xpath_location_step xpath[MAX_XPATH_STEPS];
2207
2208     if (!log_level_set)
2209     {
2210         log_level_rpn = yaz_log_module_level("rpn");
2211         log_level_set = 1;
2212     }
2213     zebra_maps_attr(zh->reg->zebra_maps, zapt, &index_type, &search_type,
2214                     rank_type, &complete_flag, &sort_flag);
2215     
2216     yaz_log(YLOG_DEBUG, "index_type=%s", index_type);
2217     yaz_log(YLOG_DEBUG, "complete_flag=%d", complete_flag);
2218     yaz_log(YLOG_DEBUG, "search_type=%s", search_type);
2219     yaz_log(YLOG_DEBUG, "rank_type=%s", rank_type);
2220
2221     if (zapt_term_to_utf8(zh, zapt, termz) == ZEBRA_FAIL)
2222         return ZEBRA_FAIL;
2223
2224     if (sort_flag)
2225         return rpn_sort_spec(zh, zapt, attributeSet, stream, sort_sequence,
2226                              rank_type, rset_nmem, rset, kc);
2227     /* consider if an X-Path query is used */
2228     xpath_len = rpn_check_xpath(zh, zapt, attributeSet, 
2229                                 xpath, MAX_XPATH_STEPS, stream);
2230     if (xpath_len >= 0)
2231     {
2232         if (xpath[xpath_len-1].part[0] == '@') 
2233             xpath_use = ZEBRA_XPATH_ATTR_CDATA;  /* last step is attribute  */
2234         else
2235             xpath_use = ZEBRA_XPATH_CDATA;  /* searching for cdata */        
2236
2237         if (1)
2238         {
2239             AttrType relation;
2240             int relation_value;
2241
2242             attr_init_APT(&relation, zapt, 2);
2243             relation_value = attr_find(&relation, NULL);
2244
2245             if (relation_value == 103) /* alwaysmatches */
2246             {
2247                 *rset = 0; /* signal no "term" set */
2248                 return rpn_search_xpath(zh, stream, rank_type, *rset, 
2249                                         xpath_len, xpath, rset_nmem, rset, kc);
2250             }
2251         }
2252     }
2253
2254     /* search using one of the various search type strategies
2255        termz is our UTF-8 search term
2256        attributeSet is top-level default attribute set 
2257        stream is ODR for search
2258        reg_id is the register type
2259        complete_flag is 1 for complete subfield, 0 for incomplete
2260        xpath_use is use-attribute to be used for X-Path search, 0 for none
2261     */
2262     if (!strcmp(search_type, "phrase"))
2263     {
2264         res = rpn_search_APT_phrase(zh, zapt, termz, attributeSet, stream,
2265                                     index_type, complete_flag, rank_type,
2266                                     xpath_use,
2267                                     rset_nmem,
2268                                     rset, kc);
2269     }
2270     else if (!strcmp(search_type, "and-list"))
2271     {
2272         res = rpn_search_APT_and_list(zh, zapt, termz, attributeSet, stream,
2273                                       index_type, complete_flag, rank_type,
2274                                       xpath_use,
2275                                       rset_nmem,
2276                                       rset, kc);
2277     }
2278     else if (!strcmp(search_type, "or-list"))
2279     {
2280         res = rpn_search_APT_or_list(zh, zapt, termz, attributeSet, stream,
2281                                      index_type, complete_flag, rank_type,
2282                                      xpath_use,
2283                                      rset_nmem,
2284                                      rset, kc);
2285     }
2286     else if (!strcmp(search_type, "local"))
2287     {
2288         res = rpn_search_APT_local(zh, zapt, termz, attributeSet, stream,
2289                                    rank_type, rset_nmem, rset, kc);
2290     }
2291     else if (!strcmp(search_type, "numeric"))
2292     {
2293         res = rpn_search_APT_numeric(zh, zapt, termz, attributeSet, stream,
2294                                      index_type, complete_flag, rank_type,
2295                                      xpath_use,
2296                                      rset_nmem,
2297                                      rset, kc);
2298     }
2299     else
2300     {
2301         zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2302         res = ZEBRA_FAIL;
2303     }
2304     if (res != ZEBRA_OK)
2305         return res;
2306     if (!*rset)
2307         return ZEBRA_FAIL;
2308     return rpn_search_xpath(zh, stream, rank_type, *rset, 
2309                             xpath_len, xpath, rset_nmem, rset, kc);
2310 }
2311
2312 static ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2313                                       const Odr_oid *attributeSet, 
2314                                       NMEM stream, NMEM rset_nmem,
2315                                       Z_SortKeySpecList *sort_sequence,
2316                                       int num_bases, char **basenames,
2317                                       RSET **result_sets, int *num_result_sets,
2318                                       Z_Operator *parent_op,
2319                                       struct rset_key_control *kc);
2320
2321 ZEBRA_RES rpn_get_top_approx_limit(ZebraHandle zh, Z_RPNStructure *zs,
2322                                    zint *approx_limit)
2323 {
2324     ZEBRA_RES res = ZEBRA_OK;
2325     if (zs->which == Z_RPNStructure_complex)
2326     {
2327         if (res == ZEBRA_OK)
2328             res = rpn_get_top_approx_limit(zh, zs->u.complex->s1,
2329                                            approx_limit);
2330         if (res == ZEBRA_OK)
2331             res = rpn_get_top_approx_limit(zh, zs->u.complex->s2,
2332                                            approx_limit);
2333     }
2334     else if (zs->which == Z_RPNStructure_simple)
2335     {
2336         if (zs->u.simple->which == Z_Operand_APT)
2337         {
2338             Z_AttributesPlusTerm *zapt = zs->u.simple->u.attributesPlusTerm;
2339             AttrType global_hits_limit_attr;
2340             int l;
2341             
2342             attr_init_APT(&global_hits_limit_attr, zapt, 12);
2343             
2344             l = attr_find(&global_hits_limit_attr, NULL);
2345             if (l != -1)
2346                 *approx_limit = l;
2347         }
2348     }
2349     return res;
2350 }
2351
2352 ZEBRA_RES rpn_search_top(ZebraHandle zh, Z_RPNStructure *zs,
2353                          const Odr_oid *attributeSet, 
2354                          NMEM stream, NMEM rset_nmem,
2355                          Z_SortKeySpecList *sort_sequence,
2356                          int num_bases, char **basenames,
2357                          RSET *result_set)
2358 {
2359     RSET *result_sets = 0;
2360     int num_result_sets = 0;
2361     ZEBRA_RES res;
2362     struct rset_key_control *kc = zebra_key_control_create(zh);
2363
2364     res = rpn_search_structure(zh, zs, attributeSet,
2365                                stream, rset_nmem,
2366                                sort_sequence, 
2367                                num_bases, basenames,
2368                                &result_sets, &num_result_sets,
2369                                0 /* no parent op */,
2370                                kc);
2371     if (res != ZEBRA_OK)
2372     {
2373         int i;
2374         for (i = 0; i<num_result_sets; i++)
2375             rset_delete(result_sets[i]);
2376         *result_set = 0;
2377     }
2378     else
2379     {
2380         assert(num_result_sets == 1);
2381         assert(result_sets);
2382         assert(*result_sets);
2383         *result_set = *result_sets;
2384     }
2385     (*kc->dec)(kc);
2386     return res;
2387 }
2388
2389 ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2390                                const Odr_oid *attributeSet, 
2391                                NMEM stream, NMEM rset_nmem,
2392                                Z_SortKeySpecList *sort_sequence,
2393                                int num_bases, char **basenames,
2394                                RSET **result_sets, int *num_result_sets,
2395                                Z_Operator *parent_op,
2396                                struct rset_key_control *kc)
2397 {
2398     *num_result_sets = 0;
2399     if (zs->which == Z_RPNStructure_complex)
2400     {
2401         ZEBRA_RES res;
2402         Z_Operator *zop = zs->u.complex->roperator;
2403         RSET *result_sets_l = 0;
2404         int num_result_sets_l = 0;
2405         RSET *result_sets_r = 0;
2406         int num_result_sets_r = 0;
2407
2408         res = rpn_search_structure(zh, zs->u.complex->s1,
2409                                    attributeSet, stream, rset_nmem,
2410                                    sort_sequence,
2411                                    num_bases, basenames,
2412                                    &result_sets_l, &num_result_sets_l,
2413                                    zop, kc);
2414         if (res != ZEBRA_OK)
2415         {
2416             int i;
2417             for (i = 0; i<num_result_sets_l; i++)
2418                 rset_delete(result_sets_l[i]);
2419             return res;
2420         }
2421         res = rpn_search_structure(zh, zs->u.complex->s2,
2422                                    attributeSet, stream, rset_nmem,
2423                                    sort_sequence,
2424                                    num_bases, basenames,
2425                                    &result_sets_r, &num_result_sets_r,
2426                                    zop, kc);
2427         if (res != ZEBRA_OK)
2428         {
2429             int i;
2430             for (i = 0; i<num_result_sets_l; i++)
2431                 rset_delete(result_sets_l[i]);
2432             for (i = 0; i<num_result_sets_r; i++)
2433                 rset_delete(result_sets_r[i]);
2434             return res;
2435         }
2436
2437         /* make a new list of result for all children */
2438         *num_result_sets = num_result_sets_l + num_result_sets_r;
2439         *result_sets = nmem_malloc(stream, *num_result_sets * 
2440                                    sizeof(**result_sets));
2441         memcpy(*result_sets, result_sets_l, 
2442                num_result_sets_l * sizeof(**result_sets));
2443         memcpy(*result_sets + num_result_sets_l, result_sets_r, 
2444                num_result_sets_r * sizeof(**result_sets));
2445
2446         if (!parent_op || parent_op->which != zop->which
2447             || (zop->which != Z_Operator_and &&
2448                 zop->which != Z_Operator_or))
2449         {
2450             /* parent node different from this one (or non-present) */
2451             /* we must combine result sets now */
2452             RSET rset;
2453             switch (zop->which)
2454             {
2455             case Z_Operator_and:
2456                 rset = rset_create_and(rset_nmem, kc,
2457                                        kc->scope,
2458                                        *num_result_sets, *result_sets);
2459                 break;
2460             case Z_Operator_or:
2461                 rset = rset_create_or(rset_nmem, kc,
2462                                       kc->scope, 0, /* termid */
2463                                       *num_result_sets, *result_sets);
2464                 break;
2465             case Z_Operator_and_not:
2466                 rset = rset_create_not(rset_nmem, kc,
2467                                        kc->scope,
2468                                        (*result_sets)[0],
2469                                        (*result_sets)[1]);
2470                 break;
2471             case Z_Operator_prox:
2472                 if (zop->u.prox->which != Z_ProximityOperator_known)
2473                 {
2474                     zebra_setError(zh, 
2475                                    YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2476                                    0);
2477                     return ZEBRA_FAIL;
2478                 }
2479                 if (*zop->u.prox->u.known != Z_ProxUnit_word)
2480                 {
2481                     zebra_setError_zint(zh,
2482                                         YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2483                                         *zop->u.prox->u.known);
2484                     return ZEBRA_FAIL;
2485                 }
2486                 else
2487                 {
2488                     rset = rset_create_prox(rset_nmem, kc,
2489                                             kc->scope,
2490                                             *num_result_sets, *result_sets, 
2491                                             *zop->u.prox->ordered,
2492                                             (!zop->u.prox->exclusion ? 
2493                                              0 : *zop->u.prox->exclusion),
2494                                             *zop->u.prox->relationType,
2495                                             *zop->u.prox->distance );
2496                 }
2497                 break;
2498             default:
2499                 zebra_setError(zh, YAZ_BIB1_OPERATOR_UNSUPP, 0);
2500                 return ZEBRA_FAIL;
2501             }
2502             *num_result_sets = 1;
2503             *result_sets = nmem_malloc(stream, *num_result_sets * 
2504                                        sizeof(**result_sets));
2505             (*result_sets)[0] = rset;
2506         }
2507     }
2508     else if (zs->which == Z_RPNStructure_simple)
2509     {
2510         RSET rset;
2511         ZEBRA_RES res;
2512
2513         if (zs->u.simple->which == Z_Operand_APT)
2514         {
2515             yaz_log(YLOG_DEBUG, "rpn_search_APT");
2516             res = rpn_search_APT(zh, zs->u.simple->u.attributesPlusTerm,
2517                                  attributeSet, stream, sort_sequence,
2518                                  num_bases, basenames, rset_nmem, &rset,
2519                                  kc);
2520             if (res != ZEBRA_OK)
2521                 return res;
2522         }
2523         else if (zs->u.simple->which == Z_Operand_resultSetId)
2524         {
2525             yaz_log(YLOG_DEBUG, "rpn_search_ref");
2526             rset = resultSetRef(zh, zs->u.simple->u.resultSetId);
2527             if (!rset)
2528             {
2529                 zebra_setError(zh, 
2530                                YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
2531                                zs->u.simple->u.resultSetId);
2532                 return ZEBRA_FAIL;
2533             }
2534             rset_dup(rset);
2535         }
2536         else
2537         {
2538             zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2539             return ZEBRA_FAIL;
2540         }
2541         *num_result_sets = 1;
2542         *result_sets = nmem_malloc(stream, *num_result_sets * 
2543                                    sizeof(**result_sets));
2544         (*result_sets)[0] = rset;
2545     }
2546     else
2547     {
2548         zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2549         return ZEBRA_FAIL;
2550     }
2551     return ZEBRA_OK;
2552 }
2553
2554
2555
2556 /*
2557  * Local variables:
2558  * c-basic-offset: 4
2559  * indent-tabs-mode: nil
2560  * End:
2561  * vim: shiftwidth=4 tabstop=8 expandtab
2562  */
2563