Do not build for Ubuntu raring, quantal (obsolete)
[idzebra-moved-to-github.git] / index / rpnsearch.c
1 /* This file is part of the Zebra server.
2    Copyright (C) Index Data
3
4 Zebra is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 2, or (at your option) any later
7 version.
8
9 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
10 WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12 for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17
18 */
19
20 #if HAVE_CONFIG_H
21 #include <config.h>
22 #endif
23 #include <stdio.h>
24 #include <assert.h>
25 #ifdef WIN32
26 #include <io.h>
27 #endif
28 #if HAVE_UNISTD_H
29 #include <unistd.h>
30 #endif
31 #include <ctype.h>
32
33 #include <yaz/diagbib1.h>
34 #include "index.h"
35 #include <zebra_xpath.h>
36 #include <attrfind.h>
37 #include <charmap.h>
38 #include <rset.h>
39
40 static int log_level_set = 0;
41 static int log_level_rpn = 0;
42
43 #define TERMSET_DISABLE 1
44
45 static const char **rpn_char_map_handler(void *vp, const char **from, int len)
46 {
47     struct rpn_char_map_info *p = (struct rpn_char_map_info *) vp;
48     const char **out = zebra_maps_input(p->zm, from, len, 0);
49 #if 0
50     if (out && *out)
51     {
52         const char *outp = *out;
53         yaz_log(YLOG_LOG, "---");
54         while (*outp)
55         {
56             yaz_log(YLOG_LOG, "%02X", *outp);
57             outp++;
58         }
59     }
60 #endif
61     return out;
62 }
63
64 void rpn_char_map_prepare(struct zebra_register *reg, zebra_map_t zm,
65                           struct rpn_char_map_info *map_info)
66 {
67     map_info->zm = zm;
68     if (zebra_maps_is_icu(zm))
69         dict_grep_cmap(reg->dict, 0, 0);
70     else
71         dict_grep_cmap(reg->dict, map_info, rpn_char_map_handler);
72 }
73
74 #define TERM_COUNT
75
76 struct grep_info {
77 #ifdef TERM_COUNT
78     int *term_no;
79 #endif
80     ISAM_P *isam_p_buf;
81     int isam_p_size;
82     int isam_p_indx;
83     int trunc_max;
84     ZebraHandle zh;
85     const char *index_type;
86     ZebraSet termset;
87 };
88
89 static int add_isam_p(const char *name, const char *info,
90                       struct grep_info *p)
91 {
92     if (!log_level_set)
93     {
94         log_level_rpn = yaz_log_module_level("rpn");
95         log_level_set = 1;
96     }
97     /* we may have to stop this madness.. NOTE: -1 so that if
98        truncmax == trunxlimit we do *not* generate result sets */
99     if (p->isam_p_indx >= p->trunc_max - 1)
100         return 1;
101
102     if (p->isam_p_indx == p->isam_p_size)
103     {
104         ISAM_P *new_isam_p_buf;
105 #ifdef TERM_COUNT
106         int *new_term_no;
107 #endif
108         p->isam_p_size = 2*p->isam_p_size + 100;
109         new_isam_p_buf = (ISAM_P *) xmalloc(sizeof(*new_isam_p_buf) *
110                                             p->isam_p_size);
111         if (p->isam_p_buf)
112         {
113             memcpy(new_isam_p_buf, p->isam_p_buf,
114                    p->isam_p_indx * sizeof(*p->isam_p_buf));
115             xfree(p->isam_p_buf);
116         }
117         p->isam_p_buf = new_isam_p_buf;
118
119 #ifdef TERM_COUNT
120         new_term_no = (int *) xmalloc(sizeof(*new_term_no) * p->isam_p_size);
121         if (p->term_no)
122         {
123             memcpy(new_term_no, p->isam_p_buf,
124                    p->isam_p_indx * sizeof(*p->term_no));
125             xfree(p->term_no);
126         }
127         p->term_no = new_term_no;
128 #endif
129     }
130     assert(*info == sizeof(*p->isam_p_buf));
131     memcpy(p->isam_p_buf + p->isam_p_indx, info+1, sizeof(*p->isam_p_buf));
132
133     if (p->termset)
134     {
135         const char *db;
136         char term_tmp[IT_MAX_WORD];
137         int ord = 0;
138         const char *index_name;
139         int len = key_SU_decode(&ord, (const unsigned char *) name);
140
141         zebra_term_untrans (p->zh, p->index_type, term_tmp, name+len);
142         yaz_log(log_level_rpn, "grep: %d %c %s", ord, name[len], term_tmp);
143         zebraExplain_lookup_ord(p->zh->reg->zei,
144                                 ord, 0 /* index_type */, &db, &index_name);
145         yaz_log(log_level_rpn, "grep:  db=%s index=%s", db, index_name);
146
147         resultSetAddTerm(p->zh, p->termset, name[len], db,
148                          index_name, term_tmp);
149     }
150     (p->isam_p_indx)++;
151     return 0;
152 }
153
154 static int grep_handle(char *name, const char *info, void *p)
155 {
156     return add_isam_p(name, info, (struct grep_info *) p);
157 }
158
159 static int term_pre(zebra_map_t zm, const char **src,
160                     const char *ct1, int first)
161 {
162     const char *s1, *s0 = *src;
163     const char **map;
164
165     /* skip white space */
166     while (*s0)
167     {
168         if (ct1 && strchr(ct1, *s0))
169             break;
170         s1 = s0;
171         map = zebra_maps_input(zm, &s1, strlen(s1), first);
172         if (**map != *CHR_SPACE)
173             break;
174         s0 = s1;
175     }
176     *src = s0;
177     return *s0;
178 }
179
180
181 static void esc_str(char *out_buf, size_t out_size,
182                     const char *in_buf, int in_size)
183 {
184     int k;
185
186     assert(out_buf);
187     assert(in_buf);
188     assert(out_size > 20);
189     *out_buf = '\0';
190     for (k = 0; k < in_size; k++)
191     {
192         int c = in_buf[k] & 0xff;
193         int pc;
194         if (c < 32 || c > 126)
195             pc = '?';
196         else
197             pc = c;
198         sprintf(out_buf +strlen(out_buf), "%02X:%c  ", c, pc);
199         if (strlen(out_buf) > out_size-20)
200         {
201             strcat(out_buf, "..");
202             break;
203         }
204     }
205 }
206
207 #define REGEX_CHARS " ^[]()|.*+?!\"$\\"
208
209 static void add_non_space(const char *start, const char *end,
210                           WRBUF term_dict,
211                           WRBUF display_term,
212                           const char **map, int q_map_match)
213 {
214     size_t sz = end - start;
215
216     wrbuf_write(display_term, start, sz);
217     if (!q_map_match)
218     {
219         while (start < end)
220         {
221             if (strchr(REGEX_CHARS, *start))
222                 wrbuf_putc(term_dict, '\\');
223             wrbuf_putc(term_dict, *start);
224             start++;
225         }
226     }
227     else
228     {
229         char tmpbuf[80];
230         esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
231
232         wrbuf_puts(term_dict, map[0]);
233     }
234 }
235
236
237 /* ICU sort keys seem to be of the form
238    basechars \x01 accents \x01 length
239    For now we'll just right truncate from basechars . This
240    may give false hits due to accents not being used.
241 */
242 static size_t icu_basechars(const char *buf, size_t i)
243 {
244     while (i > 0 && buf[--i] != '\x01') /* skip length */
245         ;
246     while (i > 0 && buf[--i] != '\x01') /* skip accents */
247         ;
248     return i; /* only basechars left */
249 }
250
251 static int term_102_icu(zebra_map_t zm,
252                         const char **src, WRBUF term_dict, int space_split,
253                         WRBUF display_term)
254 {
255     int no_terms = 0;
256     const char *s0 = *src, *s1;
257     while (*s0 == ' ')
258         s0++;
259     s1 = s0;
260     for (;;)
261     {
262         if (*s1 == ' ' && space_split)
263             break;
264         else if (*s1 && !strchr(REGEX_CHARS "-", *s1))
265             s1++;
266         else
267         {
268             /* EOF or regex reserved char */
269             if (s0 != s1)
270             {
271                 const char *res_buf = 0;
272                 size_t res_len = 0;
273                 const char *display_buf;
274                 size_t display_len;
275
276                 zebra_map_tokenize_start(zm, s0, s1 - s0);
277
278                 if (zebra_map_tokenize_next(zm, &res_buf, &res_len,
279                                             &display_buf, &display_len))
280                 {
281                     size_t i;
282                     res_len = icu_basechars(res_buf, res_len);
283                     for (i = 0; i < res_len; i++)
284                     {
285                         if (strchr(REGEX_CHARS "\\", res_buf[i]))
286                             wrbuf_putc(term_dict, '\\');
287                         if (res_buf[i] < 32)
288                             wrbuf_putc(term_dict, '\x01');
289
290                         wrbuf_putc(term_dict, res_buf[i]);
291                     }
292                     wrbuf_write(display_term, display_buf, display_len);
293
294                     no_terms++;
295                 }
296             }
297             if (*s1 == '\0')
298                 break;
299
300             wrbuf_putc(term_dict, *s1);
301             wrbuf_putc(display_term, *s1);
302
303             s1++;
304             s0 = s1;
305         }
306     }
307     if (no_terms)
308         wrbuf_puts(term_dict, "\x01\x01.*");
309     *src = s1;
310     return no_terms;
311 }
312
313 static int term_100_icu(zebra_map_t zm,
314                         const char **src, WRBUF term_dict, int space_split,
315                         WRBUF display_term,
316                         int mode)
317 {
318     size_t i;
319     const char *res_buf = 0;
320     size_t res_len = 0;
321     const char *display_buf;
322     size_t display_len;
323     const char *s0 = *src, *s1;
324
325     while (*s0 == ' ')
326         s0++;
327
328     if (*s0 == '\0')
329         return 0;
330
331     if (space_split)
332     {
333         s1 = s0;
334         while (*s1 && *s1 != ' ')
335             s1++;
336     }
337     else
338         s1 = s0 + strlen(s0);
339
340     *src = s1;
341
342     zebra_map_tokenize_start(zm, s0, s1 - s0);
343
344     if (!zebra_map_tokenize_next(zm, &res_buf, &res_len,
345                                  &display_buf, &display_len))
346     {
347         return 0;
348     }
349     wrbuf_write(display_term, display_buf, display_len);
350     if (mode)
351     {
352         res_len = icu_basechars(res_buf, res_len);
353     }
354     if (mode & 2)
355         wrbuf_puts(term_dict, ".*");
356     for (i = 0; i < res_len; i++)
357     {
358         if (strchr(REGEX_CHARS "\\", res_buf[i]))
359             wrbuf_putc(term_dict, '\\');
360         if (res_buf[i] < 32)
361             wrbuf_putc(term_dict, '\x01');
362
363         wrbuf_putc(term_dict, res_buf[i]);
364     }
365     if (mode & 1)
366         wrbuf_puts(term_dict, ".*");
367     else if (mode)
368         wrbuf_puts(term_dict, "\x01\x01.*");
369     return 1;
370 }
371
372 /* term_100: handle term, where trunc = none(no operators at all) */
373 static int term_100(zebra_map_t zm,
374                     const char **src, WRBUF term_dict, int space_split,
375                     WRBUF display_term)
376 {
377     const char *s0;
378     const char **map;
379     int i = 0;
380
381     const char *space_start = 0;
382     const char *space_end = 0;
383
384     if (!term_pre(zm, src, 0, !space_split))
385         return 0;
386     s0 = *src;
387     while (*s0)
388     {
389         const char *s1 = s0;
390         int q_map_match = 0;
391         map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
392         if (space_split)
393         {
394             if (**map == *CHR_SPACE)
395                 break;
396         }
397         else  /* complete subfield only. */
398         {
399             if (**map == *CHR_SPACE)
400             {   /* save space mapping for later  .. */
401                 space_start = s1;
402                 space_end = s0;
403                 continue;
404             }
405             else if (space_start)
406             {   /* reload last space */
407                 while (space_start < space_end)
408                 {
409                     if (strchr(REGEX_CHARS, *space_start))
410                         wrbuf_putc(term_dict, '\\');
411                     wrbuf_putc(display_term, *space_start);
412                     wrbuf_putc(term_dict, *space_start);
413                     space_start++;
414
415                 }
416                 /* and reset */
417                 space_start = space_end = 0;
418             }
419         }
420         i++;
421
422         add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
423     }
424     *src = s0;
425     return i;
426 }
427
428 /* term_101: handle term, where trunc = Process # */
429 static int term_101(zebra_map_t zm,
430                     const char **src, WRBUF term_dict, int space_split,
431                     WRBUF display_term)
432 {
433     const char *s0;
434     const char **map;
435     int i = 0;
436
437     if (!term_pre(zm, src, "#", !space_split))
438         return 0;
439     s0 = *src;
440     while (*s0)
441     {
442         if (*s0 == '#')
443         {
444             i++;
445             wrbuf_puts(term_dict, ".*");
446             wrbuf_putc(display_term, *s0);
447             s0++;
448         }
449         else
450         {
451             const char *s1 = s0;
452             int q_map_match = 0;
453             map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
454             if (space_split && **map == *CHR_SPACE)
455                 break;
456
457             i++;
458             add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
459         }
460     }
461     *src = s0;
462     return i;
463 }
464
465 /* term_103: handle term, where trunc = re-2 (regular expressions) */
466 static int term_103(zebra_map_t zm, const char **src,
467                     WRBUF term_dict, int *errors, int space_split,
468                     WRBUF display_term)
469 {
470     int i = 0;
471     const char *s0;
472     const char **map;
473
474     if (!term_pre(zm, src, "^\\()[].*+?|", !space_split))
475         return 0;
476     s0 = *src;
477     if (errors && *s0 == '+' && s0[1] && s0[2] == '+' && s0[3] &&
478         isdigit(((const unsigned char *)s0)[1]))
479     {
480         *errors = s0[1] - '0';
481         s0 += 3;
482         if (*errors > 3)
483             *errors = 3;
484     }
485     while (*s0)
486     {
487         if (strchr("^\\()[].*+?|-", *s0))
488         {
489             wrbuf_putc(display_term, *s0);
490             wrbuf_putc(term_dict, *s0);
491             s0++;
492             i++;
493         }
494         else
495         {
496             const char *s1 = s0;
497             int q_map_match = 0;
498             map = zebra_maps_search(zm, &s0, strlen(s0),  &q_map_match);
499             if (space_split && **map == *CHR_SPACE)
500                 break;
501
502             i++;
503             add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
504         }
505     }
506     *src = s0;
507
508     return i;
509 }
510
511 /* term_103: handle term, where trunc = re-1 (regular expressions) */
512 static int term_102(zebra_map_t zm, const char **src,
513                     WRBUF term_dict, int space_split, WRBUF display_term)
514 {
515     return term_103(zm, src, term_dict, NULL, space_split, display_term);
516 }
517
518
519 /* term_104: handle term, process ?n * # */
520 static int term_104(zebra_map_t zm, const char **src,
521                     WRBUF term_dict, int space_split, WRBUF display_term)
522 {
523     const char *s0;
524     const char **map;
525     int i = 0;
526
527     if (!term_pre(zm, src, "?*#", !space_split))
528         return 0;
529     s0 = *src;
530     while (*s0)
531     {
532         if (*s0 == '?')
533         {
534             i++;
535             wrbuf_putc(display_term, *s0);
536             s0++;
537             if (*s0 >= '0' && *s0 <= '9')
538             {
539                 int limit = 0;
540                 while (*s0 >= '0' && *s0 <= '9')
541                 {
542                     limit = limit * 10 + (*s0 - '0');
543                     wrbuf_putc(display_term, *s0);
544                     s0++;
545                 }
546                 if (limit > 20)
547                     limit = 20;
548                 while (--limit >= 0)
549                 {
550                     wrbuf_puts(term_dict, ".?");
551                 }
552             }
553             else
554             {
555                 wrbuf_puts(term_dict, ".*");
556             }
557         }
558         else if (*s0 == '*')
559         {
560             i++;
561             wrbuf_puts(term_dict, ".*");
562             wrbuf_putc(display_term, *s0);
563             s0++;
564         }
565         else if (*s0 == '#')
566         {
567             i++;
568             wrbuf_puts(term_dict, ".");
569             wrbuf_putc(display_term, *s0);
570             s0++;
571         }
572         else
573         {
574             const char *s1 = s0;
575             int q_map_match = 0;
576             map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
577             if (space_split && **map == *CHR_SPACE)
578                 break;
579
580             i++;
581             add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
582         }
583     }
584     *src = s0;
585     return i;
586 }
587
588 /* term_105/106: handle term, process * ! and possibly right_truncate */
589 static int term_105(zebra_map_t zm, const char **src,
590                     WRBUF term_dict, int space_split,
591                     WRBUF display_term, int right_truncate)
592 {
593     const char *s0;
594     const char **map;
595     int i = 0;
596
597     if (!term_pre(zm, src, "\\*!", !space_split))
598         return 0;
599     s0 = *src;
600     while (*s0)
601     {
602         if (*s0 == '*')
603         {
604             i++;
605             wrbuf_puts(term_dict, ".*");
606             wrbuf_putc(display_term, *s0);
607             s0++;
608         }
609         else if (*s0 == '!')
610         {
611             i++;
612             wrbuf_putc(term_dict, '.');
613             wrbuf_putc(display_term, *s0);
614             s0++;
615         }
616         else if (*s0 == '\\')
617         {
618             i++;
619             wrbuf_puts(term_dict, "\\\\");
620             wrbuf_putc(display_term, *s0);
621             s0++;
622         }
623         else
624         {
625             const char *s1 = s0;
626             int q_map_match = 0;
627             map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
628             if (space_split && **map == *CHR_SPACE)
629                 break;
630
631             i++;
632             add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
633         }
634     }
635     if (right_truncate)
636         wrbuf_puts(term_dict, ".*");
637     *src = s0;
638     return i;
639 }
640
641
642 /* gen_regular_rel - generate regular expression from relation
643  *  val:     border value (inclusive)
644  *  islt:    1 if <=; 0 if >=.
645  */
646 static void gen_regular_rel(WRBUF term_dict, int val, int islt)
647 {
648     char dst_buf[20*5*20]; /* assuming enough for expansion */
649     char *dst = dst_buf;
650     int dst_p;
651     int w, d, i;
652     int pos = 0;
653     char numstr[20];
654
655     yaz_log(YLOG_DEBUG, "gen_regular_rel. val=%d, islt=%d", val, islt);
656     if (val >= 0)
657     {
658         if (islt)
659             strcpy(dst, "(-[0-9]+|(");
660         else
661             strcpy(dst, "((");
662     }
663     else
664     {
665         if (!islt)
666         {
667             strcpy(dst, "([0-9]+|-(");
668             islt = 1;
669         }
670         else
671         {
672             strcpy(dst, "(-(");
673             islt = 0;
674         }
675         val = -val;
676     }
677     dst_p = strlen(dst);
678     sprintf(numstr, "%d", val);
679     for (w = strlen(numstr); --w >= 0; pos++)
680     {
681         d = numstr[w];
682         if (pos > 0)
683         {
684             if (islt)
685             {
686                 if (d == '0')
687                     continue;
688                 d--;
689             }
690             else
691             {
692                 if (d == '9')
693                     continue;
694                 d++;
695             }
696         }
697
698         strcpy(dst + dst_p, numstr);
699         dst_p = strlen(dst) - pos - 1;
700
701         if (islt)
702         {
703             if (d != '0')
704             {
705                 dst[dst_p++] = '[';
706                 dst[dst_p++] = '0';
707                 dst[dst_p++] = '-';
708                 dst[dst_p++] = d;
709                 dst[dst_p++] = ']';
710             }
711             else
712                 dst[dst_p++] = d;
713         }
714         else
715         {
716             if (d != '9')
717             {
718                 dst[dst_p++] = '[';
719                 dst[dst_p++] = d;
720                 dst[dst_p++] = '-';
721                 dst[dst_p++] = '9';
722                 dst[dst_p++] = ']';
723             }
724             else
725                 dst[dst_p++] = d;
726         }
727         for (i = 0; i < pos; i++)
728         {
729             dst[dst_p++] = '[';
730             dst[dst_p++] = '0';
731             dst[dst_p++] = '-';
732             dst[dst_p++] = '9';
733             dst[dst_p++] = ']';
734         }
735         dst[dst_p++] = '|';
736     }
737     dst[dst_p] = '\0';
738     if (islt)
739     {
740         /* match everything less than 10^(pos-1) */
741         strcat(dst, "0*");
742         for (i = 1; i < pos; i++)
743             strcat(dst, "[0-9]?");
744     }
745     else
746     {
747         /* match everything greater than 10^pos */
748         for (i = 0; i <= pos; i++)
749             strcat(dst, "[0-9]");
750         strcat(dst, "[0-9]*");
751     }
752     strcat(dst, "))");
753     wrbuf_puts(term_dict, dst);
754 }
755
756 void string_rel_add_char(WRBUF term_p, WRBUF wsrc, int *indx)
757 {
758     const char *src = wrbuf_cstr(wsrc);
759     if (src[*indx] == '\\')
760     {
761         wrbuf_putc(term_p, src[*indx]);
762         (*indx)++;
763     }
764     wrbuf_putc(term_p, src[*indx]);
765     (*indx)++;
766 }
767
768 /*
769  *   >  abc     ([b-].*|a[c-].*|ab[d-].*|abc.+)
770  *              ([^-a].*|a[^-b].*ab[^-c].*|abc.+)
771  *   >= abc     ([b-].*|a[c-].*|ab[c-].*)
772  *              ([^-a].*|a[^-b].*|ab[c-].*)
773  *   <  abc     ([-0].*|a[-a].*|ab[-b].*)
774  *              ([^a-].*|a[^b-].*|ab[^c-].*)
775  *   <= abc     ([-0].*|a[-a].*|ab[-b].*|abc)
776  *              ([^a-].*|a[^b-].*|ab[^c-].*|abc)
777  */
778 static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
779                            const char **term_sub, WRBUF term_dict,
780                            const Odr_oid *attributeSet,
781                            zebra_map_t zm, int space_split,
782                            WRBUF display_term,
783                            int *error_code)
784 {
785     AttrType relation;
786     int relation_value;
787     int i;
788     WRBUF term_component = wrbuf_alloc();
789
790     attr_init_APT(&relation, zapt, 2);
791     relation_value = attr_find(&relation, NULL);
792
793     *error_code = 0;
794     yaz_log(YLOG_DEBUG, "string relation value=%d", relation_value);
795     switch (relation_value)
796     {
797     case 1:
798         if (!term_100(zm, term_sub, term_component, space_split, display_term))
799         {
800             wrbuf_destroy(term_component);
801             return 0;
802         }
803         yaz_log(log_level_rpn, "Relation <");
804
805         wrbuf_putc(term_dict, '(');
806         for (i = 0; i < wrbuf_len(term_component); )
807         {
808             int j = 0;
809
810             if (i)
811                 wrbuf_putc(term_dict, '|');
812             while (j < i)
813                 string_rel_add_char(term_dict, term_component, &j);
814
815             wrbuf_putc(term_dict, '[');
816
817             wrbuf_putc(term_dict, '^');
818
819             wrbuf_putc(term_dict, 1);
820             wrbuf_putc(term_dict, FIRST_IN_FIELD_CHAR);
821
822             string_rel_add_char(term_dict, term_component, &i);
823             wrbuf_putc(term_dict, '-');
824
825             wrbuf_putc(term_dict, ']');
826             wrbuf_putc(term_dict, '.');
827             wrbuf_putc(term_dict, '*');
828         }
829         wrbuf_putc(term_dict, ')');
830         break;
831     case 2:
832         if (!term_100(zm, term_sub, term_component, space_split, display_term))
833         {
834             wrbuf_destroy(term_component);
835             return 0;
836         }
837         yaz_log(log_level_rpn, "Relation <=");
838
839         wrbuf_putc(term_dict, '(');
840         for (i = 0; i < wrbuf_len(term_component); )
841         {
842             int j = 0;
843
844             while (j < i)
845                 string_rel_add_char(term_dict, term_component, &j);
846             wrbuf_putc(term_dict, '[');
847
848             wrbuf_putc(term_dict, '^');
849
850             wrbuf_putc(term_dict, 1);
851             wrbuf_putc(term_dict, FIRST_IN_FIELD_CHAR);
852
853             string_rel_add_char(term_dict, term_component, &i);
854             wrbuf_putc(term_dict, '-');
855
856             wrbuf_putc(term_dict, ']');
857             wrbuf_putc(term_dict, '.');
858             wrbuf_putc(term_dict, '*');
859
860             wrbuf_putc(term_dict, '|');
861         }
862         for (i = 0; i < wrbuf_len(term_component); )
863             string_rel_add_char(term_dict, term_component, &i);
864         wrbuf_putc(term_dict, ')');
865         break;
866     case 5:
867         if (!term_100(zm, term_sub, term_component, space_split, display_term))
868         {
869             wrbuf_destroy(term_component);
870             return 0;
871         }
872         yaz_log(log_level_rpn, "Relation >");
873
874         wrbuf_putc(term_dict, '(');
875         for (i = 0; i < wrbuf_len(term_component); )
876         {
877             int j = 0;
878
879             while (j < i)
880                 string_rel_add_char(term_dict, term_component, &j);
881             wrbuf_putc(term_dict, '[');
882
883             wrbuf_putc(term_dict, '^');
884             wrbuf_putc(term_dict, '-');
885             string_rel_add_char(term_dict, term_component, &i);
886
887             wrbuf_putc(term_dict, ']');
888             wrbuf_putc(term_dict, '.');
889             wrbuf_putc(term_dict, '*');
890
891             wrbuf_putc(term_dict, '|');
892         }
893         for (i = 0; i < wrbuf_len(term_component); )
894             string_rel_add_char(term_dict, term_component, &i);
895         wrbuf_putc(term_dict, '.');
896         wrbuf_putc(term_dict, '+');
897         wrbuf_putc(term_dict, ')');
898         break;
899     case 4:
900         if (!term_100(zm, term_sub, term_component, space_split, display_term))
901         {
902             wrbuf_destroy(term_component);
903             return 0;
904         }
905         yaz_log(log_level_rpn, "Relation >=");
906
907         wrbuf_putc(term_dict, '(');
908         for (i = 0; i < wrbuf_len(term_component); )
909         {
910             int j = 0;
911
912             if (i)
913                 wrbuf_putc(term_dict, '|');
914             while (j < i)
915                 string_rel_add_char(term_dict, term_component, &j);
916             wrbuf_putc(term_dict, '[');
917
918             if (i < wrbuf_len(term_component)-1)
919             {
920                 wrbuf_putc(term_dict, '^');
921                 wrbuf_putc(term_dict, '-');
922                 string_rel_add_char(term_dict, term_component, &i);
923             }
924             else
925             {
926                 string_rel_add_char(term_dict, term_component, &i);
927                 wrbuf_putc(term_dict, '-');
928             }
929             wrbuf_putc(term_dict, ']');
930             wrbuf_putc(term_dict, '.');
931             wrbuf_putc(term_dict, '*');
932         }
933         wrbuf_putc(term_dict, ')');
934         break;
935     case 3:
936     case 102:
937     case -1:
938         if (!**term_sub)
939             return 1;
940         yaz_log(log_level_rpn, "Relation =");
941         if (!term_100(zm, term_sub, term_component, space_split, display_term))
942         {
943             wrbuf_destroy(term_component);
944             return 0;
945         }
946         wrbuf_puts(term_dict, "(");
947         wrbuf_puts(term_dict, wrbuf_cstr(term_component));
948         wrbuf_puts(term_dict, ")");
949         break;
950     case 103:
951         yaz_log(log_level_rpn, "Relation always matches");
952         /* skip to end of term (we don't care what it is) */
953         while (**term_sub != '\0')
954             (*term_sub)++;
955         break;
956     default:
957         *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
958         wrbuf_destroy(term_component);
959         return 0;
960     }
961     wrbuf_destroy(term_component);
962     return 1;
963 }
964
965 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
966                              const char **term_sub,
967                              WRBUF term_dict,
968                              const Odr_oid *attributeSet, NMEM stream,
969                              struct grep_info *grep_info,
970                              const char *index_type, int complete_flag,
971                              WRBUF display_term,
972                              const char *xpath_use,
973                              struct ord_list **ol,
974                              zebra_map_t zm);
975
976 ZEBRA_RES zebra_term_limits_APT(ZebraHandle zh,
977                                 Z_AttributesPlusTerm *zapt,
978                                 zint *hits_limit_value,
979                                 const char **term_ref_id_str,
980                                 NMEM nmem)
981 {
982     AttrType term_ref_id_attr;
983     AttrType hits_limit_attr;
984     int term_ref_id_int;
985     zint hits_limit_from_attr;
986
987     attr_init_APT(&hits_limit_attr, zapt, 11);
988     hits_limit_from_attr  = attr_find(&hits_limit_attr, NULL);
989
990     attr_init_APT(&term_ref_id_attr, zapt, 10);
991     term_ref_id_int = attr_find_ex(&term_ref_id_attr, NULL, term_ref_id_str);
992     if (term_ref_id_int >= 0)
993     {
994         char *res = nmem_malloc(nmem, 20);
995         sprintf(res, "%d", term_ref_id_int);
996         *term_ref_id_str = res;
997     }
998     if (hits_limit_from_attr != -1)
999         *hits_limit_value = hits_limit_from_attr;
1000
1001     yaz_log(YLOG_DEBUG, "term_limits_APT ref_id=%s limit=" ZINT_FORMAT,
1002             *term_ref_id_str ? *term_ref_id_str : "none",
1003             *hits_limit_value);
1004     return ZEBRA_OK;
1005 }
1006
1007 /** \brief search for term (which may be truncated)
1008  */
1009 static ZEBRA_RES search_term(ZebraHandle zh,
1010                              Z_AttributesPlusTerm *zapt,
1011                              const char **term_sub,
1012                              const Odr_oid *attributeSet,
1013                              zint hits_limit, NMEM stream,
1014                              struct grep_info *grep_info,
1015                              const char *index_type, int complete_flag,
1016                              const char *rank_type,
1017                              const char *xpath_use,
1018                              NMEM rset_nmem,
1019                              RSET *rset,
1020                              struct rset_key_control *kc,
1021                              zebra_map_t zm)
1022 {
1023     ZEBRA_RES res;
1024     struct ord_list *ol;
1025     zint hits_limit_value = hits_limit;
1026     const char *term_ref_id_str = 0;
1027     WRBUF term_dict = wrbuf_alloc();
1028     WRBUF display_term = wrbuf_alloc();
1029     *rset = 0;
1030     zebra_term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str,
1031                           stream);
1032     grep_info->isam_p_indx = 0;
1033     res = string_term(zh, zapt, term_sub, term_dict,
1034                       attributeSet, stream, grep_info,
1035                       index_type, complete_flag,
1036                       display_term, xpath_use, &ol, zm);
1037     wrbuf_destroy(term_dict);
1038     if (res == ZEBRA_OK && *term_sub)
1039     {
1040         yaz_log(log_level_rpn, "term: %s", wrbuf_cstr(display_term));
1041         *rset = rset_trunc(zh, grep_info->isam_p_buf,
1042                            grep_info->isam_p_indx, wrbuf_buf(display_term),
1043                            wrbuf_len(display_term), rank_type,
1044                            1 /* preserve pos */,
1045                            zapt->term->which, rset_nmem,
1046                            kc, kc->scope, ol, index_type, hits_limit_value,
1047                            term_ref_id_str);
1048         if (!*rset)
1049             res = ZEBRA_FAIL;
1050     }
1051     wrbuf_destroy(display_term);
1052     return res;
1053 }
1054
1055 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1056                              const char **term_sub,
1057                              WRBUF term_dict,
1058                              const Odr_oid *attributeSet, NMEM stream,
1059                              struct grep_info *grep_info,
1060                              const char *index_type, int complete_flag,
1061                              WRBUF display_term,
1062                              const char *xpath_use,
1063                              struct ord_list **ol,
1064                              zebra_map_t zm)
1065 {
1066     int r;
1067     AttrType truncation;
1068     int truncation_value;
1069     const char *termp;
1070     struct rpn_char_map_info rcmi;
1071
1072     int space_split = complete_flag ? 0 : 1;
1073     int ord = -1;
1074     int regex_range = 0;
1075     int max_pos, prefix_len = 0;
1076     int relation_error;
1077     char ord_buf[32];
1078     int ord_len, i;
1079
1080     *ol = ord_list_create(stream);
1081
1082     rpn_char_map_prepare(zh->reg, zm, &rcmi);
1083     attr_init_APT(&truncation, zapt, 5);
1084     truncation_value = attr_find(&truncation, NULL);
1085     yaz_log(log_level_rpn, "truncation value %d", truncation_value);
1086
1087     termp = *term_sub; /* start of term for each database */
1088
1089     if (zebra_apt_get_ord(zh, zapt, index_type, xpath_use,
1090                           attributeSet, &ord) != ZEBRA_OK)
1091     {
1092         *term_sub = 0;
1093         return ZEBRA_FAIL;
1094     }
1095
1096     wrbuf_rewind(term_dict); /* new dictionary regexp term */
1097
1098     *ol = ord_list_append(stream, *ol, ord);
1099     ord_len = key_SU_encode(ord, ord_buf);
1100
1101     wrbuf_putc(term_dict, '(');
1102
1103     for (i = 0; i < ord_len; i++)
1104     {
1105         wrbuf_putc(term_dict, 1);  /* our internal regexp escape char */
1106         wrbuf_putc(term_dict, ord_buf[i]);
1107     }
1108     wrbuf_putc(term_dict, ')');
1109
1110     prefix_len = wrbuf_len(term_dict);
1111
1112     if (zebra_maps_is_icu(zm))
1113     {
1114         int relation_value;
1115         AttrType relation;
1116
1117         attr_init_APT(&relation, zapt, 2);
1118         relation_value = attr_find(&relation, NULL);
1119         if (relation_value == 103) /* always matches */
1120             termp += strlen(termp); /* move to end of term */
1121         else if (relation_value == 3 || relation_value == 102 || relation_value == -1)
1122         {
1123             /* ICU case */
1124             switch (truncation_value)
1125             {
1126             case -1:         /* not specified */
1127             case 100:        /* do not truncate */
1128                 if (!term_100_icu(zm, &termp, term_dict, space_split, display_term, 0))
1129                 {
1130                     *term_sub = 0;
1131                     return ZEBRA_OK;
1132                 }
1133                 break;
1134             case 102:
1135                 if (!term_102_icu(zm, &termp, term_dict, space_split, display_term))
1136                 {
1137                     *term_sub = 0;
1138                     return ZEBRA_OK;
1139                 }
1140                 break;
1141             case 1:          /* right truncation */
1142                 if (!term_100_icu(zm, &termp, term_dict, space_split, display_term, 1))
1143                 {
1144                     *term_sub = 0;
1145                     return ZEBRA_OK;
1146                 }
1147                 break;
1148             case 2:
1149                 if (!term_100_icu(zm, &termp, term_dict, space_split, display_term, 2))
1150                 {
1151                     *term_sub = 0;
1152                     return ZEBRA_OK;
1153                 }
1154                 break;
1155             case 3:
1156                 if (!term_100_icu(zm, &termp, term_dict, space_split, display_term, 3))
1157                 {
1158                     *term_sub = 0;
1159                     return ZEBRA_OK;
1160                 }
1161                 break;
1162             default:
1163                 zebra_setError_zint(zh,
1164                                     YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
1165                                     truncation_value);
1166                 return ZEBRA_FAIL;
1167             }
1168         }
1169         else
1170         {
1171             zebra_setError_zint(zh,
1172                                 YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE,
1173                                 relation_value);
1174             return ZEBRA_FAIL;
1175         }
1176     }
1177     else
1178     {
1179         /* non-ICU case. using string.chr and friends */
1180         switch (truncation_value)
1181         {
1182         case -1:         /* not specified */
1183         case 100:        /* do not truncate */
1184             if (!string_relation(zh, zapt, &termp, term_dict,
1185                                  attributeSet,
1186                                  zm, space_split, display_term,
1187                                  &relation_error))
1188             {
1189                 if (relation_error)
1190                 {
1191                     zebra_setError(zh, relation_error, 0);
1192                     return ZEBRA_FAIL;
1193                 }
1194                 *term_sub = 0;
1195                 return ZEBRA_OK;
1196             }
1197             break;
1198         case 1:          /* right truncation */
1199             wrbuf_putc(term_dict, '(');
1200             if (!term_100(zm, &termp, term_dict, space_split, display_term))
1201             {
1202                 *term_sub = 0;
1203                 return ZEBRA_OK;
1204             }
1205             wrbuf_puts(term_dict, ".*)");
1206             break;
1207         case 2:          /* left truncation */
1208             wrbuf_puts(term_dict, "(.*");
1209             if (!term_100(zm, &termp, term_dict, space_split, display_term))
1210             {
1211                 *term_sub = 0;
1212                 return ZEBRA_OK;
1213             }
1214             wrbuf_putc(term_dict, ')');
1215             break;
1216         case 3:          /* left&right truncation */
1217             wrbuf_puts(term_dict, "(.*");
1218             if (!term_100(zm, &termp, term_dict, space_split, display_term))
1219             {
1220                 *term_sub = 0;
1221                 return ZEBRA_OK;
1222             }
1223             wrbuf_puts(term_dict, ".*)");
1224             break;
1225         case 101:        /* process # in term */
1226             wrbuf_putc(term_dict, '(');
1227             if (!term_101(zm, &termp, term_dict, space_split, display_term))
1228             {
1229                 *term_sub = 0;
1230                 return ZEBRA_OK;
1231             }
1232             wrbuf_puts(term_dict, ")");
1233             break;
1234         case 102:        /* Regexp-1 */
1235             wrbuf_putc(term_dict, '(');
1236             if (!term_102(zm, &termp, term_dict, space_split, display_term))
1237             {
1238                 *term_sub = 0;
1239                 return ZEBRA_OK;
1240             }
1241             wrbuf_putc(term_dict, ')');
1242             break;
1243         case 103:       /* Regexp-2 */
1244             regex_range = 1;
1245             wrbuf_putc(term_dict, '(');
1246             if (!term_103(zm, &termp, term_dict, &regex_range,
1247                           space_split, display_term))
1248             {
1249                 *term_sub = 0;
1250                 return ZEBRA_OK;
1251             }
1252             wrbuf_putc(term_dict, ')');
1253             break;
1254         case 104:        /* process ?n * # term */
1255             wrbuf_putc(term_dict, '(');
1256             if (!term_104(zm, &termp, term_dict, space_split, display_term))
1257             {
1258                 *term_sub = 0;
1259                 return ZEBRA_OK;
1260             }
1261             wrbuf_putc(term_dict, ')');
1262             break;
1263         case 105:        /* process * ! in term and right truncate */
1264             wrbuf_putc(term_dict, '(');
1265             if (!term_105(zm, &termp, term_dict, space_split, display_term, 1))
1266             {
1267                 *term_sub = 0;
1268                 return ZEBRA_OK;
1269             }
1270             wrbuf_putc(term_dict, ')');
1271             break;
1272         case 106:        /* process * ! in term */
1273             wrbuf_putc(term_dict, '(');
1274             if (!term_105(zm, &termp, term_dict, space_split, display_term, 0))
1275             {
1276                 *term_sub = 0;
1277                 return ZEBRA_OK;
1278             }
1279             wrbuf_putc(term_dict, ')');
1280             break;
1281         default:
1282             zebra_setError_zint(zh,
1283                                 YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
1284                                 truncation_value);
1285             return ZEBRA_FAIL;
1286         }
1287     }
1288     if (1)
1289     {
1290         char buf[1000];
1291         const char *input = wrbuf_cstr(term_dict) + prefix_len;
1292         esc_str(buf, sizeof(buf), input, strlen(input));
1293     }
1294     {
1295         WRBUF pr_wr = wrbuf_alloc();
1296
1297         wrbuf_write_escaped(pr_wr, wrbuf_buf(term_dict), wrbuf_len(term_dict));
1298         yaz_log(YLOG_LOG, "dict_lookup_grep: %s", wrbuf_cstr(pr_wr));
1299         wrbuf_destroy(pr_wr);
1300     }
1301     r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), regex_range,
1302                          grep_info, &max_pos,
1303                          ord_len /* number of "exact" chars */,
1304                          grep_handle);
1305     if (r == 1)
1306         zebra_set_partial_result(zh);
1307     else if (r)
1308         yaz_log(YLOG_WARN, "dict_lookup_grep fail %d", r);
1309     *term_sub = termp;
1310     yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1311     return ZEBRA_OK;
1312 }
1313
1314
1315
1316 static void grep_info_delete(struct grep_info *grep_info)
1317 {
1318 #ifdef TERM_COUNT
1319     xfree(grep_info->term_no);
1320 #endif
1321     xfree(grep_info->isam_p_buf);
1322 }
1323
1324 static ZEBRA_RES grep_info_prepare(ZebraHandle zh,
1325                                    Z_AttributesPlusTerm *zapt,
1326                                    struct grep_info *grep_info,
1327                                    const char *index_type)
1328 {
1329 #ifdef TERM_COUNT
1330     grep_info->term_no = 0;
1331 #endif
1332     grep_info->trunc_max = atoi(res_get_def(zh->res, "truncmax", "10000"));
1333     grep_info->isam_p_size = 0;
1334     grep_info->isam_p_buf = NULL;
1335     grep_info->zh = zh;
1336     grep_info->index_type = index_type;
1337     grep_info->termset = 0;
1338     if (zapt)
1339     {
1340         AttrType truncmax;
1341         int truncmax_value;
1342
1343         attr_init_APT(&truncmax, zapt, 13);
1344         truncmax_value = attr_find(&truncmax, NULL);
1345         if (truncmax_value != -1)
1346             grep_info->trunc_max = truncmax_value;
1347     }
1348     if (zapt)
1349     {
1350         AttrType termset;
1351         int termset_value_numeric;
1352         const char *termset_value_string;
1353
1354         attr_init_APT(&termset, zapt, 8);
1355         termset_value_numeric =
1356             attr_find_ex(&termset, NULL, &termset_value_string);
1357         if (termset_value_numeric != -1)
1358         {
1359 #if TERMSET_DISABLE
1360             zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, "termset");
1361             return ZEBRA_FAIL;
1362 #else
1363             char resname[32];
1364             const char *termset_name = 0;
1365             if (termset_value_numeric != -2)
1366             {
1367
1368                 sprintf(resname, "%d", termset_value_numeric);
1369                 termset_name = resname;
1370             }
1371             else
1372                 termset_name = termset_value_string;
1373             yaz_log(log_level_rpn, "creating termset set %s", termset_name);
1374             grep_info->termset = resultSetAdd(zh, termset_name, 1);
1375             if (!grep_info->termset)
1376             {
1377                 zebra_setError(zh, YAZ_BIB1_ILLEGAL_RESULT_SET_NAME, termset_name);
1378                 return ZEBRA_FAIL;
1379             }
1380 #endif
1381         }
1382     }
1383     return ZEBRA_OK;
1384 }
1385
1386 static ZEBRA_RES search_terms_chrmap(ZebraHandle zh,
1387                                      Z_AttributesPlusTerm *zapt,
1388                                      const char *termz,
1389                                      const Odr_oid *attributeSet,
1390                                      zint hits_limit,
1391                                      NMEM stream,
1392                                      const char *index_type, int complete_flag,
1393                                      const char *rank_type,
1394                                      const char *xpath_use,
1395                                      NMEM rset_nmem,
1396                                      RSET **result_sets, int *num_result_sets,
1397                                      struct rset_key_control *kc,
1398                                      zebra_map_t zm)
1399 {
1400     struct grep_info grep_info;
1401     const char *termp = termz;
1402     int alloc_sets = 0;
1403
1404     *num_result_sets = 0;
1405     if (grep_info_prepare(zh, zapt, &grep_info, index_type) == ZEBRA_FAIL)
1406         return ZEBRA_FAIL;
1407     while (1)
1408     {
1409         ZEBRA_RES res;
1410
1411         if (alloc_sets == *num_result_sets)
1412         {
1413             int add = 10;
1414             RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) *
1415                                               sizeof(*rnew));
1416             if (alloc_sets)
1417                 memcpy(rnew, *result_sets, alloc_sets * sizeof(*rnew));
1418             alloc_sets = alloc_sets + add;
1419             *result_sets = rnew;
1420         }
1421         res = search_term(zh, zapt, &termp, attributeSet, hits_limit,
1422                           stream, &grep_info,
1423                           index_type, complete_flag,
1424                           rank_type,
1425                           xpath_use, rset_nmem,
1426                           &(*result_sets)[*num_result_sets],
1427                           kc, zm);
1428         if (res != ZEBRA_OK)
1429         {
1430             int i;
1431             for (i = 0; i < *num_result_sets; i++)
1432                 rset_delete((*result_sets)[i]);
1433             grep_info_delete(&grep_info);
1434             return res;
1435         }
1436         if ((*result_sets)[*num_result_sets] == 0)
1437             break;
1438         (*num_result_sets)++;
1439
1440         if (!*termp)
1441             break;
1442     }
1443     grep_info_delete(&grep_info);
1444     return ZEBRA_OK;
1445 }
1446
1447 /**
1448    \brief Create result set(s) for list of terms
1449    \param zh Zebra Handle
1450    \param zapt Attributes Plust Term (RPN leaf)
1451    \param termz term as used in query but converted to UTF-8
1452    \param attributeSet default attribute set
1453    \param stream memory for result
1454    \param index_type register type ("w", "p",..)
1455    \param complete_flag whether it's phrases or not
1456    \param rank_type term flags for ranking
1457    \param xpath_use use attribute for X-Path (-1 for no X-path)
1458    \param rset_nmem memory for result sets
1459    \param result_sets output result set for each term in list (output)
1460    \param num_result_sets number of output result sets
1461    \param kc rset key control to be used for created result sets
1462 */
1463 static ZEBRA_RES search_terms_list(ZebraHandle zh,
1464                                    Z_AttributesPlusTerm *zapt,
1465                                    const char *termz,
1466                                    const Odr_oid *attributeSet,
1467                                    zint hits_limit,
1468                                    NMEM stream,
1469                                    const char *index_type, int complete_flag,
1470                                    const char *rank_type,
1471                                    const char *xpath_use,
1472                                    NMEM rset_nmem,
1473                                    RSET **result_sets, int *num_result_sets,
1474                                    struct rset_key_control *kc)
1475 {
1476     zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type);
1477     return search_terms_chrmap(zh, zapt, termz, attributeSet, hits_limit,
1478                                stream, index_type, complete_flag,
1479                                rank_type, xpath_use,
1480                                rset_nmem, result_sets, num_result_sets,
1481                                kc, zm);
1482 }
1483
1484
1485 /** \brief limit a search by position - returns result set
1486  */
1487 static ZEBRA_RES search_position(ZebraHandle zh,
1488                                  Z_AttributesPlusTerm *zapt,
1489                                  const Odr_oid *attributeSet,
1490                                  const char *index_type,
1491                                  NMEM rset_nmem,
1492                                  RSET *rset,
1493                                  struct rset_key_control *kc)
1494 {
1495     int position_value;
1496     AttrType position;
1497     int ord = -1;
1498     char ord_buf[32];
1499     char term_dict[100];
1500     int ord_len;
1501     char *val;
1502     ISAM_P isam_p;
1503     zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type);
1504
1505     attr_init_APT(&position, zapt, 3);
1506     position_value = attr_find(&position, NULL);
1507     switch(position_value)
1508     {
1509     case 3:
1510     case -1:
1511         return ZEBRA_OK;
1512     case 1:
1513     case 2:
1514         break;
1515     default:
1516         zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1517                             position_value);
1518         return ZEBRA_FAIL;
1519     }
1520
1521
1522     if (!zebra_maps_is_first_in_field(zm))
1523     {
1524         zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1525                             position_value);
1526         return ZEBRA_FAIL;
1527     }
1528
1529     if (zebra_apt_get_ord(zh, zapt, index_type, 0,
1530                           attributeSet, &ord) != ZEBRA_OK)
1531     {
1532         return ZEBRA_FAIL;
1533     }
1534     ord_len = key_SU_encode(ord, ord_buf);
1535     memcpy(term_dict, ord_buf, ord_len);
1536     strcpy(term_dict+ord_len, FIRST_IN_FIELD_STR);
1537     val = dict_lookup(zh->reg->dict, term_dict);
1538     if (val)
1539     {
1540         assert(*val == sizeof(ISAM_P));
1541         memcpy(&isam_p, val+1, sizeof(isam_p));
1542
1543         *rset = zebra_create_rset_isam(zh, rset_nmem, kc, kc->scope,
1544                                        isam_p, 0);
1545     }
1546     return ZEBRA_OK;
1547 }
1548
1549 /** \brief returns result set for phrase search
1550  */
1551 static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh,
1552                                        Z_AttributesPlusTerm *zapt,
1553                                        const char *termz_org,
1554                                        const Odr_oid *attributeSet,
1555                                        zint hits_limit,
1556                                        NMEM stream,
1557                                        const char *index_type,
1558                                        int complete_flag,
1559                                        const char *rank_type,
1560                                        const char *xpath_use,
1561                                        NMEM rset_nmem,
1562                                        RSET *rset,
1563                                        struct rset_key_control *kc)
1564 {
1565     RSET *result_sets = 0;
1566     int num_result_sets = 0;
1567     ZEBRA_RES res =
1568         search_terms_list(zh, zapt, termz_org, attributeSet, hits_limit,
1569                           stream, index_type, complete_flag,
1570                           rank_type, xpath_use,
1571                           rset_nmem,
1572                           &result_sets, &num_result_sets, kc);
1573
1574     if (res != ZEBRA_OK)
1575         return res;
1576
1577     if (num_result_sets > 0)
1578     {
1579         RSET first_set = 0;
1580         res = search_position(zh, zapt, attributeSet,
1581                               index_type,
1582                               rset_nmem, &first_set,
1583                               kc);
1584         if (res != ZEBRA_OK)
1585         {
1586             int i;
1587             for (i = 0; i < num_result_sets; i++)
1588                 rset_delete(result_sets[i]);
1589             return res;
1590         }
1591         if (first_set)
1592         {
1593             RSET *nsets = nmem_malloc(stream,
1594                                       sizeof(RSET) * (num_result_sets+1));
1595             nsets[0] = first_set;
1596             memcpy(nsets+1, result_sets, sizeof(RSET) * num_result_sets);
1597             result_sets = nsets;
1598             num_result_sets++;
1599         }
1600     }
1601     if (num_result_sets == 0)
1602         *rset = rset_create_null(rset_nmem, kc, 0);
1603     else if (num_result_sets == 1)
1604         *rset = result_sets[0];
1605     else
1606         *rset = rset_create_prox(rset_nmem, kc, kc->scope,
1607                                  num_result_sets, result_sets,
1608                                  1 /* ordered */, 0 /* exclusion */,
1609                                  3 /* relation */, 1 /* distance */);
1610     if (!*rset)
1611         return ZEBRA_FAIL;
1612     return ZEBRA_OK;
1613 }
1614
1615 /** \brief returns result set for or-list search
1616  */
1617 static ZEBRA_RES rpn_search_APT_or_list(ZebraHandle zh,
1618                                         Z_AttributesPlusTerm *zapt,
1619                                         const char *termz_org,
1620                                         const Odr_oid *attributeSet,
1621                                         zint hits_limit,
1622                                         NMEM stream,
1623                                         const char *index_type,
1624                                         int complete_flag,
1625                                         const char *rank_type,
1626                                         const char *xpath_use,
1627                                         NMEM rset_nmem,
1628                                         RSET *rset,
1629                                         struct rset_key_control *kc)
1630 {
1631     RSET *result_sets = 0;
1632     int num_result_sets = 0;
1633     int i;
1634     ZEBRA_RES res =
1635         search_terms_list(zh, zapt, termz_org, attributeSet, hits_limit,
1636                           stream, index_type, complete_flag,
1637                           rank_type, xpath_use,
1638                           rset_nmem,
1639                           &result_sets, &num_result_sets, kc);
1640     if (res != ZEBRA_OK)
1641         return res;
1642
1643     for (i = 0; i < num_result_sets; i++)
1644     {
1645         RSET first_set = 0;
1646         res = search_position(zh, zapt, attributeSet,
1647                               index_type,
1648                               rset_nmem, &first_set,
1649                               kc);
1650         if (res != ZEBRA_OK)
1651         {
1652             for (i = 0; i < num_result_sets; i++)
1653                 rset_delete(result_sets[i]);
1654             return res;
1655         }
1656
1657         if (first_set)
1658         {
1659             RSET tmp_set[2];
1660
1661             tmp_set[0] = first_set;
1662             tmp_set[1] = result_sets[i];
1663
1664             result_sets[i] = rset_create_prox(
1665                 rset_nmem, kc, kc->scope,
1666                 2, tmp_set,
1667                 1 /* ordered */, 0 /* exclusion */,
1668                 3 /* relation */, 1 /* distance */);
1669         }
1670     }
1671     if (num_result_sets == 0)
1672         *rset = rset_create_null(rset_nmem, kc, 0);
1673     else if (num_result_sets == 1)
1674         *rset = result_sets[0];
1675     else
1676         *rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* termid */,
1677                                num_result_sets, result_sets);
1678     if (!*rset)
1679         return ZEBRA_FAIL;
1680     return ZEBRA_OK;
1681 }
1682
1683 /** \brief returns result set for and-list search
1684  */
1685 static ZEBRA_RES rpn_search_APT_and_list(ZebraHandle zh,
1686                                          Z_AttributesPlusTerm *zapt,
1687                                          const char *termz_org,
1688                                          const Odr_oid *attributeSet,
1689                                          zint hits_limit,
1690                                          NMEM stream,
1691                                          const char *index_type,
1692                                          int complete_flag,
1693                                          const char *rank_type,
1694                                          const char *xpath_use,
1695                                          NMEM rset_nmem,
1696                                          RSET *rset,
1697                                          struct rset_key_control *kc)
1698 {
1699     RSET *result_sets = 0;
1700     int num_result_sets = 0;
1701     int i;
1702     ZEBRA_RES res =
1703         search_terms_list(zh, zapt, termz_org, attributeSet, hits_limit,
1704                           stream, index_type, complete_flag,
1705                           rank_type, xpath_use,
1706                           rset_nmem,
1707                           &result_sets, &num_result_sets,
1708                           kc);
1709     if (res != ZEBRA_OK)
1710         return res;
1711     for (i = 0; i < num_result_sets; i++)
1712     {
1713         RSET first_set = 0;
1714         res = search_position(zh, zapt, attributeSet,
1715                               index_type,
1716                               rset_nmem, &first_set,
1717                               kc);
1718         if (res != ZEBRA_OK)
1719         {
1720             for (i = 0; i < num_result_sets; i++)
1721                 rset_delete(result_sets[i]);
1722             return res;
1723         }
1724
1725         if (first_set)
1726         {
1727             RSET tmp_set[2];
1728
1729             tmp_set[0] = first_set;
1730             tmp_set[1] = result_sets[i];
1731
1732             result_sets[i] = rset_create_prox(
1733                 rset_nmem, kc, kc->scope,
1734                 2, tmp_set,
1735                 1 /* ordered */, 0 /* exclusion */,
1736                 3 /* relation */, 1 /* distance */);
1737         }
1738     }
1739
1740
1741     if (num_result_sets == 0)
1742         *rset = rset_create_null(rset_nmem, kc, 0);
1743     else if (num_result_sets == 1)
1744         *rset = result_sets[0];
1745     else
1746         *rset = rset_create_and(rset_nmem, kc, kc->scope,
1747                                 num_result_sets, result_sets);
1748     if (!*rset)
1749         return ZEBRA_FAIL;
1750     return ZEBRA_OK;
1751 }
1752
1753 static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1754                             const char **term_sub,
1755                             WRBUF term_dict,
1756                             const Odr_oid *attributeSet,
1757                             struct grep_info *grep_info,
1758                             int *max_pos,
1759                             zebra_map_t zm,
1760                             WRBUF display_term,
1761                             int *error_code)
1762 {
1763     AttrType relation;
1764     int relation_value;
1765     int term_value;
1766     int r;
1767     WRBUF term_num = wrbuf_alloc();
1768
1769     *error_code = 0;
1770     attr_init_APT(&relation, zapt, 2);
1771     relation_value = attr_find(&relation, NULL);
1772
1773     yaz_log(log_level_rpn, "numeric relation value=%d", relation_value);
1774
1775     switch (relation_value)
1776     {
1777     case 1:
1778         yaz_log(log_level_rpn, "Relation <");
1779         if (!term_100(zm, term_sub, term_num, 1, display_term))
1780         {
1781             wrbuf_destroy(term_num);
1782             return 0;
1783         }
1784         term_value = atoi(wrbuf_cstr(term_num));
1785         gen_regular_rel(term_dict, term_value-1, 1);
1786         break;
1787     case 2:
1788         yaz_log(log_level_rpn, "Relation <=");
1789         if (!term_100(zm, term_sub, term_num, 1, display_term))
1790         {
1791             wrbuf_destroy(term_num);
1792             return 0;
1793         }
1794         term_value = atoi(wrbuf_cstr(term_num));
1795         gen_regular_rel(term_dict, term_value, 1);
1796         break;
1797     case 4:
1798         yaz_log(log_level_rpn, "Relation >=");
1799         if (!term_100(zm, term_sub, term_num, 1, display_term))
1800         {
1801             wrbuf_destroy(term_num);
1802             return 0;
1803         }
1804         term_value = atoi(wrbuf_cstr(term_num));
1805         gen_regular_rel(term_dict, term_value, 0);
1806         break;
1807     case 5:
1808         yaz_log(log_level_rpn, "Relation >");
1809         if (!term_100(zm, term_sub, term_num, 1, display_term))
1810         {
1811             wrbuf_destroy(term_num);
1812             return 0;
1813         }
1814         term_value = atoi(wrbuf_cstr(term_num));
1815         gen_regular_rel(term_dict, term_value+1, 0);
1816         break;
1817     case -1:
1818     case 3:
1819         yaz_log(log_level_rpn, "Relation =");
1820         if (!term_100(zm, term_sub, term_num, 1, display_term))
1821         {
1822             wrbuf_destroy(term_num);
1823             return 0;
1824         }
1825         term_value = atoi(wrbuf_cstr(term_num));
1826         wrbuf_printf(term_dict, "(0*%d)", term_value);
1827         break;
1828     case 103:
1829         /* term_tmp untouched.. */
1830         while (**term_sub != '\0')
1831             (*term_sub)++;
1832         break;
1833     default:
1834         *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
1835         wrbuf_destroy(term_num);
1836         return 0;
1837     }
1838     r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict),
1839                          0, grep_info, max_pos, 0, grep_handle);
1840
1841     if (r == 1)
1842         zebra_set_partial_result(zh);
1843     else if (r)
1844         yaz_log(YLOG_WARN, "dict_lookup_grep fail, rel = gt: %d", r);
1845     yaz_log(log_level_rpn, "%d positions", grep_info->isam_p_indx);
1846     wrbuf_destroy(term_num);
1847     return 1;
1848 }
1849
1850 static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1851                               const char **term_sub,
1852                               WRBUF term_dict,
1853                               const Odr_oid *attributeSet, NMEM stream,
1854                               struct grep_info *grep_info,
1855                               const char *index_type, int complete_flag,
1856                               WRBUF display_term,
1857                               const char *xpath_use,
1858                               struct ord_list **ol)
1859 {
1860     const char *termp;
1861     struct rpn_char_map_info rcmi;
1862     int max_pos;
1863     int relation_error = 0;
1864     int ord, ord_len, i;
1865     char ord_buf[32];
1866     zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type);
1867
1868     *ol = ord_list_create(stream);
1869
1870     rpn_char_map_prepare(zh->reg, zm, &rcmi);
1871
1872     termp = *term_sub;
1873
1874     if (zebra_apt_get_ord(zh, zapt, index_type, xpath_use,
1875                           attributeSet, &ord) != ZEBRA_OK)
1876     {
1877         return ZEBRA_FAIL;
1878     }
1879
1880     wrbuf_rewind(term_dict);
1881
1882     *ol = ord_list_append(stream, *ol, ord);
1883
1884     ord_len = key_SU_encode(ord, ord_buf);
1885
1886     wrbuf_putc(term_dict, '(');
1887     for (i = 0; i < ord_len; i++)
1888     {
1889         wrbuf_putc(term_dict, 1);
1890         wrbuf_putc(term_dict, ord_buf[i]);
1891     }
1892     wrbuf_putc(term_dict, ')');
1893
1894     if (!numeric_relation(zh, zapt, &termp, term_dict,
1895                           attributeSet, grep_info, &max_pos, zm,
1896                           display_term, &relation_error))
1897     {
1898         if (relation_error)
1899         {
1900             zebra_setError(zh, relation_error, 0);
1901             return ZEBRA_FAIL;
1902         }
1903         *term_sub = 0;
1904         return ZEBRA_OK;
1905     }
1906     *term_sub = termp;
1907     yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1908     return ZEBRA_OK;
1909 }
1910
1911
1912 static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh,
1913                                         Z_AttributesPlusTerm *zapt,
1914                                         const char *termz,
1915                                         const Odr_oid *attributeSet,
1916                                         zint hits_limit,
1917                                         NMEM stream,
1918                                         const char *index_type,
1919                                         int complete_flag,
1920                                         const char *rank_type,
1921                                         const char *xpath_use,
1922                                         NMEM rset_nmem,
1923                                         RSET *rset,
1924                                         struct rset_key_control *kc)
1925 {
1926     const char *termp = termz;
1927     RSET *result_sets = 0;
1928     int num_result_sets = 0;
1929     ZEBRA_RES res;
1930     struct grep_info grep_info;
1931     int alloc_sets = 0;
1932     zint hits_limit_value = hits_limit;
1933     const char *term_ref_id_str = 0;
1934
1935     zebra_term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str,
1936                           stream);
1937
1938     yaz_log(log_level_rpn, "APT_numeric t='%s'", termz);
1939     if (grep_info_prepare(zh, zapt, &grep_info, index_type) == ZEBRA_FAIL)
1940         return ZEBRA_FAIL;
1941     while (1)
1942     {
1943         struct ord_list *ol;
1944         WRBUF term_dict = wrbuf_alloc();
1945         WRBUF display_term = wrbuf_alloc();
1946         if (alloc_sets == num_result_sets)
1947         {
1948             int add = 10;
1949             RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) *
1950                                               sizeof(*rnew));
1951             if (alloc_sets)
1952                 memcpy(rnew, result_sets, alloc_sets * sizeof(*rnew));
1953             alloc_sets = alloc_sets + add;
1954             result_sets = rnew;
1955         }
1956         yaz_log(YLOG_DEBUG, "APT_numeric termp=%s", termp);
1957         grep_info.isam_p_indx = 0;
1958         res = numeric_term(zh, zapt, &termp, term_dict,
1959                            attributeSet, stream, &grep_info,
1960                            index_type, complete_flag,
1961                            display_term, xpath_use, &ol);
1962         wrbuf_destroy(term_dict);
1963         if (res == ZEBRA_FAIL || termp == 0)
1964         {
1965             wrbuf_destroy(display_term);
1966             break;
1967         }
1968         yaz_log(YLOG_DEBUG, "term: %s", wrbuf_cstr(display_term));
1969         result_sets[num_result_sets] =
1970             rset_trunc(zh, grep_info.isam_p_buf,
1971                        grep_info.isam_p_indx, wrbuf_buf(display_term),
1972                        wrbuf_len(display_term), rank_type,
1973                        0 /* preserve position */,
1974                        zapt->term->which, rset_nmem,
1975                        kc, kc->scope, ol, index_type,
1976                        hits_limit_value,
1977                        term_ref_id_str);
1978         wrbuf_destroy(display_term);
1979         if (!result_sets[num_result_sets])
1980             break;
1981         num_result_sets++;
1982         if (!*termp)
1983             break;
1984     }
1985     grep_info_delete(&grep_info);
1986
1987     if (res != ZEBRA_OK)
1988         return res;
1989     if (num_result_sets == 0)
1990         *rset = rset_create_null(rset_nmem, kc, 0);
1991     else if (num_result_sets == 1)
1992         *rset = result_sets[0];
1993     else
1994         *rset = rset_create_and(rset_nmem, kc, kc->scope,
1995                                 num_result_sets, result_sets);
1996     if (!*rset)
1997         return ZEBRA_FAIL;
1998     return ZEBRA_OK;
1999 }
2000
2001 static ZEBRA_RES rpn_search_APT_local(ZebraHandle zh,
2002                                       Z_AttributesPlusTerm *zapt,
2003                                       const char *termz,
2004                                       const Odr_oid *attributeSet,
2005                                       NMEM stream,
2006                                       const char *rank_type, NMEM rset_nmem,
2007                                       RSET *rset,
2008                                       struct rset_key_control *kc)
2009 {
2010     Record rec;
2011     zint sysno = atozint(termz);
2012
2013     if (sysno <= 0)
2014         sysno = 0;
2015     rec = rec_get(zh->reg->records, sysno);
2016     if (!rec)
2017         sysno = 0;
2018
2019     rec_free(&rec);
2020
2021     if (sysno <= 0)
2022     {
2023         *rset = rset_create_null(rset_nmem, kc, 0);
2024     }
2025     else
2026     {
2027         RSFD rsfd;
2028         struct it_key key;
2029         *rset = rset_create_temp(rset_nmem, kc, kc->scope,
2030                                  res_get(zh->res, "setTmpDir"), 0);
2031         rsfd = rset_open(*rset, RSETF_WRITE);
2032
2033         key.mem[0] = sysno;
2034         key.mem[1] = 1;
2035         key.len = 2;
2036         rset_write(rsfd, &key);
2037         rset_close(rsfd);
2038     }
2039     return ZEBRA_OK;
2040 }
2041
2042 static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2043                                const Odr_oid *attributeSet, NMEM stream,
2044                                Z_SortKeySpecList *sort_sequence,
2045                                const char *rank_type,
2046                                NMEM rset_nmem,
2047                                RSET *rset,
2048                                struct rset_key_control *kc)
2049 {
2050     int i;
2051     int sort_relation_value;
2052     AttrType sort_relation_type;
2053     Z_SortKeySpec *sks;
2054     Z_SortKey *sk;
2055     char termz[20];
2056
2057     attr_init_APT(&sort_relation_type, zapt, 7);
2058     sort_relation_value = attr_find(&sort_relation_type, &attributeSet);
2059
2060     if (!sort_sequence->specs)
2061     {
2062         sort_sequence->num_specs = 10;
2063         sort_sequence->specs = (Z_SortKeySpec **)
2064             nmem_malloc(stream, sort_sequence->num_specs *
2065                         sizeof(*sort_sequence->specs));
2066         for (i = 0; i < sort_sequence->num_specs; i++)
2067             sort_sequence->specs[i] = 0;
2068     }
2069     if (zapt->term->which != Z_Term_general)
2070         i = 0;
2071     else
2072         i = atoi_n((char *) zapt->term->u.general->buf,
2073                    zapt->term->u.general->len);
2074     if (i >= sort_sequence->num_specs)
2075         i = 0;
2076     sprintf(termz, "%d", i);
2077
2078     sks = (Z_SortKeySpec *) nmem_malloc(stream, sizeof(*sks));
2079     sks->sortElement = (Z_SortElement *)
2080         nmem_malloc(stream, sizeof(*sks->sortElement));
2081     sks->sortElement->which = Z_SortElement_generic;
2082     sk = sks->sortElement->u.generic = (Z_SortKey *)
2083         nmem_malloc(stream, sizeof(*sk));
2084     sk->which = Z_SortKey_sortAttributes;
2085     sk->u.sortAttributes = (Z_SortAttributes *)
2086         nmem_malloc(stream, sizeof(*sk->u.sortAttributes));
2087
2088     sk->u.sortAttributes->id = odr_oiddup_nmem(stream, attributeSet);
2089     sk->u.sortAttributes->list = zapt->attributes;
2090
2091     sks->sortRelation = (Odr_int *)
2092         nmem_malloc(stream, sizeof(*sks->sortRelation));
2093     if (sort_relation_value == 1)
2094         *sks->sortRelation = Z_SortKeySpec_ascending;
2095     else if (sort_relation_value == 2)
2096         *sks->sortRelation = Z_SortKeySpec_descending;
2097     else
2098         *sks->sortRelation = Z_SortKeySpec_ascending;
2099
2100     sks->caseSensitivity = (Odr_int *)
2101         nmem_malloc(stream, sizeof(*sks->caseSensitivity));
2102     *sks->caseSensitivity = 0;
2103
2104     sks->which = Z_SortKeySpec_null;
2105     sks->u.null = odr_nullval ();
2106     sort_sequence->specs[i] = sks;
2107     *rset = rset_create_null(rset_nmem, kc, 0);
2108     return ZEBRA_OK;
2109 }
2110
2111
2112 static int rpn_check_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2113                            const Odr_oid *attributeSet,
2114                            struct xpath_location_step *xpath, int max,
2115                            NMEM mem)
2116 {
2117     const Odr_oid *curAttributeSet = attributeSet;
2118     AttrType use;
2119     const char *use_string = 0;
2120
2121     attr_init_APT(&use, zapt, 1);
2122     attr_find_ex(&use, &curAttributeSet, &use_string);
2123
2124     if (!use_string || *use_string != '/')
2125         return -1;
2126
2127     return zebra_parse_xpath_str(use_string, xpath, max, mem);
2128 }
2129
2130
2131
2132 static RSET xpath_trunc(ZebraHandle zh, NMEM stream,
2133                         const char *index_type, const char *term,
2134                         const char *xpath_use,
2135                         NMEM rset_nmem,
2136                         struct rset_key_control *kc)
2137 {
2138     struct grep_info grep_info;
2139     int ord = zebraExplain_lookup_attr_str(zh->reg->zei,
2140                                            zinfo_index_category_index,
2141                                            index_type, xpath_use);
2142     if (grep_info_prepare(zh, 0 /* zapt */, &grep_info, "0") == ZEBRA_FAIL)
2143         return rset_create_null(rset_nmem, kc, 0);
2144
2145     if (ord < 0)
2146         return rset_create_null(rset_nmem, kc, 0);
2147     else
2148     {
2149         int i, max_pos;
2150         char ord_buf[32];
2151         RSET rset;
2152         WRBUF term_dict = wrbuf_alloc();
2153         int ord_len = key_SU_encode(ord, ord_buf);
2154         int term_type = Z_Term_characterString;
2155         const char *flags = "void";
2156
2157         wrbuf_putc(term_dict, '(');
2158         for (i = 0; i < ord_len; i++)
2159         {
2160             wrbuf_putc(term_dict, 1);
2161             wrbuf_putc(term_dict, ord_buf[i]);
2162         }
2163         wrbuf_putc(term_dict, ')');
2164         wrbuf_puts(term_dict, term);
2165
2166         grep_info.isam_p_indx = 0;
2167         dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), 0,
2168                          &grep_info, &max_pos, 0, grep_handle);
2169         yaz_log(YLOG_DEBUG, "%s %d positions", term,
2170                 grep_info.isam_p_indx);
2171         rset = rset_trunc(zh, grep_info.isam_p_buf,
2172                           grep_info.isam_p_indx, term, strlen(term),
2173                           flags, 1, term_type, rset_nmem,
2174                           kc, kc->scope, 0, index_type, 0 /* hits_limit */,
2175                           0 /* term_ref_id_str */);
2176         grep_info_delete(&grep_info);
2177         wrbuf_destroy(term_dict);
2178         return rset;
2179     }
2180 }
2181
2182 static
2183 ZEBRA_RES rpn_search_xpath(ZebraHandle zh,
2184                            NMEM stream, const char *rank_type, RSET rset,
2185                            int xpath_len, struct xpath_location_step *xpath,
2186                            NMEM rset_nmem,
2187                            RSET *rset_out,
2188                            struct rset_key_control *kc)
2189 {
2190     int i;
2191     int always_matches = rset ? 0 : 1;
2192
2193     if (xpath_len < 0)
2194     {
2195         *rset_out = rset;
2196         return ZEBRA_OK;
2197     }
2198
2199     yaz_log(YLOG_DEBUG, "xpath len=%d", xpath_len);
2200     for (i = 0; i < xpath_len; i++)
2201     {
2202         yaz_log(log_level_rpn, "XPATH %d %s", i, xpath[i].part);
2203
2204     }
2205
2206     /*
2207     //a    ->    a/.*
2208     //a/b  ->    b/a/.*
2209     /a     ->    a/
2210     /a/b   ->    b/a/
2211
2212     /      ->    none
2213
2214     a[@attr = value]/b[@other = othervalue]
2215
2216     /e/@a val      range(e/,range(@a,freetext(w,1015,val),@a),e/)
2217     /a/b val       range(b/a/,freetext(w,1016,val),b/a/)
2218     /a/b/@c val    range(b/a/,range(@c,freetext(w,1016,val),@c),b/a/)
2219     /a/b[@c = y] val range(b/a/,freetext(w,1016,val),b/a/,@c = y)
2220     /a[@c = y]/b val range(a/,range(b/a/,freetext(w,1016,val),b/a/),a/,@c = y)
2221     /a[@c = x]/b[@c = y] range(a/,range(b/a/,freetext(w,1016,val),b/a/,@c = y),a/,@c = x)
2222
2223     */
2224
2225     dict_grep_cmap(zh->reg->dict, 0, 0);
2226
2227     {
2228         int level = xpath_len;
2229         int first_path = 1;
2230
2231         while (--level >= 0)
2232         {
2233             WRBUF xpath_rev = wrbuf_alloc();
2234             int i;
2235             RSET rset_start_tag = 0, rset_end_tag = 0, rset_attr = 0;
2236
2237             for (i = level; i >= 1; --i)
2238             {
2239                 const char *cp = xpath[i].part;
2240                 if (*cp)
2241                 {
2242                     for (; *cp; cp++)
2243                     {
2244                         if (*cp == '*')
2245                             wrbuf_puts(xpath_rev, "[^/]*");
2246                         else if (*cp == ' ')
2247                             wrbuf_puts(xpath_rev, "\001 ");
2248                         else
2249                             wrbuf_putc(xpath_rev, *cp);
2250
2251                         /* wrbuf_putc does not null-terminate , but
2252                            wrbuf_puts below ensures it does.. so xpath_rev
2253                            is OK iff length is > 0 */
2254                     }
2255                     wrbuf_puts(xpath_rev, "/");
2256                 }
2257                 else if (i == 1)  /* // case */
2258                     wrbuf_puts(xpath_rev, ".*");
2259             }
2260             if (xpath[level].predicate &&
2261                 xpath[level].predicate->which == XPATH_PREDICATE_RELATION &&
2262                 xpath[level].predicate->u.relation.name[0])
2263             {
2264                 WRBUF wbuf = wrbuf_alloc();
2265                 wrbuf_puts(wbuf, xpath[level].predicate->u.relation.name+1);
2266                 if (xpath[level].predicate->u.relation.value)
2267                 {
2268                     const char *cp = xpath[level].predicate->u.relation.value;
2269                     wrbuf_putc(wbuf, '=');
2270
2271                     while (*cp)
2272                     {
2273                         if (strchr(REGEX_CHARS, *cp))
2274                             wrbuf_putc(wbuf, '\\');
2275                         wrbuf_putc(wbuf, *cp);
2276                         cp++;
2277                     }
2278                 }
2279                 rset_attr = xpath_trunc(
2280                     zh, stream, "0", wrbuf_cstr(wbuf),
2281                     ZEBRA_XPATH_ATTR_NAME,
2282                     rset_nmem, kc);
2283                 wrbuf_destroy(wbuf);
2284             }
2285             else
2286             {
2287                 if (!first_path)
2288                 {
2289                     wrbuf_destroy(xpath_rev);
2290                     continue;
2291                 }
2292             }
2293             yaz_log(log_level_rpn, "xpath_rev (%d) = %s", level,
2294                     wrbuf_cstr(xpath_rev));
2295             if (wrbuf_len(xpath_rev))
2296             {
2297                 rset_start_tag = xpath_trunc(zh, stream, "0",
2298                                              wrbuf_cstr(xpath_rev),
2299                                              ZEBRA_XPATH_ELM_BEGIN,
2300                                              rset_nmem, kc);
2301                 if (always_matches)
2302                     rset = rset_start_tag;
2303                 else
2304                 {
2305                     rset_end_tag = xpath_trunc(zh, stream, "0",
2306                                                wrbuf_cstr(xpath_rev),
2307                                                ZEBRA_XPATH_ELM_END,
2308                                                rset_nmem, kc);
2309
2310                     rset = rset_create_between(rset_nmem, kc, kc->scope,
2311                                                rset_start_tag, rset,
2312                                                rset_end_tag, rset_attr);
2313                 }
2314             }
2315             wrbuf_destroy(xpath_rev);
2316             first_path = 0;
2317         }
2318     }
2319     *rset_out = rset;
2320     return ZEBRA_OK;
2321 }
2322
2323 #define MAX_XPATH_STEPS 10
2324
2325 static ZEBRA_RES rpn_search_database(ZebraHandle zh,
2326                                      Z_AttributesPlusTerm *zapt,
2327                                      const Odr_oid *attributeSet,
2328                                      zint hits_limit, NMEM stream,
2329                                      Z_SortKeySpecList *sort_sequence,
2330                                      NMEM rset_nmem,
2331                                      RSET *rset,
2332                                      struct rset_key_control *kc);
2333
2334 static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2335                                 const Odr_oid *attributeSet,
2336                                 zint hits_limit, NMEM stream,
2337                                 Z_SortKeySpecList *sort_sequence,
2338                                 int num_bases, const char **basenames,
2339                                 NMEM rset_nmem,
2340                                 RSET *rset,
2341                                 struct rset_key_control *kc)
2342 {
2343     RSET *rsets = nmem_malloc(stream, num_bases * sizeof(*rsets));
2344     ZEBRA_RES res = ZEBRA_OK;
2345     int i;
2346     for (i = 0; i < num_bases; i++)
2347     {
2348
2349         if (zebraExplain_curDatabase(zh->reg->zei, basenames[i]))
2350         {
2351             zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
2352                            basenames[i]);
2353             res = ZEBRA_FAIL;
2354             break;
2355         }
2356         res = rpn_search_database(zh, zapt, attributeSet, hits_limit, stream,
2357                                   sort_sequence,
2358                                   rset_nmem, rsets+i, kc);
2359         if (res != ZEBRA_OK)
2360             break;
2361     }
2362     if (res != ZEBRA_OK)
2363     {   /* must clean up the already created sets */
2364         while (--i >= 0)
2365             rset_delete(rsets[i]);
2366         *rset = 0;
2367     }
2368     else
2369     {
2370         if (num_bases == 1)
2371             *rset = rsets[0];
2372         else if (num_bases == 0)
2373             *rset = rset_create_null(rset_nmem, kc, 0);
2374         else
2375             *rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* TERMID */,
2376                                    num_bases, rsets);
2377     }
2378     return res;
2379 }
2380
2381 static ZEBRA_RES rpn_search_database(ZebraHandle zh,
2382                                      Z_AttributesPlusTerm *zapt,
2383                                      const Odr_oid *attributeSet,
2384                                      zint hits_limit, NMEM stream,
2385                                      Z_SortKeySpecList *sort_sequence,
2386                                      NMEM rset_nmem,
2387                                      RSET *rset,
2388                                      struct rset_key_control *kc)
2389 {
2390     ZEBRA_RES res = ZEBRA_OK;
2391     const char *index_type;
2392     char *search_type = NULL;
2393     char rank_type[128];
2394     int complete_flag;
2395     int sort_flag;
2396     char termz[IT_MAX_WORD+1];
2397     int xpath_len;
2398     const char *xpath_use = 0;
2399     struct xpath_location_step xpath[MAX_XPATH_STEPS];
2400
2401     if (!log_level_set)
2402     {
2403         log_level_rpn = yaz_log_module_level("rpn");
2404         log_level_set = 1;
2405     }
2406     zebra_maps_attr(zh->reg->zebra_maps, zapt, &index_type, &search_type,
2407                     rank_type, &complete_flag, &sort_flag);
2408
2409     yaz_log(YLOG_DEBUG, "index_type=%s", index_type);
2410     yaz_log(YLOG_DEBUG, "complete_flag=%d", complete_flag);
2411     yaz_log(YLOG_DEBUG, "search_type=%s", search_type);
2412     yaz_log(YLOG_DEBUG, "rank_type=%s", rank_type);
2413
2414     if (zapt_term_to_utf8(zh, zapt, termz) == ZEBRA_FAIL)
2415         return ZEBRA_FAIL;
2416
2417     if (sort_flag)
2418         return rpn_sort_spec(zh, zapt, attributeSet, stream, sort_sequence,
2419                              rank_type, rset_nmem, rset, kc);
2420     /* consider if an X-Path query is used */
2421     xpath_len = rpn_check_xpath(zh, zapt, attributeSet,
2422                                 xpath, MAX_XPATH_STEPS, stream);
2423     if (xpath_len >= 0)
2424     {
2425         if (xpath[xpath_len-1].part[0] == '@')
2426             xpath_use = ZEBRA_XPATH_ATTR_CDATA;  /* last step is attribute  */
2427         else
2428             xpath_use = ZEBRA_XPATH_CDATA;  /* searching for cdata */
2429
2430         if (1)
2431         {
2432             AttrType relation;
2433             int relation_value;
2434
2435             attr_init_APT(&relation, zapt, 2);
2436             relation_value = attr_find(&relation, NULL);
2437
2438             if (relation_value == 103) /* alwaysmatches */
2439             {
2440                 *rset = 0; /* signal no "term" set */
2441                 return rpn_search_xpath(zh, stream, rank_type, *rset,
2442                                         xpath_len, xpath, rset_nmem, rset, kc);
2443             }
2444         }
2445     }
2446
2447     /* search using one of the various search type strategies
2448        termz is our UTF-8 search term
2449        attributeSet is top-level default attribute set
2450        stream is ODR for search
2451        reg_id is the register type
2452        complete_flag is 1 for complete subfield, 0 for incomplete
2453        xpath_use is use-attribute to be used for X-Path search, 0 for none
2454     */
2455     if (!strcmp(search_type, "phrase"))
2456     {
2457         res = rpn_search_APT_phrase(zh, zapt, termz, attributeSet, hits_limit,
2458                                     stream,
2459                                     index_type, complete_flag, rank_type,
2460                                     xpath_use,
2461                                     rset_nmem,
2462                                     rset, kc);
2463     }
2464     else if (!strcmp(search_type, "and-list"))
2465     {
2466         res = rpn_search_APT_and_list(zh, zapt, termz, attributeSet, hits_limit,
2467                                       stream,
2468                                       index_type, complete_flag, rank_type,
2469                                       xpath_use,
2470                                       rset_nmem,
2471                                       rset, kc);
2472     }
2473     else if (!strcmp(search_type, "or-list"))
2474     {
2475         res = rpn_search_APT_or_list(zh, zapt, termz, attributeSet, hits_limit,
2476                                      stream,
2477                                      index_type, complete_flag, rank_type,
2478                                      xpath_use,
2479                                      rset_nmem,
2480                                      rset, kc);
2481     }
2482     else if (!strcmp(search_type, "local"))
2483     {
2484         res = rpn_search_APT_local(zh, zapt, termz, attributeSet, stream,
2485                                    rank_type, rset_nmem, rset, kc);
2486     }
2487     else if (!strcmp(search_type, "numeric"))
2488     {
2489         res = rpn_search_APT_numeric(zh, zapt, termz, attributeSet, hits_limit,
2490                                      stream,
2491                                      index_type, complete_flag, rank_type,
2492                                      xpath_use,
2493                                      rset_nmem,
2494                                      rset, kc);
2495     }
2496     else
2497     {
2498         zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2499         res = ZEBRA_FAIL;
2500     }
2501     if (res != ZEBRA_OK)
2502         return res;
2503     if (!*rset)
2504         return ZEBRA_FAIL;
2505     return rpn_search_xpath(zh, stream, rank_type, *rset,
2506                             xpath_len, xpath, rset_nmem, rset, kc);
2507 }
2508
2509 static ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2510                                       const Odr_oid *attributeSet,
2511                                       zint hits_limit,
2512                                       NMEM stream, NMEM rset_nmem,
2513                                       Z_SortKeySpecList *sort_sequence,
2514                                       int num_bases, const char **basenames,
2515                                       RSET **result_sets, int *num_result_sets,
2516                                       Z_Operator *parent_op,
2517                                       struct rset_key_control *kc);
2518
2519 ZEBRA_RES rpn_get_top_approx_limit(ZebraHandle zh, Z_RPNStructure *zs,
2520                                    zint *approx_limit)
2521 {
2522     ZEBRA_RES res = ZEBRA_OK;
2523     if (zs->which == Z_RPNStructure_complex)
2524     {
2525         if (res == ZEBRA_OK)
2526             res = rpn_get_top_approx_limit(zh, zs->u.complex->s1,
2527                                            approx_limit);
2528         if (res == ZEBRA_OK)
2529             res = rpn_get_top_approx_limit(zh, zs->u.complex->s2,
2530                                            approx_limit);
2531     }
2532     else if (zs->which == Z_RPNStructure_simple)
2533     {
2534         if (zs->u.simple->which == Z_Operand_APT)
2535         {
2536             Z_AttributesPlusTerm *zapt = zs->u.simple->u.attributesPlusTerm;
2537             AttrType global_hits_limit_attr;
2538             int l;
2539
2540             attr_init_APT(&global_hits_limit_attr, zapt, 12);
2541
2542             l = attr_find(&global_hits_limit_attr, NULL);
2543             if (l != -1)
2544                 *approx_limit = l;
2545         }
2546     }
2547     return res;
2548 }
2549
2550 ZEBRA_RES rpn_search_top(ZebraHandle zh, Z_RPNStructure *zs,
2551                          const Odr_oid *attributeSet,
2552                          zint hits_limit,
2553                          NMEM stream, NMEM rset_nmem,
2554                          Z_SortKeySpecList *sort_sequence,
2555                          int num_bases, const char **basenames,
2556                          RSET *result_set)
2557 {
2558     RSET *result_sets = 0;
2559     int num_result_sets = 0;
2560     ZEBRA_RES res;
2561     struct rset_key_control *kc = zebra_key_control_create(zh);
2562
2563     res = rpn_search_structure(zh, zs, attributeSet, hits_limit,
2564                                stream, rset_nmem,
2565                                sort_sequence,
2566                                num_bases, basenames,
2567                                &result_sets, &num_result_sets,
2568                                0 /* no parent op */,
2569                                kc);
2570     if (res != ZEBRA_OK)
2571     {
2572         int i;
2573         for (i = 0; i < num_result_sets; i++)
2574             rset_delete(result_sets[i]);
2575         *result_set = 0;
2576     }
2577     else
2578     {
2579         assert(num_result_sets == 1);
2580         assert(result_sets);
2581         assert(*result_sets);
2582         *result_set = *result_sets;
2583     }
2584     (*kc->dec)(kc);
2585     return res;
2586 }
2587
2588 ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2589                                const Odr_oid *attributeSet, zint hits_limit,
2590                                NMEM stream, NMEM rset_nmem,
2591                                Z_SortKeySpecList *sort_sequence,
2592                                int num_bases, const char **basenames,
2593                                RSET **result_sets, int *num_result_sets,
2594                                Z_Operator *parent_op,
2595                                struct rset_key_control *kc)
2596 {
2597     *num_result_sets = 0;
2598     if (zs->which == Z_RPNStructure_complex)
2599     {
2600         ZEBRA_RES res;
2601         Z_Operator *zop = zs->u.complex->roperator;
2602         RSET *result_sets_l = 0;
2603         int num_result_sets_l = 0;
2604         RSET *result_sets_r = 0;
2605         int num_result_sets_r = 0;
2606
2607         res = rpn_search_structure(zh, zs->u.complex->s1,
2608                                    attributeSet, hits_limit, stream, rset_nmem,
2609                                    sort_sequence,
2610                                    num_bases, basenames,
2611                                    &result_sets_l, &num_result_sets_l,
2612                                    zop, kc);
2613         if (res != ZEBRA_OK)
2614         {
2615             int i;
2616             for (i = 0; i < num_result_sets_l; i++)
2617                 rset_delete(result_sets_l[i]);
2618             return res;
2619         }
2620         res = rpn_search_structure(zh, zs->u.complex->s2,
2621                                    attributeSet, hits_limit, stream, rset_nmem,
2622                                    sort_sequence,
2623                                    num_bases, basenames,
2624                                    &result_sets_r, &num_result_sets_r,
2625                                    zop, kc);
2626         if (res != ZEBRA_OK)
2627         {
2628             int i;
2629             for (i = 0; i < num_result_sets_l; i++)
2630                 rset_delete(result_sets_l[i]);
2631             for (i = 0; i < num_result_sets_r; i++)
2632                 rset_delete(result_sets_r[i]);
2633             return res;
2634         }
2635
2636         /* make a new list of result for all children */
2637         *num_result_sets = num_result_sets_l + num_result_sets_r;
2638         *result_sets = nmem_malloc(stream, *num_result_sets *
2639                                    sizeof(**result_sets));
2640         memcpy(*result_sets, result_sets_l,
2641                num_result_sets_l * sizeof(**result_sets));
2642         memcpy(*result_sets + num_result_sets_l, result_sets_r,
2643                num_result_sets_r * sizeof(**result_sets));
2644
2645         if (!parent_op || parent_op->which != zop->which
2646             || (zop->which != Z_Operator_and &&
2647                 zop->which != Z_Operator_or))
2648         {
2649             /* parent node different from this one (or non-present) */
2650             /* we must combine result sets now */
2651             RSET rset;
2652             switch (zop->which)
2653             {
2654             case Z_Operator_and:
2655                 rset = rset_create_and(rset_nmem, kc,
2656                                        kc->scope,
2657                                        *num_result_sets, *result_sets);
2658                 break;
2659             case Z_Operator_or:
2660                 rset = rset_create_or(rset_nmem, kc,
2661                                       kc->scope, 0, /* termid */
2662                                       *num_result_sets, *result_sets);
2663                 break;
2664             case Z_Operator_and_not:
2665                 rset = rset_create_not(rset_nmem, kc,
2666                                        kc->scope,
2667                                        (*result_sets)[0],
2668                                        (*result_sets)[1]);
2669                 break;
2670             case Z_Operator_prox:
2671                 if (zop->u.prox->which != Z_ProximityOperator_known)
2672                 {
2673                     zebra_setError(zh,
2674                                    YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2675                                    0);
2676                     return ZEBRA_FAIL;
2677                 }
2678                 if (*zop->u.prox->u.known != Z_ProxUnit_word)
2679                 {
2680                     zebra_setError_zint(zh,
2681                                         YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2682                                         *zop->u.prox->u.known);
2683                     return ZEBRA_FAIL;
2684                 }
2685                 else
2686                 {
2687                     rset = rset_create_prox(rset_nmem, kc,
2688                                             kc->scope,
2689                                             *num_result_sets, *result_sets,
2690                                             *zop->u.prox->ordered,
2691                                             (!zop->u.prox->exclusion ?
2692                                              0 : *zop->u.prox->exclusion),
2693                                             *zop->u.prox->relationType,
2694                                             *zop->u.prox->distance );
2695                 }
2696                 break;
2697             default:
2698                 zebra_setError(zh, YAZ_BIB1_OPERATOR_UNSUPP, 0);
2699                 return ZEBRA_FAIL;
2700             }
2701             *num_result_sets = 1;
2702             *result_sets = nmem_malloc(stream, *num_result_sets *
2703                                        sizeof(**result_sets));
2704             (*result_sets)[0] = rset;
2705         }
2706     }
2707     else if (zs->which == Z_RPNStructure_simple)
2708     {
2709         RSET rset;
2710         ZEBRA_RES res;
2711
2712         if (zs->u.simple->which == Z_Operand_APT)
2713         {
2714             yaz_log(YLOG_DEBUG, "rpn_search_APT");
2715             res = rpn_search_APT(zh, zs->u.simple->u.attributesPlusTerm,
2716                                  attributeSet, hits_limit,
2717                                  stream, sort_sequence,
2718                                  num_bases, basenames, rset_nmem, &rset,
2719                                  kc);
2720             if (res != ZEBRA_OK)
2721                 return res;
2722         }
2723         else if (zs->u.simple->which == Z_Operand_resultSetId)
2724         {
2725             yaz_log(YLOG_DEBUG, "rpn_search_ref");
2726             rset = resultSetRef(zh, zs->u.simple->u.resultSetId);
2727             if (!rset)
2728             {
2729                 zebra_setError(zh,
2730                                YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
2731                                zs->u.simple->u.resultSetId);
2732                 return ZEBRA_FAIL;
2733             }
2734             rset_dup(rset);
2735         }
2736         else
2737         {
2738             zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2739             return ZEBRA_FAIL;
2740         }
2741         *num_result_sets = 1;
2742         *result_sets = nmem_malloc(stream, *num_result_sets *
2743                                    sizeof(**result_sets));
2744         (*result_sets)[0] = rset;
2745     }
2746     else
2747     {
2748         zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2749         return ZEBRA_FAIL;
2750     }
2751     return ZEBRA_OK;
2752 }
2753
2754
2755
2756 /*
2757  * Local variables:
2758  * c-basic-offset: 4
2759  * c-file-style: "Stroustrup"
2760  * indent-tabs-mode: nil
2761  * End:
2762  * vim: shiftwidth=4 tabstop=8 expandtab
2763  */
2764