Fix documentation of of chr's equivalent directive ZEB-672
[idzebra-moved-to-github.git] / index / rpnsearch.c
1 /* This file is part of the Zebra server.
2    Copyright (C) Index Data
3
4 Zebra is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 2, or (at your option) any later
7 version.
8
9 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
10 WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12 for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17
18 */
19
20 #if HAVE_CONFIG_H
21 #include <config.h>
22 #endif
23 #include <stdio.h>
24 #include <assert.h>
25 #ifdef WIN32
26 #include <io.h>
27 #endif
28 #if HAVE_UNISTD_H
29 #include <unistd.h>
30 #endif
31 #include <ctype.h>
32
33 #include <yaz/diagbib1.h>
34 #include "index.h"
35 #include <zebra_xpath.h>
36 #include <attrfind.h>
37 #include <charmap.h>
38 #include <rset.h>
39
40 static int log_level_set = 0;
41 static int log_level_rpn = 0;
42
43 #define TERMSET_DISABLE 1
44
45 static const char **rpn_char_map_handler(void *vp, const char **from, int len)
46 {
47     struct rpn_char_map_info *p = (struct rpn_char_map_info *) vp;
48     const char **out = zebra_maps_input(p->zm, from, len, 0);
49 #if 0
50     if (out && *out)
51     {
52         const char *outp = *out;
53         yaz_log(YLOG_LOG, "---");
54         while (*outp)
55         {
56             yaz_log(YLOG_LOG, "%02X", *outp);
57             outp++;
58         }
59     }
60 #endif
61     return out;
62 }
63
64 void rpn_char_map_prepare(struct zebra_register *reg, zebra_map_t zm,
65                           struct rpn_char_map_info *map_info)
66 {
67     map_info->zm = zm;
68     if (zebra_maps_is_icu(zm))
69         dict_grep_cmap(reg->dict, 0, 0);
70     else
71         dict_grep_cmap(reg->dict, map_info, rpn_char_map_handler);
72 }
73
74 #define TERM_COUNT
75
76 struct grep_info {
77 #ifdef TERM_COUNT
78     int *term_no;
79 #endif
80     ISAM_P *isam_p_buf;
81     int isam_p_size;
82     int isam_p_indx;
83     int trunc_max;
84     ZebraHandle zh;
85     const char *index_type;
86     ZebraSet termset;
87 };
88
89 static int add_isam_p(const char *name, const char *info,
90                       struct grep_info *p)
91 {
92     if (!log_level_set)
93     {
94         log_level_rpn = yaz_log_module_level("rpn");
95         log_level_set = 1;
96     }
97     /* we may have to stop this madness.. NOTE: -1 so that if
98        truncmax == trunxlimit we do *not* generate result sets */
99     if (p->isam_p_indx >= p->trunc_max - 1)
100         return 1;
101
102     if (p->isam_p_indx == p->isam_p_size)
103     {
104         ISAM_P *new_isam_p_buf;
105 #ifdef TERM_COUNT
106         int *new_term_no;
107 #endif
108         p->isam_p_size = 2*p->isam_p_size + 100;
109         new_isam_p_buf = (ISAM_P *) xmalloc(sizeof(*new_isam_p_buf) *
110                                             p->isam_p_size);
111         if (p->isam_p_buf)
112         {
113             memcpy(new_isam_p_buf, p->isam_p_buf,
114                    p->isam_p_indx * sizeof(*p->isam_p_buf));
115             xfree(p->isam_p_buf);
116         }
117         p->isam_p_buf = new_isam_p_buf;
118
119 #ifdef TERM_COUNT
120         new_term_no = (int *) xmalloc(sizeof(*new_term_no) * p->isam_p_size);
121         if (p->term_no)
122         {
123             memcpy(new_term_no, p->isam_p_buf,
124                    p->isam_p_indx * sizeof(*p->term_no));
125             xfree(p->term_no);
126         }
127         p->term_no = new_term_no;
128 #endif
129     }
130     assert(*info == sizeof(*p->isam_p_buf));
131     memcpy(p->isam_p_buf + p->isam_p_indx, info+1, sizeof(*p->isam_p_buf));
132
133     if (p->termset)
134     {
135         const char *db;
136         char term_tmp[IT_MAX_WORD];
137         int ord = 0;
138         const char *index_name;
139         int len = key_SU_decode(&ord, (const unsigned char *) name);
140
141         zebra_term_untrans (p->zh, p->index_type, term_tmp, name+len);
142         yaz_log(log_level_rpn, "grep: %d %c %s", ord, name[len], term_tmp);
143         zebraExplain_lookup_ord(p->zh->reg->zei,
144                                 ord, 0 /* index_type */, &db, &index_name);
145         yaz_log(log_level_rpn, "grep:  db=%s index=%s", db, index_name);
146
147         resultSetAddTerm(p->zh, p->termset, name[len], db,
148                          index_name, term_tmp);
149     }
150     (p->isam_p_indx)++;
151     return 0;
152 }
153
154 static int grep_handle(char *name, const char *info, void *p)
155 {
156     return add_isam_p(name, info, (struct grep_info *) p);
157 }
158
159 static int term_pre(zebra_map_t zm, const char **src,
160                     const char *ct1, int first)
161 {
162     const char *s1, *s0 = *src;
163     const char **map;
164
165     /* skip white space */
166     while (*s0)
167     {
168         if (ct1 && strchr(ct1, *s0))
169             break;
170         s1 = s0;
171         map = zebra_maps_input(zm, &s1, strlen(s1), first);
172         if (**map != *CHR_SPACE)
173             break;
174         s0 = s1;
175     }
176     *src = s0;
177     return *s0;
178 }
179
180
181 static void esc_str(char *out_buf, size_t out_size,
182                     const char *in_buf, int in_size)
183 {
184     int k;
185
186     assert(out_buf);
187     assert(in_buf);
188     assert(out_size > 20);
189     *out_buf = '\0';
190     for (k = 0; k < in_size; k++)
191     {
192         int c = in_buf[k] & 0xff;
193         int pc;
194         if (c < 32 || c > 126)
195             pc = '?';
196         else
197             pc = c;
198         sprintf(out_buf +strlen(out_buf), "%02X:%c  ", c, pc);
199         if (strlen(out_buf) > out_size-20)
200         {
201             strcat(out_buf, "..");
202             break;
203         }
204     }
205 }
206
207 #define REGEX_CHARS " ^[]()|.*+?!\"$\\"
208
209 static void add_non_space(const char *start, const char *end,
210                           WRBUF term_dict,
211                           WRBUF display_term,
212                           const char **map, int q_map_match)
213 {
214     size_t sz = end - start;
215
216     wrbuf_write(display_term, start, sz);
217     if (!q_map_match)
218     {
219         while (start < end)
220         {
221             if (strchr(REGEX_CHARS, *start))
222                 wrbuf_putc(term_dict, '\\');
223             wrbuf_putc(term_dict, *start);
224             start++;
225         }
226     }
227     else
228     {
229         char tmpbuf[80];
230         esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
231
232         wrbuf_puts(term_dict, map[0]);
233     }
234 }
235
236
237 /* ICU sort keys seem to be of the form
238    basechars \x01 accents \x01 length
239    For now we'll just right truncate from basechars . This
240    may give false hits due to accents not being used.
241 */
242 static size_t icu_basechars(const char *buf, size_t i)
243 {
244     while (i > 0 && buf[--i] != '\x01') /* skip length */
245         ;
246     while (i > 0 && buf[--i] != '\x01') /* skip accents */
247         ;
248     return i; /* only basechars left */
249 }
250
251 static int term_102_icu(zebra_map_t zm,
252                         const char **src, WRBUF term_dict, int space_split,
253                         WRBUF display_term)
254 {
255     int no_terms = 0;
256     const char *s0 = *src, *s1;
257     while (*s0 == ' ')
258         s0++;
259     s1 = s0;
260     for (;;)
261     {
262         if (*s1 == ' ' && space_split)
263             break;
264         else if (*s1 && !strchr(REGEX_CHARS "-", *s1))
265             s1++;
266         else
267         {
268             /* EOF or regex reserved char */
269             if (s0 != s1)
270             {
271                 const char *res_buf = 0;
272                 size_t res_len = 0;
273                 const char *display_buf;
274                 size_t display_len;
275
276                 zebra_map_tokenize_start(zm, s0, s1 - s0);
277
278                 if (zebra_map_tokenize_next(zm, &res_buf, &res_len,
279                                             &display_buf, &display_len))
280                 {
281                     size_t i;
282                     res_len = icu_basechars(res_buf, res_len);
283                     for (i = 0; i < res_len; i++)
284                     {
285                         if (strchr(REGEX_CHARS "\\", res_buf[i]))
286                             wrbuf_putc(term_dict, '\\');
287                         if (res_buf[i] < 32)
288                             wrbuf_putc(term_dict, '\x01');
289
290                         wrbuf_putc(term_dict, res_buf[i]);
291                     }
292                     wrbuf_write(display_term, display_buf, display_len);
293
294                     no_terms++;
295                 }
296             }
297             if (*s1 == '\0')
298                 break;
299
300             wrbuf_putc(term_dict, *s1);
301             wrbuf_putc(display_term, *s1);
302
303             s1++;
304             s0 = s1;
305         }
306     }
307     if (no_terms)
308         wrbuf_puts(term_dict, "\x01\x01.*");
309     *src = s1;
310     return no_terms;
311 }
312
313 static int term_100_icu(zebra_map_t zm,
314                         const char **src, WRBUF term_dict,
315                         WRBUF display_term,
316                         int mode,
317                         size_t token_number)
318 {
319     size_t i;
320     const char *res_buf = 0;
321     size_t res_len = 0;
322     const char *display_buf;
323     size_t display_len;
324
325     zebra_map_tokenize_start(zm, *src, strlen(*src));
326     for (i = 0; i <= token_number; i++)
327     {
328         if (!zebra_map_tokenize_next(zm, &res_buf, &res_len,
329                                      &display_buf, &display_len))
330             return 0;
331     }
332     wrbuf_write(display_term, display_buf, display_len);
333     if (mode)
334     {
335         res_len = icu_basechars(res_buf, res_len);
336     }
337     if (mode & 2)
338         wrbuf_puts(term_dict, ".*");
339     for (i = 0; i < res_len; i++)
340     {
341         if (strchr(REGEX_CHARS "\\", res_buf[i]))
342             wrbuf_putc(term_dict, '\\');
343         if (res_buf[i] < 32)
344             wrbuf_putc(term_dict, '\x01');
345
346         wrbuf_putc(term_dict, res_buf[i]);
347     }
348     if (mode & 1)
349         wrbuf_puts(term_dict, ".*");
350     else if (mode)
351         wrbuf_puts(term_dict, "\x01\x01.*");
352     return 1;
353 }
354
355 /* term_100: handle term, where trunc = none(no operators at all) */
356 static int term_100(zebra_map_t zm,
357                     const char **src, WRBUF term_dict, int space_split,
358                     WRBUF display_term)
359 {
360     const char *s0;
361     const char **map;
362     int i = 0;
363
364     const char *space_start = 0;
365     const char *space_end = 0;
366
367     if (!term_pre(zm, src, 0, !space_split))
368         return 0;
369     s0 = *src;
370     while (*s0)
371     {
372         const char *s1 = s0;
373         int q_map_match = 0;
374         map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
375         if (space_split)
376         {
377             if (**map == *CHR_SPACE)
378                 break;
379         }
380         else  /* complete subfield only. */
381         {
382             if (**map == *CHR_SPACE)
383             {   /* save space mapping for later  .. */
384                 space_start = s1;
385                 space_end = s0;
386                 continue;
387             }
388             else if (space_start)
389             {   /* reload last space */
390                 while (space_start < space_end)
391                 {
392                     if (strchr(REGEX_CHARS, *space_start))
393                         wrbuf_putc(term_dict, '\\');
394                     wrbuf_putc(display_term, *space_start);
395                     wrbuf_putc(term_dict, *space_start);
396                     space_start++;
397
398                 }
399                 /* and reset */
400                 space_start = space_end = 0;
401             }
402         }
403         i++;
404
405         add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
406     }
407     *src = s0;
408     return i;
409 }
410
411 /* term_101: handle term, where trunc = Process # */
412 static int term_101(zebra_map_t zm,
413                     const char **src, WRBUF term_dict, int space_split,
414                     WRBUF display_term)
415 {
416     const char *s0;
417     const char **map;
418     int i = 0;
419
420     if (!term_pre(zm, src, "#", !space_split))
421         return 0;
422     s0 = *src;
423     while (*s0)
424     {
425         if (*s0 == '#')
426         {
427             i++;
428             wrbuf_puts(term_dict, ".*");
429             wrbuf_putc(display_term, *s0);
430             s0++;
431         }
432         else
433         {
434             const char *s1 = s0;
435             int q_map_match = 0;
436             map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
437             if (space_split && **map == *CHR_SPACE)
438                 break;
439
440             i++;
441             add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
442         }
443     }
444     *src = s0;
445     return i;
446 }
447
448 /* term_103: handle term, where trunc = re-2 (regular expressions) */
449 static int term_103(zebra_map_t zm, const char **src,
450                     WRBUF term_dict, int *errors, int space_split,
451                     WRBUF display_term)
452 {
453     int i = 0;
454     const char *s0;
455     const char **map;
456
457     if (!term_pre(zm, src, "^\\()[].*+?|", !space_split))
458         return 0;
459     s0 = *src;
460     if (errors && *s0 == '+' && s0[1] && s0[2] == '+' && s0[3] &&
461         isdigit(((const unsigned char *)s0)[1]))
462     {
463         *errors = s0[1] - '0';
464         s0 += 3;
465         if (*errors > 3)
466             *errors = 3;
467     }
468     while (*s0)
469     {
470         if (strchr("^\\()[].*+?|-", *s0))
471         {
472             wrbuf_putc(display_term, *s0);
473             wrbuf_putc(term_dict, *s0);
474             s0++;
475             i++;
476         }
477         else
478         {
479             const char *s1 = s0;
480             int q_map_match = 0;
481             map = zebra_maps_search(zm, &s0, strlen(s0),  &q_map_match);
482             if (space_split && **map == *CHR_SPACE)
483                 break;
484
485             i++;
486             add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
487         }
488     }
489     *src = s0;
490
491     return i;
492 }
493
494 /* term_103: handle term, where trunc = re-1 (regular expressions) */
495 static int term_102(zebra_map_t zm, const char **src,
496                     WRBUF term_dict, int space_split, WRBUF display_term)
497 {
498     return term_103(zm, src, term_dict, NULL, space_split, display_term);
499 }
500
501
502 /* term_104: handle term, process ?n * # */
503 static int term_104(zebra_map_t zm, const char **src,
504                     WRBUF term_dict, int space_split, WRBUF display_term)
505 {
506     const char *s0;
507     const char **map;
508     int i = 0;
509
510     if (!term_pre(zm, src, "?*#", !space_split))
511         return 0;
512     s0 = *src;
513     while (*s0)
514     {
515         if (*s0 == '?')
516         {
517             i++;
518             wrbuf_putc(display_term, *s0);
519             s0++;
520             if (*s0 >= '0' && *s0 <= '9')
521             {
522                 int limit = 0;
523                 while (*s0 >= '0' && *s0 <= '9')
524                 {
525                     limit = limit * 10 + (*s0 - '0');
526                     wrbuf_putc(display_term, *s0);
527                     s0++;
528                 }
529                 if (limit > 20)
530                     limit = 20;
531                 while (--limit >= 0)
532                 {
533                     wrbuf_puts(term_dict, ".?");
534                 }
535             }
536             else
537             {
538                 wrbuf_puts(term_dict, ".*");
539             }
540         }
541         else if (*s0 == '*')
542         {
543             i++;
544             wrbuf_puts(term_dict, ".*");
545             wrbuf_putc(display_term, *s0);
546             s0++;
547         }
548         else if (*s0 == '#')
549         {
550             i++;
551             wrbuf_puts(term_dict, ".");
552             wrbuf_putc(display_term, *s0);
553             s0++;
554         }
555         else
556         {
557             const char *s1 = s0;
558             int q_map_match = 0;
559             map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
560             if (space_split && **map == *CHR_SPACE)
561                 break;
562
563             i++;
564             add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
565         }
566     }
567     *src = s0;
568     return i;
569 }
570
571 /* term_105/106: handle term, process * ! and possibly right_truncate */
572 static int term_105(zebra_map_t zm, const char **src,
573                     WRBUF term_dict, int space_split,
574                     WRBUF display_term, int right_truncate)
575 {
576     const char *s0;
577     const char **map;
578     int i = 0;
579
580     if (!term_pre(zm, src, "\\*!", !space_split))
581         return 0;
582     s0 = *src;
583     while (*s0)
584     {
585         if (*s0 == '*')
586         {
587             i++;
588             wrbuf_puts(term_dict, ".*");
589             wrbuf_putc(display_term, *s0);
590             s0++;
591         }
592         else if (*s0 == '!')
593         {
594             i++;
595             wrbuf_putc(term_dict, '.');
596             wrbuf_putc(display_term, *s0);
597             s0++;
598         }
599         else if (*s0 == '\\')
600         {
601             i++;
602             wrbuf_puts(term_dict, "\\\\");
603             wrbuf_putc(display_term, *s0);
604             s0++;
605         }
606         else
607         {
608             const char *s1 = s0;
609             int q_map_match = 0;
610             map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
611             if (space_split && **map == *CHR_SPACE)
612                 break;
613
614             i++;
615             add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
616         }
617     }
618     if (right_truncate)
619         wrbuf_puts(term_dict, ".*");
620     *src = s0;
621     return i;
622 }
623
624
625 /* gen_regular_rel - generate regular expression from relation
626  *  val:     border value (inclusive)
627  *  islt:    1 if <=; 0 if >=.
628  */
629 static void gen_regular_rel(WRBUF term_dict, int val, int islt)
630 {
631     char dst_buf[20*5*20]; /* assuming enough for expansion */
632     char *dst = dst_buf;
633     int dst_p;
634     int w, d, i;
635     int pos = 0;
636     char numstr[20];
637
638     yaz_log(YLOG_DEBUG, "gen_regular_rel. val=%d, islt=%d", val, islt);
639     if (val >= 0)
640     {
641         if (islt)
642             strcpy(dst, "(-[0-9]+|(");
643         else
644             strcpy(dst, "((");
645     }
646     else
647     {
648         if (!islt)
649         {
650             strcpy(dst, "([0-9]+|-(");
651             islt = 1;
652         }
653         else
654         {
655             strcpy(dst, "(-(");
656             islt = 0;
657         }
658         val = -val;
659     }
660     dst_p = strlen(dst);
661     sprintf(numstr, "%d", val);
662     for (w = strlen(numstr); --w >= 0; pos++)
663     {
664         d = numstr[w];
665         if (pos > 0)
666         {
667             if (islt)
668             {
669                 if (d == '0')
670                     continue;
671                 d--;
672             }
673             else
674             {
675                 if (d == '9')
676                     continue;
677                 d++;
678             }
679         }
680
681         strcpy(dst + dst_p, numstr);
682         dst_p = strlen(dst) - pos - 1;
683
684         if (islt)
685         {
686             if (d != '0')
687             {
688                 dst[dst_p++] = '[';
689                 dst[dst_p++] = '0';
690                 dst[dst_p++] = '-';
691                 dst[dst_p++] = d;
692                 dst[dst_p++] = ']';
693             }
694             else
695                 dst[dst_p++] = d;
696         }
697         else
698         {
699             if (d != '9')
700             {
701                 dst[dst_p++] = '[';
702                 dst[dst_p++] = d;
703                 dst[dst_p++] = '-';
704                 dst[dst_p++] = '9';
705                 dst[dst_p++] = ']';
706             }
707             else
708                 dst[dst_p++] = d;
709         }
710         for (i = 0; i < pos; i++)
711         {
712             dst[dst_p++] = '[';
713             dst[dst_p++] = '0';
714             dst[dst_p++] = '-';
715             dst[dst_p++] = '9';
716             dst[dst_p++] = ']';
717         }
718         dst[dst_p++] = '|';
719     }
720     dst[dst_p] = '\0';
721     if (islt)
722     {
723         /* match everything less than 10^(pos-1) */
724         strcat(dst, "0*");
725         for (i = 1; i < pos; i++)
726             strcat(dst, "[0-9]?");
727     }
728     else
729     {
730         /* match everything greater than 10^pos */
731         for (i = 0; i <= pos; i++)
732             strcat(dst, "[0-9]");
733         strcat(dst, "[0-9]*");
734     }
735     strcat(dst, "))");
736     wrbuf_puts(term_dict, dst);
737 }
738
739 void string_rel_add_char(WRBUF term_p, WRBUF wsrc, int *indx)
740 {
741     const char *src = wrbuf_cstr(wsrc);
742     if (src[*indx] == '\\')
743     {
744         wrbuf_putc(term_p, src[*indx]);
745         (*indx)++;
746     }
747     wrbuf_putc(term_p, src[*indx]);
748     (*indx)++;
749 }
750
751 /*
752  *   >  abc     ([b-].*|a[c-].*|ab[d-].*|abc.+)
753  *              ([^-a].*|a[^-b].*ab[^-c].*|abc.+)
754  *   >= abc     ([b-].*|a[c-].*|ab[c-].*)
755  *              ([^-a].*|a[^-b].*|ab[c-].*)
756  *   <  abc     ([-0].*|a[-a].*|ab[-b].*)
757  *              ([^a-].*|a[^b-].*|ab[^c-].*)
758  *   <= abc     ([-0].*|a[-a].*|ab[-b].*|abc)
759  *              ([^a-].*|a[^b-].*|ab[^c-].*|abc)
760  */
761 static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
762                            const char **term_sub, WRBUF term_dict,
763                            const Odr_oid *attributeSet,
764                            zebra_map_t zm, int space_split,
765                            WRBUF display_term,
766                            int *error_code)
767 {
768     AttrType relation;
769     int relation_value;
770     int i;
771     WRBUF term_component = wrbuf_alloc();
772
773     attr_init_APT(&relation, zapt, 2);
774     relation_value = attr_find(&relation, NULL);
775
776     *error_code = 0;
777     yaz_log(YLOG_DEBUG, "string relation value=%d", relation_value);
778     switch (relation_value)
779     {
780     case 1:
781         if (!term_100(zm, term_sub, term_component, space_split, display_term))
782         {
783             wrbuf_destroy(term_component);
784             return 0;
785         }
786         yaz_log(log_level_rpn, "Relation <");
787
788         wrbuf_putc(term_dict, '(');
789         for (i = 0; i < wrbuf_len(term_component); )
790         {
791             int j = 0;
792
793             if (i)
794                 wrbuf_putc(term_dict, '|');
795             while (j < i)
796                 string_rel_add_char(term_dict, term_component, &j);
797
798             wrbuf_putc(term_dict, '[');
799
800             wrbuf_putc(term_dict, '^');
801
802             wrbuf_putc(term_dict, 1);
803             wrbuf_putc(term_dict, FIRST_IN_FIELD_CHAR);
804
805             string_rel_add_char(term_dict, term_component, &i);
806             wrbuf_putc(term_dict, '-');
807
808             wrbuf_putc(term_dict, ']');
809             wrbuf_putc(term_dict, '.');
810             wrbuf_putc(term_dict, '*');
811         }
812         wrbuf_putc(term_dict, ')');
813         break;
814     case 2:
815         if (!term_100(zm, term_sub, term_component, space_split, display_term))
816         {
817             wrbuf_destroy(term_component);
818             return 0;
819         }
820         yaz_log(log_level_rpn, "Relation <=");
821
822         wrbuf_putc(term_dict, '(');
823         for (i = 0; i < wrbuf_len(term_component); )
824         {
825             int j = 0;
826
827             while (j < i)
828                 string_rel_add_char(term_dict, term_component, &j);
829             wrbuf_putc(term_dict, '[');
830
831             wrbuf_putc(term_dict, '^');
832
833             wrbuf_putc(term_dict, 1);
834             wrbuf_putc(term_dict, FIRST_IN_FIELD_CHAR);
835
836             string_rel_add_char(term_dict, term_component, &i);
837             wrbuf_putc(term_dict, '-');
838
839             wrbuf_putc(term_dict, ']');
840             wrbuf_putc(term_dict, '.');
841             wrbuf_putc(term_dict, '*');
842
843             wrbuf_putc(term_dict, '|');
844         }
845         for (i = 0; i < wrbuf_len(term_component); )
846             string_rel_add_char(term_dict, term_component, &i);
847         wrbuf_putc(term_dict, ')');
848         break;
849     case 5:
850         if (!term_100(zm, term_sub, term_component, space_split, display_term))
851         {
852             wrbuf_destroy(term_component);
853             return 0;
854         }
855         yaz_log(log_level_rpn, "Relation >");
856
857         wrbuf_putc(term_dict, '(');
858         for (i = 0; i < wrbuf_len(term_component); )
859         {
860             int j = 0;
861
862             while (j < i)
863                 string_rel_add_char(term_dict, term_component, &j);
864             wrbuf_putc(term_dict, '[');
865
866             wrbuf_putc(term_dict, '^');
867             wrbuf_putc(term_dict, '-');
868             string_rel_add_char(term_dict, term_component, &i);
869
870             wrbuf_putc(term_dict, ']');
871             wrbuf_putc(term_dict, '.');
872             wrbuf_putc(term_dict, '*');
873
874             wrbuf_putc(term_dict, '|');
875         }
876         for (i = 0; i < wrbuf_len(term_component); )
877             string_rel_add_char(term_dict, term_component, &i);
878         wrbuf_putc(term_dict, '.');
879         wrbuf_putc(term_dict, '+');
880         wrbuf_putc(term_dict, ')');
881         break;
882     case 4:
883         if (!term_100(zm, term_sub, term_component, space_split, display_term))
884         {
885             wrbuf_destroy(term_component);
886             return 0;
887         }
888         yaz_log(log_level_rpn, "Relation >=");
889
890         wrbuf_putc(term_dict, '(');
891         for (i = 0; i < wrbuf_len(term_component); )
892         {
893             int j = 0;
894
895             if (i)
896                 wrbuf_putc(term_dict, '|');
897             while (j < i)
898                 string_rel_add_char(term_dict, term_component, &j);
899             wrbuf_putc(term_dict, '[');
900
901             if (i < wrbuf_len(term_component)-1)
902             {
903                 wrbuf_putc(term_dict, '^');
904                 wrbuf_putc(term_dict, '-');
905                 string_rel_add_char(term_dict, term_component, &i);
906             }
907             else
908             {
909                 string_rel_add_char(term_dict, term_component, &i);
910                 wrbuf_putc(term_dict, '-');
911             }
912             wrbuf_putc(term_dict, ']');
913             wrbuf_putc(term_dict, '.');
914             wrbuf_putc(term_dict, '*');
915         }
916         wrbuf_putc(term_dict, ')');
917         break;
918     case 3:
919     case 102:
920     case -1:
921         if (!**term_sub)
922             return 1;
923         yaz_log(log_level_rpn, "Relation =");
924         if (!term_100(zm, term_sub, term_component, space_split, display_term))
925         {
926             wrbuf_destroy(term_component);
927             return 0;
928         }
929         wrbuf_puts(term_dict, "(");
930         wrbuf_puts(term_dict, wrbuf_cstr(term_component));
931         wrbuf_puts(term_dict, ")");
932         break;
933     case 103:
934         yaz_log(log_level_rpn, "Relation always matches");
935         /* skip to end of term (we don't care what it is) */
936         while (**term_sub != '\0')
937             (*term_sub)++;
938         break;
939     default:
940         *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
941         wrbuf_destroy(term_component);
942         return 0;
943     }
944     wrbuf_destroy(term_component);
945     return 1;
946 }
947
948 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
949                              const char **term_sub,
950                              WRBUF term_dict,
951                              const Odr_oid *attributeSet, NMEM stream,
952                              struct grep_info *grep_info,
953                              const char *index_type, int complete_flag,
954                              WRBUF display_term,
955                              const char *xpath_use,
956                              struct ord_list **ol,
957                              zebra_map_t zm, size_t token_number);
958
959 ZEBRA_RES zebra_term_limits_APT(ZebraHandle zh,
960                                 Z_AttributesPlusTerm *zapt,
961                                 zint *hits_limit_value,
962                                 const char **term_ref_id_str,
963                                 NMEM nmem)
964 {
965     AttrType term_ref_id_attr;
966     AttrType hits_limit_attr;
967     int term_ref_id_int;
968     zint hits_limit_from_attr;
969
970     attr_init_APT(&hits_limit_attr, zapt, 11);
971     hits_limit_from_attr  = attr_find(&hits_limit_attr, NULL);
972
973     attr_init_APT(&term_ref_id_attr, zapt, 10);
974     term_ref_id_int = attr_find_ex(&term_ref_id_attr, NULL, term_ref_id_str);
975     if (term_ref_id_int >= 0)
976     {
977         char *res = nmem_malloc(nmem, 20);
978         sprintf(res, "%d", term_ref_id_int);
979         *term_ref_id_str = res;
980     }
981     if (hits_limit_from_attr != -1)
982         *hits_limit_value = hits_limit_from_attr;
983
984     yaz_log(YLOG_DEBUG, "term_limits_APT ref_id=%s limit=" ZINT_FORMAT,
985             *term_ref_id_str ? *term_ref_id_str : "none",
986             *hits_limit_value);
987     return ZEBRA_OK;
988 }
989
990 /** \brief search for term (which may be truncated)
991  */
992 static ZEBRA_RES search_term(ZebraHandle zh,
993                              Z_AttributesPlusTerm *zapt,
994                              const char **term_sub,
995                              const Odr_oid *attributeSet,
996                              zint hits_limit, NMEM stream,
997                              struct grep_info *grep_info,
998                              const char *index_type, int complete_flag,
999                              const char *rank_type,
1000                              const char *xpath_use,
1001                              NMEM rset_nmem,
1002                              RSET *rset,
1003                              struct rset_key_control *kc,
1004                              zebra_map_t zm,
1005                              size_t token_number)
1006 {
1007     ZEBRA_RES res;
1008     struct ord_list *ol;
1009     zint hits_limit_value = hits_limit;
1010     const char *term_ref_id_str = 0;
1011     WRBUF term_dict = wrbuf_alloc();
1012     WRBUF display_term = wrbuf_alloc();
1013     *rset = 0;
1014     zebra_term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str,
1015                           stream);
1016     grep_info->isam_p_indx = 0;
1017     res = string_term(zh, zapt, term_sub, term_dict,
1018                       attributeSet, stream, grep_info,
1019                       index_type, complete_flag,
1020                       display_term, xpath_use, &ol, zm, token_number);
1021     wrbuf_destroy(term_dict);
1022     if (res == ZEBRA_OK && *term_sub)
1023     {
1024         yaz_log(log_level_rpn, "term: %s", wrbuf_cstr(display_term));
1025         *rset = rset_trunc(zh, grep_info->isam_p_buf,
1026                            grep_info->isam_p_indx, wrbuf_buf(display_term),
1027                            wrbuf_len(display_term), rank_type,
1028                            1 /* preserve pos */,
1029                            zapt->term->which, rset_nmem,
1030                            kc, kc->scope, ol, index_type, hits_limit_value,
1031                            term_ref_id_str);
1032         if (!*rset)
1033             res = ZEBRA_FAIL;
1034     }
1035     wrbuf_destroy(display_term);
1036     return res;
1037 }
1038
1039 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1040                              const char **term_sub,
1041                              WRBUF term_dict,
1042                              const Odr_oid *attributeSet, NMEM stream,
1043                              struct grep_info *grep_info,
1044                              const char *index_type, int complete_flag,
1045                              WRBUF display_term,
1046                              const char *xpath_use,
1047                              struct ord_list **ol,
1048                              zebra_map_t zm, size_t token_number)
1049 {
1050     int r;
1051     AttrType truncation;
1052     int truncation_value;
1053     const char *termp;
1054     struct rpn_char_map_info rcmi;
1055
1056     int space_split = complete_flag ? 0 : 1;
1057     int ord = -1;
1058     int regex_range = 0;
1059     int max_pos, prefix_len = 0;
1060     int relation_error;
1061     char ord_buf[32];
1062     int ord_len, i;
1063
1064     *ol = ord_list_create(stream);
1065
1066     rpn_char_map_prepare(zh->reg, zm, &rcmi);
1067     attr_init_APT(&truncation, zapt, 5);
1068     truncation_value = attr_find(&truncation, NULL);
1069     yaz_log(log_level_rpn, "truncation value %d", truncation_value);
1070
1071     termp = *term_sub; /* start of term for each database */
1072
1073     if (zebra_apt_get_ord(zh, zapt, index_type, xpath_use,
1074                           attributeSet, &ord) != ZEBRA_OK)
1075     {
1076         *term_sub = 0;
1077         return ZEBRA_FAIL;
1078     }
1079
1080     wrbuf_rewind(term_dict); /* new dictionary regexp term */
1081
1082     *ol = ord_list_append(stream, *ol, ord);
1083     ord_len = key_SU_encode(ord, ord_buf);
1084
1085     wrbuf_putc(term_dict, '(');
1086
1087     for (i = 0; i < ord_len; i++)
1088     {
1089         wrbuf_putc(term_dict, 1);  /* our internal regexp escape char */
1090         wrbuf_putc(term_dict, ord_buf[i]);
1091     }
1092     wrbuf_putc(term_dict, ')');
1093
1094     prefix_len = wrbuf_len(term_dict);
1095
1096     if (zebra_maps_is_icu(zm))
1097     {
1098         int relation_value;
1099         AttrType relation;
1100
1101         attr_init_APT(&relation, zapt, 2);
1102         relation_value = attr_find(&relation, NULL);
1103         if (relation_value == 103) /* always matches */
1104             termp += strlen(termp); /* move to end of term */
1105         else if (relation_value == 3 || relation_value == 102 || relation_value == -1)
1106         {
1107             /* ICU case */
1108             switch (truncation_value)
1109             {
1110             case -1:         /* not specified */
1111             case 100:        /* do not truncate */
1112                 if (!term_100_icu(zm, &termp, term_dict, display_term, 0, token_number))
1113                 {
1114                     *term_sub = 0;
1115                     return ZEBRA_OK;
1116                 }
1117                 break;
1118             case 102:
1119                 if (!term_102_icu(zm, &termp, term_dict, space_split, display_term))
1120                 {
1121                     *term_sub = 0;
1122                     return ZEBRA_OK;
1123                 }
1124                 break;
1125             case 1:          /* right truncation */
1126                 if (!term_100_icu(zm, &termp, term_dict, display_term, 1, token_number))
1127                 {
1128                     *term_sub = 0;
1129                     return ZEBRA_OK;
1130                 }
1131                 break;
1132             case 2:
1133                 if (!term_100_icu(zm, &termp, term_dict, display_term, 2, token_number))
1134                 {
1135                     *term_sub = 0;
1136                     return ZEBRA_OK;
1137                 }
1138                 break;
1139             case 3:
1140                 if (!term_100_icu(zm, &termp, term_dict, display_term, 3, token_number))
1141                 {
1142                     *term_sub = 0;
1143                     return ZEBRA_OK;
1144                 }
1145                 break;
1146             default:
1147                 zebra_setError_zint(zh,
1148                                     YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
1149                                     truncation_value);
1150                 return ZEBRA_FAIL;
1151             }
1152         }
1153         else
1154         {
1155             zebra_setError_zint(zh,
1156                                 YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE,
1157                                 relation_value);
1158             return ZEBRA_FAIL;
1159         }
1160     }
1161     else
1162     {
1163         /* non-ICU case. using string.chr and friends */
1164         switch (truncation_value)
1165         {
1166         case -1:         /* not specified */
1167         case 100:        /* do not truncate */
1168             if (!string_relation(zh, zapt, &termp, term_dict,
1169                                  attributeSet,
1170                                  zm, space_split, display_term,
1171                                  &relation_error))
1172             {
1173                 if (relation_error)
1174                 {
1175                     zebra_setError(zh, relation_error, 0);
1176                     return ZEBRA_FAIL;
1177                 }
1178                 *term_sub = 0;
1179                 return ZEBRA_OK;
1180             }
1181             break;
1182         case 1:          /* right truncation */
1183             wrbuf_putc(term_dict, '(');
1184             if (!term_100(zm, &termp, term_dict, space_split, display_term))
1185             {
1186                 *term_sub = 0;
1187                 return ZEBRA_OK;
1188             }
1189             wrbuf_puts(term_dict, ".*)");
1190             break;
1191         case 2:          /* left truncation */
1192             wrbuf_puts(term_dict, "(.*");
1193             if (!term_100(zm, &termp, term_dict, space_split, display_term))
1194             {
1195                 *term_sub = 0;
1196                 return ZEBRA_OK;
1197             }
1198             wrbuf_putc(term_dict, ')');
1199             break;
1200         case 3:          /* left&right truncation */
1201             wrbuf_puts(term_dict, "(.*");
1202             if (!term_100(zm, &termp, term_dict, space_split, display_term))
1203             {
1204                 *term_sub = 0;
1205                 return ZEBRA_OK;
1206             }
1207             wrbuf_puts(term_dict, ".*)");
1208             break;
1209         case 101:        /* process # in term */
1210             wrbuf_putc(term_dict, '(');
1211             if (!term_101(zm, &termp, term_dict, space_split, display_term))
1212             {
1213                 *term_sub = 0;
1214                 return ZEBRA_OK;
1215             }
1216             wrbuf_puts(term_dict, ")");
1217             break;
1218         case 102:        /* Regexp-1 */
1219             wrbuf_putc(term_dict, '(');
1220             if (!term_102(zm, &termp, term_dict, space_split, display_term))
1221             {
1222                 *term_sub = 0;
1223                 return ZEBRA_OK;
1224             }
1225             wrbuf_putc(term_dict, ')');
1226             break;
1227         case 103:       /* Regexp-2 */
1228             regex_range = 1;
1229             wrbuf_putc(term_dict, '(');
1230             if (!term_103(zm, &termp, term_dict, &regex_range,
1231                           space_split, display_term))
1232             {
1233                 *term_sub = 0;
1234                 return ZEBRA_OK;
1235             }
1236             wrbuf_putc(term_dict, ')');
1237             break;
1238         case 104:        /* process ?n * # term */
1239             wrbuf_putc(term_dict, '(');
1240             if (!term_104(zm, &termp, term_dict, space_split, display_term))
1241             {
1242                 *term_sub = 0;
1243                 return ZEBRA_OK;
1244             }
1245             wrbuf_putc(term_dict, ')');
1246             break;
1247         case 105:        /* process * ! in term and right truncate */
1248             wrbuf_putc(term_dict, '(');
1249             if (!term_105(zm, &termp, term_dict, space_split, display_term, 1))
1250             {
1251                 *term_sub = 0;
1252                 return ZEBRA_OK;
1253             }
1254             wrbuf_putc(term_dict, ')');
1255             break;
1256         case 106:        /* process * ! in term */
1257             wrbuf_putc(term_dict, '(');
1258             if (!term_105(zm, &termp, term_dict, space_split, display_term, 0))
1259             {
1260                 *term_sub = 0;
1261                 return ZEBRA_OK;
1262             }
1263             wrbuf_putc(term_dict, ')');
1264             break;
1265         default:
1266             zebra_setError_zint(zh,
1267                                 YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
1268                                 truncation_value);
1269             return ZEBRA_FAIL;
1270         }
1271     }
1272     if (1)
1273     {
1274         char buf[1000];
1275         const char *input = wrbuf_cstr(term_dict) + prefix_len;
1276         esc_str(buf, sizeof(buf), input, strlen(input));
1277     }
1278     {
1279         WRBUF pr_wr = wrbuf_alloc();
1280
1281         wrbuf_write_escaped(pr_wr, wrbuf_buf(term_dict), wrbuf_len(term_dict));
1282         yaz_log(YLOG_LOG, "dict_lookup_grep: %s", wrbuf_cstr(pr_wr));
1283         wrbuf_destroy(pr_wr);
1284     }
1285     r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), regex_range,
1286                          grep_info, &max_pos,
1287                          ord_len /* number of "exact" chars */,
1288                          grep_handle);
1289     if (r == 1)
1290         zebra_set_partial_result(zh);
1291     else if (r)
1292         yaz_log(YLOG_WARN, "dict_lookup_grep fail %d", r);
1293     *term_sub = termp;
1294     yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1295     return ZEBRA_OK;
1296 }
1297
1298
1299
1300 static void grep_info_delete(struct grep_info *grep_info)
1301 {
1302 #ifdef TERM_COUNT
1303     xfree(grep_info->term_no);
1304 #endif
1305     xfree(grep_info->isam_p_buf);
1306 }
1307
1308 static ZEBRA_RES grep_info_prepare(ZebraHandle zh,
1309                                    Z_AttributesPlusTerm *zapt,
1310                                    struct grep_info *grep_info,
1311                                    const char *index_type)
1312 {
1313 #ifdef TERM_COUNT
1314     grep_info->term_no = 0;
1315 #endif
1316     grep_info->trunc_max = atoi(res_get_def(zh->res, "truncmax", "10000"));
1317     grep_info->isam_p_size = 0;
1318     grep_info->isam_p_buf = NULL;
1319     grep_info->zh = zh;
1320     grep_info->index_type = index_type;
1321     grep_info->termset = 0;
1322     if (zapt)
1323     {
1324         AttrType truncmax;
1325         int truncmax_value;
1326
1327         attr_init_APT(&truncmax, zapt, 13);
1328         truncmax_value = attr_find(&truncmax, NULL);
1329         if (truncmax_value != -1)
1330             grep_info->trunc_max = truncmax_value;
1331     }
1332     if (zapt)
1333     {
1334         AttrType termset;
1335         int termset_value_numeric;
1336         const char *termset_value_string;
1337
1338         attr_init_APT(&termset, zapt, 8);
1339         termset_value_numeric =
1340             attr_find_ex(&termset, NULL, &termset_value_string);
1341         if (termset_value_numeric != -1)
1342         {
1343 #if TERMSET_DISABLE
1344             zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, "termset");
1345             return ZEBRA_FAIL;
1346 #else
1347             char resname[32];
1348             const char *termset_name = 0;
1349             if (termset_value_numeric != -2)
1350             {
1351
1352                 sprintf(resname, "%d", termset_value_numeric);
1353                 termset_name = resname;
1354             }
1355             else
1356                 termset_name = termset_value_string;
1357             yaz_log(log_level_rpn, "creating termset set %s", termset_name);
1358             grep_info->termset = resultSetAdd(zh, termset_name, 1);
1359             if (!grep_info->termset)
1360             {
1361                 zebra_setError(zh, YAZ_BIB1_ILLEGAL_RESULT_SET_NAME, termset_name);
1362                 return ZEBRA_FAIL;
1363             }
1364 #endif
1365         }
1366     }
1367     return ZEBRA_OK;
1368 }
1369
1370 static ZEBRA_RES search_terms_chrmap(ZebraHandle zh,
1371                                      Z_AttributesPlusTerm *zapt,
1372                                      const char *termz,
1373                                      const Odr_oid *attributeSet,
1374                                      zint hits_limit,
1375                                      NMEM stream,
1376                                      const char *index_type, int complete_flag,
1377                                      const char *rank_type,
1378                                      const char *xpath_use,
1379                                      NMEM rset_nmem,
1380                                      RSET **result_sets, int *num_result_sets,
1381                                      struct rset_key_control *kc,
1382                                      zebra_map_t zm)
1383 {
1384     struct grep_info grep_info;
1385     const char *termp = termz;
1386     int alloc_sets = 0;
1387
1388     *num_result_sets = 0;
1389     if (grep_info_prepare(zh, zapt, &grep_info, index_type) == ZEBRA_FAIL)
1390         return ZEBRA_FAIL;
1391     while (1)
1392     {
1393         ZEBRA_RES res;
1394
1395         if (alloc_sets == *num_result_sets)
1396         {
1397             int add = 10;
1398             RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) *
1399                                               sizeof(*rnew));
1400             if (alloc_sets)
1401                 memcpy(rnew, *result_sets, alloc_sets * sizeof(*rnew));
1402             alloc_sets = alloc_sets + add;
1403             *result_sets = rnew;
1404         }
1405         res = search_term(zh, zapt, &termp, attributeSet, hits_limit,
1406                           stream, &grep_info,
1407                           index_type, complete_flag,
1408                           rank_type,
1409                           xpath_use, rset_nmem,
1410                           &(*result_sets)[*num_result_sets],
1411                           kc, zm,
1412                           *num_result_sets);
1413         if (res != ZEBRA_OK)
1414         {
1415             int i;
1416             for (i = 0; i < *num_result_sets; i++)
1417                 rset_delete((*result_sets)[i]);
1418             grep_info_delete(&grep_info);
1419             return res;
1420         }
1421         if ((*result_sets)[*num_result_sets] == 0)
1422             break;
1423         (*num_result_sets)++;
1424
1425         if (!*termp)
1426             break;
1427     }
1428     grep_info_delete(&grep_info);
1429     return ZEBRA_OK;
1430 }
1431
1432 /**
1433    \brief Create result set(s) for list of terms
1434    \param zh Zebra Handle
1435    \param zapt Attributes Plust Term (RPN leaf)
1436    \param termz term as used in query but converted to UTF-8
1437    \param attributeSet default attribute set
1438    \param stream memory for result
1439    \param index_type register type ("w", "p",..)
1440    \param complete_flag whether it's phrases or not
1441    \param rank_type term flags for ranking
1442    \param xpath_use use attribute for X-Path (-1 for no X-path)
1443    \param rset_nmem memory for result sets
1444    \param result_sets output result set for each term in list (output)
1445    \param num_result_sets number of output result sets
1446    \param kc rset key control to be used for created result sets
1447 */
1448 static ZEBRA_RES search_terms_list(ZebraHandle zh,
1449                                    Z_AttributesPlusTerm *zapt,
1450                                    const char *termz,
1451                                    const Odr_oid *attributeSet,
1452                                    zint hits_limit,
1453                                    NMEM stream,
1454                                    const char *index_type, int complete_flag,
1455                                    const char *rank_type,
1456                                    const char *xpath_use,
1457                                    NMEM rset_nmem,
1458                                    RSET **result_sets, int *num_result_sets,
1459                                    struct rset_key_control *kc)
1460 {
1461     zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type);
1462     return search_terms_chrmap(zh, zapt, termz, attributeSet, hits_limit,
1463                                stream, index_type, complete_flag,
1464                                rank_type, xpath_use,
1465                                rset_nmem, result_sets, num_result_sets,
1466                                kc, zm);
1467 }
1468
1469
1470 /** \brief limit a search by position - returns result set
1471  */
1472 static ZEBRA_RES search_position(ZebraHandle zh,
1473                                  Z_AttributesPlusTerm *zapt,
1474                                  const Odr_oid *attributeSet,
1475                                  const char *index_type,
1476                                  NMEM rset_nmem,
1477                                  RSET *rset,
1478                                  struct rset_key_control *kc)
1479 {
1480     int position_value;
1481     AttrType position;
1482     int ord = -1;
1483     char ord_buf[32];
1484     char term_dict[100];
1485     int ord_len;
1486     char *val;
1487     ISAM_P isam_p;
1488     zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type);
1489
1490     attr_init_APT(&position, zapt, 3);
1491     position_value = attr_find(&position, NULL);
1492     switch(position_value)
1493     {
1494     case 3:
1495     case -1:
1496         return ZEBRA_OK;
1497     case 1:
1498     case 2:
1499         break;
1500     default:
1501         zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1502                             position_value);
1503         return ZEBRA_FAIL;
1504     }
1505
1506
1507     if (!zebra_maps_is_first_in_field(zm))
1508     {
1509         zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1510                             position_value);
1511         return ZEBRA_FAIL;
1512     }
1513
1514     if (zebra_apt_get_ord(zh, zapt, index_type, 0,
1515                           attributeSet, &ord) != ZEBRA_OK)
1516     {
1517         return ZEBRA_FAIL;
1518     }
1519     ord_len = key_SU_encode(ord, ord_buf);
1520     memcpy(term_dict, ord_buf, ord_len);
1521     strcpy(term_dict+ord_len, FIRST_IN_FIELD_STR);
1522     val = dict_lookup(zh->reg->dict, term_dict);
1523     if (val)
1524     {
1525         assert(*val == sizeof(ISAM_P));
1526         memcpy(&isam_p, val+1, sizeof(isam_p));
1527
1528         *rset = zebra_create_rset_isam(zh, rset_nmem, kc, kc->scope,
1529                                        isam_p, 0);
1530     }
1531     return ZEBRA_OK;
1532 }
1533
1534 /** \brief returns result set for phrase search
1535  */
1536 static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh,
1537                                        Z_AttributesPlusTerm *zapt,
1538                                        const char *termz_org,
1539                                        const Odr_oid *attributeSet,
1540                                        zint hits_limit,
1541                                        NMEM stream,
1542                                        const char *index_type,
1543                                        int complete_flag,
1544                                        const char *rank_type,
1545                                        const char *xpath_use,
1546                                        NMEM rset_nmem,
1547                                        RSET *rset,
1548                                        struct rset_key_control *kc)
1549 {
1550     RSET *result_sets = 0;
1551     int num_result_sets = 0;
1552     ZEBRA_RES res =
1553         search_terms_list(zh, zapt, termz_org, attributeSet, hits_limit,
1554                           stream, index_type, complete_flag,
1555                           rank_type, xpath_use,
1556                           rset_nmem,
1557                           &result_sets, &num_result_sets, kc);
1558
1559     if (res != ZEBRA_OK)
1560         return res;
1561
1562     if (num_result_sets > 0)
1563     {
1564         RSET first_set = 0;
1565         res = search_position(zh, zapt, attributeSet,
1566                               index_type,
1567                               rset_nmem, &first_set,
1568                               kc);
1569         if (res != ZEBRA_OK)
1570         {
1571             int i;
1572             for (i = 0; i < num_result_sets; i++)
1573                 rset_delete(result_sets[i]);
1574             return res;
1575         }
1576         if (first_set)
1577         {
1578             RSET *nsets = nmem_malloc(stream,
1579                                       sizeof(RSET) * (num_result_sets+1));
1580             nsets[0] = first_set;
1581             memcpy(nsets+1, result_sets, sizeof(RSET) * num_result_sets);
1582             result_sets = nsets;
1583             num_result_sets++;
1584         }
1585     }
1586     if (num_result_sets == 0)
1587         *rset = rset_create_null(rset_nmem, kc, 0);
1588     else if (num_result_sets == 1)
1589         *rset = result_sets[0];
1590     else
1591         *rset = rset_create_prox(rset_nmem, kc, kc->scope,
1592                                  num_result_sets, result_sets,
1593                                  1 /* ordered */, 0 /* exclusion */,
1594                                  3 /* relation */, 1 /* distance */);
1595     if (!*rset)
1596         return ZEBRA_FAIL;
1597     return ZEBRA_OK;
1598 }
1599
1600 /** \brief returns result set for or-list search
1601  */
1602 static ZEBRA_RES rpn_search_APT_or_list(ZebraHandle zh,
1603                                         Z_AttributesPlusTerm *zapt,
1604                                         const char *termz_org,
1605                                         const Odr_oid *attributeSet,
1606                                         zint hits_limit,
1607                                         NMEM stream,
1608                                         const char *index_type,
1609                                         int complete_flag,
1610                                         const char *rank_type,
1611                                         const char *xpath_use,
1612                                         NMEM rset_nmem,
1613                                         RSET *rset,
1614                                         struct rset_key_control *kc)
1615 {
1616     RSET *result_sets = 0;
1617     int num_result_sets = 0;
1618     int i;
1619     ZEBRA_RES res =
1620         search_terms_list(zh, zapt, termz_org, attributeSet, hits_limit,
1621                           stream, index_type, complete_flag,
1622                           rank_type, xpath_use,
1623                           rset_nmem,
1624                           &result_sets, &num_result_sets, kc);
1625     if (res != ZEBRA_OK)
1626         return res;
1627
1628     for (i = 0; i < num_result_sets; i++)
1629     {
1630         RSET first_set = 0;
1631         res = search_position(zh, zapt, attributeSet,
1632                               index_type,
1633                               rset_nmem, &first_set,
1634                               kc);
1635         if (res != ZEBRA_OK)
1636         {
1637             for (i = 0; i < num_result_sets; i++)
1638                 rset_delete(result_sets[i]);
1639             return res;
1640         }
1641
1642         if (first_set)
1643         {
1644             RSET tmp_set[2];
1645
1646             tmp_set[0] = first_set;
1647             tmp_set[1] = result_sets[i];
1648
1649             result_sets[i] = rset_create_prox(
1650                 rset_nmem, kc, kc->scope,
1651                 2, tmp_set,
1652                 1 /* ordered */, 0 /* exclusion */,
1653                 3 /* relation */, 1 /* distance */);
1654         }
1655     }
1656     if (num_result_sets == 0)
1657         *rset = rset_create_null(rset_nmem, kc, 0);
1658     else if (num_result_sets == 1)
1659         *rset = result_sets[0];
1660     else
1661         *rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* termid */,
1662                                num_result_sets, result_sets);
1663     if (!*rset)
1664         return ZEBRA_FAIL;
1665     return ZEBRA_OK;
1666 }
1667
1668 /** \brief returns result set for and-list search
1669  */
1670 static ZEBRA_RES rpn_search_APT_and_list(ZebraHandle zh,
1671                                          Z_AttributesPlusTerm *zapt,
1672                                          const char *termz_org,
1673                                          const Odr_oid *attributeSet,
1674                                          zint hits_limit,
1675                                          NMEM stream,
1676                                          const char *index_type,
1677                                          int complete_flag,
1678                                          const char *rank_type,
1679                                          const char *xpath_use,
1680                                          NMEM rset_nmem,
1681                                          RSET *rset,
1682                                          struct rset_key_control *kc)
1683 {
1684     RSET *result_sets = 0;
1685     int num_result_sets = 0;
1686     int i;
1687     ZEBRA_RES res =
1688         search_terms_list(zh, zapt, termz_org, attributeSet, hits_limit,
1689                           stream, index_type, complete_flag,
1690                           rank_type, xpath_use,
1691                           rset_nmem,
1692                           &result_sets, &num_result_sets,
1693                           kc);
1694     if (res != ZEBRA_OK)
1695         return res;
1696     for (i = 0; i < num_result_sets; i++)
1697     {
1698         RSET first_set = 0;
1699         res = search_position(zh, zapt, attributeSet,
1700                               index_type,
1701                               rset_nmem, &first_set,
1702                               kc);
1703         if (res != ZEBRA_OK)
1704         {
1705             for (i = 0; i < num_result_sets; i++)
1706                 rset_delete(result_sets[i]);
1707             return res;
1708         }
1709
1710         if (first_set)
1711         {
1712             RSET tmp_set[2];
1713
1714             tmp_set[0] = first_set;
1715             tmp_set[1] = result_sets[i];
1716
1717             result_sets[i] = rset_create_prox(
1718                 rset_nmem, kc, kc->scope,
1719                 2, tmp_set,
1720                 1 /* ordered */, 0 /* exclusion */,
1721                 3 /* relation */, 1 /* distance */);
1722         }
1723     }
1724
1725
1726     if (num_result_sets == 0)
1727         *rset = rset_create_null(rset_nmem, kc, 0);
1728     else if (num_result_sets == 1)
1729         *rset = result_sets[0];
1730     else
1731         *rset = rset_create_and(rset_nmem, kc, kc->scope,
1732                                 num_result_sets, result_sets);
1733     if (!*rset)
1734         return ZEBRA_FAIL;
1735     return ZEBRA_OK;
1736 }
1737
1738 static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1739                             const char **term_sub,
1740                             WRBUF term_dict,
1741                             const Odr_oid *attributeSet,
1742                             struct grep_info *grep_info,
1743                             int *max_pos,
1744                             zebra_map_t zm,
1745                             WRBUF display_term,
1746                             int *error_code)
1747 {
1748     AttrType relation;
1749     int relation_value;
1750     int term_value;
1751     int r;
1752     WRBUF term_num = wrbuf_alloc();
1753
1754     *error_code = 0;
1755     attr_init_APT(&relation, zapt, 2);
1756     relation_value = attr_find(&relation, NULL);
1757
1758     yaz_log(log_level_rpn, "numeric relation value=%d", relation_value);
1759
1760     switch (relation_value)
1761     {
1762     case 1:
1763         yaz_log(log_level_rpn, "Relation <");
1764         if (!term_100(zm, term_sub, term_num, 1, display_term))
1765         {
1766             wrbuf_destroy(term_num);
1767             return 0;
1768         }
1769         term_value = atoi(wrbuf_cstr(term_num));
1770         gen_regular_rel(term_dict, term_value-1, 1);
1771         break;
1772     case 2:
1773         yaz_log(log_level_rpn, "Relation <=");
1774         if (!term_100(zm, term_sub, term_num, 1, display_term))
1775         {
1776             wrbuf_destroy(term_num);
1777             return 0;
1778         }
1779         term_value = atoi(wrbuf_cstr(term_num));
1780         gen_regular_rel(term_dict, term_value, 1);
1781         break;
1782     case 4:
1783         yaz_log(log_level_rpn, "Relation >=");
1784         if (!term_100(zm, term_sub, term_num, 1, display_term))
1785         {
1786             wrbuf_destroy(term_num);
1787             return 0;
1788         }
1789         term_value = atoi(wrbuf_cstr(term_num));
1790         gen_regular_rel(term_dict, term_value, 0);
1791         break;
1792     case 5:
1793         yaz_log(log_level_rpn, "Relation >");
1794         if (!term_100(zm, term_sub, term_num, 1, display_term))
1795         {
1796             wrbuf_destroy(term_num);
1797             return 0;
1798         }
1799         term_value = atoi(wrbuf_cstr(term_num));
1800         gen_regular_rel(term_dict, term_value+1, 0);
1801         break;
1802     case -1:
1803     case 102:
1804     case 3:
1805         yaz_log(log_level_rpn, "Relation =");
1806         if (!term_100(zm, term_sub, term_num, 1, display_term))
1807         {
1808             wrbuf_destroy(term_num);
1809             return 0;
1810         }
1811         term_value = atoi(wrbuf_cstr(term_num));
1812         wrbuf_printf(term_dict, "(0*%d)", term_value);
1813         break;
1814     case 103:
1815         /* term_tmp untouched.. */
1816         while (**term_sub != '\0')
1817             (*term_sub)++;
1818         break;
1819     default:
1820         *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
1821         wrbuf_destroy(term_num);
1822         return 0;
1823     }
1824     r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict),
1825                          0, grep_info, max_pos, 0, grep_handle);
1826
1827     if (r == 1)
1828         zebra_set_partial_result(zh);
1829     else if (r)
1830         yaz_log(YLOG_WARN, "dict_lookup_grep fail, rel = gt: %d", r);
1831     yaz_log(log_level_rpn, "%d positions", grep_info->isam_p_indx);
1832     wrbuf_destroy(term_num);
1833     return 1;
1834 }
1835
1836 static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1837                               const char **term_sub,
1838                               WRBUF term_dict,
1839                               const Odr_oid *attributeSet, NMEM stream,
1840                               struct grep_info *grep_info,
1841                               const char *index_type, int complete_flag,
1842                               WRBUF display_term,
1843                               const char *xpath_use,
1844                               struct ord_list **ol)
1845 {
1846     const char *termp;
1847     struct rpn_char_map_info rcmi;
1848     int max_pos;
1849     int relation_error = 0;
1850     int ord, ord_len, i;
1851     char ord_buf[32];
1852     zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type);
1853
1854     *ol = ord_list_create(stream);
1855
1856     rpn_char_map_prepare(zh->reg, zm, &rcmi);
1857
1858     termp = *term_sub;
1859
1860     if (zebra_apt_get_ord(zh, zapt, index_type, xpath_use,
1861                           attributeSet, &ord) != ZEBRA_OK)
1862     {
1863         return ZEBRA_FAIL;
1864     }
1865
1866     wrbuf_rewind(term_dict);
1867
1868     *ol = ord_list_append(stream, *ol, ord);
1869
1870     ord_len = key_SU_encode(ord, ord_buf);
1871
1872     wrbuf_putc(term_dict, '(');
1873     for (i = 0; i < ord_len; i++)
1874     {
1875         wrbuf_putc(term_dict, 1);
1876         wrbuf_putc(term_dict, ord_buf[i]);
1877     }
1878     wrbuf_putc(term_dict, ')');
1879
1880     if (!numeric_relation(zh, zapt, &termp, term_dict,
1881                           attributeSet, grep_info, &max_pos, zm,
1882                           display_term, &relation_error))
1883     {
1884         if (relation_error)
1885         {
1886             zebra_setError(zh, relation_error, 0);
1887             return ZEBRA_FAIL;
1888         }
1889         *term_sub = 0;
1890         return ZEBRA_OK;
1891     }
1892     *term_sub = termp;
1893     yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1894     return ZEBRA_OK;
1895 }
1896
1897
1898 static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh,
1899                                         Z_AttributesPlusTerm *zapt,
1900                                         const char *termz,
1901                                         const Odr_oid *attributeSet,
1902                                         zint hits_limit,
1903                                         NMEM stream,
1904                                         const char *index_type,
1905                                         int complete_flag,
1906                                         const char *rank_type,
1907                                         const char *xpath_use,
1908                                         NMEM rset_nmem,
1909                                         RSET *rset,
1910                                         struct rset_key_control *kc)
1911 {
1912     const char *termp = termz;
1913     RSET *result_sets = 0;
1914     int num_result_sets = 0;
1915     ZEBRA_RES res;
1916     struct grep_info grep_info;
1917     int alloc_sets = 0;
1918     zint hits_limit_value = hits_limit;
1919     const char *term_ref_id_str = 0;
1920
1921     zebra_term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str,
1922                           stream);
1923
1924     yaz_log(log_level_rpn, "APT_numeric t='%s'", termz);
1925     if (grep_info_prepare(zh, zapt, &grep_info, index_type) == ZEBRA_FAIL)
1926         return ZEBRA_FAIL;
1927     while (1)
1928     {
1929         struct ord_list *ol;
1930         WRBUF term_dict = wrbuf_alloc();
1931         WRBUF display_term = wrbuf_alloc();
1932         if (alloc_sets == num_result_sets)
1933         {
1934             int add = 10;
1935             RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) *
1936                                               sizeof(*rnew));
1937             if (alloc_sets)
1938                 memcpy(rnew, result_sets, alloc_sets * sizeof(*rnew));
1939             alloc_sets = alloc_sets + add;
1940             result_sets = rnew;
1941         }
1942         yaz_log(YLOG_DEBUG, "APT_numeric termp=%s", termp);
1943         grep_info.isam_p_indx = 0;
1944         res = numeric_term(zh, zapt, &termp, term_dict,
1945                            attributeSet, stream, &grep_info,
1946                            index_type, complete_flag,
1947                            display_term, xpath_use, &ol);
1948         wrbuf_destroy(term_dict);
1949         if (res == ZEBRA_FAIL || termp == 0)
1950         {
1951             wrbuf_destroy(display_term);
1952             break;
1953         }
1954         yaz_log(YLOG_DEBUG, "term: %s", wrbuf_cstr(display_term));
1955         result_sets[num_result_sets] =
1956             rset_trunc(zh, grep_info.isam_p_buf,
1957                        grep_info.isam_p_indx, wrbuf_buf(display_term),
1958                        wrbuf_len(display_term), rank_type,
1959                        0 /* preserve position */,
1960                        zapt->term->which, rset_nmem,
1961                        kc, kc->scope, ol, index_type,
1962                        hits_limit_value,
1963                        term_ref_id_str);
1964         wrbuf_destroy(display_term);
1965         if (!result_sets[num_result_sets])
1966             break;
1967         num_result_sets++;
1968         if (!*termp)
1969             break;
1970     }
1971     grep_info_delete(&grep_info);
1972
1973     if (res != ZEBRA_OK)
1974         return res;
1975     if (num_result_sets == 0)
1976         *rset = rset_create_null(rset_nmem, kc, 0);
1977     else if (num_result_sets == 1)
1978         *rset = result_sets[0];
1979     else
1980         *rset = rset_create_and(rset_nmem, kc, kc->scope,
1981                                 num_result_sets, result_sets);
1982     if (!*rset)
1983         return ZEBRA_FAIL;
1984     return ZEBRA_OK;
1985 }
1986
1987 static ZEBRA_RES rpn_search_APT_local(ZebraHandle zh,
1988                                       Z_AttributesPlusTerm *zapt,
1989                                       const char *termz,
1990                                       const Odr_oid *attributeSet,
1991                                       NMEM stream,
1992                                       const char *rank_type, NMEM rset_nmem,
1993                                       RSET *rset,
1994                                       struct rset_key_control *kc)
1995 {
1996     Record rec;
1997     zint sysno = atozint(termz);
1998
1999     if (sysno <= 0)
2000         sysno = 0;
2001     rec = rec_get(zh->reg->records, sysno);
2002     if (!rec)
2003         sysno = 0;
2004
2005     rec_free(&rec);
2006
2007     if (sysno <= 0)
2008     {
2009         *rset = rset_create_null(rset_nmem, kc, 0);
2010     }
2011     else
2012     {
2013         RSFD rsfd;
2014         struct it_key key;
2015         *rset = rset_create_temp(rset_nmem, kc, kc->scope,
2016                                  res_get(zh->res, "setTmpDir"), 0);
2017         rsfd = rset_open(*rset, RSETF_WRITE);
2018
2019         key.mem[0] = sysno;
2020         key.mem[1] = 1;
2021         key.len = 2;
2022         rset_write(rsfd, &key);
2023         rset_close(rsfd);
2024     }
2025     return ZEBRA_OK;
2026 }
2027
2028 static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2029                                const Odr_oid *attributeSet, NMEM stream,
2030                                Z_SortKeySpecList *sort_sequence,
2031                                const char *rank_type,
2032                                NMEM rset_nmem,
2033                                RSET *rset,
2034                                struct rset_key_control *kc)
2035 {
2036     int i;
2037     int sort_relation_value;
2038     AttrType sort_relation_type;
2039     Z_SortKeySpec *sks;
2040     Z_SortKey *sk;
2041     char termz[20];
2042
2043     attr_init_APT(&sort_relation_type, zapt, 7);
2044     sort_relation_value = attr_find(&sort_relation_type, &attributeSet);
2045
2046     if (!sort_sequence->specs)
2047     {
2048         sort_sequence->num_specs = 10;
2049         sort_sequence->specs = (Z_SortKeySpec **)
2050             nmem_malloc(stream, sort_sequence->num_specs *
2051                         sizeof(*sort_sequence->specs));
2052         for (i = 0; i < sort_sequence->num_specs; i++)
2053             sort_sequence->specs[i] = 0;
2054     }
2055     if (zapt->term->which != Z_Term_general)
2056         i = 0;
2057     else
2058         i = atoi_n((char *) zapt->term->u.general->buf,
2059                    zapt->term->u.general->len);
2060     if (i >= sort_sequence->num_specs)
2061         i = 0;
2062     sprintf(termz, "%d", i);
2063
2064     sks = (Z_SortKeySpec *) nmem_malloc(stream, sizeof(*sks));
2065     sks->sortElement = (Z_SortElement *)
2066         nmem_malloc(stream, sizeof(*sks->sortElement));
2067     sks->sortElement->which = Z_SortElement_generic;
2068     sk = sks->sortElement->u.generic = (Z_SortKey *)
2069         nmem_malloc(stream, sizeof(*sk));
2070     sk->which = Z_SortKey_sortAttributes;
2071     sk->u.sortAttributes = (Z_SortAttributes *)
2072         nmem_malloc(stream, sizeof(*sk->u.sortAttributes));
2073
2074     sk->u.sortAttributes->id = odr_oiddup_nmem(stream, attributeSet);
2075     sk->u.sortAttributes->list = zapt->attributes;
2076
2077     sks->sortRelation = (Odr_int *)
2078         nmem_malloc(stream, sizeof(*sks->sortRelation));
2079     if (sort_relation_value == 1)
2080         *sks->sortRelation = Z_SortKeySpec_ascending;
2081     else if (sort_relation_value == 2)
2082         *sks->sortRelation = Z_SortKeySpec_descending;
2083     else
2084         *sks->sortRelation = Z_SortKeySpec_ascending;
2085
2086     sks->caseSensitivity = (Odr_int *)
2087         nmem_malloc(stream, sizeof(*sks->caseSensitivity));
2088     *sks->caseSensitivity = 0;
2089
2090     sks->which = Z_SortKeySpec_null;
2091     sks->u.null = odr_nullval ();
2092     sort_sequence->specs[i] = sks;
2093     *rset = rset_create_null(rset_nmem, kc, 0);
2094     return ZEBRA_OK;
2095 }
2096
2097
2098 static int rpn_check_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2099                            const Odr_oid *attributeSet,
2100                            struct xpath_location_step *xpath, int max,
2101                            NMEM mem)
2102 {
2103     const Odr_oid *curAttributeSet = attributeSet;
2104     AttrType use;
2105     const char *use_string = 0;
2106
2107     attr_init_APT(&use, zapt, 1);
2108     attr_find_ex(&use, &curAttributeSet, &use_string);
2109
2110     if (!use_string || *use_string != '/')
2111         return -1;
2112
2113     return zebra_parse_xpath_str(use_string, xpath, max, mem);
2114 }
2115
2116
2117
2118 static RSET xpath_trunc(ZebraHandle zh, NMEM stream,
2119                         const char *index_type, const char *term,
2120                         const char *xpath_use,
2121                         NMEM rset_nmem,
2122                         struct rset_key_control *kc)
2123 {
2124     struct grep_info grep_info;
2125     int ord = zebraExplain_lookup_attr_str(zh->reg->zei,
2126                                            zinfo_index_category_index,
2127                                            index_type, xpath_use);
2128     if (grep_info_prepare(zh, 0 /* zapt */, &grep_info, "0") == ZEBRA_FAIL)
2129         return rset_create_null(rset_nmem, kc, 0);
2130
2131     if (ord < 0)
2132         return rset_create_null(rset_nmem, kc, 0);
2133     else
2134     {
2135         int i, max_pos;
2136         char ord_buf[32];
2137         RSET rset;
2138         WRBUF term_dict = wrbuf_alloc();
2139         int ord_len = key_SU_encode(ord, ord_buf);
2140         int term_type = Z_Term_characterString;
2141         const char *flags = "void";
2142
2143         wrbuf_putc(term_dict, '(');
2144         for (i = 0; i < ord_len; i++)
2145         {
2146             wrbuf_putc(term_dict, 1);
2147             wrbuf_putc(term_dict, ord_buf[i]);
2148         }
2149         wrbuf_putc(term_dict, ')');
2150         wrbuf_puts(term_dict, term);
2151
2152         grep_info.isam_p_indx = 0;
2153         dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), 0,
2154                          &grep_info, &max_pos, 0, grep_handle);
2155         yaz_log(YLOG_DEBUG, "%s %d positions", term,
2156                 grep_info.isam_p_indx);
2157         rset = rset_trunc(zh, grep_info.isam_p_buf,
2158                           grep_info.isam_p_indx, term, strlen(term),
2159                           flags, 1, term_type, rset_nmem,
2160                           kc, kc->scope, 0, index_type, 0 /* hits_limit */,
2161                           0 /* term_ref_id_str */);
2162         grep_info_delete(&grep_info);
2163         wrbuf_destroy(term_dict);
2164         return rset;
2165     }
2166 }
2167
2168 static
2169 ZEBRA_RES rpn_search_xpath(ZebraHandle zh,
2170                            NMEM stream, const char *rank_type, RSET rset,
2171                            int xpath_len, struct xpath_location_step *xpath,
2172                            NMEM rset_nmem,
2173                            RSET *rset_out,
2174                            struct rset_key_control *kc)
2175 {
2176     int i;
2177     int always_matches = rset ? 0 : 1;
2178
2179     if (xpath_len < 0)
2180     {
2181         *rset_out = rset;
2182         return ZEBRA_OK;
2183     }
2184
2185     yaz_log(YLOG_DEBUG, "xpath len=%d", xpath_len);
2186     for (i = 0; i < xpath_len; i++)
2187     {
2188         yaz_log(log_level_rpn, "XPATH %d %s", i, xpath[i].part);
2189
2190     }
2191
2192     /*
2193     //a    ->    a/.*
2194     //a/b  ->    b/a/.*
2195     /a     ->    a/
2196     /a/b   ->    b/a/
2197
2198     /      ->    none
2199
2200     a[@attr = value]/b[@other = othervalue]
2201
2202     /e/@a val      range(e/,range(@a,freetext(w,1015,val),@a),e/)
2203     /a/b val       range(b/a/,freetext(w,1016,val),b/a/)
2204     /a/b/@c val    range(b/a/,range(@c,freetext(w,1016,val),@c),b/a/)
2205     /a/b[@c = y] val range(b/a/,freetext(w,1016,val),b/a/,@c = y)
2206     /a[@c = y]/b val range(a/,range(b/a/,freetext(w,1016,val),b/a/),a/,@c = y)
2207     /a[@c = x]/b[@c = y] range(a/,range(b/a/,freetext(w,1016,val),b/a/,@c = y),a/,@c = x)
2208
2209     */
2210
2211     dict_grep_cmap(zh->reg->dict, 0, 0);
2212
2213     {
2214         int level = xpath_len;
2215         int first_path = 1;
2216
2217         while (--level >= 0)
2218         {
2219             WRBUF xpath_rev = wrbuf_alloc();
2220             int i;
2221             RSET rset_start_tag = 0, rset_end_tag = 0, rset_attr = 0;
2222
2223             for (i = level; i >= 1; --i)
2224             {
2225                 const char *cp = xpath[i].part;
2226                 if (*cp)
2227                 {
2228                     for (; *cp; cp++)
2229                     {
2230                         if (*cp == '*')
2231                             wrbuf_puts(xpath_rev, "[^/]*");
2232                         else if (*cp == ' ')
2233                             wrbuf_puts(xpath_rev, "\001 ");
2234                         else
2235                             wrbuf_putc(xpath_rev, *cp);
2236
2237                         /* wrbuf_putc does not null-terminate , but
2238                            wrbuf_puts below ensures it does.. so xpath_rev
2239                            is OK iff length is > 0 */
2240                     }
2241                     wrbuf_puts(xpath_rev, "/");
2242                 }
2243                 else if (i == 1)  /* // case */
2244                     wrbuf_puts(xpath_rev, ".*");
2245             }
2246             if (xpath[level].predicate &&
2247                 xpath[level].predicate->which == XPATH_PREDICATE_RELATION &&
2248                 xpath[level].predicate->u.relation.name[0])
2249             {
2250                 WRBUF wbuf = wrbuf_alloc();
2251                 wrbuf_puts(wbuf, xpath[level].predicate->u.relation.name+1);
2252                 if (xpath[level].predicate->u.relation.value)
2253                 {
2254                     const char *cp = xpath[level].predicate->u.relation.value;
2255                     wrbuf_putc(wbuf, '=');
2256
2257                     while (*cp)
2258                     {
2259                         if (strchr(REGEX_CHARS, *cp))
2260                             wrbuf_putc(wbuf, '\\');
2261                         wrbuf_putc(wbuf, *cp);
2262                         cp++;
2263                     }
2264                 }
2265                 rset_attr = xpath_trunc(
2266                     zh, stream, "0", wrbuf_cstr(wbuf),
2267                     ZEBRA_XPATH_ATTR_NAME,
2268                     rset_nmem, kc);
2269                 wrbuf_destroy(wbuf);
2270             }
2271             else
2272             {
2273                 if (!first_path)
2274                 {
2275                     wrbuf_destroy(xpath_rev);
2276                     continue;
2277                 }
2278             }
2279             yaz_log(log_level_rpn, "xpath_rev (%d) = %s", level,
2280                     wrbuf_cstr(xpath_rev));
2281             if (wrbuf_len(xpath_rev))
2282             {
2283                 rset_start_tag = xpath_trunc(zh, stream, "0",
2284                                              wrbuf_cstr(xpath_rev),
2285                                              ZEBRA_XPATH_ELM_BEGIN,
2286                                              rset_nmem, kc);
2287                 if (always_matches)
2288                     rset = rset_start_tag;
2289                 else
2290                 {
2291                     rset_end_tag = xpath_trunc(zh, stream, "0",
2292                                                wrbuf_cstr(xpath_rev),
2293                                                ZEBRA_XPATH_ELM_END,
2294                                                rset_nmem, kc);
2295
2296                     rset = rset_create_between(rset_nmem, kc, kc->scope,
2297                                                rset_start_tag, rset,
2298                                                rset_end_tag, rset_attr);
2299                 }
2300             }
2301             wrbuf_destroy(xpath_rev);
2302             first_path = 0;
2303         }
2304     }
2305     *rset_out = rset;
2306     return ZEBRA_OK;
2307 }
2308
2309 #define MAX_XPATH_STEPS 10
2310
2311 static ZEBRA_RES rpn_search_database(ZebraHandle zh,
2312                                      Z_AttributesPlusTerm *zapt,
2313                                      const Odr_oid *attributeSet,
2314                                      zint hits_limit, NMEM stream,
2315                                      Z_SortKeySpecList *sort_sequence,
2316                                      NMEM rset_nmem,
2317                                      RSET *rset,
2318                                      struct rset_key_control *kc);
2319
2320 static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2321                                 const Odr_oid *attributeSet,
2322                                 zint hits_limit, NMEM stream,
2323                                 Z_SortKeySpecList *sort_sequence,
2324                                 int num_bases, const char **basenames,
2325                                 NMEM rset_nmem,
2326                                 RSET *rset,
2327                                 struct rset_key_control *kc)
2328 {
2329     RSET *rsets = nmem_malloc(stream, num_bases * sizeof(*rsets));
2330     ZEBRA_RES res = ZEBRA_OK;
2331     int i;
2332     for (i = 0; i < num_bases; i++)
2333     {
2334
2335         if (zebraExplain_curDatabase(zh->reg->zei, basenames[i]))
2336         {
2337             zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
2338                            basenames[i]);
2339             res = ZEBRA_FAIL;
2340             break;
2341         }
2342         res = rpn_search_database(zh, zapt, attributeSet, hits_limit, stream,
2343                                   sort_sequence,
2344                                   rset_nmem, rsets+i, kc);
2345         if (res != ZEBRA_OK)
2346             break;
2347     }
2348     if (res != ZEBRA_OK)
2349     {   /* must clean up the already created sets */
2350         while (--i >= 0)
2351             rset_delete(rsets[i]);
2352         *rset = 0;
2353     }
2354     else
2355     {
2356         if (num_bases == 1)
2357             *rset = rsets[0];
2358         else if (num_bases == 0)
2359             *rset = rset_create_null(rset_nmem, kc, 0);
2360         else
2361             *rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* TERMID */,
2362                                    num_bases, rsets);
2363     }
2364     return res;
2365 }
2366
2367 static ZEBRA_RES rpn_search_database(ZebraHandle zh,
2368                                      Z_AttributesPlusTerm *zapt,
2369                                      const Odr_oid *attributeSet,
2370                                      zint hits_limit, NMEM stream,
2371                                      Z_SortKeySpecList *sort_sequence,
2372                                      NMEM rset_nmem,
2373                                      RSET *rset,
2374                                      struct rset_key_control *kc)
2375 {
2376     ZEBRA_RES res = ZEBRA_OK;
2377     const char *index_type;
2378     char *search_type = NULL;
2379     char rank_type[128];
2380     int complete_flag;
2381     int sort_flag;
2382     char termz[IT_MAX_WORD+1];
2383     int xpath_len;
2384     const char *xpath_use = 0;
2385     struct xpath_location_step xpath[MAX_XPATH_STEPS];
2386
2387     if (!log_level_set)
2388     {
2389         log_level_rpn = yaz_log_module_level("rpn");
2390         log_level_set = 1;
2391     }
2392     zebra_maps_attr(zh->reg->zebra_maps, zapt, &index_type, &search_type,
2393                     rank_type, &complete_flag, &sort_flag);
2394
2395     yaz_log(YLOG_DEBUG, "index_type=%s", index_type);
2396     yaz_log(YLOG_DEBUG, "complete_flag=%d", complete_flag);
2397     yaz_log(YLOG_DEBUG, "search_type=%s", search_type);
2398     yaz_log(YLOG_DEBUG, "rank_type=%s", rank_type);
2399
2400     if (zapt_term_to_utf8(zh, zapt, termz) == ZEBRA_FAIL)
2401         return ZEBRA_FAIL;
2402
2403     if (sort_flag)
2404         return rpn_sort_spec(zh, zapt, attributeSet, stream, sort_sequence,
2405                              rank_type, rset_nmem, rset, kc);
2406     /* consider if an X-Path query is used */
2407     xpath_len = rpn_check_xpath(zh, zapt, attributeSet,
2408                                 xpath, MAX_XPATH_STEPS, stream);
2409     if (xpath_len >= 0)
2410     {
2411         if (xpath[xpath_len-1].part[0] == '@')
2412             xpath_use = ZEBRA_XPATH_ATTR_CDATA;  /* last step is attribute  */
2413         else
2414             xpath_use = ZEBRA_XPATH_CDATA;  /* searching for cdata */
2415
2416         if (1)
2417         {
2418             AttrType relation;
2419             int relation_value;
2420
2421             attr_init_APT(&relation, zapt, 2);
2422             relation_value = attr_find(&relation, NULL);
2423
2424             if (relation_value == 103) /* alwaysmatches */
2425             {
2426                 *rset = 0; /* signal no "term" set */
2427                 return rpn_search_xpath(zh, stream, rank_type, *rset,
2428                                         xpath_len, xpath, rset_nmem, rset, kc);
2429             }
2430         }
2431     }
2432
2433     /* search using one of the various search type strategies
2434        termz is our UTF-8 search term
2435        attributeSet is top-level default attribute set
2436        stream is ODR for search
2437        reg_id is the register type
2438        complete_flag is 1 for complete subfield, 0 for incomplete
2439        xpath_use is use-attribute to be used for X-Path search, 0 for none
2440     */
2441     if (!strcmp(search_type, "phrase"))
2442     {
2443         res = rpn_search_APT_phrase(zh, zapt, termz, attributeSet, hits_limit,
2444                                     stream,
2445                                     index_type, complete_flag, rank_type,
2446                                     xpath_use,
2447                                     rset_nmem,
2448                                     rset, kc);
2449     }
2450     else if (!strcmp(search_type, "and-list"))
2451     {
2452         res = rpn_search_APT_and_list(zh, zapt, termz, attributeSet, hits_limit,
2453                                       stream,
2454                                       index_type, complete_flag, rank_type,
2455                                       xpath_use,
2456                                       rset_nmem,
2457                                       rset, kc);
2458     }
2459     else if (!strcmp(search_type, "or-list"))
2460     {
2461         res = rpn_search_APT_or_list(zh, zapt, termz, attributeSet, hits_limit,
2462                                      stream,
2463                                      index_type, complete_flag, rank_type,
2464                                      xpath_use,
2465                                      rset_nmem,
2466                                      rset, kc);
2467     }
2468     else if (!strcmp(search_type, "local"))
2469     {
2470         res = rpn_search_APT_local(zh, zapt, termz, attributeSet, stream,
2471                                    rank_type, rset_nmem, rset, kc);
2472     }
2473     else if (!strcmp(search_type, "numeric"))
2474     {
2475         res = rpn_search_APT_numeric(zh, zapt, termz, attributeSet, hits_limit,
2476                                      stream,
2477                                      index_type, complete_flag, rank_type,
2478                                      xpath_use,
2479                                      rset_nmem,
2480                                      rset, kc);
2481     }
2482     else
2483     {
2484         zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2485         res = ZEBRA_FAIL;
2486     }
2487     if (res != ZEBRA_OK)
2488         return res;
2489     if (!*rset)
2490         return ZEBRA_FAIL;
2491     return rpn_search_xpath(zh, stream, rank_type, *rset,
2492                             xpath_len, xpath, rset_nmem, rset, kc);
2493 }
2494
2495 static ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2496                                       const Odr_oid *attributeSet,
2497                                       zint hits_limit,
2498                                       NMEM stream, NMEM rset_nmem,
2499                                       Z_SortKeySpecList *sort_sequence,
2500                                       int num_bases, const char **basenames,
2501                                       RSET **result_sets, int *num_result_sets,
2502                                       Z_Operator *parent_op,
2503                                       struct rset_key_control *kc);
2504
2505 ZEBRA_RES rpn_get_top_approx_limit(ZebraHandle zh, Z_RPNStructure *zs,
2506                                    zint *approx_limit)
2507 {
2508     ZEBRA_RES res = ZEBRA_OK;
2509     if (zs->which == Z_RPNStructure_complex)
2510     {
2511         if (res == ZEBRA_OK)
2512             res = rpn_get_top_approx_limit(zh, zs->u.complex->s1,
2513                                            approx_limit);
2514         if (res == ZEBRA_OK)
2515             res = rpn_get_top_approx_limit(zh, zs->u.complex->s2,
2516                                            approx_limit);
2517     }
2518     else if (zs->which == Z_RPNStructure_simple)
2519     {
2520         if (zs->u.simple->which == Z_Operand_APT)
2521         {
2522             Z_AttributesPlusTerm *zapt = zs->u.simple->u.attributesPlusTerm;
2523             AttrType global_hits_limit_attr;
2524             int l;
2525
2526             attr_init_APT(&global_hits_limit_attr, zapt, 12);
2527
2528             l = attr_find(&global_hits_limit_attr, NULL);
2529             if (l != -1)
2530                 *approx_limit = l;
2531         }
2532     }
2533     return res;
2534 }
2535
2536 ZEBRA_RES rpn_search_top(ZebraHandle zh, Z_RPNStructure *zs,
2537                          const Odr_oid *attributeSet,
2538                          zint hits_limit,
2539                          NMEM stream, NMEM rset_nmem,
2540                          Z_SortKeySpecList *sort_sequence,
2541                          int num_bases, const char **basenames,
2542                          RSET *result_set)
2543 {
2544     RSET *result_sets = 0;
2545     int num_result_sets = 0;
2546     ZEBRA_RES res;
2547     struct rset_key_control *kc = zebra_key_control_create(zh);
2548
2549     res = rpn_search_structure(zh, zs, attributeSet, hits_limit,
2550                                stream, rset_nmem,
2551                                sort_sequence,
2552                                num_bases, basenames,
2553                                &result_sets, &num_result_sets,
2554                                0 /* no parent op */,
2555                                kc);
2556     if (res != ZEBRA_OK)
2557     {
2558         int i;
2559         for (i = 0; i < num_result_sets; i++)
2560             rset_delete(result_sets[i]);
2561         *result_set = 0;
2562     }
2563     else
2564     {
2565         assert(num_result_sets == 1);
2566         assert(result_sets);
2567         assert(*result_sets);
2568         *result_set = *result_sets;
2569     }
2570     (*kc->dec)(kc);
2571     return res;
2572 }
2573
2574 ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2575                                const Odr_oid *attributeSet, zint hits_limit,
2576                                NMEM stream, NMEM rset_nmem,
2577                                Z_SortKeySpecList *sort_sequence,
2578                                int num_bases, const char **basenames,
2579                                RSET **result_sets, int *num_result_sets,
2580                                Z_Operator *parent_op,
2581                                struct rset_key_control *kc)
2582 {
2583     *num_result_sets = 0;
2584     if (zs->which == Z_RPNStructure_complex)
2585     {
2586         ZEBRA_RES res;
2587         Z_Operator *zop = zs->u.complex->roperator;
2588         RSET *result_sets_l = 0;
2589         int num_result_sets_l = 0;
2590         RSET *result_sets_r = 0;
2591         int num_result_sets_r = 0;
2592
2593         res = rpn_search_structure(zh, zs->u.complex->s1,
2594                                    attributeSet, hits_limit, stream, rset_nmem,
2595                                    sort_sequence,
2596                                    num_bases, basenames,
2597                                    &result_sets_l, &num_result_sets_l,
2598                                    zop, kc);
2599         if (res != ZEBRA_OK)
2600         {
2601             int i;
2602             for (i = 0; i < num_result_sets_l; i++)
2603                 rset_delete(result_sets_l[i]);
2604             return res;
2605         }
2606         res = rpn_search_structure(zh, zs->u.complex->s2,
2607                                    attributeSet, hits_limit, stream, rset_nmem,
2608                                    sort_sequence,
2609                                    num_bases, basenames,
2610                                    &result_sets_r, &num_result_sets_r,
2611                                    zop, kc);
2612         if (res != ZEBRA_OK)
2613         {
2614             int i;
2615             for (i = 0; i < num_result_sets_l; i++)
2616                 rset_delete(result_sets_l[i]);
2617             for (i = 0; i < num_result_sets_r; i++)
2618                 rset_delete(result_sets_r[i]);
2619             return res;
2620         }
2621
2622         /* make a new list of result for all children */
2623         *num_result_sets = num_result_sets_l + num_result_sets_r;
2624         *result_sets = nmem_malloc(stream, *num_result_sets *
2625                                    sizeof(**result_sets));
2626         memcpy(*result_sets, result_sets_l,
2627                num_result_sets_l * sizeof(**result_sets));
2628         memcpy(*result_sets + num_result_sets_l, result_sets_r,
2629                num_result_sets_r * sizeof(**result_sets));
2630
2631         if (!parent_op || parent_op->which != zop->which
2632             || (zop->which != Z_Operator_and &&
2633                 zop->which != Z_Operator_or))
2634         {
2635             /* parent node different from this one (or non-present) */
2636             /* we must combine result sets now */
2637             RSET rset;
2638             switch (zop->which)
2639             {
2640             case Z_Operator_and:
2641                 rset = rset_create_and(rset_nmem, kc,
2642                                        kc->scope,
2643                                        *num_result_sets, *result_sets);
2644                 break;
2645             case Z_Operator_or:
2646                 rset = rset_create_or(rset_nmem, kc,
2647                                       kc->scope, 0, /* termid */
2648                                       *num_result_sets, *result_sets);
2649                 break;
2650             case Z_Operator_and_not:
2651                 rset = rset_create_not(rset_nmem, kc,
2652                                        kc->scope,
2653                                        (*result_sets)[0],
2654                                        (*result_sets)[1]);
2655                 break;
2656             case Z_Operator_prox:
2657                 if (zop->u.prox->which != Z_ProximityOperator_known)
2658                 {
2659                     zebra_setError(zh,
2660                                    YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2661                                    0);
2662                     return ZEBRA_FAIL;
2663                 }
2664                 if (*zop->u.prox->u.known != Z_ProxUnit_word)
2665                 {
2666                     zebra_setError_zint(zh,
2667                                         YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2668                                         *zop->u.prox->u.known);
2669                     return ZEBRA_FAIL;
2670                 }
2671                 else
2672                 {
2673                     rset = rset_create_prox(rset_nmem, kc,
2674                                             kc->scope,
2675                                             *num_result_sets, *result_sets,
2676                                             *zop->u.prox->ordered,
2677                                             (!zop->u.prox->exclusion ?
2678                                              0 : *zop->u.prox->exclusion),
2679                                             *zop->u.prox->relationType,
2680                                             *zop->u.prox->distance );
2681                 }
2682                 break;
2683             default:
2684                 zebra_setError(zh, YAZ_BIB1_OPERATOR_UNSUPP, 0);
2685                 return ZEBRA_FAIL;
2686             }
2687             *num_result_sets = 1;
2688             *result_sets = nmem_malloc(stream, *num_result_sets *
2689                                        sizeof(**result_sets));
2690             (*result_sets)[0] = rset;
2691         }
2692     }
2693     else if (zs->which == Z_RPNStructure_simple)
2694     {
2695         RSET rset;
2696         ZEBRA_RES res;
2697
2698         if (zs->u.simple->which == Z_Operand_APT)
2699         {
2700             yaz_log(YLOG_DEBUG, "rpn_search_APT");
2701             res = rpn_search_APT(zh, zs->u.simple->u.attributesPlusTerm,
2702                                  attributeSet, hits_limit,
2703                                  stream, sort_sequence,
2704                                  num_bases, basenames, rset_nmem, &rset,
2705                                  kc);
2706             if (res != ZEBRA_OK)
2707                 return res;
2708         }
2709         else if (zs->u.simple->which == Z_Operand_resultSetId)
2710         {
2711             yaz_log(YLOG_DEBUG, "rpn_search_ref");
2712             rset = resultSetRef(zh, zs->u.simple->u.resultSetId);
2713             if (!rset)
2714             {
2715                 zebra_setError(zh,
2716                                YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
2717                                zs->u.simple->u.resultSetId);
2718                 return ZEBRA_FAIL;
2719             }
2720             rset_dup(rset);
2721         }
2722         else
2723         {
2724             zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2725             return ZEBRA_FAIL;
2726         }
2727         *num_result_sets = 1;
2728         *result_sets = nmem_malloc(stream, *num_result_sets *
2729                                    sizeof(**result_sets));
2730         (*result_sets)[0] = rset;
2731     }
2732     else
2733     {
2734         zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2735         return ZEBRA_FAIL;
2736     }
2737     return ZEBRA_OK;
2738 }
2739
2740
2741
2742 /*
2743  * Local variables:
2744  * c-basic-offset: 4
2745  * c-file-style: "Stroustrup"
2746  * indent-tabs-mode: nil
2747  * End:
2748  * vim: shiftwidth=4 tabstop=8 expandtab
2749  */
2750