ICU: support @attr 5=2, @attr 5=3 in searches
[idzebra-moved-to-github.git] / index / rpnsearch.c
index d1b9b88..f119eb5 100644 (file)
@@ -1,8 +1,5 @@
-/* $Id: rpnsearch.c,v 1.27 2007-12-07 14:09:09 adam Exp $
-   Copyright (C) 1995-2007
-   Index Data ApS
-
-This file is part of the Zebra server.
+/* This file is part of the Zebra server.
+   Copyright (C) 1994-2011 Index Data
 
 Zebra is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free
@@ -20,6 +17,9 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 
 */
 
+#if HAVE_CONFIG_H
+#include <config.h>
+#endif
 #include <stdio.h>
 #include <assert.h>
 #ifdef WIN32
@@ -157,7 +157,7 @@ static int grep_handle(char *name, const char *info, void *p)
 }
 
 static int term_pre(zebra_map_t zm, const char **src,
-                   const char *ct1, const char *ct2, int first)
+                   const char *ct1, int first)
 {
     const char *s1, *s0 = *src;
     const char **map;
@@ -167,8 +167,6 @@ static int term_pre(zebra_map_t zm, const char **src,
     {
         if (ct1 && strchr(ct1, *s0))
             break;
-        if (ct2 && strchr(ct2, *s0))
-            break;
         s1 = s0;
         map = zebra_maps_input(zm, &s1, strlen(s1), first);
         if (**map != *CHR_SPACE)
@@ -206,16 +204,16 @@ static void esc_str(char *out_buf, size_t out_size,
     }
 }
 
-#define REGEX_CHARS " []()|.*+?!"
+#define REGEX_CHARS " ^[]()|.*+?!\"$"
 
 static void add_non_space(const char *start, const char *end,
                           WRBUF term_dict,
-                          char *dst_term, int *dst_ptr,
+                          WRBUF display_term,
                           const char **map, int q_map_match)
 {
     size_t sz = end - start;
-    memcpy(dst_term + *dst_ptr, start, sz);
-    (*dst_ptr) += sz;
+
+    wrbuf_write(display_term, start, sz);
     if (!q_map_match)
     {
         while (start < end)
@@ -238,45 +236,74 @@ static void add_non_space(const char *start, const char *end,
 
 static int term_100_icu(zebra_map_t zm,
                         const char **src, WRBUF term_dict, int space_split,
-                        char *dst_term)
+                        WRBUF display_term,
+                        int mode)
 {
     int i;
     const char *res_buf = 0;
     size_t res_len = 0;
-    if (!zebra_map_tokenize_next(zm, &res_buf, &res_len))
+    const char *display_buf;
+    size_t display_len;
+    if (!zebra_map_tokenize_next(zm, &res_buf, &res_len,
+                                 &display_buf, &display_len))
     {
         *src += strlen(*src);
         return 0;
     }
-    strcat(dst_term, *src);
+    wrbuf_write(display_term, display_buf, display_len);
+    if (mode)
+    {
+        /* ICU sort keys seem to be of the form
+           basechars \x01 accents \x01 length
+           For now we'll just right truncate from basechars . This 
+           may give false hits due to accents not being used.
+        */
+        i = res_len;
+        while (--i >= 0 && res_buf[i] != '\x01')
+            ;
+        if (i > 0)
+        {
+            while (--i >= 0 && res_buf[i] != '\x01')
+                ;
+        }
+        if (i == 0)
+        {  /* did not find base chars at all. Throw error */
+            return -1;
+        }
+        res_len = i; /* reduce res_len */
+    }
+    if (mode & 2)
+        wrbuf_puts(term_dict, ".*");
     for (i = 0; i < res_len; i++)
     {
-        if (strchr(REGEX_CHARS, res_buf[i]))
+        if (strchr(REGEX_CHARS "\\", res_buf[i]))
             wrbuf_putc(term_dict, '\\');
         if (res_buf[i] < 32)
             wrbuf_putc(term_dict, 1);
+            
         wrbuf_putc(term_dict, res_buf[i]);
     }
+    if (mode & 1)
+        wrbuf_puts(term_dict, ".*");
+    else if (mode)
+        wrbuf_puts(term_dict, "\x01\x01.*");
+        
     return 1;
 }
 
 /* term_100: handle term, where trunc = none(no operators at all) */
 static int term_100(zebra_map_t zm,
                    const char **src, WRBUF term_dict, int space_split,
-                   char *dst_term)
+                   WRBUF display_term)
 {
     const char *s0;
     const char **map;
     int i = 0;
-    int j = 0;
 
     const char *space_start = 0;
     const char *space_end = 0;
 
-    if (zebra_maps_is_icu(zm))
-        return term_100_icu(zm, src, term_dict, space_split, dst_term);
-
-    if (!term_pre(zm, src, NULL, NULL, !space_split))
+    if (!term_pre(zm, src, 0, !space_split))
         return 0;
     s0 = *src;
     while (*s0)
@@ -303,7 +330,7 @@ static int term_100(zebra_map_t zm,
                 {
                     if (strchr(REGEX_CHARS, *space_start))
                         wrbuf_putc(term_dict, '\\');
-                    dst_term[j++] = *space_start;
+                    wrbuf_putc(display_term, *space_start);
                     wrbuf_putc(term_dict, *space_start);
                     space_start++;
                                
@@ -314,10 +341,8 @@ static int term_100(zebra_map_t zm,
         }
         i++;
 
-        add_non_space(s1, s0, term_dict, dst_term, &j,
-                      map, q_map_match);
+        add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
     }
-    dst_term[j] = '\0';
     *src = s0;
     return i;
 }
@@ -325,14 +350,13 @@ static int term_100(zebra_map_t zm,
 /* term_101: handle term, where trunc = Process # */
 static int term_101(zebra_map_t zm,
                    const char **src, WRBUF term_dict, int space_split,
-                   char *dst_term)
+                   WRBUF display_term)
 {
     const char *s0;
     const char **map;
     int i = 0;
-    int j = 0;
 
-    if (!term_pre(zm, src, "#", "#", !space_split))
+    if (!term_pre(zm, src, "#", !space_split))
         return 0;
     s0 = *src;
     while (*s0)
@@ -341,7 +365,8 @@ static int term_101(zebra_map_t zm,
         {
             i++;
             wrbuf_puts(term_dict, ".*");
-            dst_term[j++] = *s0++;
+            wrbuf_putc(display_term, *s0);
+            s0++;
         }
         else
         {
@@ -352,11 +377,9 @@ static int term_101(zebra_map_t zm,
                 break;
 
             i++;
-            add_non_space(s1, s0, term_dict, dst_term, &j,
-                          map, q_map_match);
+            add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
         }
     }
-    dst_term[j++] = '\0';
     *src = s0;
     return i;
 }
@@ -364,14 +387,13 @@ static int term_101(zebra_map_t zm,
 /* term_103: handle term, where trunc = re-2 (regular expressions) */
 static int term_103(zebra_map_t zm, const char **src,
                    WRBUF term_dict, int *errors, int space_split,
-                   char *dst_term)
+                   WRBUF display_term)
 {
     int i = 0;
-    int j = 0;
     const char *s0;
     const char **map;
 
-    if (!term_pre(zm, src, "^\\()[].*+?|", "(", !space_split))
+    if (!term_pre(zm, src, "^\\()[].*+?|", !space_split))
         return 0;
     s0 = *src;
     if (errors && *s0 == '+' && s0[1] && s0[2] == '+' && s0[3] &&
@@ -386,7 +408,7 @@ static int term_103(zebra_map_t zm, const char **src,
     {
         if (strchr("^\\()[].*+?|-", *s0))
         {
-            dst_term[j++] = *s0;
+            wrbuf_putc(display_term, *s0);
             wrbuf_putc(term_dict, *s0);
             s0++;
             i++;
@@ -400,11 +422,9 @@ static int term_103(zebra_map_t zm, const char **src,
                 break;
 
             i++;
-            add_non_space(s1, s0, term_dict, dst_term, &j,
-                          map, q_map_match);
+            add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
         }
     }
-    dst_term[j] = '\0';
     *src = s0;
     
     return i;
@@ -412,22 +432,21 @@ static int term_103(zebra_map_t zm, const char **src,
 
 /* term_103: handle term, where trunc = re-1 (regular expressions) */
 static int term_102(zebra_map_t zm, const char **src,
-                   WRBUF term_dict, int space_split, char *dst_term)
+                   WRBUF term_dict, int space_split, WRBUF display_term)
 {
-    return term_103(zm, src, term_dict, NULL, space_split, dst_term);
+    return term_103(zm, src, term_dict, NULL, space_split, display_term);
 }
 
 
-/* term_104: handle term, process # and ! */
+/* term_104: handle term, process ?n * # */
 static int term_104(zebra_map_t zm, const char **src, 
-                    WRBUF term_dict, int space_split, char *dst_term)
+                    WRBUF term_dict, int space_split, WRBUF display_term)
 {
     const char *s0;
     const char **map;
     int i = 0;
-    int j = 0;
 
-    if (!term_pre(zm, src, "?*#", "?*#", !space_split))
+    if (!term_pre(zm, src, "?*#", !space_split))
         return 0;
     s0 = *src;
     while (*s0)
@@ -435,14 +454,16 @@ static int term_104(zebra_map_t zm, const char **src,
         if (*s0 == '?')
         {
             i++;
-            dst_term[j++] = *s0++;
+            wrbuf_putc(display_term, *s0);
+            s0++;
             if (*s0 >= '0' && *s0 <= '9')
             {
                 int limit = 0;
                 while (*s0 >= '0' && *s0 <= '9')
                 {
                     limit = limit * 10 + (*s0 - '0');
-                    dst_term[j++] = *s0++;
+                    wrbuf_putc(display_term, *s0);
+                    s0++;
                 }
                 if (limit > 20)
                     limit = 20;
@@ -460,13 +481,15 @@ static int term_104(zebra_map_t zm, const char **src,
         {
             i++;
             wrbuf_puts(term_dict, ".*");
-            dst_term[j++] = *s0++;
+            wrbuf_putc(display_term, *s0);
+            s0++;
         }
         else if (*s0 == '#')
         {
             i++;
             wrbuf_puts(term_dict, ".");
-            dst_term[j++] = *s0++;
+            wrbuf_putc(display_term, *s0);
+            s0++;
         }
        else
         {
@@ -477,26 +500,23 @@ static int term_104(zebra_map_t zm, const char **src,
                 break;
 
             i++;
-            add_non_space(s1, s0, term_dict, dst_term, &j,
-                          map, q_map_match);
+            add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
         }
     }
-    dst_term[j++] = '\0';
     *src = s0;
     return i;
 }
 
-/* term_105/106: handle term, where trunc = Process * and ! and right trunc */
+/* term_105/106: handle term, process * ! and possibly right_truncate */
 static int term_105(zebra_map_t zm, const char **src, 
                     WRBUF term_dict, int space_split,
-                   char *dst_term, int right_truncate)
+                   WRBUF display_term, int right_truncate)
 {
     const char *s0;
     const char **map;
     int i = 0;
-    int j = 0;
 
-    if (!term_pre(zm, src, "*!", "*!", !space_split))
+    if (!term_pre(zm, src, "\\*!", !space_split))
         return 0;
     s0 = *src;
     while (*s0)
@@ -505,13 +525,22 @@ static int term_105(zebra_map_t zm, const char **src,
         {
             i++;
             wrbuf_puts(term_dict, ".*");
-            dst_term[j++] = *s0++;
+            wrbuf_putc(display_term, *s0);
+            s0++;
         }
         else if (*s0 == '!')
         {
             i++;
             wrbuf_putc(term_dict, '.');
-            dst_term[j++] = *s0++;
+            wrbuf_putc(display_term, *s0);
+            s0++;
+        }
+        else if (*s0 == '\\')
+        {
+            i++;
+            wrbuf_puts(term_dict, "\\\\");
+            wrbuf_putc(display_term, *s0);
+            s0++;
         }
        else
         {
@@ -522,13 +551,11 @@ static int term_105(zebra_map_t zm, const char **src,
                 break;
 
             i++;
-            add_non_space(s1, s0, term_dict, dst_term, &j,
-                          map, q_map_match);
+            add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
         }
     }
     if (right_truncate)
         wrbuf_puts(term_dict, ".*");
-    dst_term[j++] = '\0';
     *src = s0;
     return i;
 }
@@ -673,7 +700,8 @@ void string_rel_add_char(WRBUF term_p, WRBUF wsrc, int *indx)
 static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
                           const char **term_sub, WRBUF term_dict,
                           const Odr_oid *attributeSet,
-                          zebra_map_t zm, int space_split, char *term_dst,
+                          zebra_map_t zm, int space_split, 
+                           WRBUF display_term,
                           int *error_code)
 {
     AttrType relation;
@@ -689,7 +717,7 @@ static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
     switch (relation_value)
     {
     case 1:
-        if (!term_100(zm, term_sub, term_component, space_split, term_dst))
+        if (!term_100(zm, term_sub, term_component, space_split, display_term))
         {
             wrbuf_destroy(term_component);
             return 0;
@@ -723,7 +751,7 @@ static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
         wrbuf_putc(term_dict, ')');
         break;
     case 2:
-        if (!term_100(zm, term_sub, term_component, space_split, term_dst))
+        if (!term_100(zm, term_sub, term_component, space_split, display_term))
         {
             wrbuf_destroy(term_component);
             return 0;
@@ -758,7 +786,7 @@ static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
         wrbuf_putc(term_dict, ')');
         break;
     case 5:
-        if (!term_100(zm, term_sub, term_component, space_split, term_dst))
+        if (!term_100(zm, term_sub, term_component, space_split, display_term))
         {
             wrbuf_destroy(term_component);
             return 0;
@@ -791,7 +819,7 @@ static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
         wrbuf_putc(term_dict, ')');
         break;
     case 4:
-        if (!term_100(zm, term_sub, term_component, space_split, term_dst))
+        if (!term_100(zm, term_sub, term_component, space_split, display_term))
         {
             wrbuf_destroy(term_component);
             return 0;
@@ -832,7 +860,7 @@ static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
         if (!**term_sub)
             return 1;
         yaz_log(log_level_rpn, "Relation =");
-        if (!term_100(zm, term_sub, term_component, space_split, term_dst))
+        if (!term_100(zm, term_sub, term_component, space_split, display_term))
         {
             wrbuf_destroy(term_component);
             return 0;
@@ -862,7 +890,7 @@ static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
                             const Odr_oid *attributeSet, NMEM stream,
                             struct grep_info *grep_info,
                             const char *index_type, int complete_flag,
-                            char *term_dst,
+                            WRBUF display_term,
                              const char *xpath_use,
                             struct ord_list **ol,
                              zebra_map_t zm);
@@ -876,9 +904,10 @@ ZEBRA_RES zebra_term_limits_APT(ZebraHandle zh,
     AttrType term_ref_id_attr;
     AttrType hits_limit_attr;
     int term_ref_id_int;
+    zint hits_limit_from_attr;
  
     attr_init_APT(&hits_limit_attr, zapt, 11);
-    *hits_limit_value  = attr_find(&hits_limit_attr, NULL);
+    hits_limit_from_attr  = attr_find(&hits_limit_attr, NULL);
 
     attr_init_APT(&term_ref_id_attr, zapt, 10);
     term_ref_id_int = attr_find_ex(&term_ref_id_attr, NULL, term_ref_id_str);
@@ -888,26 +917,9 @@ ZEBRA_RES zebra_term_limits_APT(ZebraHandle zh,
        sprintf(res, "%d", term_ref_id_int);
        *term_ref_id_str = res;
     }
+    if (hits_limit_from_attr != -1)
+        *hits_limit_value = hits_limit_from_attr;
 
-    /* no limit given ? */
-    if (*hits_limit_value == -1)
-    {
-       if (*term_ref_id_str)
-       {
-           /* use global if term_ref is present */
-           *hits_limit_value = zh->approx_limit;
-       }
-       else
-       {
-           /* no counting if term_ref is not present */
-           *hits_limit_value = 0;
-       }
-    }
-    else if (*hits_limit_value == 0)
-    {
-       /* 0 is the same as global limit */
-       *hits_limit_value = zh->approx_limit;
-    }
     yaz_log(YLOG_DEBUG, "term_limits_APT ref_id=%s limit=" ZINT_FORMAT,
            *term_ref_id_str ? *term_ref_id_str : "none",
            *hits_limit_value);
@@ -919,10 +931,10 @@ ZEBRA_RES zebra_term_limits_APT(ZebraHandle zh,
 static ZEBRA_RES search_term(ZebraHandle zh,
                              Z_AttributesPlusTerm *zapt,
                              const char **term_sub, 
-                             const Odr_oid *attributeSet, NMEM stream,
+                             const Odr_oid *attributeSet,
+                             zint hits_limit, NMEM stream,
                              struct grep_info *grep_info,
                              const char *index_type, int complete_flag,
-                             char *term_dst,
                              const char *rank_type, 
                              const char *xpath_use,
                              NMEM rset_nmem,
@@ -932,9 +944,10 @@ static ZEBRA_RES search_term(ZebraHandle zh,
 {
     ZEBRA_RES res;
     struct ord_list *ol;
-    zint hits_limit_value;
+    zint hits_limit_value = hits_limit;
     const char *term_ref_id_str = 0;
     WRBUF term_dict = wrbuf_alloc();
+    WRBUF display_term = wrbuf_alloc();
     *rset = 0;
     zebra_term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str,
                           stream);
@@ -942,22 +955,23 @@ static ZEBRA_RES search_term(ZebraHandle zh,
     res = string_term(zh, zapt, term_sub, term_dict,
                       attributeSet, stream, grep_info,
                      index_type, complete_flag,
-                     term_dst, xpath_use, &ol, zm);
+                     display_term, xpath_use, &ol, zm);
     wrbuf_destroy(term_dict);
-    if (res != ZEBRA_OK)
-        return res;
-    if (!*term_sub)  /* no more terms ? */
-       return res;
-    yaz_log(log_level_rpn, "term: %s", term_dst);
-    *rset = rset_trunc(zh, grep_info->isam_p_buf,
-                      grep_info->isam_p_indx, term_dst,
-                      strlen(term_dst), rank_type, 1 /* preserve pos */,
-                      zapt->term->which, rset_nmem,
-                      kc, kc->scope, ol, index_type, hits_limit_value,
-                      term_ref_id_str);
-    if (!*rset)
-       return ZEBRA_FAIL;
-    return ZEBRA_OK;
+    if (res == ZEBRA_OK && *term_sub)
+    {
+        yaz_log(log_level_rpn, "term: %s", wrbuf_cstr(display_term));
+        *rset = rset_trunc(zh, grep_info->isam_p_buf,
+                           grep_info->isam_p_indx, wrbuf_buf(display_term),
+                           wrbuf_len(display_term), rank_type, 
+                           1 /* preserve pos */,
+                           zapt->term->which, rset_nmem,
+                           kc, kc->scope, ol, index_type, hits_limit_value,
+                           term_ref_id_str);
+        if (!*rset)
+            res = ZEBRA_FAIL;
+    }
+    wrbuf_destroy(display_term);
+    return res;
 }
 
 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
@@ -966,7 +980,7 @@ static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
                             const Odr_oid *attributeSet, NMEM stream,
                             struct grep_info *grep_info,
                             const char *index_type, int complete_flag,
-                            char *term_dst,
+                            WRBUF display_term,
                              const char *xpath_use,
                             struct ord_list **ol,
                              zebra_map_t zm)
@@ -1016,113 +1030,175 @@ static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
     wrbuf_putc(term_dict, ')');
     
     prefix_len = wrbuf_len(term_dict);
-    
-    switch (truncation_value)
-    {
-    case -1:         /* not specified */
-    case 100:        /* do not truncate */
-        if (!string_relation(zh, zapt, &termp, term_dict,
-                             attributeSet,
-                             zm, space_split, term_dst,
-                             &relation_error))
+
+    if (zebra_maps_is_icu(zm))
+    {
+        int relation_value;
+        AttrType relation;
+        
+        attr_init_APT(&relation, zapt, 2);
+        relation_value = attr_find(&relation, NULL);
+        if (relation_value == 103) /* always matches */
+            termp += strlen(termp); /* move to end of term */
+        else if (relation_value == 3 || relation_value == 102 || relation_value == -1)
         {
-            if (relation_error)
+            /* ICU case */
+            switch (truncation_value)
             {
-                zebra_setError(zh, relation_error, 0);
+            case -1:         /* not specified */
+            case 100:        /* do not truncate */
+                if (!term_100_icu(zm, &termp, term_dict, space_split, display_term, 0))
+                {
+                    *term_sub = 0;
+                    return ZEBRA_OK;
+                }
+                break;
+            case 1:          /* right truncation */
+                if (!term_100_icu(zm, &termp, term_dict, space_split, display_term, 1))
+                {
+                    *term_sub = 0;
+                    return ZEBRA_OK;
+                }
+                break;
+            case 2:
+                if (!term_100_icu(zm, &termp, term_dict, space_split, display_term, 2))
+                {
+                    *term_sub = 0;
+                    return ZEBRA_OK;
+                }
+                break;
+            case 3:
+                if (!term_100_icu(zm, &termp, term_dict, space_split, display_term, 3))
+                {
+                    *term_sub = 0;
+                    return ZEBRA_OK;
+                }
+                break;
+            default:
+                zebra_setError_zint(zh,
+                                    YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
+                                    truncation_value);
                 return ZEBRA_FAIL;
             }
-            *term_sub = 0;
-            return ZEBRA_OK;
-        }
-        break;
-    case 1:          /* right truncation */
-        wrbuf_putc(term_dict, '(');
-        if (!term_100(zm, &termp, term_dict, space_split, term_dst))
-        {
-            *term_sub = 0;
-            return ZEBRA_OK;
-        }
-        wrbuf_puts(term_dict, ".*)");
-        break;
-    case 2:          /* keft truncation */
-        wrbuf_puts(term_dict, "(.*");
-        if (!term_100(zm, &termp, term_dict, space_split, term_dst))
-        {
-            *term_sub = 0;
-            return ZEBRA_OK;
-        }
-        wrbuf_putc(term_dict, ')');
-        break;
-    case 3:          /* left&right truncation */
-        wrbuf_puts(term_dict, "(.*");
-        if (!term_100(zm, &termp, term_dict, space_split, term_dst))
-        {
-            *term_sub = 0;
-            return ZEBRA_OK;
-        }
-        wrbuf_puts(term_dict, ".*)");
-        break;
-    case 101:        /* process # in term */
-        wrbuf_putc(term_dict, '(');
-        if (!term_101(zm, &termp, term_dict, space_split, term_dst))
-        {
-            *term_sub = 0;
-            return ZEBRA_OK;
-        }
-        wrbuf_puts(term_dict, ")");
-        break;
-    case 102:        /* Regexp-1 */
-        wrbuf_putc(term_dict, '(');
-        if (!term_102(zm, &termp, term_dict, space_split, term_dst))
-        {
-            *term_sub = 0;
-            return ZEBRA_OK;
-        }
-        wrbuf_putc(term_dict, ')');
-        break;
-    case 103:       /* Regexp-2 */
-        regex_range = 1;
-        wrbuf_putc(term_dict, '(');
-        if (!term_103(zm, &termp, term_dict, &regex_range,
-                      space_split, term_dst))
-        {
-            *term_sub = 0;
-            return ZEBRA_OK;
-        }
-        wrbuf_putc(term_dict, ')');
-        break;
-    case 104:        /* process # and ! in term */
-        wrbuf_putc(term_dict, '(');
-        if (!term_104(zm, &termp, term_dict, space_split, term_dst))
-        {
-            *term_sub = 0;
-            return ZEBRA_OK;
         }
-        wrbuf_putc(term_dict, ')');
-        break;
-    case 105:        /* process * and ! in term */
-        wrbuf_putc(term_dict, '(');
-        if (!term_105(zm, &termp, term_dict, space_split, term_dst, 1))
+        else
         {
-            *term_sub = 0;
-            return ZEBRA_OK;
+            zebra_setError_zint(zh,
+                                YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE,
+                                relation_value);
+            return ZEBRA_FAIL;
         }
-        wrbuf_putc(term_dict, ')');
-        break;
-    case 106:        /* process * and ! in term */
-        wrbuf_putc(term_dict, '(');
-        if (!term_105(zm, &termp, term_dict, space_split, term_dst, 0))
+    }
+    else
+    {
+        /* non-ICU case. using string.chr and friends */
+        switch (truncation_value)
         {
-            *term_sub = 0;
-            return ZEBRA_OK;
+        case -1:         /* not specified */
+        case 100:        /* do not truncate */
+            if (!string_relation(zh, zapt, &termp, term_dict,
+                                 attributeSet,
+                                 zm, space_split, display_term,
+                                 &relation_error))
+            {
+                if (relation_error)
+                {
+                    zebra_setError(zh, relation_error, 0);
+                    return ZEBRA_FAIL;
+                }
+                *term_sub = 0;
+                return ZEBRA_OK;
+            }
+            break;
+        case 1:          /* right truncation */
+            wrbuf_putc(term_dict, '(');
+            if (!term_100(zm, &termp, term_dict, space_split, display_term))
+            {
+                *term_sub = 0;
+                return ZEBRA_OK;
+            }
+            wrbuf_puts(term_dict, ".*)");
+            break;
+        case 2:          /* left truncation */
+            wrbuf_puts(term_dict, "(.*");
+            if (!term_100(zm, &termp, term_dict, space_split, display_term))
+            {
+                *term_sub = 0;
+                return ZEBRA_OK;
+            }
+            wrbuf_putc(term_dict, ')');
+            break;
+        case 3:          /* left&right truncation */
+            wrbuf_puts(term_dict, "(.*");
+            if (!term_100(zm, &termp, term_dict, space_split, display_term))
+            {
+                *term_sub = 0;
+                return ZEBRA_OK;
+            }
+            wrbuf_puts(term_dict, ".*)");
+            break;
+        case 101:        /* process # in term */
+            wrbuf_putc(term_dict, '(');
+            if (!term_101(zm, &termp, term_dict, space_split, display_term))
+            {
+                *term_sub = 0;
+                return ZEBRA_OK;
+            }
+            wrbuf_puts(term_dict, ")");
+            break;
+        case 102:        /* Regexp-1 */
+            wrbuf_putc(term_dict, '(');
+            if (!term_102(zm, &termp, term_dict, space_split, display_term))
+            {
+                *term_sub = 0;
+                return ZEBRA_OK;
+            }
+            wrbuf_putc(term_dict, ')');
+            break;
+        case 103:       /* Regexp-2 */
+            regex_range = 1;
+            wrbuf_putc(term_dict, '(');
+            if (!term_103(zm, &termp, term_dict, &regex_range,
+                          space_split, display_term))
+            {
+                *term_sub = 0;
+                return ZEBRA_OK;
+            }
+            wrbuf_putc(term_dict, ')');
+            break;
+        case 104:        /* process ?n * # term */
+            wrbuf_putc(term_dict, '(');
+            if (!term_104(zm, &termp, term_dict, space_split, display_term))
+            {
+                *term_sub = 0;
+                return ZEBRA_OK;
+            }
+            wrbuf_putc(term_dict, ')');
+            break;
+        case 105:        /* process * ! in term and right truncate */
+            wrbuf_putc(term_dict, '(');
+            if (!term_105(zm, &termp, term_dict, space_split, display_term, 1))
+            {
+                *term_sub = 0;
+                return ZEBRA_OK;
+            }
+            wrbuf_putc(term_dict, ')');
+            break;
+        case 106:        /* process * ! in term */
+            wrbuf_putc(term_dict, '(');
+            if (!term_105(zm, &termp, term_dict, space_split, display_term, 0))
+            {
+                *term_sub = 0;
+                return ZEBRA_OK;
+            }
+            wrbuf_putc(term_dict, ')');
+            break;
+        default:
+            zebra_setError_zint(zh,
+                                YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
+                                truncation_value);
+            return ZEBRA_FAIL;
         }
-        wrbuf_putc(term_dict, ')');
-        break;
-    default:
-        zebra_setError_zint(zh,
-                            YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
-                            truncation_value);
-        return ZEBRA_FAIL;
     }
     if (1)
     {
@@ -1226,6 +1302,7 @@ static ZEBRA_RES search_terms_chrmap(ZebraHandle zh,
                                      Z_AttributesPlusTerm *zapt,
                                      const char *termz,
                                      const Odr_oid *attributeSet,
+                                     zint hits_limit,
                                      NMEM stream,
                                      const char *index_type, int complete_flag,
                                      const char *rank_type,
@@ -1235,16 +1312,14 @@ static ZEBRA_RES search_terms_chrmap(ZebraHandle zh,
                                      struct rset_key_control *kc,
                                      zebra_map_t zm)
 {
-    char term_dst[IT_MAX_WORD+1];
     struct grep_info grep_info;
     const char *termp = termz;
     int alloc_sets = 0;
     
     *num_result_sets = 0;
-    *term_dst = 0;
     if (grep_info_prepare(zh, zapt, &grep_info, index_type) == ZEBRA_FAIL)
         return ZEBRA_FAIL;
-    while(1)
+    while (1)
     { 
        ZEBRA_RES res;
 
@@ -1258,10 +1333,10 @@ static ZEBRA_RES search_terms_chrmap(ZebraHandle zh,
            alloc_sets = alloc_sets + add;
            *result_sets = rnew;
        }
-        res = search_term(zh, zapt, &termp, attributeSet,
+        res = search_term(zh, zapt, &termp, attributeSet, hits_limit,
                           stream, &grep_info,
                           index_type, complete_flag,
-                          term_dst, rank_type,
+                          rank_type,
                           xpath_use, rset_nmem,
                           &(*result_sets)[*num_result_sets],
                           kc, zm);
@@ -1304,6 +1379,7 @@ static ZEBRA_RES search_terms_list(ZebraHandle zh,
                                    Z_AttributesPlusTerm *zapt,
                                    const char *termz,
                                    const Odr_oid *attributeSet,
+                                   zint hits_limit,
                                    NMEM stream,
                                    const char *index_type, int complete_flag,
                                    const char *rank_type,
@@ -1315,7 +1391,7 @@ static ZEBRA_RES search_terms_list(ZebraHandle zh,
     zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type);
     if (zebra_maps_is_icu(zm))
         zebra_map_tokenize_start(zm, termz, strlen(termz));
-    return search_terms_chrmap(zh, zapt, termz, attributeSet,
+    return search_terms_chrmap(zh, zapt, termz, attributeSet, hits_limit,
                                stream, index_type, complete_flag,
                                rank_type, xpath_use,
                                rset_nmem, result_sets, num_result_sets,
@@ -1393,6 +1469,7 @@ static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh,
                                       Z_AttributesPlusTerm *zapt,
                                       const char *termz_org,
                                       const Odr_oid *attributeSet,
+                                       zint hits_limit,
                                       NMEM stream,
                                       const char *index_type,
                                        int complete_flag,
@@ -1405,7 +1482,7 @@ static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh,
     RSET *result_sets = 0;
     int num_result_sets = 0;
     ZEBRA_RES res =
-       search_terms_list(zh, zapt, termz_org, attributeSet,
+       search_terms_list(zh, zapt, termz_org, attributeSet, hits_limit,
                           stream, index_type, complete_flag,
                           rank_type, xpath_use,
                           rset_nmem,
@@ -1458,6 +1535,7 @@ static ZEBRA_RES rpn_search_APT_or_list(ZebraHandle zh,
                                        Z_AttributesPlusTerm *zapt,
                                        const char *termz_org,
                                        const Odr_oid *attributeSet,
+                                        zint hits_limit,
                                        NMEM stream,
                                        const char *index_type, 
                                         int complete_flag,
@@ -1471,7 +1549,7 @@ static ZEBRA_RES rpn_search_APT_or_list(ZebraHandle zh,
     int num_result_sets = 0;
     int i;
     ZEBRA_RES res =
-       search_terms_list(zh, zapt, termz_org, attributeSet,
+       search_terms_list(zh, zapt, termz_org, attributeSet, hits_limit,
                           stream, index_type, complete_flag,
                           rank_type, xpath_use,
                           rset_nmem,
@@ -1525,6 +1603,7 @@ static ZEBRA_RES rpn_search_APT_and_list(ZebraHandle zh,
                                         Z_AttributesPlusTerm *zapt,
                                         const char *termz_org,
                                         const Odr_oid *attributeSet,
+                                         zint hits_limit,
                                         NMEM stream,
                                         const char *index_type, 
                                          int complete_flag,
@@ -1538,7 +1617,7 @@ static ZEBRA_RES rpn_search_APT_and_list(ZebraHandle zh,
     int num_result_sets = 0;
     int i;
     ZEBRA_RES res =
-       search_terms_list(zh, zapt, termz_org, attributeSet,
+       search_terms_list(zh, zapt, termz_org, attributeSet, hits_limit,
                           stream, index_type, complete_flag,
                           rank_type, xpath_use,
                           rset_nmem,
@@ -1595,7 +1674,7 @@ static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
                            struct grep_info *grep_info,
                            int *max_pos,
                            zebra_map_t zm,
-                           char *term_dst,
+                           WRBUF display_term,
                            int *error_code)
 {
     AttrType relation;
@@ -1614,7 +1693,7 @@ static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
     {
     case 1:
         yaz_log(log_level_rpn, "Relation <");
-        if (!term_100(zm, term_sub, term_num, 1, term_dst))
+        if (!term_100(zm, term_sub, term_num, 1, display_term))
         { 
             wrbuf_destroy(term_num);
             return 0;
@@ -1624,7 +1703,7 @@ static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
         break;
     case 2:
         yaz_log(log_level_rpn, "Relation <=");
-        if (!term_100(zm, term_sub, term_num, 1, term_dst))
+        if (!term_100(zm, term_sub, term_num, 1, display_term))
         {
             wrbuf_destroy(term_num);
             return 0;
@@ -1634,7 +1713,7 @@ static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
         break;
     case 4:
         yaz_log(log_level_rpn, "Relation >=");
-        if (!term_100(zm, term_sub, term_num, 1, term_dst))
+        if (!term_100(zm, term_sub, term_num, 1, display_term))
         {
             wrbuf_destroy(term_num);
             return 0;
@@ -1644,7 +1723,7 @@ static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
         break;
     case 5:
         yaz_log(log_level_rpn, "Relation >");
-        if (!term_100(zm, term_sub, term_num, 1, term_dst))
+        if (!term_100(zm, term_sub, term_num, 1, display_term))
         {
             wrbuf_destroy(term_num);
             return 0;
@@ -1655,7 +1734,7 @@ static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
     case -1:
     case 3:
         yaz_log(log_level_rpn, "Relation =");
-        if (!term_100(zm, term_sub, term_num, 1, term_dst))
+        if (!term_100(zm, term_sub, term_num, 1, display_term))
         {
             wrbuf_destroy(term_num);
             return 0; 
@@ -1691,7 +1770,7 @@ static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
                              const Odr_oid *attributeSet, NMEM stream,
                              struct grep_info *grep_info,
                              const char *index_type, int complete_flag,
-                             char *term_dst, 
+                             WRBUF display_term,
                               const char *xpath_use,
                               struct ord_list **ol)
 {
@@ -1731,7 +1810,7 @@ static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
     
     if (!numeric_relation(zh, zapt, &termp, term_dict,
                           attributeSet, grep_info, &max_pos, zm,
-                          term_dst, &relation_error))
+                          display_term, &relation_error))
     {
         if (relation_error)
         {
@@ -1751,6 +1830,7 @@ static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh,
                                        Z_AttributesPlusTerm *zapt,
                                        const char *termz,
                                        const Odr_oid *attributeSet,
+                                        zint hits_limit,
                                        NMEM stream,
                                        const char *index_type, 
                                         int complete_flag,
@@ -1760,14 +1840,13 @@ static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh,
                                        RSET *rset,
                                        struct rset_key_control *kc)
 {
-    char term_dst[IT_MAX_WORD+1];
     const char *termp = termz;
     RSET *result_sets = 0;
     int num_result_sets = 0;
     ZEBRA_RES res;
     struct grep_info grep_info;
     int alloc_sets = 0;
-    zint hits_limit_value;
+    zint hits_limit_value = hits_limit;
     const char *term_ref_id_str = 0;
 
     zebra_term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str,
@@ -1780,6 +1859,7 @@ static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh,
     { 
         struct ord_list *ol;
         WRBUF term_dict = wrbuf_alloc();
+        WRBUF display_term = wrbuf_alloc();
        if (alloc_sets == num_result_sets)
        {
            int add = 10;
@@ -1795,20 +1875,24 @@ static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh,
         res = numeric_term(zh, zapt, &termp, term_dict,
                            attributeSet, stream, &grep_info,
                           index_type, complete_flag,
-                          term_dst, xpath_use, &ol);
+                          display_term, xpath_use, &ol);
         wrbuf_destroy(term_dict);
        if (res == ZEBRA_FAIL || termp == 0)
+        {
+            wrbuf_destroy(display_term);
            break;
-        yaz_log(YLOG_DEBUG, "term: %s", term_dst);
+        }
+        yaz_log(YLOG_DEBUG, "term: %s", wrbuf_cstr(display_term));
         result_sets[num_result_sets] =
            rset_trunc(zh, grep_info.isam_p_buf,
-                      grep_info.isam_p_indx, term_dst,
-                      strlen(term_dst), rank_type,
+                      grep_info.isam_p_indx, wrbuf_buf(display_term),
+                      wrbuf_len(display_term), rank_type,
                       0 /* preserve position */,
                       zapt->term->which, rset_nmem, 
                       kc, kc->scope, ol, index_type,
                       hits_limit_value,
                       term_ref_id_str);
+        wrbuf_destroy(display_term);
        if (!result_sets[num_result_sets])
            break;
        num_result_sets++;
@@ -1921,7 +2005,7 @@ static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
     sk->u.sortAttributes->id = odr_oiddup_nmem(stream, attributeSet);
     sk->u.sortAttributes->list = zapt->attributes;
 
-    sks->sortRelation = (int *)
+    sks->sortRelation = (Odr_int *)
         nmem_malloc(stream, sizeof(*sks->sortRelation));
     if (sort_relation_value == 1)
         *sks->sortRelation = Z_SortKeySpec_ascending;
@@ -1930,7 +2014,7 @@ static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
     else 
         *sks->sortRelation = Z_SortKeySpec_ascending;
 
-    sks->caseSensitivity = (int *)
+    sks->caseSensitivity = (Odr_int *)
         nmem_malloc(stream, sizeof(*sks->caseSensitivity));
     *sks->caseSensitivity = 0;
 
@@ -2157,14 +2241,16 @@ ZEBRA_RES rpn_search_xpath(ZebraHandle zh,
 
 static ZEBRA_RES rpn_search_database(ZebraHandle zh, 
                                      Z_AttributesPlusTerm *zapt,
-                                     const Odr_oid *attributeSet, NMEM stream,
+                                     const Odr_oid *attributeSet,
+                                     zint hits_limit, NMEM stream,
                                      Z_SortKeySpecList *sort_sequence,
                                      NMEM rset_nmem,
                                      RSET *rset,
                                      struct rset_key_control *kc);
 
 static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
-                               const Odr_oid *attributeSet, NMEM stream,
+                               const Odr_oid *attributeSet,
+                                zint hits_limit, NMEM stream,
                                Z_SortKeySpecList *sort_sequence,
                                int num_bases, const char **basenames, 
                                NMEM rset_nmem,
@@ -2184,7 +2270,7 @@ static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
             res = ZEBRA_FAIL;
             break;
         }
-        res = rpn_search_database(zh, zapt, attributeSet, stream,
+        res = rpn_search_database(zh, zapt, attributeSet, hits_limit, stream,
                                   sort_sequence,
                                   rset_nmem, rsets+i, kc);
         if (res != ZEBRA_OK)
@@ -2211,7 +2297,8 @@ static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
 
 static ZEBRA_RES rpn_search_database(ZebraHandle zh, 
                                      Z_AttributesPlusTerm *zapt,
-                                     const Odr_oid *attributeSet, NMEM stream,
+                                     const Odr_oid *attributeSet,
+                                     zint hits_limit, NMEM stream,
                                      Z_SortKeySpecList *sort_sequence,
                                      NMEM rset_nmem,
                                      RSET *rset,
@@ -2284,7 +2371,8 @@ static ZEBRA_RES rpn_search_database(ZebraHandle zh,
     */
     if (!strcmp(search_type, "phrase"))
     {
-        res = rpn_search_APT_phrase(zh, zapt, termz, attributeSet, stream,
+        res = rpn_search_APT_phrase(zh, zapt, termz, attributeSet, hits_limit,
+                                    stream,
                                    index_type, complete_flag, rank_type,
                                    xpath_use,
                                    rset_nmem,
@@ -2292,7 +2380,8 @@ static ZEBRA_RES rpn_search_database(ZebraHandle zh,
     }
     else if (!strcmp(search_type, "and-list"))
     {
-        res = rpn_search_APT_and_list(zh, zapt, termz, attributeSet, stream,
+        res = rpn_search_APT_and_list(zh, zapt, termz, attributeSet, hits_limit,
+                                      stream,
                                      index_type, complete_flag, rank_type,
                                      xpath_use,
                                      rset_nmem,
@@ -2300,7 +2389,8 @@ static ZEBRA_RES rpn_search_database(ZebraHandle zh,
     }
     else if (!strcmp(search_type, "or-list"))
     {
-        res = rpn_search_APT_or_list(zh, zapt, termz, attributeSet, stream,
+        res = rpn_search_APT_or_list(zh, zapt, termz, attributeSet, hits_limit,
+                                     stream,
                                     index_type, complete_flag, rank_type,
                                     xpath_use,
                                      rset_nmem,
@@ -2313,7 +2403,8 @@ static ZEBRA_RES rpn_search_database(ZebraHandle zh,
     }
     else if (!strcmp(search_type, "numeric"))
     {
-        res = rpn_search_APT_numeric(zh, zapt, termz, attributeSet, stream,
+        res = rpn_search_APT_numeric(zh, zapt, termz, attributeSet, hits_limit,
+                                     stream,
                                     index_type, complete_flag, rank_type,
                                     xpath_use,
                                     rset_nmem,
@@ -2333,7 +2424,8 @@ static ZEBRA_RES rpn_search_database(ZebraHandle zh,
 }
 
 static ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
-                                     const Odr_oid *attributeSet, 
+                                     const Odr_oid *attributeSet,
+                                      zint hits_limit,
                                      NMEM stream, NMEM rset_nmem,
                                      Z_SortKeySpecList *sort_sequence,
                                      int num_bases, const char **basenames,
@@ -2374,6 +2466,7 @@ ZEBRA_RES rpn_get_top_approx_limit(ZebraHandle zh, Z_RPNStructure *zs,
 
 ZEBRA_RES rpn_search_top(ZebraHandle zh, Z_RPNStructure *zs,
                         const Odr_oid *attributeSet, 
+                         zint hits_limit,
                         NMEM stream, NMEM rset_nmem,
                         Z_SortKeySpecList *sort_sequence,
                         int num_bases, const char **basenames,
@@ -2384,7 +2477,7 @@ ZEBRA_RES rpn_search_top(ZebraHandle zh, Z_RPNStructure *zs,
     ZEBRA_RES res;
     struct rset_key_control *kc = zebra_key_control_create(zh);
 
-    res = rpn_search_structure(zh, zs, attributeSet,
+    res = rpn_search_structure(zh, zs, attributeSet, hits_limit,
                               stream, rset_nmem,
                               sort_sequence, 
                               num_bases, basenames,
@@ -2410,7 +2503,7 @@ ZEBRA_RES rpn_search_top(ZebraHandle zh, Z_RPNStructure *zs,
 }
 
 ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
-                              const Odr_oid *attributeSet, 
+                              const Odr_oid *attributeSet, zint hits_limit,
                               NMEM stream, NMEM rset_nmem,
                               Z_SortKeySpecList *sort_sequence,
                               int num_bases, const char **basenames,
@@ -2429,7 +2522,7 @@ ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
        int num_result_sets_r = 0;
 
         res = rpn_search_structure(zh, zs->u.complex->s1,
-                                  attributeSet, stream, rset_nmem,
+                                  attributeSet, hits_limit, stream, rset_nmem,
                                   sort_sequence,
                                   num_bases, basenames,
                                   &result_sets_l, &num_result_sets_l,
@@ -2442,7 +2535,7 @@ ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
            return res;
        }
         res = rpn_search_structure(zh, zs->u.complex->s2,
-                                  attributeSet, stream, rset_nmem,
+                                  attributeSet, hits_limit, stream, rset_nmem,
                                   sort_sequence,
                                   num_bases, basenames,
                                   &result_sets_r, &num_result_sets_r,
@@ -2537,7 +2630,8 @@ ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
         {
             yaz_log(YLOG_DEBUG, "rpn_search_APT");
             res = rpn_search_APT(zh, zs->u.simple->u.attributesPlusTerm,
-                                attributeSet, stream, sort_sequence,
+                                attributeSet, hits_limit,
+                                 stream, sort_sequence,
                                 num_bases, basenames, rset_nmem, &rset,
                                 kc);
            if (res != ZEBRA_OK)
@@ -2579,6 +2673,7 @@ ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
 /*
  * Local variables:
  * c-basic-offset: 4
+ * c-file-style: "Stroustrup"
  * indent-tabs-mode: nil
  * End:
  * vim: shiftwidth=4 tabstop=8 expandtab