Honor position attribute, i.e. allow first-in-field search. To
[idzebra-moved-to-github.git] / index / zrpn.c
index 8cc6515..4ae0937 100644 (file)
@@ -1,4 +1,4 @@
-/* $Id: zrpn.c,v 1.217 2006-06-07 10:50:08 adam Exp $
+/* $Id: zrpn.c,v 1.228 2006-09-08 14:40:53 adam Exp $
    Copyright (C) 1995-2006
    Index Data ApS
 
@@ -15,9 +15,9 @@ FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 for more details.
 
 You should have received a copy of the GNU General Public License
-along with Zebra; see the file LICENSE.zebra.  If not, write to the
-Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
-02111-1307, USA.
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+
 */
 
 #include <stdio.h>
@@ -46,6 +46,8 @@ struct rpn_char_map_info
 static int log_level_set = 0;
 static int log_level_rpn = 0;
 
+#define TERMSET_DISABLE 1
+
 static const char **rpn_char_map_handler(void *vp, const char **from, int len)
 {
     struct rpn_char_map_info *p = (struct rpn_char_map_info *) vp;
@@ -66,7 +68,7 @@ static const char **rpn_char_map_handler(void *vp, const char **from, int len)
 }
 
 static void rpn_char_map_prepare(struct zebra_register *reg, int reg_type,
-                                  struct rpn_char_map_info *map_info)
+                                 struct rpn_char_map_info *map_info)
 {
     map_info->zm = reg->zebra_maps;
     map_info->reg_type = reg_type;
@@ -147,7 +149,6 @@ static void add_isam_p(const char *name, const char *info,
     assert(*info == sizeof(*p->isam_p_buf));
     memcpy(p->isam_p_buf + p->isam_p_indx, info+1, sizeof(*p->isam_p_buf));
 
-#if 1
     if (p->termset)
     {
         const char *db;
@@ -156,7 +157,7 @@ static void add_isam_p(const char *name, const char *info,
         const char *index_name;
         int len = key_SU_decode (&ord, (const unsigned char *) name);
         
-        zebra_term_untrans  (p->zh, p->reg_type, term_tmp, name+len+1);
+        zebra_term_untrans  (p->zh, p->reg_type, term_tmp, name+len);
         yaz_log(log_level_rpn, "grep: %d %c %s", ord, name[len], term_tmp);
         zebraExplain_lookup_ord(p->zh->reg->zei,
                                 ord, 0 /* index_type */, &db, &index_name);
@@ -165,7 +166,6 @@ static void add_isam_p(const char *name, const char *info,
         resultSetAddTerm(p->zh, p->termset, name[len], db,
                         index_name, term_tmp);
     }
-#endif
     (p->isam_p_indx)++;
 }
 
@@ -883,7 +883,6 @@ static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
         break;
     case 3:
     case 102:
-    case 103:
     case -1:
         if (!**term_sub)
             return 1;
@@ -895,6 +894,12 @@ static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
         strcat(term_tmp, term_component);
         strcat(term_tmp, ")");
        break;
+    case 103:
+        yaz_log(log_level_rpn, "Relation always matches");
+        /* skip to end of term (we don't care what it is) */
+        while (**term_sub != '\0')
+            (*term_sub)++;
+        break;
     default:
        *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
        return 0;
@@ -922,7 +927,7 @@ static ZEBRA_RES term_limits_APT(ZebraHandle zh,
     AttrType hits_limit_attr;
     int term_ref_id_int;
  
-    attr_init_APT(&hits_limit_attr, zapt, 9);
+    attr_init_APT(&hits_limit_attr, zapt, 11);
     *hits_limit_value  = attr_find(&hits_limit_attr, NULL);
 
     attr_init_APT(&term_ref_id_attr, zapt, 10);
@@ -979,8 +984,7 @@ static ZEBRA_RES term_trunc(ZebraHandle zh,
     const char *term_ref_id_str = 0;
     *rset = 0;
 
-    term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str,
-                   stream);
+    term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str, stream);
     grep_info->isam_p_indx = 0;
     res = string_term(zh, zapt, term_sub, attributeSet, stream, grep_info,
                      reg_type, complete_flag, num_bases, basenames,
@@ -1015,7 +1019,6 @@ static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
     int j, r, base_no;
     AttrType truncation;
     int truncation_value;
-    oid_value curAttributeSet = attributeSet;
     const char *termp;
     struct rpn_char_map_info rcmi;
     int space_split = complete_flag ? 0 : 1;
@@ -1032,20 +1035,13 @@ static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
     for (base_no = 0; base_no < num_bases; base_no++)
     {
        int ord = -1;
-       int attr_ok = 0;
        int regex_range = 0;
-       int init_pos = 0;
-#if 0
-        attent attp;
-        data1_local_attribute id_xpath_attr;
-        data1_local_attribute *local_attr;
-#endif
         int max_pos, prefix_len = 0;
        int relation_error;
         char ord_buf[32];
         int ord_len, i;
 
-        termp = *term_sub;
+        termp = *term_sub; /* start of term for each database */
 
         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
         {
@@ -1054,32 +1050,21 @@ static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
             return ZEBRA_FAIL;
         }
         
-        if (zebra_apt_get_ord(zh, zapt, reg_type, xpath_use, 
-                              curAttributeSet, &ord) 
-            != ZEBRA_OK)
-        {
-            break;
-        }
+        if (zebra_apt_get_ord(zh, zapt, reg_type, xpath_use,
+                              attributeSet, &ord) != ZEBRA_OK)
+            continue;
+
+       bases_ok++;
+
         *ol = ord_list_append(stream, *ol, ord);
-        
-        if (prefix_len)
-            term_dict[prefix_len++] = '|';
-        else
-            term_dict[prefix_len++] = '(';
-        
         ord_len = key_SU_encode (ord, ord_buf);
+        
+        term_dict[prefix_len++] = '(';
         for (i = 0; i<ord_len; i++)
         {
-            term_dict[prefix_len++] = 1;
-               term_dict[prefix_len++] = ord_buf[i];
+            term_dict[prefix_len++] = 1;  /* our internal regexp escape char */
+            term_dict[prefix_len++] = ord_buf[i];
         }
-        if (ord_len > init_pos)
-            init_pos = ord_len;
-        
-       bases_ok++;
-        if (prefix_len)
-           attr_ok = 1;
-
         term_dict[prefix_len++] = ')';
         term_dict[prefix_len] = '\0';
         j = prefix_len;
@@ -1087,10 +1072,10 @@ static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
         {
         case -1:         /* not specified */
         case 100:        /* do not truncate */
-            if (!string_relation (zh, zapt, &termp, term_dict,
-                                  attributeSet,
-                                  reg_type, space_split, term_dst,
-                                 &relation_error))
+            if (!string_relation(zh, zapt, &termp, term_dict,
+                                 attributeSet,
+                                 reg_type, space_split, term_dst,
+                                &relation_error))
            {
                if (relation_error)
                {
@@ -1199,21 +1184,19 @@ static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
                                truncation_value);
            return ZEBRA_FAIL;
         }
-       if (attr_ok)
+       if (1)
        {
            char buf[80];
            const char *input = term_dict + prefix_len;
            esc_str(buf, sizeof(buf), input, strlen(input));
        }
-       if (attr_ok)
-       {
-           yaz_log(log_level_rpn, "dict_lookup_grep: %s", term_dict+prefix_len);
-           r = dict_lookup_grep(zh->reg->dict, term_dict, regex_range,
-                                grep_info, &max_pos, init_pos,
-                                grep_handle);
-           if (r)
-               yaz_log(YLOG_WARN, "dict_lookup_grep fail %d", r);
-       }
+        yaz_log(log_level_rpn, "dict_lookup_grep: %s", term_dict+prefix_len);
+        r = dict_lookup_grep(zh->reg->dict, term_dict, regex_range,
+                             grep_info, &max_pos, 
+                             ord_len /* number of "exact" chars */,
+                             grep_handle);
+        if (r)
+            yaz_log(YLOG_WARN, "dict_lookup_grep fail %d", r);
     }
     if (!bases_ok)
         return ZEBRA_FAIL;
@@ -1340,7 +1323,6 @@ static ZEBRA_RES grep_info_prepare(ZebraHandle zh,
     grep_info->zh = zh;
     grep_info->reg_type = reg_type;
     grep_info->termset = 0;
-
     if (!zapt)
         return ZEBRA_OK;
     attr_init_APT(&termset, zapt, 8);
@@ -1348,6 +1330,10 @@ static ZEBRA_RES grep_info_prepare(ZebraHandle zh,
         attr_find_ex(&termset, NULL, &termset_value_string);
     if (termset_value_numeric != -1)
     {
+#if TERMSET_DISABLE
+        zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, "termset");
+        return ZEBRA_FAIL;
+#else
         char resname[32];
         const char *termset_name = 0;
         if (termset_value_numeric != -2)
@@ -1365,6 +1351,7 @@ static ZEBRA_RES grep_info_prepare(ZebraHandle zh,
            zebra_setError(zh, YAZ_BIB1_ILLEGAL_RESULT_SET_NAME, termset_name);
             return ZEBRA_FAIL;
         }
+#endif
     }
     return ZEBRA_OK;
 }
@@ -1372,6 +1359,7 @@ static ZEBRA_RES grep_info_prepare(ZebraHandle zh,
 /**
   \brief Create result set(s) for list of terms
   \param zh Zebra Handle
+  \param zapt Attributes Plust Term (RPN leaf)
   \param termz term as used in query but converted to UTF-8
   \param attributeSet default attribute set
   \param stream memory for result
@@ -1381,9 +1369,9 @@ static ZEBRA_RES grep_info_prepare(ZebraHandle zh,
   \param xpath_use use attribute for X-Path (-1 for no X-path)
   \param num_bases number of databases
   \param basenames array of databases
-  \param rset_mem memory for result sets
+  \param rset_nmem memory for result sets
   \param result_sets output result set for each term in list (output)
-  \param number number of output result sets
+  \param num_result_sets number of output result sets
   \param kc rset key control to be used for created result sets
 */
 static ZEBRA_RES term_list_trunc(ZebraHandle zh,
@@ -1403,9 +1391,7 @@ static ZEBRA_RES term_list_trunc(ZebraHandle zh,
     struct grep_info grep_info;
     const char *termp = termz;
     int alloc_sets = 0;
-    int empty_term = *termz ? 0 : 1;
 
-    empty_term = 0;
     *num_result_sets = 0;
     *term_dst = 0;
     if (grep_info_prepare(zh, zapt, &grep_info, reg_type) == ZEBRA_FAIL)
@@ -1444,8 +1430,6 @@ static ZEBRA_RES term_list_trunc(ZebraHandle zh,
            break;
        (*num_result_sets)++;
 
-        if (empty_term)
-            break;
         if (!*termp)
             break;
     }
@@ -1453,6 +1437,93 @@ static ZEBRA_RES term_list_trunc(ZebraHandle zh,
     return ZEBRA_OK;
 }
 
+static ZEBRA_RES rpn_search_APT_position(ZebraHandle zh,
+                                         Z_AttributesPlusTerm *zapt,
+                                         oid_value attributeSet,
+                                         int reg_type,
+                                         int num_bases, char **basenames,
+                                         NMEM rset_nmem,
+                                         RSET *rset,
+                                         struct rset_key_control *kc)
+{
+    RSET *f_set;
+    int base_no;
+    int position_value;
+    int num_sets = 0;
+    AttrType position;
+
+    attr_init_APT(&position, zapt, 3);
+    position_value = attr_find(&position, NULL);
+    switch(position_value)
+    {
+    case 3:
+    case -1:
+        return ZEBRA_OK;
+    case 1:
+    case 2:
+        break;
+    default:
+        zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
+                            position_value);
+        return ZEBRA_FAIL;
+    }
+
+    if (!zebra_maps_is_first_in_field(zh->reg->zebra_maps, reg_type))
+    {
+        zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
+                            position_value);
+        return ZEBRA_FAIL;
+    }
+
+    if (!zh->reg->isamb)
+    {
+        zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
+                            position_value);
+        return ZEBRA_FAIL;
+    }
+    f_set = xmalloc(sizeof(RSET) * num_bases);
+    for (base_no = 0; base_no < num_bases; base_no++)
+    {
+       int ord = -1;
+        char ord_buf[32];
+        char term_dict[100];
+        int ord_len;
+        char *val;
+        ISAM_P isam_p;
+
+        if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
+        {
+           zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
+                          basenames[base_no]);
+            return ZEBRA_FAIL;
+        }
+        
+        if (zebra_apt_get_ord(zh, zapt, reg_type, 0,
+                              attributeSet, &ord) != ZEBRA_OK)
+            continue;
+
+        ord_len = key_SU_encode (ord, ord_buf);
+        memcpy(term_dict, ord_buf, ord_len);
+        strcpy(term_dict+ord_len, FIRST_IN_FIELD_STR);
+        val = dict_lookup(zh->reg->dict, term_dict);
+        if (!val)
+            continue;
+        assert(*val == sizeof(ISAM_P));
+        memcpy(&isam_p, val+1, sizeof(isam_p));
+        
+        f_set[num_sets++] = rsisamb_create(rset_nmem, kc, kc->scope,
+                                           zh->reg->isamb, isam_p, 0);
+        
+    }
+    if (num_sets)
+    {
+        *rset = rset_create_or(rset_nmem, kc, kc->scope,
+                               0 /* termid */, num_sets, f_set);
+    }
+    xfree(f_set);
+    return ZEBRA_OK;
+}
+                                         
 static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh,
                                       Z_AttributesPlusTerm *zapt,
                                       const char *termz_org,
@@ -1475,8 +1546,30 @@ static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh,
                        num_bases, basenames,
                        rset_nmem,
                        &result_sets, &num_result_sets, kc);
+
     if (res != ZEBRA_OK)
        return res;
+
+    if (num_result_sets > 0)
+    {
+        RSET first_set = 0;
+        res = rpn_search_APT_position(zh, zapt, attributeSet, 
+                                      reg_type,
+                                      num_bases, basenames,
+                                      rset_nmem, &first_set,
+                                      kc);
+        if (res != ZEBRA_OK)
+            return res;
+        if (first_set)
+        {
+            RSET *nsets = nmem_malloc(stream,
+                                      sizeof(RSET) * (num_result_sets+1));
+            nsets[0] = first_set;
+            memcpy(nsets+1, result_sets, sizeof(RSET) * num_result_sets);
+            result_sets = nsets;
+            num_result_sets++;
+        }
+    }
     if (num_result_sets == 0)
        *rset = rset_create_null(rset_nmem, kc, 0); 
     else if (num_result_sets == 1)
@@ -1586,33 +1679,54 @@ static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
 
     yaz_log(log_level_rpn, "numeric relation value=%d", relation_value);
 
-    if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
-                 term_dst))
-        return 0;
-    term_value = atoi (term_tmp);
     switch (relation_value)
     {
     case 1:
         yaz_log(log_level_rpn, "Relation <");
+        if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
+                      term_dst))
+            return 0;
+        term_value = atoi (term_tmp);
         gen_regular_rel(term_tmp, term_value-1, 1);
         break;
     case 2:
         yaz_log(log_level_rpn, "Relation <=");
+        if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
+                      term_dst))
+            return 0;
+        term_value = atoi (term_tmp);
         gen_regular_rel(term_tmp, term_value, 1);
         break;
     case 4:
         yaz_log(log_level_rpn, "Relation >=");
+        if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
+                      term_dst))
+            return 0;
+        term_value = atoi (term_tmp);
         gen_regular_rel(term_tmp, term_value, 0);
         break;
     case 5:
         yaz_log(log_level_rpn, "Relation >");
+        if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
+                      term_dst))
+            return 0;
+        term_value = atoi (term_tmp);
         gen_regular_rel(term_tmp, term_value+1, 0);
         break;
     case -1:
     case 3:
         yaz_log(log_level_rpn, "Relation =");
+        if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
+                      term_dst))
+            return 0;
+        term_value = atoi (term_tmp);
         sprintf(term_tmp, "(0*%d)", term_value);
        break;
+    case 103:
+        /* term_tmp untouched.. */
+        while (**term_sub != '\0')
+            (*term_sub)++;
+        break;
     default:
        *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
        return 0;
@@ -1628,22 +1742,23 @@ static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
 
 static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
                              const char **term_sub, 
-                             oid_value attributeSet,
+                             oid_value attributeSet, NMEM stream,
                              struct grep_info *grep_info,
                              int reg_type, int complete_flag,
                              int num_bases, char **basenames,
                              char *term_dst, 
                               const char *xpath_use,
-                              NMEM stream)
+                              struct ord_list **ol)
 {
     char term_dict[2*IT_MAX_WORD+2];
     int base_no;
-    oid_value curAttributeSet = attributeSet;
     const char *termp;
     struct rpn_char_map_info rcmi;
 
     int bases_ok = 0;     /* no of databases with OK attribute */
 
+    *ol = ord_list_create(stream);
+
     rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
 
     for (base_no = 0; base_no < num_bases; base_no++)
@@ -1663,26 +1778,23 @@ static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
         }
 
         if (zebra_apt_get_ord(zh, zapt, reg_type, xpath_use,
-                              curAttributeSet, &ord) 
-            != ZEBRA_OK)
-        {
-            break;
-        }
+                              attributeSet, &ord) != ZEBRA_OK)
+            continue;
+        bases_ok++;
+
+        *ol = ord_list_append(stream, *ol, ord);
 
-        if (prefix_len)
-            term_dict[prefix_len++] = '|';
-        else
-            term_dict[prefix_len++] = '(';
-        
         ord_len = key_SU_encode (ord, ord_buf);
+
+        term_dict[prefix_len++] = '(';
         for (i = 0; i < ord_len; i++)
         {
             term_dict[prefix_len++] = 1;
-               term_dict[prefix_len++] = ord_buf[i];
+            term_dict[prefix_len++] = ord_buf[i];
         }
-        bases_ok++;
         term_dict[prefix_len++] = ')';
         term_dict[prefix_len] = '\0';
+
         if (!numeric_relation(zh, zapt, &termp, term_dict,
                              attributeSet, grep_info, &max_pos, reg_type,
                              term_dst, &relation_error))
@@ -1734,6 +1846,7 @@ static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh,
         return ZEBRA_FAIL;
     while (1)
     { 
+        struct ord_list *ol;
        if (alloc_sets == num_result_sets)
        {
            int add = 10;
@@ -1746,10 +1859,9 @@ static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh,
        }
         yaz_log(YLOG_DEBUG, "APT_numeric termp=%s", termp);
         grep_info.isam_p_indx = 0;
-        res = numeric_term(zh, zapt, &termp, attributeSet, &grep_info,
+        res = numeric_term(zh, zapt, &termp, attributeSet, stream, &grep_info,
                           reg_type, complete_flag, num_bases, basenames,
-                          term_dst, xpath_use,
-                          stream);
+                          term_dst, xpath_use, &ol);
        if (res == ZEBRA_FAIL || termp == 0)
            break;
         yaz_log(YLOG_DEBUG, "term: %s", term_dst);
@@ -1759,30 +1871,28 @@ static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh,
                       strlen(term_dst), rank_type,
                       0 /* preserve position */,
                       zapt->term->which, rset_nmem, 
-                      kc, kc->scope, 0, reg_type,
+                      kc, kc->scope, ol, reg_type,
                       hits_limit_value,
                       term_ref_id_str);
        if (!result_sets[num_result_sets])
            break;
        num_result_sets++;
+        if (!*termp)
+            break;
     }
     grep_info_delete(&grep_info);
-    if (termp)
-    {
-       int i;
-       for (i = 0; i<num_result_sets; i++)
-           rset_delete(result_sets[i]);
-       return ZEBRA_FAIL;
-    }
+
+    if (res != ZEBRA_OK)
+        return res;
     if (num_result_sets == 0)
         *rset = rset_create_null(rset_nmem, kc, 0);
-    if (num_result_sets == 1)
+    else if (num_result_sets == 1)
         *rset = result_sets[0];
     else
-       *rset = rset_create_and(rset_nmem, kc, kc->scope,
+        *rset = rset_create_and(rset_nmem, kc, kc->scope,
                                 num_result_sets, result_sets);
     if (!*rset)
-       return ZEBRA_FAIL;
+        return ZEBRA_FAIL;
     return ZEBRA_OK;
 }
 
@@ -1891,9 +2001,10 @@ static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
 }
 
 
-static int parse_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
-                       oid_value attributeSet,
-                       struct xpath_location_step *xpath, int max, NMEM mem)
+static int rpn_check_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
+                           oid_value attributeSet,
+                           struct xpath_location_step *xpath, int max,
+                           NMEM mem)
 {
     oid_value curAttributeSet = attributeSet;
     AttrType use;
@@ -1913,7 +2024,7 @@ static int parse_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
 static RSET xpath_trunc(ZebraHandle zh, NMEM stream,
                         int reg_type, const char *term, 
                         const char *xpath_use,
-                        oid_value curAttributeSet, NMEM rset_nmem,
+                        NMEM rset_nmem,
                        struct rset_key_control *kc)
 {
     RSET rset;
@@ -1921,7 +2032,9 @@ static RSET xpath_trunc(ZebraHandle zh, NMEM stream,
     char term_dict[2048];
     char ord_buf[32];
     int prefix_len = 0;
-    int ord = zebraExplain_lookup_attr_str(zh->reg->zei, reg_type,
+    int ord = zebraExplain_lookup_attr_str(zh->reg->zei, 
+                                           zinfo_index_category_index,
+                                           reg_type,
                                            xpath_use);
     int ord_len, i, r, max_pos;
     int term_type = Z_Term_characterString;
@@ -1962,7 +2075,6 @@ static RSET xpath_trunc(ZebraHandle zh, NMEM stream,
 
 static
 ZEBRA_RES rpn_search_xpath(ZebraHandle zh,
-                          oid_value attributeSet,
                           int num_bases, char **basenames,
                           NMEM stream, const char *rank_type, RSET rset,
                           int xpath_len, struct xpath_location_step *xpath,
@@ -1970,9 +2082,9 @@ ZEBRA_RES rpn_search_xpath(ZebraHandle zh,
                           RSET *rset_out,
                           struct rset_key_control *kc)
 {
-    oid_value curAttributeSet = attributeSet;
     int base_no;
     int i;
+    int always_matches = rset ? 0 : 1;
 
     if (xpath_len < 0)
     {
@@ -1987,8 +2099,6 @@ ZEBRA_RES rpn_search_xpath(ZebraHandle zh,
 
     }
 
-    curAttributeSet = VAL_IDXPATH;
-
     /*
       //a    ->    a/.*
       //a/b  ->    b/a/.*
@@ -2024,42 +2134,33 @@ ZEBRA_RES rpn_search_xpath(ZebraHandle zh,
         }
         while (--level >= 0)
         {
-            char xpath_rev[128];
-            int i, len;
+            WRBUF xpath_rev = wrbuf_alloc();
+            int i;
             RSET rset_start_tag = 0, rset_end_tag = 0, rset_attr = 0;
 
-            *xpath_rev = 0;
-            len = 0;
             for (i = level; i >= 1; --i)
             {
                 const char *cp = xpath[i].part;
                 if (*cp)
                 {
-                    for (;*cp; cp++)
+                    for (; *cp; cp++)
+                    {
                         if (*cp == '*')
-                        {
-                            memcpy (xpath_rev + len, "[^/]*", 5);
-                            len += 5;
-                        }
+                            wrbuf_puts(xpath_rev, "[^/]*");
                         else if (*cp == ' ')
-                        {
-
-                            xpath_rev[len++] = 1;
-                            xpath_rev[len++] = ' ';
-                        }
-
+                            wrbuf_puts(xpath_rev, "\001 ");
                         else
-                            xpath_rev[len++] = *cp;
-                    xpath_rev[len++] = '/';
+                            wrbuf_putc(xpath_rev, *cp);
+
+                        /* wrbuf_putc does not null-terminate , but
+                           wrbuf_puts below ensures it does.. so xpath_rev
+                           is OK iff length is > 0 */
+                    }
+                    wrbuf_puts(xpath_rev, "/");
                 }
                 else if (i == 1)  /* // case */
-                {
-                    xpath_rev[len++] = '.';
-                    xpath_rev[len++] = '*';
-                }
+                    wrbuf_puts(xpath_rev, ".*");
             }
-            xpath_rev[len] = 0;
-
             if (xpath[level].predicate &&
                 xpath[level].predicate->which == XPATH_PREDICATE_RELATION &&
                 xpath[level].predicate->u.relation.name[0])
@@ -2082,33 +2183,40 @@ ZEBRA_RES rpn_search_xpath(ZebraHandle zh,
                 wrbuf_puts(wbuf, "");
                 rset_attr = xpath_trunc(
                     zh, stream, '0', wrbuf_buf(wbuf), ZEBRA_XPATH_ATTR_NAME, 
-                    curAttributeSet, rset_nmem, kc);
+                    rset_nmem, kc);
                 wrbuf_free(wbuf, 1);
             } 
             else 
             {
                 if (!first_path)
+                {
+                    wrbuf_free(xpath_rev, 1);
                     continue;
+                }
             }
-            yaz_log(log_level_rpn, "xpath_rev (%d) = %s", level, xpath_rev);
-            if (strlen(xpath_rev))
+            yaz_log(log_level_rpn, "xpath_rev (%d) = %.*s", level, 
+                    wrbuf_len(xpath_rev), wrbuf_buf(xpath_rev));
+            if (wrbuf_len(xpath_rev))
             {
                 rset_start_tag = xpath_trunc(zh, stream, '0', 
-                                             xpath_rev, 
+                                             wrbuf_buf(xpath_rev),
                                              ZEBRA_XPATH_ELM_BEGIN, 
-                                             curAttributeSet,
                                              rset_nmem, kc);
-            
-                rset_end_tag = xpath_trunc(zh, stream, '0', 
-                                           xpath_rev, 
-                                           ZEBRA_XPATH_ELM_END, 
-                                           curAttributeSet,
-                                           rset_nmem, kc);
-
-                rset = rset_create_between(rset_nmem, kc, kc->scope,
-                                           rset_start_tag, rset,
-                                           rset_end_tag, rset_attr);
+                if (always_matches)
+                    rset = rset_start_tag;
+                else
+                {
+                    rset_end_tag = xpath_trunc(zh, stream, '0', 
+                                               wrbuf_buf(xpath_rev),
+                                               ZEBRA_XPATH_ELM_END, 
+                                               rset_nmem, kc);
+                    
+                    rset = rset_create_between(rset_nmem, kc, kc->scope,
+                                               rset_start_tag, rset,
+                                               rset_end_tag, rset_attr);
+                }
             }
+            wrbuf_free(xpath_rev, 1);
             first_path = 0;
         }
     }
@@ -2116,6 +2224,8 @@ ZEBRA_RES rpn_search_xpath(ZebraHandle zh,
     return ZEBRA_OK;
 }
 
+#define MAX_XPATH_STEPS 10
+
 static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
                                oid_value attributeSet, NMEM stream,
                                Z_SortKeySpecList *sort_sequence,
@@ -2133,7 +2243,7 @@ static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
     char termz[IT_MAX_WORD+1];
     int xpath_len;
     const char *xpath_use = 0;
-    struct xpath_location_step xpath[10];
+    struct xpath_location_step xpath[MAX_XPATH_STEPS];
 
     if (!log_level_set)
     {
@@ -2155,13 +2265,31 @@ static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
         return rpn_sort_spec(zh, zapt, attributeSet, stream, sort_sequence,
                             rank_type, rset_nmem, rset, kc);
     /* consider if an X-Path query is used */
-    xpath_len = parse_xpath(zh, zapt, attributeSet, xpath, 10, stream);
+    xpath_len = rpn_check_xpath(zh, zapt, attributeSet, 
+                                xpath, MAX_XPATH_STEPS, stream);
     if (xpath_len >= 0)
     {
         if (xpath[xpath_len-1].part[0] == '@') 
             xpath_use = ZEBRA_XPATH_ATTR_CDATA;  /* last step is attribute  */
         else
             xpath_use = ZEBRA_XPATH_CDATA;  /* searching for cdata */        
+
+        if (1)
+        {
+            AttrType relation;
+            int relation_value;
+
+            attr_init_APT(&relation, zapt, 2);
+            relation_value = attr_find(&relation, NULL);
+
+            if (relation_value == 103) /* alwaysmatches */
+            {
+                *rset = 0; /* signal no "term" set */
+                return rpn_search_xpath(zh, num_bases, basenames,
+                                        stream, rank_type, *rset, 
+                                        xpath_len, xpath, rset_nmem, rset, kc);
+            }
+        }
     }
 
     /* search using one of the various search type strategies
@@ -2209,25 +2337,16 @@ static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
                                     num_bases, basenames, rset_nmem,
                                     rset, kc);
     }
-    else if (!strcmp(search_type, "always"))
-    {
-        *termz = '\0';
-        res = rpn_search_APT_phrase(zh, zapt, termz, attributeSet, stream,
-                                    reg_id, complete_flag, rank_type,
-                                    xpath_use,
-                                    num_bases, basenames, rset_nmem,
-                                    rset, kc);
-    }
     else
     {
-       zebra_setError(zh, YAZ_BIB1_UNSUPP_STRUCTURE_ATTRIBUTE, 0);
+       zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
        res = ZEBRA_FAIL;
     }
     if (res != ZEBRA_OK)
        return res;
     if (!*rset)
        return ZEBRA_FAIL;
-    return rpn_search_xpath(zh, attributeSet, num_bases, basenames,
+    return rpn_search_xpath(zh, num_bases, basenames,
                            stream, rank_type, *rset, 
                            xpath_len, xpath, rset_nmem, rset, kc);
 }
@@ -2468,6 +2587,10 @@ static int scan_handle (char *name, const char *info, int pos, void *client)
     else
         idx = - pos - 1;
 
+    /* skip special terms.. of no interest */
+    if (name[len_prefix] < 4)
+        return 1;
+
     if (idx < 0)
        return 0;
     scan_info->list[idx].term = (char *)
@@ -2535,6 +2658,8 @@ static void count_set(ZebraHandle zh, RSET rset, zint *count)
     *count = rset->hits_count;
 }
 
+#define RPN_MAX_ORDS 32
+
 ZEBRA_RES rpn_scan(ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt,
                   oid_value attributeset,
                   int num_bases, char **basenames,
@@ -2550,12 +2675,8 @@ ZEBRA_RES rpn_scan(ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt,
     char termz[IT_MAX_WORD+20];
     struct scan_info *scan_info_array;
     ZebraScanEntry *glist;
-    int ords[32], ord_no = 0;
-    int ptr[32];
-
-    int bases_ok = 0;     /* no of databases with OK attribute */
-    int errCode = 0;      /* err code (if any is not OK) */
-    char *errString = 0;  /* addinfo */
+    int ords[RPN_MAX_ORDS], ord_no = 0;
+    int ptr[RPN_MAX_ORDS];
 
     unsigned index_type;
     char *search_type = NULL;
@@ -2607,7 +2728,7 @@ ZEBRA_RES rpn_scan(ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt,
        zebra_setError(zh, YAZ_BIB1_UNSUPP_ATTRIBUTE_TYPE, 0);
         return ZEBRA_FAIL;
     }
-    for (base_no = 0; base_no < num_bases && ord_no < 32; base_no++)
+    for (base_no = 0; base_no < num_bases && ord_no < RPN_MAX_ORDS; base_no++)
     {
        int ord;
 
@@ -2618,24 +2739,15 @@ ZEBRA_RES rpn_scan(ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt,
            *num_entries = 0;
            return ZEBRA_FAIL;
        }
-
         if (zebra_apt_get_ord(zh, zapt, index_type, 0, attributeset, &ord) 
             != ZEBRA_OK)
-        {
-            break;
-        }
+            continue;
         ords[ord_no++] = ord;
     }
-    if (!bases_ok && errCode)
-    {
-       zebra_setError(zh, errCode, errString);
-        *num_entries = 0;
-       return ZEBRA_FAIL;
-    }
     if (ord_no == 0)
     {
         *num_entries = 0;
-        return ZEBRA_OK;
+        return ZEBRA_FAIL;
     }
     /* prepare dictionary scanning */
     if (num < 1)