X-Git-Url: http://git.indexdata.com/?p=idzebra-moved-to-github.git;a=blobdiff_plain;f=index%2Frpnsearch.c;h=47c1c234c84500c4dda68aab85c7a8301fc3cba5;hp=f5aa6698741ff10c706cd199bf421da6f64f0d82;hb=c5971ebf8a88865ed9a1f7c8cf9daa22544f07be;hpb=2b86700a060db9a529b7227a1d3a9175564cd931

diff --git a/index/rpnsearch.c b/index/rpnsearch.c
index f5aa669..47c1c23 100644
--- a/index/rpnsearch.c
+++ b/index/rpnsearch.c
@@ -1,5 +1,5 @@
 /* This file is part of the Zebra server.
-   Copyright (C) 2004-2013 Index Data
+   Copyright (C) Index Data
 
 Zebra is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free
@@ -187,7 +187,7 @@ static void esc_str(char *out_buf, size_t out_size,
     assert(in_buf);
     assert(out_size > 20);
     *out_buf = '\0';
-    for (k = 0; k<in_size; k++)
+    for (k = 0; k < in_size; k++)
     {
 	int c = in_buf[k] & 0xff;
 	int pc;
@@ -234,6 +234,20 @@ static void add_non_space(const char *start, const char *end,
 }
 
 
+/* ICU sort keys seem to be of the form
+   basechars \x01 accents \x01 length
+   For now we'll just right truncate from basechars . This
+   may give false hits due to accents not being used.
+*/
+static size_t icu_basechars(const char *buf, size_t i)
+{
+    while (i > 0 && buf[--i] != '\x01') /* skip length */
+        ;
+    while (i > 0 && buf[--i] != '\x01') /* skip accents */
+        ;
+    return i; /* only basechars left */
+}
+
 static int term_102_icu(zebra_map_t zm,
                         const char **src, WRBUF term_dict, int space_split,
                         WRBUF display_term)
@@ -264,12 +278,8 @@ static int term_102_icu(zebra_map_t zm,
                 if (zebra_map_tokenize_next(zm, &res_buf, &res_len,
                                             &display_buf, &display_len))
                 {
-                    size_t i = res_len;
-                    while (i > 0 && res_buf[--i] != '\x01')
-                        ;
-                    while (i > 0 && res_buf[--i] != '\x01')
-                        ;
-                    res_len = i; /* reduce res_len */
+                    size_t i;
+                    res_len = icu_basechars(res_buf, res_len);
                     for (i = 0; i < res_len; i++)
                     {
                         if (strchr(REGEX_CHARS "\\", res_buf[i]))
@@ -301,59 +311,28 @@ static int term_102_icu(zebra_map_t zm,
 }
 
 static int term_100_icu(zebra_map_t zm,
-                        const char **src, WRBUF term_dict, int space_split,
+                        const char **src, WRBUF term_dict,
                         WRBUF display_term,
-                        int mode)
+                        int mode,
+                        size_t token_number)
 {
     size_t i;
     const char *res_buf = 0;
     size_t res_len = 0;
     const char *display_buf;
     size_t display_len;
-    const char *s0 = *src, *s1;
 
-    while (*s0 == ' ')
-        s0++;
-
-    if (*s0 == '\0')
-        return 0;
-
-    if (space_split)
+    zebra_map_tokenize_start(zm, *src, strlen(*src));
+    for (i = 0; i <= token_number; i++)
     {
-        s1 = s0;
-        while (*s1 && *s1 != ' ')
-            s1++;
-    }
-    else
-        s1 = s0 + strlen(s0);
-
-    *src = s1;
-
-    zebra_map_tokenize_start(zm, s0, s1 - s0);
-
-    if (!zebra_map_tokenize_next(zm, &res_buf, &res_len,
-                                 &display_buf, &display_len))
-    {
-        return 0;
+        if (!zebra_map_tokenize_next(zm, &res_buf, &res_len,
+                                     &display_buf, &display_len))
+            return 0;
     }
     wrbuf_write(display_term, display_buf, display_len);
     if (mode)
     {
-        /* ICU sort keys seem to be of the form
-           basechars \x01 accents \x01 length
-           For now we'll just right truncate from basechars . This
-           may give false hits due to accents not being used.
-        */
-        i = res_len;
-        while (i > 0 && res_buf[--i] != '\x01')
-            ;
-        while (i > 0 && res_buf[--i] != '\x01')
-            ;
-        if (i == 0)
-        {  /* did not find base chars at all. Throw error */
-            return -1;
-        }
-        res_len = i; /* reduce res_len */
+        res_len = icu_basechars(res_buf, res_len);
     }
     if (mode & 2)
         wrbuf_puts(term_dict, ".*");
@@ -728,7 +707,7 @@ static void gen_regular_rel(WRBUF term_dict, int val, int islt)
             else
                 dst[dst_p++] = d;
         }
-        for (i = 0; i<pos; i++)
+        for (i = 0; i < pos; i++)
         {
             dst[dst_p++] = '[';
             dst[dst_p++] = '0';
@@ -743,7 +722,7 @@ static void gen_regular_rel(WRBUF term_dict, int val, int islt)
     {
         /* match everything less than 10^(pos-1) */
         strcat(dst, "0*");
-        for (i = 1; i<pos; i++)
+        for (i = 1; i < pos; i++)
             strcat(dst, "[0-9]?");
     }
     else
@@ -975,7 +954,7 @@ static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
 			     WRBUF display_term,
                              const char *xpath_use,
 			     struct ord_list **ol,
-                             zebra_map_t zm);
+                             zebra_map_t zm, size_t token_number);
 
 ZEBRA_RES zebra_term_limits_APT(ZebraHandle zh,
                                 Z_AttributesPlusTerm *zapt,
@@ -1022,7 +1001,8 @@ static ZEBRA_RES search_term(ZebraHandle zh,
                              NMEM rset_nmem,
                              RSET *rset,
                              struct rset_key_control *kc,
-                             zebra_map_t zm)
+                             zebra_map_t zm,
+                             size_t token_number)
 {
     ZEBRA_RES res;
     struct ord_list *ol;
@@ -1037,7 +1017,7 @@ static ZEBRA_RES search_term(ZebraHandle zh,
     res = string_term(zh, zapt, term_sub, term_dict,
                       attributeSet, stream, grep_info,
 		      index_type, complete_flag,
-		      display_term, xpath_use, &ol, zm);
+		      display_term, xpath_use, &ol, zm, token_number);
     wrbuf_destroy(term_dict);
     if (res == ZEBRA_OK && *term_sub)
     {
@@ -1065,7 +1045,7 @@ static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
 			     WRBUF display_term,
                              const char *xpath_use,
 			     struct ord_list **ol,
-                             zebra_map_t zm)
+                             zebra_map_t zm, size_t token_number)
 {
     int r;
     AttrType truncation;
@@ -1104,7 +1084,7 @@ static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
 
     wrbuf_putc(term_dict, '(');
 
-    for (i = 0; i<ord_len; i++)
+    for (i = 0; i < ord_len; i++)
     {
         wrbuf_putc(term_dict, 1);  /* our internal regexp escape char */
         wrbuf_putc(term_dict, ord_buf[i]);
@@ -1129,7 +1109,7 @@ static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
             {
             case -1:         /* not specified */
             case 100:        /* do not truncate */
-                if (!term_100_icu(zm, &termp, term_dict, space_split, display_term, 0))
+                if (!term_100_icu(zm, &termp, term_dict, display_term, 0, token_number))
                 {
                     *term_sub = 0;
                     return ZEBRA_OK;
@@ -1143,21 +1123,21 @@ static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
                 }
                 break;
             case 1:          /* right truncation */
-                if (!term_100_icu(zm, &termp, term_dict, space_split, display_term, 1))
+                if (!term_100_icu(zm, &termp, term_dict, display_term, 1, token_number))
                 {
                     *term_sub = 0;
                     return ZEBRA_OK;
                 }
                 break;
             case 2:
-                if (!term_100_icu(zm, &termp, term_dict, space_split, display_term, 2))
+                if (!term_100_icu(zm, &termp, term_dict, display_term, 2, token_number))
                 {
                     *term_sub = 0;
                     return ZEBRA_OK;
                 }
                 break;
             case 3:
-                if (!term_100_icu(zm, &termp, term_dict, space_split, display_term, 3))
+                if (!term_100_icu(zm, &termp, term_dict, display_term, 3, token_number))
                 {
                     *term_sub = 0;
                     return ZEBRA_OK;
@@ -1428,7 +1408,8 @@ static ZEBRA_RES search_terms_chrmap(ZebraHandle zh,
                           rank_type,
                           xpath_use, rset_nmem,
                           &(*result_sets)[*num_result_sets],
-                          kc, zm);
+                          kc, zm,
+                          *num_result_sets);
 	if (res != ZEBRA_OK)
 	{
 	    int i;
@@ -1588,7 +1569,7 @@ static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh,
         if (res != ZEBRA_OK)
         {
             int i;
-            for (i = 0; i<num_result_sets; i++)
+            for (i = 0; i < num_result_sets; i++)
                 rset_delete(result_sets[i]);
             return res;
         }
@@ -1644,7 +1625,7 @@ static ZEBRA_RES rpn_search_APT_or_list(ZebraHandle zh,
     if (res != ZEBRA_OK)
 	return res;
 
-    for (i = 0; i<num_result_sets; i++)
+    for (i = 0; i < num_result_sets; i++)
     {
         RSET first_set = 0;
         res = search_position(zh, zapt, attributeSet,
@@ -1653,7 +1634,7 @@ static ZEBRA_RES rpn_search_APT_or_list(ZebraHandle zh,
                               kc);
         if (res != ZEBRA_OK)
         {
-            for (i = 0; i<num_result_sets; i++)
+            for (i = 0; i < num_result_sets; i++)
                 rset_delete(result_sets[i]);
             return res;
         }
@@ -1712,7 +1693,7 @@ static ZEBRA_RES rpn_search_APT_and_list(ZebraHandle zh,
                           kc);
     if (res != ZEBRA_OK)
 	return res;
-    for (i = 0; i<num_result_sets; i++)
+    for (i = 0; i < num_result_sets; i++)
     {
         RSET first_set = 0;
         res = search_position(zh, zapt, attributeSet,
@@ -1721,7 +1702,7 @@ static ZEBRA_RES rpn_search_APT_and_list(ZebraHandle zh,
                               kc);
         if (res != ZEBRA_OK)
         {
-            for (i = 0; i<num_result_sets; i++)
+            for (i = 0; i < num_result_sets; i++)
                 rset_delete(result_sets[i]);
             return res;
         }
@@ -1819,6 +1800,7 @@ static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
         gen_regular_rel(term_dict, term_value+1, 0);
         break;
     case -1:
+    case 102:
     case 3:
         yaz_log(log_level_rpn, "Relation =");
         if (!term_100(zm, term_sub, term_num, 1, display_term))
@@ -2067,7 +2049,7 @@ static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
         sort_sequence->specs = (Z_SortKeySpec **)
             nmem_malloc(stream, sort_sequence->num_specs *
                         sizeof(*sort_sequence->specs));
-        for (i = 0; i<sort_sequence->num_specs; i++)
+        for (i = 0; i < sort_sequence->num_specs; i++)
             sort_sequence->specs[i] = 0;
     }
     if (zapt->term->which != Z_Term_general)
@@ -2159,7 +2141,7 @@ static RSET xpath_trunc(ZebraHandle zh, NMEM stream,
         const char *flags = "void";
 
         wrbuf_putc(term_dict, '(');
-        for (i = 0; i<ord_len; i++)
+        for (i = 0; i < ord_len; i++)
         {
             wrbuf_putc(term_dict, 1);
             wrbuf_putc(term_dict, ord_buf[i]);
@@ -2201,7 +2183,7 @@ ZEBRA_RES rpn_search_xpath(ZebraHandle zh,
     }
 
     yaz_log(YLOG_DEBUG, "xpath len=%d", xpath_len);
-    for (i = 0; i<xpath_len; i++)
+    for (i = 0; i < xpath_len; i++)
     {
         yaz_log(log_level_rpn, "XPATH %d %s", i, xpath[i].part);
 
@@ -2574,7 +2556,7 @@ ZEBRA_RES rpn_search_top(ZebraHandle zh, Z_RPNStructure *zs,
     if (res != ZEBRA_OK)
     {
 	int i;
-	for (i = 0; i<num_result_sets; i++)
+	for (i = 0; i < num_result_sets; i++)
 	    rset_delete(result_sets[i]);
 	*result_set = 0;
     }
@@ -2617,7 +2599,7 @@ ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
 	if (res != ZEBRA_OK)
 	{
 	    int i;
-	    for (i = 0; i<num_result_sets_l; i++)
+	    for (i = 0; i < num_result_sets_l; i++)
 		rset_delete(result_sets_l[i]);
 	    return res;
 	}
@@ -2630,9 +2612,9 @@ ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
 	if (res != ZEBRA_OK)
 	{
 	    int i;
-	    for (i = 0; i<num_result_sets_l; i++)
+	    for (i = 0; i < num_result_sets_l; i++)
 		rset_delete(result_sets_l[i]);
-	    for (i = 0; i<num_result_sets_r; i++)
+	    for (i = 0; i < num_result_sets_r; i++)
 		rset_delete(result_sets_r[i]);
 	    return res;
 	}