projects
/
idzebra-moved-to-github.git
/ commitdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
| commitdiff |
tree
raw
|
patch
|
inline
| side by side (parent:
2b86700
)
Refactor ICU basechar search into function
author
Adam Dickmeiss
<adam@indexdata.dk>
Mon, 21 Jan 2013 11:32:58 +0000
(12:32 +0100)
committer
Adam Dickmeiss
<adam@indexdata.dk>
Mon, 21 Jan 2013 11:32:58 +0000
(12:32 +0100)
index/rpnsearch.c
patch
|
blob
|
history
diff --git
a/index/rpnsearch.c
b/index/rpnsearch.c
index
f5aa669
..
1109a60
100644
(file)
--- a/
index/rpnsearch.c
+++ b/
index/rpnsearch.c
@@
-234,6
+234,20
@@
static void add_non_space(const char *start, const char *end,
}
}
+/* ICU sort keys seem to be of the form
+ basechars \x01 accents \x01 length
+ For now we'll just right truncate from basechars . This
+ may give false hits due to accents not being used.
+*/
+static size_t icu_basechars(const char *buf, size_t i)
+{
+ while (i > 0 && buf[--i] != '\x01') /* skip length */
+ ;
+ while (i > 0 && buf[--i] != '\x01') /* skip accents */
+ ;
+ return i; /* only basechars left */
+}
+
static int term_102_icu(zebra_map_t zm,
const char **src, WRBUF term_dict, int space_split,
WRBUF display_term)
static int term_102_icu(zebra_map_t zm,
const char **src, WRBUF term_dict, int space_split,
WRBUF display_term)
@@
-264,12
+278,8
@@
static int term_102_icu(zebra_map_t zm,
if (zebra_map_tokenize_next(zm, &res_buf, &res_len,
&display_buf, &display_len))
{
if (zebra_map_tokenize_next(zm, &res_buf, &res_len,
&display_buf, &display_len))
{
- size_t i = res_len;
- while (i > 0 && res_buf[--i] != '\x01')
- ;
- while (i > 0 && res_buf[--i] != '\x01')
- ;
- res_len = i; /* reduce res_len */
+ size_t i;
+ res_len = icu_basechars(res_buf, res_len);
for (i = 0; i < res_len; i++)
{
if (strchr(REGEX_CHARS "\\", res_buf[i]))
for (i = 0; i < res_len; i++)
{
if (strchr(REGEX_CHARS "\\", res_buf[i]))
@@
-339,21
+349,7
@@
static int term_100_icu(zebra_map_t zm,
wrbuf_write(display_term, display_buf, display_len);
if (mode)
{
wrbuf_write(display_term, display_buf, display_len);
if (mode)
{
- /* ICU sort keys seem to be of the form
- basechars \x01 accents \x01 length
- For now we'll just right truncate from basechars . This
- may give false hits due to accents not being used.
- */
- i = res_len;
- while (i > 0 && res_buf[--i] != '\x01')
- ;
- while (i > 0 && res_buf[--i] != '\x01')
- ;
- if (i == 0)
- { /* did not find base chars at all. Throw error */
- return -1;
- }
- res_len = i; /* reduce res_len */
+ res_len = icu_basechars(res_buf, res_len);
}
if (mode & 2)
wrbuf_puts(term_dict, ".*");
}
if (mode & 2)
wrbuf_puts(term_dict, ".*");