X-Git-Url: http://git.indexdata.com/?p=yaz-moved-to-github.git;a=blobdiff_plain;f=src%2Ficu_tokenizer.c;h=7e2fc3f0656b275fa57e814fb3b20a031b9795a1;hp=e09960f9b05e6407fd6eb41b361bcd839349524b;hb=94b1547e5951e1e01bf5180159e74095cd0527f4;hpb=0c46d2e66bdeea1600e700124a81a5d0a65d349e diff --git a/src/icu_tokenizer.c b/src/icu_tokenizer.c index e09960f..7e2fc3f 100644 --- a/src/icu_tokenizer.c +++ b/src/icu_tokenizer.c @@ -78,7 +78,7 @@ struct icu_tokenizer *icu_tokenizer_clone(struct icu_tokenizer *old) struct icu_tokenizer *icu_tokenizer_create(const char *locale, char action, UErrorCode *status) { - struct icu_tokenizer * tokenizer + struct icu_tokenizer *tokenizer = (struct icu_tokenizer *) xmalloc(sizeof(struct icu_tokenizer)); icu_tokenizer_reset(tokenizer, action); @@ -119,7 +119,7 @@ struct icu_tokenizer *icu_tokenizer_create(const char *locale, char action, return 0; } -void icu_tokenizer_destroy(struct icu_tokenizer * tokenizer) +void icu_tokenizer_destroy(struct icu_tokenizer *tokenizer) { if (tokenizer) { @@ -130,8 +130,8 @@ void icu_tokenizer_destroy(struct icu_tokenizer * tokenizer) } } -int icu_tokenizer_attach(struct icu_tokenizer * tokenizer, - struct icu_buf_utf16 * src16, +int icu_tokenizer_attach(struct icu_tokenizer *tokenizer, + struct icu_buf_utf16 *src16, UErrorCode *status) { if (!tokenizer || !tokenizer->bi || !src16) @@ -153,9 +153,10 @@ int icu_tokenizer_attach(struct icu_tokenizer * tokenizer, return 1; } -int32_t icu_tokenizer_next_token(struct icu_tokenizer * tokenizer, - struct icu_buf_utf16 * tkn16, - UErrorCode *status) +int32_t icu_tokenizer_next_token(struct icu_tokenizer *tokenizer, + struct icu_buf_utf16 *tkn16, + UErrorCode *status, + size_t *start, size_t *len) { int32_t tkn_start = 0; int32_t tkn_end = 0; @@ -202,6 +203,9 @@ int32_t icu_tokenizer_next_token(struct icu_tokenizer * tokenizer, tokenizer->token_start = tkn_start; tokenizer->token_end = tkn_end; + *start = tkn_start; + *len = tkn_end - tkn_start; + /* copying into token buffer if it exists */ if (tkn16) { @@ -217,7 +221,7 @@ int32_t icu_tokenizer_next_token(struct icu_tokenizer * tokenizer, return tkn_len; } -int32_t icu_tokenizer_token_count(struct icu_tokenizer * tokenizer) +int32_t icu_tokenizer_token_count(struct icu_tokenizer *tokenizer) { return tokenizer->token_count; }