X-Git-Url: http://git.indexdata.com/?p=yaz-moved-to-github.git;a=blobdiff_plain;f=src%2Ficu_tokenizer.c;h=7e2fc3f0656b275fa57e814fb3b20a031b9795a1;hp=06970297cc9568313e87a867af95949d9818bb7d;hb=94b1547e5951e1e01bf5180159e74095cd0527f4;hpb=5242cb5a8634bfa38b9333ff7f903e718ac6e292 diff --git a/src/icu_tokenizer.c b/src/icu_tokenizer.c index 0697029..7e2fc3f 100644 --- a/src/icu_tokenizer.c +++ b/src/icu_tokenizer.c @@ -1,5 +1,5 @@ /* This file is part of the YAZ toolkit. - * Copyright (C) 1995-2012 Index Data + * Copyright (C) 1995-2013 Index Data * See the file LICENSE for details. */ @@ -78,7 +78,7 @@ struct icu_tokenizer *icu_tokenizer_clone(struct icu_tokenizer *old) struct icu_tokenizer *icu_tokenizer_create(const char *locale, char action, UErrorCode *status) { - struct icu_tokenizer * tokenizer + struct icu_tokenizer *tokenizer = (struct icu_tokenizer *) xmalloc(sizeof(struct icu_tokenizer)); icu_tokenizer_reset(tokenizer, action); @@ -119,7 +119,7 @@ struct icu_tokenizer *icu_tokenizer_create(const char *locale, char action, return 0; } -void icu_tokenizer_destroy(struct icu_tokenizer * tokenizer) +void icu_tokenizer_destroy(struct icu_tokenizer *tokenizer) { if (tokenizer) { @@ -130,8 +130,8 @@ void icu_tokenizer_destroy(struct icu_tokenizer * tokenizer) } } -int icu_tokenizer_attach(struct icu_tokenizer * tokenizer, - struct icu_buf_utf16 * src16, +int icu_tokenizer_attach(struct icu_tokenizer *tokenizer, + struct icu_buf_utf16 *src16, UErrorCode *status) { if (!tokenizer || !tokenizer->bi || !src16) @@ -153,9 +153,10 @@ int icu_tokenizer_attach(struct icu_tokenizer * tokenizer, return 1; } -int32_t icu_tokenizer_next_token(struct icu_tokenizer * tokenizer, - struct icu_buf_utf16 * tkn16, - UErrorCode *status) +int32_t icu_tokenizer_next_token(struct icu_tokenizer *tokenizer, + struct icu_buf_utf16 *tkn16, + UErrorCode *status, + size_t *start, size_t *len) { int32_t tkn_start = 0; int32_t tkn_end = 0; @@ -202,6 +203,9 @@ int32_t icu_tokenizer_next_token(struct icu_tokenizer * tokenizer, tokenizer->token_start = tkn_start; tokenizer->token_end = tkn_end; + *start = tkn_start; + *len = tkn_end - tkn_start; + /* copying into token buffer if it exists */ if (tkn16) { @@ -217,7 +221,7 @@ int32_t icu_tokenizer_next_token(struct icu_tokenizer * tokenizer, return tkn_len; } -int32_t icu_tokenizer_token_count(struct icu_tokenizer * tokenizer) +int32_t icu_tokenizer_token_count(struct icu_tokenizer *tokenizer) { return tokenizer->token_count; }