X-Git-Url: http://git.indexdata.com/?p=yaz-moved-to-github.git;a=blobdiff_plain;f=include%2Fyaz%2Ficu.h;h=007e29b5e50ac2810525e0ea903a6c46d81cd99f;hp=488071b336dbbeb07c643933c9830bbbd2671501;hb=94b1547e5951e1e01bf5180159e74095cd0527f4;hpb=05e94adc1a717e85ba5b775468c3c9e2c8a26034 diff --git a/include/yaz/icu.h b/include/yaz/icu.h index 488071b..007e29b 100644 --- a/include/yaz/icu.h +++ b/include/yaz/icu.h @@ -109,6 +109,14 @@ YAZ_EXPORT const char * icu_chain_token_norm(yaz_icu_chain_t chain); */ YAZ_EXPORT const char * icu_chain_token_sortkey(yaz_icu_chain_t chain); +/** \brief returns token as it relates to originl text + \param chain ICU chain + \param start offset in original text + \param size number of uchars in original text +*/ +YAZ_EXPORT void icu_chain_get_org_info(yaz_icu_chain_t chain, + size_t *start, size_t *len); + /** \brief ICU tokenizer iterator type (opaque) */ typedef struct icu_iter *yaz_icu_iter_t; @@ -170,6 +178,14 @@ const char *icu_iter_get_display(yaz_icu_iter_t iter); YAZ_EXPORT int icu_iter_get_token_number(yaz_icu_iter_t iter); +/** \brief returns ICU original token start (offset) and length + \param iter ICU tokenizer iterator + \param start offset of last token in original text + \param len length of last token in original text +*/ +YAZ_EXPORT +void icu_iter_get_org_info(yaz_icu_iter_t iter, size_t *start, size_t *len); + YAZ_END_CDECL #endif /* YAZ_ICU_H */