X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=include%2Fyaz%2Ficu.h;h=176e2999d8fa741907202e3ede9ab23d03ebd4af;hb=d654b817f2bdb80102dd663d78f31ab3eea4f6bd;hp=23364b39547a1e737e239ba29f05f6c3b8d00c03;hpb=43a9d38d20c1b1bcd1a03b2445a501d27526bd35;p=yaz-moved-to-github.git diff --git a/include/yaz/icu.h b/include/yaz/icu.h index 23364b3..176e299 100644 --- a/include/yaz/icu.h +++ b/include/yaz/icu.h @@ -1,5 +1,5 @@ /* This file is part of the YAZ toolkit. - * Copyright (C) 1995-2011 Index Data. + * Copyright (C) Index Data. * All rights reserved. * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -55,7 +55,7 @@ YAZ_EXPORT void icu_chain_destroy(yaz_icu_chain_t chain); */ YAZ_EXPORT yaz_icu_chain_t icu_chain_xml_config(const xmlNode *xml_node, int sort, - UErrorCode * status); + UErrorCode *status); /** \brief pass string to ICU for parsing/tokenization/etc \param chain ICU chain to be used for parsing \param src8cstr input C string (null-terminated) @@ -64,7 +64,7 @@ YAZ_EXPORT yaz_icu_chain_t icu_chain_xml_config(const xmlNode *xml_node, \retval 1 success */ YAZ_EXPORT int icu_chain_assign_cstr(yaz_icu_chain_t chain, - const char * src8cstr, + const char *src8cstr, UErrorCode *status); /** \brief returns one token (if any) @@ -109,6 +109,24 @@ YAZ_EXPORT const char * icu_chain_token_norm(yaz_icu_chain_t chain); */ YAZ_EXPORT const char * icu_chain_token_sortkey(yaz_icu_chain_t chain); +/** \brief returns token as it relates to original text (legacy) + \param chain ICU chain + \param start offset in original text + \param len number of uchars in original text +*/ +YAZ_EXPORT void icu_chain_get_org_info(yaz_icu_chain_t chain, + size_t *start, size_t *len); + +/** \brief returns token as it relates to original text (2nd version) + \param chain ICU chain + \param start offset in original text + \param len number of uchars in original text + \param cstr if not-null, holds original string in there +*/ +YAZ_EXPORT void icu_chain_get_org_info2(yaz_icu_chain_t chain, + size_t *start, size_t *len, + const char **cstr); + /** \brief ICU tokenizer iterator type (opaque) */ typedef struct icu_iter *yaz_icu_iter_t; @@ -122,7 +140,7 @@ yaz_icu_iter_t icu_iter_create(struct icu_chain *chain); /** \brief starts iteration over string \param iter ICU tokenizer iterator \param src8cstr input string (0-terminated) - + Call icu_iter_next to iterate over each token. */ YAZ_EXPORT @@ -170,6 +188,24 @@ const char *icu_iter_get_display(yaz_icu_iter_t iter); YAZ_EXPORT int icu_iter_get_token_number(yaz_icu_iter_t iter); +/** \brief returns ICU original token start (offset) and length (legacy) + \param iter ICU tokenizer iterator + \param start offset of last token in original text + \param len length of last token in original text +*/ +YAZ_EXPORT +void icu_iter_get_org_info(yaz_icu_iter_t iter, size_t *start, size_t *len); + +/** \brief returns ICU original token start (offset) and length + \param iter ICU tokenizer iterator + \param start offset of last token in original text + \param len length of last token in original text + \param cstr if non-null: original string +*/ +YAZ_EXPORT +void icu_iter_get_org_info2(yaz_icu_iter_t iter, size_t *start, size_t *len, + const char **cstr); + YAZ_END_CDECL #endif /* YAZ_ICU_H */