/* This file is part of the YAZ toolkit.
- * Copyright (C) 1995-2012 Index Data.
+ * Copyright (C) Index Data.
* All rights reserved.
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*/
YAZ_EXPORT yaz_icu_chain_t icu_chain_xml_config(const xmlNode *xml_node,
int sort,
- UErrorCode * status);
+ UErrorCode *status);
/** \brief pass string to ICU for parsing/tokenization/etc
\param chain ICU chain to be used for parsing
\param src8cstr input C string (null-terminated)
\retval 1 success
*/
YAZ_EXPORT int icu_chain_assign_cstr(yaz_icu_chain_t chain,
- const char * src8cstr,
+ const char *src8cstr,
UErrorCode *status);
/** \brief returns one token (if any)
*/
YAZ_EXPORT const char * icu_chain_token_sortkey(yaz_icu_chain_t chain);
+/** \brief returns token as it relates to originl text
+ \param chain ICU chain
+ \param start offset in original text
+ \param len number of uchars in original text
+*/
+YAZ_EXPORT void icu_chain_get_org_info(yaz_icu_chain_t chain,
+ size_t *start, size_t *len);
+
/** \brief ICU tokenizer iterator type (opaque) */
typedef struct icu_iter *yaz_icu_iter_t;
/** \brief starts iteration over string
\param iter ICU tokenizer iterator
\param src8cstr input string (0-terminated)
-
+
Call icu_iter_next to iterate over each token.
*/
YAZ_EXPORT
YAZ_EXPORT
int icu_iter_get_token_number(yaz_icu_iter_t iter);
+/** \brief returns ICU original token start (offset) and length
+ \param iter ICU tokenizer iterator
+ \param start offset of last token in original text
+ \param len length of last token in original text
+*/
+YAZ_EXPORT
+void icu_iter_get_org_info(yaz_icu_iter_t iter, size_t *start, size_t *len);
+
YAZ_END_CDECL
#endif /* YAZ_ICU_H */