X-Git-Url: http://git.indexdata.com/?p=yaz-moved-to-github.git;a=blobdiff_plain;f=include%2Fyaz%2Ficu_I18N.h;h=d61c00702aa2a7e458672b2dd32431f7277798f4;hp=f69714c34c2ce59c2174b3e50f8be8a49b79528b;hb=94b1547e5951e1e01bf5180159e74095cd0527f4;hpb=674986e09b1e2c5ea9670355aef21c813323d186 diff --git a/include/yaz/icu_I18N.h b/include/yaz/icu_I18N.h index f69714c..d61c007 100644 --- a/include/yaz/icu_I18N.h +++ b/include/yaz/icu_I18N.h @@ -1,5 +1,5 @@ -/* - * Copyright (c) 1995-2007, Index Data +/* This file is part of the YAZ toolkit. + * Copyright (C) 1995-2013 Index Data. * All rights reserved. * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -25,21 +25,25 @@ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +/** \file + \brief Internal header for ICU utilities + + These functions, while non-static, are considered unstable and internal + and may be renamed for each YAZ release. +*/ + #ifndef ICU_I18NL_H #define ICU_I18NL_H -#include - -#include -#include +#include #include /* Basic ICU data types */ #include /* char names */ -#include +#include #include -#include +#include /* declared structs and functions */ @@ -47,9 +51,9 @@ int icu_check_status (UErrorCode status); struct icu_buf_utf16 { - UChar * utf16; - int32_t utf16_len; - int32_t utf16_cap; + UChar * utf16; + int32_t utf16_len; + int32_t utf16_cap; }; struct icu_buf_utf16 * icu_buf_utf16_create(size_t capacity); @@ -59,12 +63,17 @@ struct icu_buf_utf16 * icu_buf_utf16_clear(struct icu_buf_utf16 * buf16); struct icu_buf_utf16 * icu_buf_utf16_resize(struct icu_buf_utf16 * buf16, size_t capacity); -struct icu_buf_utf16 * icu_buf_utf16_copy(struct icu_buf_utf16 * dest16, - struct icu_buf_utf16 * src16); +struct icu_buf_utf16 *icu_buf_utf16_copy(struct icu_buf_utf16 * dest16, + const struct icu_buf_utf16 * src16); -void icu_buf_utf16_destroy(struct icu_buf_utf16 * buf16); +struct icu_buf_utf16 *icu_buf_utf16_append(struct icu_buf_utf16 *dest16, + const struct icu_buf_utf16 * src16); +void icu_buf_utf16_log(const char *lead, struct icu_buf_utf16 *src16); + +void icu_buf_utf16_destroy(struct icu_buf_utf16 * buf16); +struct icu_buf_utf8; struct icu_buf_utf8 { @@ -83,210 +92,87 @@ struct icu_buf_utf8 * icu_buf_utf8_resize(struct icu_buf_utf8 * buf8, void icu_buf_utf8_destroy(struct icu_buf_utf8 * buf8); -UErrorCode icu_utf16_from_utf8(struct icu_buf_utf16 * dest16, - struct icu_buf_utf8 * src8, - UErrorCode * status); - UErrorCode icu_utf16_from_utf8_cstr(struct icu_buf_utf16 * dest16, const char * src8cstr, UErrorCode * status); +const char *icu_buf_utf8_to_cstr(struct icu_buf_utf8 *src8); -UErrorCode icu_utf16_to_utf8(struct icu_buf_utf8 * dest8, - struct icu_buf_utf16 * src16, + +UErrorCode icu_utf16_to_utf8(struct icu_buf_utf8 *dest8, + const struct icu_buf_utf16 *src16, UErrorCode * status); -struct icu_casemap -{ - char locale[16]; - char action; -}; +struct icu_casemap; -struct icu_casemap * icu_casemap_create(const char *locale, char action, - UErrorCode *status); +struct icu_casemap * icu_casemap_create(char action, UErrorCode *status); + +struct icu_casemap *icu_casemap_clone(struct icu_casemap *old); void icu_casemap_destroy(struct icu_casemap * casemap); int icu_casemap_casemap(struct icu_casemap * casemap, struct icu_buf_utf16 * dest16, struct icu_buf_utf16 * src16, - UErrorCode *status); + UErrorCode *status, + const char *locale); int icu_utf16_casemap(struct icu_buf_utf16 * dest16, struct icu_buf_utf16 * src16, const char *locale, char action, UErrorCode *status); -UErrorCode icu_sortkey8_from_utf16(UCollator *coll, - struct icu_buf_utf8 * dest8, - struct icu_buf_utf16 * src16, - UErrorCode * status); - -struct icu_tokenizer -{ - char locale[16]; - char action; - UBreakIterator* bi; - struct icu_buf_utf16 * buf16; - int32_t token_count; - int32_t token_id; - int32_t token_start; - int32_t token_end; -/* - keep always invariant - 0 <= token_start - <= token_end - <= buf16->utf16_len - and invariant - 0 <= token_id <= token_count -*/ -}; +void icu_sortkey8_from_utf16(UCollator *coll, + struct icu_buf_utf8 * dest8, + struct icu_buf_utf16 * src16, + UErrorCode * status); +struct icu_tokenizer; struct icu_tokenizer * icu_tokenizer_create(const char *locale, char action, UErrorCode *status); +struct icu_tokenizer *icu_tokenizer_clone(struct icu_tokenizer *old); void icu_tokenizer_destroy(struct icu_tokenizer * tokenizer); -int icu_tokenizer_attach(struct icu_tokenizer * tokenizer, +int icu_tokenizer_attach(struct icu_tokenizer * tokenizer, struct icu_buf_utf16 * src16, UErrorCode *status); -int32_t icu_tokenizer_next_token(struct icu_tokenizer * tokenizer, - struct icu_buf_utf16 * tkn16, - UErrorCode *status); +int32_t icu_tokenizer_next_token(struct icu_tokenizer * tokenizer, + struct icu_buf_utf16 * tkn16, + UErrorCode *status, + size_t *start, size_t *len); -int32_t icu_tokenizer_token_id(struct icu_tokenizer * tokenizer); -int32_t icu_tokenizer_token_start(struct icu_tokenizer * tokenizer); -int32_t icu_tokenizer_token_end(struct icu_tokenizer * tokenizer); -int32_t icu_tokenizer_token_length(struct icu_tokenizer * tokenizer); int32_t icu_tokenizer_token_count(struct icu_tokenizer * tokenizer); +struct icu_transform; +struct icu_transform * icu_transform_create(const char *id, char action, + const char *rules, + UErrorCode *status); +struct icu_transform *icu_transform_clone(struct icu_transform *old); +void icu_transform_destroy(struct icu_transform * transform); -struct icu_normalizer -{ - char action; - struct icu_buf_utf16 * rules16; - UParseError parse_error[256]; - UTransliterator * trans; -}; - -struct icu_normalizer * icu_normalizer_create(const char *rules, char action, - UErrorCode *status); - - -void icu_normalizer_destroy(struct icu_normalizer * normalizer); - -int icu_normalizer_normalize(struct icu_normalizer * normalizer, - struct icu_buf_utf16 * dest16, - struct icu_buf_utf16 * src16, - UErrorCode *status); - -enum icu_chain_step_type { - ICU_chain_step_type_none, - ICU_chain_step_type_display, /* convert to utf8 display format */ - ICU_chain_step_type_casemap, /* apply utf16 charmap */ - ICU_chain_step_type_normalize, /* apply utf16 normalization */ - ICU_chain_step_type_tokenize /* apply utf16 tokenization */ -}; - - - -struct icu_chain_step -{ - /* type and action object */ - enum icu_chain_step_type type; - union { - struct icu_casemap * casemap; - struct icu_normalizer * normalizer; - struct icu_tokenizer * tokenizer; - } u; - /* temprary post-action utf16 buffer */ - struct icu_buf_utf16 * buf16; - struct icu_chain_step * previous; - int more_tokens; - int need_new_token; -}; - - -struct icu_chain; - -struct icu_chain_step * icu_chain_step_create(struct icu_chain * chain, - enum icu_chain_step_type type, - const uint8_t * rule, - struct icu_buf_utf16 * buf16, - UErrorCode *status); - - -void icu_chain_step_destroy(struct icu_chain_step * step); - - -struct icu_chain -{ - uint8_t locale[16]; - int sort; - - const char * src8cstr; - - UCollator * coll; - - /* number of tokens returned so far */ - int32_t token_count; - - /* utf8 output buffers */ - struct icu_buf_utf8 * display8; - struct icu_buf_utf8 * norm8; - struct icu_buf_utf8 * sort8; - - /* utf16 source buffer */ - struct icu_buf_utf16 * src16; - - /* linked list of chain steps */ - struct icu_chain_step * steps; -}; - -struct icu_chain * icu_chain_create(const uint8_t * locale, - int sort, - UErrorCode * status); - -void icu_chain_destroy(struct icu_chain * chain); - -struct icu_chain * icu_chain_xml_config(xmlNode *xml_node, - const uint8_t * locale, - int sort, - UErrorCode * status); - -struct icu_chain_step * icu_chain_insert_step(struct icu_chain * chain, - enum icu_chain_step_type type, - const uint8_t * rule, - UErrorCode *status); - -int icu_chain_step_next_token(struct icu_chain * chain, - struct icu_chain_step * step, - UErrorCode *status); - -int icu_chain_assign_cstr(struct icu_chain * chain, - const char * src8cstr, - UErrorCode *status); - -int icu_chain_next_token(struct icu_chain * chain, - UErrorCode *status); - -int icu_chain_get_token_count(struct icu_chain * chain); - -const char * icu_chain_get_display(struct icu_chain * chain); +int icu_transform_trans(struct icu_transform * transform, + struct icu_buf_utf16 * dest16, + const struct icu_buf_utf16 * src16, + UErrorCode *status); -const char * icu_chain_get_norm(struct icu_chain * chain); +struct icu_chain_step; -const char * icu_chain_get_sort(struct icu_chain * chain); +int icu_chain_token_number(yaz_icu_chain_t chain); -const UCollator * icu_chain_get_coll(struct icu_chain * chain); +yaz_icu_chain_t icu_chain_create(const char * locale, + int sort, + UErrorCode * status); #endif /* ICU_I18NL_H */ /* * Local variables: * c-basic-offset: 4 + * c-file-style: "Stroustrup" * indent-tabs-mode: nil * End: * vim: shiftwidth=4 tabstop=8 expandtab */ +