X-Git-Url: http://git.indexdata.com/?p=yaz-moved-to-github.git;a=blobdiff_plain;f=include%2Fyaz%2Ficu_I18N.h;h=c5703840f297d17bdee53755c28052c788c9e009;hp=c0af4cf0d96b8de910ed28168a57d4c61ccdf6ef;hb=30af0a1ebbea75c3e757eb03d3f988c61fce8511;hpb=765b94caaa93566a9792019ec5d2bf56fc8100a3 diff --git a/include/yaz/icu_I18N.h b/include/yaz/icu_I18N.h index c0af4cf..c570384 100644 --- a/include/yaz/icu_I18N.h +++ b/include/yaz/icu_I18N.h @@ -1,5 +1,5 @@ -/* - * Copyright (c) 1995-2007, Index Data +/* This file is part of the YAZ toolkit. + * Copyright (C) Index Data. * All rights reserved. * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -27,6 +27,9 @@ /** \file \brief Internal header for ICU utilities + + These functions, while non-static, are considered unstable and internal + and may be renamed for each YAZ release. */ #ifndef ICU_I18NL_H @@ -34,20 +37,17 @@ #include -#include - #include /* Basic ICU data types */ #include /* char names */ -#include +#include #include -#include #include /* declared structs and functions */ -int icu_check_status (UErrorCode status); +int icu_check_status(UErrorCode status); struct icu_buf_utf16 { @@ -63,12 +63,17 @@ struct icu_buf_utf16 * icu_buf_utf16_clear(struct icu_buf_utf16 * buf16); struct icu_buf_utf16 * icu_buf_utf16_resize(struct icu_buf_utf16 * buf16, size_t capacity); -struct icu_buf_utf16 * icu_buf_utf16_copy(struct icu_buf_utf16 * dest16, - struct icu_buf_utf16 * src16); +struct icu_buf_utf16 *icu_buf_utf16_copy(struct icu_buf_utf16 * dest16, + const struct icu_buf_utf16 * src16); -void icu_buf_utf16_destroy(struct icu_buf_utf16 * buf16); +struct icu_buf_utf16 *icu_buf_utf16_append(struct icu_buf_utf16 *dest16, + const struct icu_buf_utf16 * src16); + +void icu_buf_utf16_log(const char *lead, struct icu_buf_utf16 *src16); +void icu_buf_utf16_destroy(struct icu_buf_utf16 * buf16); +struct icu_buf_utf8; struct icu_buf_utf8 { @@ -87,184 +92,86 @@ struct icu_buf_utf8 * icu_buf_utf8_resize(struct icu_buf_utf8 * buf8, void icu_buf_utf8_destroy(struct icu_buf_utf8 * buf8); -UErrorCode icu_utf16_from_utf8(struct icu_buf_utf16 * dest16, - struct icu_buf_utf8 * src8, - UErrorCode * status); - UErrorCode icu_utf16_from_utf8_cstr(struct icu_buf_utf16 * dest16, const char * src8cstr, UErrorCode * status); +const char *icu_buf_utf8_to_cstr(struct icu_buf_utf8 *src8); + -UErrorCode icu_utf16_to_utf8(struct icu_buf_utf8 * dest8, - struct icu_buf_utf16 * src16, +UErrorCode icu_utf16_to_utf8(struct icu_buf_utf8 *dest8, + const struct icu_buf_utf16 *src16, UErrorCode * status); -struct icu_casemap -{ - char action; -}; +struct icu_casemap; + +struct icu_casemap *icu_casemap_create(char action, UErrorCode *status); -struct icu_casemap * icu_casemap_create(char action, UErrorCode *status); +struct icu_casemap *icu_casemap_clone(struct icu_casemap *old); -void icu_casemap_destroy(struct icu_casemap * casemap); +void icu_casemap_destroy(struct icu_casemap *casemap); -int icu_casemap_casemap(struct icu_casemap * casemap, - struct icu_buf_utf16 * dest16, - struct icu_buf_utf16 * src16, +int icu_casemap_casemap(struct icu_casemap *casemap, + struct icu_buf_utf16 *dest16, + struct icu_buf_utf16 *src16, UErrorCode *status, const char *locale); -int icu_utf16_casemap(struct icu_buf_utf16 * dest16, - struct icu_buf_utf16 * src16, +int icu_utf16_casemap(struct icu_buf_utf16 *dest16, + struct icu_buf_utf16 *src16, const char *locale, char action, UErrorCode *status); void icu_sortkey8_from_utf16(UCollator *coll, - struct icu_buf_utf8 * dest8, - struct icu_buf_utf16 * src16, - UErrorCode * status); - -struct icu_tokenizer -{ - char action; - UBreakIterator* bi; - struct icu_buf_utf16 * buf16; - int32_t token_count; - int32_t token_id; - int32_t token_start; - int32_t token_end; -/* - keep always invariant - 0 <= token_start - <= token_end - <= buf16->utf16_len - and invariant - 0 <= token_id <= token_count -*/ -}; + struct icu_buf_utf8 *dest8, + struct icu_buf_utf16 *src16, + UErrorCode *status); +struct icu_tokenizer; struct icu_tokenizer * icu_tokenizer_create(const char *locale, char action, UErrorCode *status); -void icu_tokenizer_destroy(struct icu_tokenizer * tokenizer); +struct icu_tokenizer *icu_tokenizer_clone(struct icu_tokenizer *old); +void icu_tokenizer_destroy(struct icu_tokenizer *tokenizer); -int icu_tokenizer_attach(struct icu_tokenizer * tokenizer, - struct icu_buf_utf16 * src16, UErrorCode *status); +int icu_tokenizer_attach(struct icu_tokenizer *tokenizer, + struct icu_buf_utf16 *src16, UErrorCode *status); -int32_t icu_tokenizer_next_token(struct icu_tokenizer * tokenizer, - struct icu_buf_utf16 * tkn16, - UErrorCode *status); +int32_t icu_tokenizer_next_token(struct icu_tokenizer *tokenizer, + struct icu_buf_utf16 *tkn16, + UErrorCode *status, + size_t *start, size_t *len); -int32_t icu_tokenizer_token_id(struct icu_tokenizer * tokenizer); -int32_t icu_tokenizer_token_start(struct icu_tokenizer * tokenizer); -int32_t icu_tokenizer_token_end(struct icu_tokenizer * tokenizer); -int32_t icu_tokenizer_token_length(struct icu_tokenizer * tokenizer); int32_t icu_tokenizer_token_count(struct icu_tokenizer * tokenizer); +struct icu_transform; +struct icu_transform * icu_transform_create(const char *id, char action, + const char *rules, + UErrorCode *status); +struct icu_transform *icu_transform_clone(struct icu_transform *old); +void icu_transform_destroy(struct icu_transform * transform); -struct icu_normalizer -{ - char action; - struct icu_buf_utf16 * rules16; - UParseError parse_error[256]; - UTransliterator * trans; -}; - -struct icu_normalizer * icu_normalizer_create(const char *rules, char action, - UErrorCode *status); - - -void icu_normalizer_destroy(struct icu_normalizer * normalizer); - -int icu_normalizer_normalize(struct icu_normalizer * normalizer, - struct icu_buf_utf16 * dest16, - struct icu_buf_utf16 * src16, - UErrorCode *status); - -enum icu_chain_step_type { - ICU_chain_step_type_none, - ICU_chain_step_type_display, /* convert to utf8 display format */ - ICU_chain_step_type_casemap, /* apply utf16 charmap */ - ICU_chain_step_type_normalize, /* apply utf16 normalization */ - ICU_chain_step_type_tokenize /* apply utf16 tokenization */ -}; - - - -struct icu_chain_step -{ - /* type and action object */ - enum icu_chain_step_type type; - union { - struct icu_casemap * casemap; - struct icu_normalizer * normalizer; - struct icu_tokenizer * tokenizer; - } u; - /* temprary post-action utf16 buffer */ - struct icu_buf_utf16 * buf16; - struct icu_chain_step * previous; - int more_tokens; - int need_new_token; -}; - - -struct icu_chain; - -struct icu_chain_step * icu_chain_step_create(yaz_icu_chain_t chain, - enum icu_chain_step_type type, - const uint8_t * rule, - struct icu_buf_utf16 * buf16, - UErrorCode *status); - - -void icu_chain_step_destroy(struct icu_chain_step * step); - - -struct icu_chain -{ - char *locale; - int sort; - - const char * src8cstr; - - UCollator * coll; - - /* number of tokens returned so far */ - int32_t token_count; - - /* utf8 output buffers */ - struct icu_buf_utf8 * display8; - struct icu_buf_utf8 * norm8; - struct icu_buf_utf8 * sort8; - - /* utf16 source buffer */ - struct icu_buf_utf16 * src16; - - /* linked list of chain steps */ - struct icu_chain_step * steps; -}; - -struct icu_chain_step * icu_chain_insert_step(yaz_icu_chain_t chain, - enum icu_chain_step_type type, - const uint8_t * rule, - UErrorCode *status); +int icu_transform_trans(struct icu_transform *transform, + struct icu_buf_utf16 *dest16, + const struct icu_buf_utf16 *src16, + UErrorCode *status); -int icu_chain_step_next_token(yaz_icu_chain_t chain, - struct icu_chain_step * step, - UErrorCode *status); +struct icu_chain_step; int icu_chain_token_number(yaz_icu_chain_t chain); -const UCollator * icu_chain_get_coll(yaz_icu_chain_t chain); +yaz_icu_chain_t icu_chain_create(const char *locale, + int sort, UErrorCode *status); #endif /* ICU_I18NL_H */ /* * Local variables: * c-basic-offset: 4 + * c-file-style: "Stroustrup" * indent-tabs-mode: nil * End: * vim: shiftwidth=4 tabstop=8 expandtab */ +