X-Git-Url: http://git.indexdata.com/?p=yaz-moved-to-github.git;a=blobdiff_plain;f=include%2Fyaz%2Ficu_I18N.h;h=16d8aaa7ca289be3ec6130a2b3235ac3e170b759;hp=457f767b176c56dee9d55d3376566501c5ac7a51;hb=ad88b93c8fbb00728acd0b49b4079167304ed58d;hpb=64d15b4ed1a24c3064f4fa3c50668ea99b58111b diff --git a/include/yaz/icu_I18N.h b/include/yaz/icu_I18N.h index 457f767..16d8aaa 100644 --- a/include/yaz/icu_I18N.h +++ b/include/yaz/icu_I18N.h @@ -1,5 +1,5 @@ /* This file is part of the YAZ toolkit. - * Copyright (C) 1995-2009 Index Data. + * Copyright (C) 1995-2013 Index Data. * All rights reserved. * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -40,15 +40,14 @@ #include /* Basic ICU data types */ #include /* char names */ -#include +#include #include -#include #include /* declared structs and functions */ -int icu_check_status (UErrorCode status); +int icu_check_status(UErrorCode status); struct icu_buf_utf16 { @@ -64,12 +63,17 @@ struct icu_buf_utf16 * icu_buf_utf16_clear(struct icu_buf_utf16 * buf16); struct icu_buf_utf16 * icu_buf_utf16_resize(struct icu_buf_utf16 * buf16, size_t capacity); -struct icu_buf_utf16 * icu_buf_utf16_copy(struct icu_buf_utf16 * dest16, - struct icu_buf_utf16 * src16); +struct icu_buf_utf16 *icu_buf_utf16_copy(struct icu_buf_utf16 * dest16, + const struct icu_buf_utf16 * src16); -void icu_buf_utf16_destroy(struct icu_buf_utf16 * buf16); +struct icu_buf_utf16 *icu_buf_utf16_append(struct icu_buf_utf16 *dest16, + const struct icu_buf_utf16 * src16); + +void icu_buf_utf16_log(const char *lead, struct icu_buf_utf16 *src16); +void icu_buf_utf16_destroy(struct icu_buf_utf16 * buf16); +struct icu_buf_utf8; struct icu_buf_utf8 { @@ -88,182 +92,77 @@ struct icu_buf_utf8 * icu_buf_utf8_resize(struct icu_buf_utf8 * buf8, void icu_buf_utf8_destroy(struct icu_buf_utf8 * buf8); -UErrorCode icu_utf16_from_utf8(struct icu_buf_utf16 * dest16, - struct icu_buf_utf8 * src8, - UErrorCode * status); - UErrorCode icu_utf16_from_utf8_cstr(struct icu_buf_utf16 * dest16, const char * src8cstr, UErrorCode * status); +const char *icu_buf_utf8_to_cstr(struct icu_buf_utf8 *src8); + -UErrorCode icu_utf16_to_utf8(struct icu_buf_utf8 * dest8, - struct icu_buf_utf16 * src16, +UErrorCode icu_utf16_to_utf8(struct icu_buf_utf8 *dest8, + const struct icu_buf_utf16 *src16, UErrorCode * status); -struct icu_casemap -{ - char action; -}; +struct icu_casemap; + +struct icu_casemap *icu_casemap_create(char action, UErrorCode *status); -struct icu_casemap * icu_casemap_create(char action, UErrorCode *status); +struct icu_casemap *icu_casemap_clone(struct icu_casemap *old); -void icu_casemap_destroy(struct icu_casemap * casemap); +void icu_casemap_destroy(struct icu_casemap *casemap); -int icu_casemap_casemap(struct icu_casemap * casemap, - struct icu_buf_utf16 * dest16, - struct icu_buf_utf16 * src16, +int icu_casemap_casemap(struct icu_casemap *casemap, + struct icu_buf_utf16 *dest16, + struct icu_buf_utf16 *src16, UErrorCode *status, const char *locale); -int icu_utf16_casemap(struct icu_buf_utf16 * dest16, - struct icu_buf_utf16 * src16, +int icu_utf16_casemap(struct icu_buf_utf16 *dest16, + struct icu_buf_utf16 *src16, const char *locale, char action, UErrorCode *status); void icu_sortkey8_from_utf16(UCollator *coll, - struct icu_buf_utf8 * dest8, - struct icu_buf_utf16 * src16, - UErrorCode * status); - -struct icu_tokenizer -{ - char action; - UBreakIterator* bi; - struct icu_buf_utf16 * buf16; - int32_t token_count; - int32_t token_id; - int32_t token_start; - int32_t token_end; -/* - keep always invariant - 0 <= token_start - <= token_end - <= buf16->utf16_len - and invariant - 0 <= token_id <= token_count -*/ -}; + struct icu_buf_utf8 *dest8, + struct icu_buf_utf16 *src16, + UErrorCode *status); +struct icu_tokenizer; struct icu_tokenizer * icu_tokenizer_create(const char *locale, char action, UErrorCode *status); -void icu_tokenizer_destroy(struct icu_tokenizer * tokenizer); +struct icu_tokenizer *icu_tokenizer_clone(struct icu_tokenizer *old); +void icu_tokenizer_destroy(struct icu_tokenizer *tokenizer); -int icu_tokenizer_attach(struct icu_tokenizer * tokenizer, - struct icu_buf_utf16 * src16, UErrorCode *status); +int icu_tokenizer_attach(struct icu_tokenizer *tokenizer, + struct icu_buf_utf16 *src16, UErrorCode *status); -int32_t icu_tokenizer_next_token(struct icu_tokenizer * tokenizer, - struct icu_buf_utf16 * tkn16, - UErrorCode *status); +int32_t icu_tokenizer_next_token(struct icu_tokenizer *tokenizer, + struct icu_buf_utf16 *tkn16, + UErrorCode *status, + size_t *start, size_t *len); -int32_t icu_tokenizer_token_id(struct icu_tokenizer * tokenizer); -int32_t icu_tokenizer_token_start(struct icu_tokenizer * tokenizer); -int32_t icu_tokenizer_token_end(struct icu_tokenizer * tokenizer); -int32_t icu_tokenizer_token_length(struct icu_tokenizer * tokenizer); int32_t icu_tokenizer_token_count(struct icu_tokenizer * tokenizer); - - -struct icu_transform -{ - char action; - UParseError parse_error; - UTransliterator * trans; -}; +struct icu_transform; struct icu_transform * icu_transform_create(const char *id, char action, const char *rules, UErrorCode *status); - +struct icu_transform *icu_transform_clone(struct icu_transform *old); void icu_transform_destroy(struct icu_transform * transform); -int icu_transform_trans(struct icu_transform * transform, - struct icu_buf_utf16 * dest16, - struct icu_buf_utf16 * src16, +int icu_transform_trans(struct icu_transform *transform, + struct icu_buf_utf16 *dest16, + const struct icu_buf_utf16 *src16, UErrorCode *status); -enum icu_chain_step_type { - ICU_chain_step_type_none, - ICU_chain_step_type_display, /* convert to utf8 display format */ - ICU_chain_step_type_casemap, /* apply utf16 charmap */ - ICU_chain_step_type_transform, /* apply utf16 transform */ - ICU_chain_step_type_tokenize, /* apply utf16 tokenization */ - ICU_chain_step_type_transliterate /* apply utf16 tokenization */ -}; - - - -struct icu_chain_step -{ - /* type and action object */ - enum icu_chain_step_type type; - union { - struct icu_casemap * casemap; - struct icu_transform * transform; - struct icu_tokenizer * tokenizer; - } u; - /* temprary post-action utf16 buffer */ - struct icu_buf_utf16 * buf16; - struct icu_chain_step * previous; - int more_tokens; - int need_new_token; -}; - - -struct icu_chain; - -struct icu_chain_step * icu_chain_step_create(yaz_icu_chain_t chain, - enum icu_chain_step_type type, - const uint8_t * rule, - struct icu_buf_utf16 * buf16, - UErrorCode *status); - - -void icu_chain_step_destroy(struct icu_chain_step * step); - - -struct icu_chain -{ - char *locale; - int sort; - - const char * src8cstr; - - UCollator * coll; - - /* number of tokens returned so far */ - int32_t token_count; - - /* utf8 output buffers */ - struct icu_buf_utf8 * display8; - struct icu_buf_utf8 * norm8; - struct icu_buf_utf8 * sort8; - - /* utf16 source buffer */ - struct icu_buf_utf16 * src16; - - /* linked list of chain steps */ - struct icu_chain_step * steps; -}; - -struct icu_chain_step * icu_chain_insert_step(yaz_icu_chain_t chain, - enum icu_chain_step_type type, - const uint8_t * rule, - UErrorCode *status); - -int icu_chain_step_next_token(yaz_icu_chain_t chain, - struct icu_chain_step * step, - UErrorCode *status); +struct icu_chain_step; int icu_chain_token_number(yaz_icu_chain_t chain); -const UCollator * icu_chain_get_coll(yaz_icu_chain_t chain); - -yaz_icu_chain_t icu_chain_create(const char * locale, - int sort, - UErrorCode * status); - +yaz_icu_chain_t icu_chain_create(const char *locale, + int sort, UErrorCode *status); #endif /* ICU_I18NL_H */