X-Git-Url: http://git.indexdata.com/?p=yaz-moved-to-github.git;a=blobdiff_plain;f=include%2Fyaz%2Ficu_I18N.h;h=457f767b176c56dee9d55d3376566501c5ac7a51;hp=3abe6bb83351665b22e094fcc801a496dd2633c4;hb=64d15b4ed1a24c3064f4fa3c50668ea99b58111b;hpb=b59f3a79326742a4432bda999670f0c85e2c83b7 diff --git a/include/yaz/icu_I18N.h b/include/yaz/icu_I18N.h index 3abe6bb..457f767 100644 --- a/include/yaz/icu_I18N.h +++ b/include/yaz/icu_I18N.h @@ -1,5 +1,5 @@ -/* - * Copyright (c) 1995-2007, Index Data +/* This file is part of the YAZ toolkit. + * Copyright (C) 1995-2009 Index Data. * All rights reserved. * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -25,13 +25,17 @@ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +/** \file + \brief Internal header for ICU utilities + + These functions, while non-static, are considered unstable and internal + and may be renamed for each YAZ release. +*/ + #ifndef ICU_I18NL_H #define ICU_I18NL_H -#include - -#include -#include +#include #include /* Basic ICU data types */ #include /* char names */ @@ -40,6 +44,7 @@ #include #include +#include /* declared structs and functions */ @@ -47,9 +52,9 @@ int icu_check_status (UErrorCode status); struct icu_buf_utf16 { - UChar * utf16; - int32_t utf16_len; - int32_t utf16_cap; + UChar * utf16; + int32_t utf16_len; + int32_t utf16_cap; }; struct icu_buf_utf16 * icu_buf_utf16_create(size_t capacity); @@ -98,33 +103,31 @@ UErrorCode icu_utf16_to_utf8(struct icu_buf_utf8 * dest8, struct icu_casemap { - char locale[16]; char action; }; -struct icu_casemap * icu_casemap_create(const char *locale, char action, - UErrorCode *status); +struct icu_casemap * icu_casemap_create(char action, UErrorCode *status); void icu_casemap_destroy(struct icu_casemap * casemap); int icu_casemap_casemap(struct icu_casemap * casemap, struct icu_buf_utf16 * dest16, struct icu_buf_utf16 * src16, - UErrorCode *status); + UErrorCode *status, + const char *locale); int icu_utf16_casemap(struct icu_buf_utf16 * dest16, struct icu_buf_utf16 * src16, const char *locale, char action, UErrorCode *status); -UErrorCode icu_sortkey8_from_utf16(UCollator *coll, - struct icu_buf_utf8 * dest8, - struct icu_buf_utf16 * src16, - UErrorCode * status); +void icu_sortkey8_from_utf16(UCollator *coll, + struct icu_buf_utf8 * dest8, + struct icu_buf_utf16 * src16, + UErrorCode * status); struct icu_tokenizer { - char locale[16]; char action; UBreakIterator* bi; struct icu_buf_utf16 * buf16; @@ -162,31 +165,31 @@ int32_t icu_tokenizer_token_count(struct icu_tokenizer * tokenizer); -struct icu_normalizer +struct icu_transform { char action; - struct icu_buf_utf16 * rules16; - UParseError parse_error[256]; + UParseError parse_error; UTransliterator * trans; }; -struct icu_normalizer * icu_normalizer_create(const char *rules, char action, - UErrorCode *status); - +struct icu_transform * icu_transform_create(const char *id, char action, + const char *rules, + UErrorCode *status); -void icu_normalizer_destroy(struct icu_normalizer * normalizer); +void icu_transform_destroy(struct icu_transform * transform); -int icu_normalizer_normalize(struct icu_normalizer * normalizer, - struct icu_buf_utf16 * dest16, - struct icu_buf_utf16 * src16, - UErrorCode *status); +int icu_transform_trans(struct icu_transform * transform, + struct icu_buf_utf16 * dest16, + struct icu_buf_utf16 * src16, + UErrorCode *status); enum icu_chain_step_type { ICU_chain_step_type_none, ICU_chain_step_type_display, /* convert to utf8 display format */ ICU_chain_step_type_casemap, /* apply utf16 charmap */ - ICU_chain_step_type_normalize, /* apply utf16 normalization */ - ICU_chain_step_type_tokenize /* apply utf16 tokenization */ + ICU_chain_step_type_transform, /* apply utf16 transform */ + ICU_chain_step_type_tokenize, /* apply utf16 tokenization */ + ICU_chain_step_type_transliterate /* apply utf16 tokenization */ }; @@ -197,7 +200,7 @@ struct icu_chain_step enum icu_chain_step_type type; union { struct icu_casemap * casemap; - struct icu_normalizer * normalizer; + struct icu_transform * transform; struct icu_tokenizer * tokenizer; } u; /* temprary post-action utf16 buffer */ @@ -210,7 +213,7 @@ struct icu_chain_step struct icu_chain; -struct icu_chain_step * icu_chain_step_create(struct icu_chain * chain, +struct icu_chain_step * icu_chain_step_create(yaz_icu_chain_t chain, enum icu_chain_step_type type, const uint8_t * rule, struct icu_buf_utf16 * buf16, @@ -222,9 +225,11 @@ void icu_chain_step_destroy(struct icu_chain_step * step); struct icu_chain { - uint8_t locale[16]; + char *locale; int sort; + const char * src8cstr; + UCollator * coll; /* number of tokens returned so far */ @@ -242,49 +247,32 @@ struct icu_chain struct icu_chain_step * steps; }; -struct icu_chain * icu_chain_create(const uint8_t * locale, - int sort, - UErrorCode * status); - -void icu_chain_destroy(struct icu_chain * chain); - -struct icu_chain * icu_chain_xml_config(xmlNode *xml_node, - const uint8_t * locale, - int sort, - UErrorCode * status); - -struct icu_chain_step * icu_chain_insert_step(struct icu_chain * chain, +struct icu_chain_step * icu_chain_insert_step(yaz_icu_chain_t chain, enum icu_chain_step_type type, const uint8_t * rule, UErrorCode *status); -int icu_chain_step_next_token(struct icu_chain * chain, +int icu_chain_step_next_token(yaz_icu_chain_t chain, struct icu_chain_step * step, UErrorCode *status); -int icu_chain_assign_cstr(struct icu_chain * chain, - const char * src8cstr, - UErrorCode *status); - -int icu_chain_next_token(struct icu_chain * chain, - UErrorCode *status); +int icu_chain_token_number(yaz_icu_chain_t chain); -int icu_chain_get_token_count(struct icu_chain * chain); +const UCollator * icu_chain_get_coll(yaz_icu_chain_t chain); -const char * icu_chain_get_display(struct icu_chain * chain); +yaz_icu_chain_t icu_chain_create(const char * locale, + int sort, + UErrorCode * status); -const char * icu_chain_get_norm(struct icu_chain * chain); - -const char * icu_chain_get_sort(struct icu_chain * chain); - -const UCollator * icu_chain_get_coll(struct icu_chain * chain); #endif /* ICU_I18NL_H */ /* * Local variables: * c-basic-offset: 4 + * c-file-style: "Stroustrup" * indent-tabs-mode: nil * End: * vim: shiftwidth=4 tabstop=8 expandtab */ +