Bump copyright year
[yaz-moved-to-github.git] / include / yaz / icu_I18N.h
1 /* This file is part of the YAZ toolkit.
2  * Copyright (C) 1995-2010 Index Data.
3  * All rights reserved.
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions are met:
6  *
7  *     * Redistributions of source code must retain the above copyright
8  *       notice, this list of conditions and the following disclaimer.
9  *     * Redistributions in binary form must reproduce the above copyright
10  *       notice, this list of conditions and the following disclaimer in the
11  *       documentation and/or other materials provided with the distribution.
12  *     * Neither the name of Index Data nor the names of its contributors
13  *       may be used to endorse or promote products derived from this
14  *       software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY
17  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR ANY
20  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27
28 /** \file
29     \brief Internal header for ICU utilities
30
31     These functions, while non-static, are considered unstable and internal
32     and may be renamed for each YAZ release.
33 */
34
35 #ifndef ICU_I18NL_H
36 #define ICU_I18NL_H
37
38 #include <yaz/yconfig.h>
39
40 #include <unicode/utypes.h>   /* Basic ICU data types */
41 #include <unicode/uchar.h>    /* char names           */
42
43 #include <unicode/ucol.h>
44 #include <unicode/ubrk.h>
45
46 #include <yaz/icu.h>
47
48 /* declared structs and functions */
49
50 int icu_check_status (UErrorCode status);
51
52 struct icu_buf_utf16
53 {
54     UChar * utf16;
55     int32_t utf16_len;
56     int32_t utf16_cap;
57 };
58
59 struct icu_buf_utf16 * icu_buf_utf16_create(size_t capacity);
60
61 struct icu_buf_utf16 * icu_buf_utf16_clear(struct icu_buf_utf16 * buf16);
62
63 struct icu_buf_utf16 * icu_buf_utf16_resize(struct icu_buf_utf16 * buf16,
64                                             size_t capacity);
65
66 struct icu_buf_utf16 *icu_buf_utf16_copy(struct icu_buf_utf16 * dest16,
67                                          const struct icu_buf_utf16 * src16);
68
69 void icu_buf_utf16_destroy(struct icu_buf_utf16 * buf16);
70
71 struct icu_buf_utf8;
72
73 struct icu_buf_utf8
74 {
75     uint8_t * utf8;
76     int32_t utf8_len;
77     int32_t utf8_cap;
78 };
79
80 struct icu_buf_utf8 * icu_buf_utf8_create(size_t capacity);
81
82 struct icu_buf_utf8 * icu_buf_utf8_clear(struct icu_buf_utf8 * buf8);
83
84 struct icu_buf_utf8 * icu_buf_utf8_resize(struct icu_buf_utf8 * buf8,
85                                           size_t capacity);
86
87 void icu_buf_utf8_destroy(struct icu_buf_utf8 * buf8);
88
89
90 UErrorCode icu_utf16_from_utf8_cstr(struct icu_buf_utf16 * dest16,
91                                     const char * src8cstr,
92                                     UErrorCode * status);
93
94 const char *icu_buf_utf8_to_cstr(struct icu_buf_utf8 *src8);
95
96
97 UErrorCode icu_utf16_to_utf8(struct icu_buf_utf8 *dest8,
98                              const struct icu_buf_utf16 *src16,
99                              UErrorCode * status);
100
101 struct icu_casemap;
102
103 struct icu_casemap * icu_casemap_create(char action, UErrorCode *status);
104
105 void icu_casemap_destroy(struct icu_casemap * casemap);
106
107 int icu_casemap_casemap(struct icu_casemap * casemap,
108                         struct icu_buf_utf16 * dest16,
109                         struct icu_buf_utf16 * src16,
110                         UErrorCode *status,
111                         const char *locale);
112
113 int icu_utf16_casemap(struct icu_buf_utf16 * dest16,
114                       struct icu_buf_utf16 * src16,
115                       const char *locale, char action,
116                       UErrorCode *status);
117
118 void icu_sortkey8_from_utf16(UCollator *coll,
119                              struct icu_buf_utf8 * dest8, 
120                              struct icu_buf_utf16 * src16,
121                              UErrorCode * status);
122
123 struct icu_tokenizer
124 {
125     char action;
126     UBreakIterator* bi;
127     struct icu_buf_utf16 * buf16;
128     int32_t token_count;
129     int32_t token_id;
130     int32_t token_start;
131     int32_t token_end;
132 /*
133   keep always invariant
134   0 <= token_start 
135   <= token_end 
136   <= buf16->utf16_len
137   and invariant
138   0 <= token_id <= token_count
139 */
140 };
141
142 struct icu_tokenizer * icu_tokenizer_create(const char *locale, char action,
143                                             UErrorCode *status);
144
145 void icu_tokenizer_destroy(struct icu_tokenizer * tokenizer);
146
147 int icu_tokenizer_attach(struct icu_tokenizer * tokenizer, 
148                          struct icu_buf_utf16 * src16, UErrorCode *status);
149
150 int32_t icu_tokenizer_next_token(struct icu_tokenizer * tokenizer, 
151                                  struct icu_buf_utf16 * tkn16, 
152                                  UErrorCode *status);
153
154 int32_t icu_tokenizer_token_count(struct icu_tokenizer * tokenizer);
155
156 struct icu_transform;
157
158 struct icu_transform * icu_transform_create(const char *id, char action,
159                                             const char *rules,
160                                             UErrorCode *status);
161
162 void icu_transform_destroy(struct icu_transform * transform);
163
164 int icu_transform_trans(struct icu_transform * transform,
165                         struct icu_buf_utf16 * dest16,
166                         const struct icu_buf_utf16 * src16,
167                         UErrorCode *status);
168
169 struct icu_chain_step;
170
171 int icu_chain_token_number(yaz_icu_chain_t chain);
172
173 yaz_icu_chain_t icu_chain_create(const char * locale,
174                                  int sort,
175                                  UErrorCode * status);
176
177
178 struct icu_iter;
179 struct icu_iter *icu_iter_create(struct icu_chain *chain,
180                                  const char *src8cstr);
181 void icu_iter_destroy(struct icu_iter *iter);
182 int icu_iter_next(struct icu_iter *iter, struct icu_buf_utf8 *result);
183 const char *icu_iter_get_sortkey(struct icu_iter *iter);
184 const char *icu_iter_get_display(struct icu_iter *iter);
185
186 #endif /* ICU_I18NL_H */
187
188 /*
189  * Local variables:
190  * c-basic-offset: 4
191  * c-file-style: "Stroustrup"
192  * indent-tabs-mode: nil
193  * End:
194  * vim: shiftwidth=4 tabstop=8 expandtab
195  */
196