Added icu_buf_utf8_copy() and icu_buf_utf16_copy() functions.
[pazpar2-moved-to-github.git] / src / icu_I18N.h
1 /* $Id: icu_I18N.h,v 1.11 2007-05-11 10:38:42 marc Exp $
2    Copyright (c) 2006-2007, Index Data.
3
4    This file is part of Pazpar2.
5
6    Pazpar2 is free software; you can redistribute it and/or modify it under
7    the terms of the GNU General Public License as published by the Free
8    Software Foundation; either version 2, or (at your option) any later
9    version.
10
11    Pazpar2 is distributed in the hope that it will be useful, but WITHOUT ANY
12    WARRANTY; without even the implied warranty of MERCHANTABILITY or
13    FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14    for more details.
15
16    You should have received a copy of the GNU General Public License
17    along with Pazpar2; see the file LICENSE.  If not, write to the
18    Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
19    02111-1307, USA.
20 */
21
22 #ifndef ICU_I18NL_H
23 #define ICU_I18NL_H
24
25 #ifdef HAVE_ICU
26
27 #include <yaz/nmem.h>
28
29
30 #include <unicode/utypes.h>   /* Basic ICU data types */
31 #include <unicode/uchar.h>    /* char names           */
32
33 //#include <unicode/ustdio.h>
34 #include <unicode/ucol.h> 
35 //#include <unicode/ucnv.h>     /* C   Converter API    */
36 //#include <unicode/ustring.h>  /* some more string fcns*/
37 //#include <unicode/uloc.h>
38 #include <unicode/ubrk.h>
39 //#include <unicode/unistr.h>
40 #include <unicode/utrans.h>
41
42
43
44 // forward declarations
45 //struct UBreakIterator;
46
47
48
49
50 // declared structs and functions
51
52
53 int icu_check_status (UErrorCode status);
54
55 struct icu_buf_utf16
56 {
57   UChar * utf16;
58   int32_t utf16_len;
59   int32_t utf16_cap;
60 };
61
62 struct icu_buf_utf16 * icu_buf_utf16_create(size_t capacity);
63 struct icu_buf_utf16 * icu_buf_utf16_resize(struct icu_buf_utf16 * buf16,
64                                             size_t capacity);
65 struct icu_buf_utf16 * icu_buf_utf16_copy(struct icu_buf_utf16 * dest16,
66                                           struct icu_buf_utf16 * src16);
67 void icu_buf_utf16_destroy(struct icu_buf_utf16 * buf16);
68
69
70
71 struct icu_buf_utf8
72 {
73   uint8_t * utf8;
74   int32_t utf8_len;
75   int32_t utf8_cap;
76 };
77
78 struct icu_buf_utf8 * icu_buf_utf8_create(size_t capacity);
79 struct icu_buf_utf8 * icu_buf_utf8_resize(struct icu_buf_utf8 * buf8,
80                                           size_t capacity);
81 void icu_buf_utf8_destroy(struct icu_buf_utf8 * buf8);
82
83
84 UErrorCode icu_utf16_from_utf8(struct icu_buf_utf16 * dest16,
85                                struct icu_buf_utf8 * src8,
86                                UErrorCode * status);
87
88 UErrorCode icu_utf16_from_utf8_cstr(struct icu_buf_utf16 * dest16,
89                                     const char * src8cstr,
90                                     UErrorCode * status);
91
92
93 UErrorCode icu_utf16_to_utf8(struct icu_buf_utf8 * dest8,
94                              struct icu_buf_utf16 * src16,
95                              UErrorCode * status);
96
97 int icu_utf16_casemap(struct icu_buf_utf16 * dest16,
98                       struct icu_buf_utf16 * src16,
99                       const char *locale, char action,
100                       UErrorCode *status);
101
102 UErrorCode icu_sortkey8_from_utf16(UCollator *coll,
103                                    struct icu_buf_utf8 * dest8, 
104                                    struct icu_buf_utf16 * src16,
105                                    UErrorCode * status);
106
107 struct icu_tokenizer
108 {
109   char locale[16];
110   char action;
111   UBreakIterator* bi;
112   struct icu_buf_utf16 * buf16;
113   int32_t token_count;
114   int32_t token_id;
115   int32_t token_start;
116   int32_t token_end;
117   // keep always invariant
118   // 0 <= token_start 
119   //   <= token_end 
120   //   <= buf16->utf16_len
121   // and invariant
122   // 0 <= token_id <= token_count
123 };
124
125 struct icu_tokenizer * icu_tokenizer_create(const char *locale, char action,
126                                             UErrorCode *status);
127
128 void icu_tokenizer_destroy(struct icu_tokenizer * tokenizer);
129
130 int icu_tokenizer_attach(struct icu_tokenizer * tokenizer, 
131                          struct icu_buf_utf16 * src16, UErrorCode *status);
132
133 int32_t icu_tokenizer_next_token(struct icu_tokenizer * tokenizer, 
134                                  struct icu_buf_utf16 * tkn16, 
135                                  UErrorCode *status);
136
137 int32_t icu_tokenizer_token_id(struct icu_tokenizer * tokenizer);
138 int32_t icu_tokenizer_token_start(struct icu_tokenizer * tokenizer);
139 int32_t icu_tokenizer_token_end(struct icu_tokenizer * tokenizer);
140 int32_t icu_tokenizer_token_length(struct icu_tokenizer * tokenizer);
141 int32_t icu_tokenizer_token_count(struct icu_tokenizer * tokenizer);
142
143
144
145 struct icu_normalizer
146 {
147   char action;
148   struct icu_buf_utf16 * rules16;
149   UParseError parse_error[256];
150   UTransliterator * trans;
151 };
152
153 struct icu_normalizer * icu_normalizer_create(const char *rules, char action,
154                                               UErrorCode *status);
155
156
157 void icu_normalizer_destroy(struct icu_normalizer * normalizer);
158
159 int icu_normalizer_normalize(struct icu_normalizer * normalizer,
160                              struct icu_buf_utf16 * dest16,
161                              struct icu_buf_utf16 * src16,
162                              UErrorCode *status);
163
164
165
166
167 #endif // HAVE_ICU
168 #endif // ICU_I18NL_H