c4685d556b3733f4f8789b1da522d349428a7cf7
[yaz-moved-to-github.git] / src / icu_utf8.c
1 /* This file is part of the YAZ toolkit.
2  * Copyright (C) 1995-2012 Index Data
3  * See the file LICENSE for details.
4  */
5
6 /**
7  * \file
8  * \brief UTF-8 string utilities for ICU
9  */
10
11 #if HAVE_CONFIG_H
12 #include "config.h"
13 #endif
14
15 #if YAZ_HAVE_ICU
16 #include <yaz/xmalloc.h>
17
18 #include <yaz/icu_I18N.h>
19
20 #include <yaz/log.h>
21
22 #include <string.h>
23 #include <stdlib.h>
24 #include <stdio.h>
25
26 #include <unicode/ustring.h>  /* some more string fcns*/
27 #include <unicode/uchar.h>    /* char names           */
28
29 struct icu_buf_utf8 *icu_buf_utf8_create(size_t capacity)
30 {
31     struct icu_buf_utf8 * buf8 
32         = (struct icu_buf_utf8 *) xmalloc(sizeof(struct icu_buf_utf8));
33
34     buf8->utf8 = 0;
35     buf8->utf8_len = 0;
36     buf8->utf8_cap = 0;
37
38     if (capacity > 0)
39     {
40         buf8->utf8 = (uint8_t *) xmalloc(sizeof(uint8_t) * capacity);
41         buf8->utf8[0] = (uint8_t) 0;
42         buf8->utf8_cap = capacity;
43     }
44     return buf8;
45 }
46
47 struct icu_buf_utf8 * icu_buf_utf8_clear(struct icu_buf_utf8 * buf8)
48 {
49     if (buf8)
50     {
51         if (buf8->utf8)
52             buf8->utf8[0] = (uint8_t) 0;
53         buf8->utf8_len = 0;
54     }
55     return buf8;
56 }
57
58 struct icu_buf_utf8 * icu_buf_utf8_resize(struct icu_buf_utf8 * buf8,
59                                           size_t capacity)
60 {
61     if (!buf8)
62         return 0;
63
64     if (capacity > 0)
65     {
66         if (0 == buf8->utf8)
67             buf8->utf8 = (uint8_t *) xmalloc(sizeof(uint8_t) * capacity);
68         else
69             buf8->utf8 
70                 = (uint8_t *) xrealloc(buf8->utf8, sizeof(uint8_t) * capacity);
71         
72         buf8->utf8_cap = capacity;
73     } 
74     else
75     { 
76         xfree(buf8->utf8);
77         buf8->utf8 = 0;
78         buf8->utf8_len = 0;
79         buf8->utf8_cap = 0;
80     }
81     
82     return buf8;
83 }
84
85 const char *icu_buf_utf8_to_cstr(struct icu_buf_utf8 *src8)
86 {
87     if (!src8 || src8->utf8_len == 0)
88         return "";
89
90     if (src8->utf8_len == src8->utf8_cap)
91         src8 = icu_buf_utf8_resize(src8, src8->utf8_len * 2 + 1);
92
93     src8->utf8[src8->utf8_len] = '\0';
94
95     return (const char *) src8->utf8;
96 }
97
98 void icu_buf_utf8_destroy(struct icu_buf_utf8 * buf8)
99 {
100     if (buf8)
101         xfree(buf8->utf8);
102     xfree(buf8);
103 }
104
105 UErrorCode icu_utf16_from_utf8_cstr(struct icu_buf_utf16 * dest16,
106                                     const char * src8cstr,
107                                     UErrorCode * status)
108 {
109     size_t src8cstr_len = 0;
110     int32_t utf16_len = 0;
111
112     *status = U_ZERO_ERROR;
113     src8cstr_len = strlen(src8cstr);
114   
115     u_strFromUTF8(dest16->utf16, dest16->utf16_cap,
116                   &utf16_len,
117                   src8cstr, src8cstr_len, status);
118   
119     /* check for buffer overflow, resize and retry */
120     if (*status == U_BUFFER_OVERFLOW_ERROR)
121     {
122         icu_buf_utf16_resize(dest16, utf16_len * 2);
123         *status = U_ZERO_ERROR;
124         u_strFromUTF8(dest16->utf16, dest16->utf16_cap,
125                       &utf16_len,
126                       src8cstr, src8cstr_len, status);
127     }
128
129     if (U_SUCCESS(*status) && utf16_len <= dest16->utf16_cap)
130         dest16->utf16_len = utf16_len;
131     else 
132         icu_buf_utf16_clear(dest16);
133   
134     return *status;
135 }
136
137 UErrorCode icu_utf16_to_utf8(struct icu_buf_utf8 *dest8,
138                              const struct icu_buf_utf16 *src16,
139                              UErrorCode * status)
140 {
141     int32_t utf8_len = 0;
142   
143     u_strToUTF8((char *) dest8->utf8, dest8->utf8_cap,
144                 &utf8_len,
145                 src16->utf16, src16->utf16_len, status);
146   
147     /* check for buffer overflow, resize and retry */
148     if (*status == U_BUFFER_OVERFLOW_ERROR)
149     {
150         icu_buf_utf8_resize(dest8, utf8_len * 2);
151         *status = U_ZERO_ERROR;
152         u_strToUTF8((char *) dest8->utf8, dest8->utf8_cap,
153                     &utf8_len,
154                     src16->utf16, src16->utf16_len, status);
155     }
156
157     if (U_SUCCESS(*status) && utf8_len <= dest8->utf8_cap)
158         dest8->utf8_len = utf8_len;
159     else 
160         icu_buf_utf8_clear(dest8);
161   
162     return *status;
163 }
164
165 #endif /* YAZ_HAVE_ICU */
166
167 /*
168  * Local variables:
169  * c-basic-offset: 4
170  * c-file-style: "Stroustrup"
171  * indent-tabs-mode: nil
172  * End:
173  * vim: shiftwidth=4 tabstop=8 expandtab
174  */
175