GFS: fix sizeof: saved by the bell
[yaz-moved-to-github.git] / src / icu_utf8.c
1 /* This file is part of the YAZ toolkit.
2  * Copyright (C) 1995-2010 Index Data
3  * See the file LICENSE for details.
4  */
5
6 /**
7  * \file
8  * \brief UTF-8 string utilities for ICU
9  */
10
11 #if HAVE_CONFIG_H
12 #include "config.h"
13 #endif
14
15 #if YAZ_HAVE_ICU
16 #include <yaz/xmalloc.h>
17
18 #include <yaz/icu_I18N.h>
19
20 #include <yaz/log.h>
21
22 #include <string.h>
23 #include <stdlib.h>
24 #include <stdio.h>
25
26 #include <unicode/ustring.h>  /* some more string fcns*/
27 #include <unicode/uchar.h>    /* char names           */
28
29 struct icu_buf_utf8 *icu_buf_utf8_create(size_t capacity)
30 {
31     struct icu_buf_utf8 * buf8 
32         = (struct icu_buf_utf8 *) xmalloc(sizeof(struct icu_buf_utf8));
33
34     buf8->utf8 = 0;
35     buf8->utf8_len = 0;
36     buf8->utf8_cap = 0;
37
38     if (capacity > 0)
39     {
40         buf8->utf8 = (uint8_t *) xmalloc(sizeof(uint8_t) * capacity);
41         buf8->utf8[0] = (uint8_t) 0;
42         buf8->utf8_cap = capacity;
43     }
44     return buf8;
45 }
46
47 struct icu_buf_utf8 * icu_buf_utf8_clear(struct icu_buf_utf8 * buf8)
48 {
49     if (buf8)
50     {
51         if (buf8->utf8)
52             buf8->utf8[0] = (uint8_t) 0;
53         buf8->utf8_len = 0;
54     }
55     return buf8;
56 }
57
58 struct icu_buf_utf8 * icu_buf_utf8_resize(struct icu_buf_utf8 * buf8,
59                                           size_t capacity)
60 {
61     if (!buf8)
62         return 0;
63
64     if (capacity >  0)
65     {
66         if (0 == buf8->utf8)
67             buf8->utf8 = (uint8_t *) xmalloc(sizeof(uint8_t) * capacity);
68         else
69             buf8->utf8 
70                 = (uint8_t *) xrealloc(buf8->utf8, sizeof(uint8_t) * capacity);
71         
72         buf8->utf8_cap = capacity;
73     } 
74     else { 
75         xfree(buf8->utf8);
76         buf8->utf8 = 0;
77         buf8->utf8_len = 0;
78         buf8->utf8_cap = 0;
79     }
80     
81     return buf8;
82 }
83
84 const char *icu_buf_utf8_to_cstr(struct icu_buf_utf8 *src8)
85 {
86     if (!src8 || src8->utf8_len == 0)
87         return "";
88
89     if (src8->utf8_len == src8->utf8_cap)
90         src8 = icu_buf_utf8_resize(src8, src8->utf8_len * 2 + 1);
91
92     src8->utf8[src8->utf8_len] = '\0';
93
94     return (const char *) src8->utf8;
95 }
96
97 void icu_buf_utf8_destroy(struct icu_buf_utf8 * buf8)
98 {
99     if (buf8)
100         xfree(buf8->utf8);
101     xfree(buf8);
102 }
103
104 UErrorCode icu_utf16_from_utf8_cstr(struct icu_buf_utf16 * dest16,
105                                     const char * src8cstr,
106                                     UErrorCode * status)
107 {
108     size_t src8cstr_len = 0;
109     int32_t utf16_len = 0;
110
111     *status = U_ZERO_ERROR;
112     src8cstr_len = strlen(src8cstr);
113   
114     u_strFromUTF8(dest16->utf16, dest16->utf16_cap,
115                   &utf16_len,
116                   src8cstr, src8cstr_len, status);
117   
118     /* check for buffer overflow, resize and retry */
119     if (*status == U_BUFFER_OVERFLOW_ERROR)
120     {
121         icu_buf_utf16_resize(dest16, utf16_len * 2);
122         *status = U_ZERO_ERROR;
123         u_strFromUTF8(dest16->utf16, dest16->utf16_cap,
124                       &utf16_len,
125                       src8cstr, src8cstr_len, status);
126     }
127
128     if (U_SUCCESS(*status)  
129         && utf16_len <= dest16->utf16_cap)
130         dest16->utf16_len = utf16_len;
131     else 
132         icu_buf_utf16_clear(dest16);
133   
134     return *status;
135 }
136
137 UErrorCode icu_utf16_to_utf8(struct icu_buf_utf8 *dest8,
138                              const struct icu_buf_utf16 *src16,
139                              UErrorCode * status)
140 {
141     int32_t utf8_len = 0;
142   
143     u_strToUTF8((char *) dest8->utf8, dest8->utf8_cap,
144                 &utf8_len,
145                 src16->utf16, src16->utf16_len, status);
146   
147     /* check for buffer overflow, resize and retry */
148     if (*status == U_BUFFER_OVERFLOW_ERROR)
149     {
150         icu_buf_utf8_resize(dest8, utf8_len * 2);
151         *status = U_ZERO_ERROR;
152         u_strToUTF8((char *) dest8->utf8, dest8->utf8_cap,
153                     &utf8_len,
154                     src16->utf16, src16->utf16_len, status);
155     }
156
157     if (U_SUCCESS(*status)  
158         && utf8_len <= dest8->utf8_cap)
159         dest8->utf8_len = utf8_len;
160     else 
161         icu_buf_utf8_clear(dest8);
162   
163     return *status;
164 }
165
166 #endif /* YAZ_HAVE_ICU */
167
168 /*
169  * Local variables:
170  * c-basic-offset: 4
171  * c-file-style: "Stroustrup"
172  * indent-tabs-mode: nil
173  * End:
174  * vim: shiftwidth=4 tabstop=8 expandtab
175  */
176