Provide yaz_log_{lock,unlock}
[yaz-moved-to-github.git] / src / icu_utf8.c
1 /* This file is part of the YAZ toolkit.
2  * Copyright (C) Index Data
3  * See the file LICENSE for details.
4  */
5
6 /**
7  * \file
8  * \brief UTF-8 string utilities for ICU
9  */
10
11 #if HAVE_CONFIG_H
12 #include "config.h"
13 #endif
14
15 #if YAZ_HAVE_ICU
16 #include <yaz/xmalloc.h>
17
18 #include <yaz/icu_I18N.h>
19
20 #include <yaz/log.h>
21
22 #include <string.h>
23 #include <stdlib.h>
24 #include <stdio.h>
25 #include <assert.h>
26
27 #include <unicode/ustring.h>  /* some more string fcns*/
28 #include <unicode/uchar.h>    /* char names           */
29
30 struct icu_buf_utf8 *icu_buf_utf8_create(size_t capacity)
31 {
32     struct icu_buf_utf8 *buf8
33         = (struct icu_buf_utf8 *) xmalloc(sizeof(struct icu_buf_utf8));
34
35     buf8->utf8_len = 0;
36     buf8->utf8_cap = capacity;
37     if (capacity > 0)
38     {
39         buf8->utf8 = (uint8_t *) xmalloc(sizeof(uint8_t) * capacity);
40         buf8->utf8[0] = (uint8_t) 0;
41     }
42     else
43         buf8->utf8 = 0;
44     return buf8;
45 }
46
47 struct icu_buf_utf8 *icu_buf_utf8_clear(struct icu_buf_utf8 *buf8)
48 {
49     assert(buf8);
50     if (buf8->utf8)
51         buf8->utf8[0] = (uint8_t) 0;
52     buf8->utf8_len = 0;
53     return buf8;
54 }
55
56 struct icu_buf_utf8 *icu_buf_utf8_resize(struct icu_buf_utf8 *buf8,
57                                          size_t capacity)
58 {
59     assert(buf8);
60     if (capacity > 0)
61     {
62         if (0 == buf8->utf8)
63             buf8->utf8 = (uint8_t *) xmalloc(sizeof(uint8_t) * capacity);
64         else
65             buf8->utf8
66                 = (uint8_t *) xrealloc(buf8->utf8, sizeof(uint8_t) * capacity);
67
68         buf8->utf8_cap = capacity;
69     }
70     return buf8;
71 }
72
73 const char *icu_buf_utf8_to_cstr(struct icu_buf_utf8 *src8)
74 {
75     assert(src8);
76     if (src8->utf8_len == 0)
77         return "";
78
79     if (src8->utf8_len == src8->utf8_cap)
80         src8 = icu_buf_utf8_resize(src8, src8->utf8_len * 2 + 1);
81
82     src8->utf8[src8->utf8_len] = '\0';
83
84     return (const char *) src8->utf8;
85 }
86
87 void icu_buf_utf8_destroy(struct icu_buf_utf8 *buf8)
88 {
89     if (buf8)
90         xfree(buf8->utf8);
91     xfree(buf8);
92 }
93
94 UErrorCode icu_utf16_from_utf8_cstr(struct icu_buf_utf16 *dest16,
95                                     const char *src8cstr,
96                                     UErrorCode *status)
97 {
98     size_t src8cstr_len = 0;
99     int32_t utf16_len = 0;
100
101     *status = U_ZERO_ERROR;
102     src8cstr_len = strlen(src8cstr);
103
104     u_strFromUTF8(dest16->utf16, dest16->utf16_cap,
105                   &utf16_len,
106                   src8cstr, src8cstr_len, status);
107
108     /* check for buffer overflow, resize and retry */
109     if (*status == U_BUFFER_OVERFLOW_ERROR)
110     {
111         icu_buf_utf16_resize(dest16, utf16_len * 2);
112         *status = U_ZERO_ERROR;
113         u_strFromUTF8(dest16->utf16, dest16->utf16_cap,
114                       &utf16_len,
115                       src8cstr, src8cstr_len, status);
116     }
117
118     if (U_SUCCESS(*status) && utf16_len <= dest16->utf16_cap)
119         dest16->utf16_len = utf16_len;
120     else
121         icu_buf_utf16_clear(dest16);
122
123     return *status;
124 }
125
126 UErrorCode icu_utf16_to_utf8(struct icu_buf_utf8 *dest8,
127                              const struct icu_buf_utf16 *src16,
128                              UErrorCode *status)
129 {
130     int32_t utf8_len = 0;
131
132     u_strToUTF8((char *) dest8->utf8, dest8->utf8_cap,
133                 &utf8_len,
134                 src16->utf16, src16->utf16_len, status);
135
136     /* check for buffer overflow, resize and retry */
137     if (*status == U_BUFFER_OVERFLOW_ERROR)
138     {
139         icu_buf_utf8_resize(dest8, utf8_len * 2);
140         *status = U_ZERO_ERROR;
141         u_strToUTF8((char *) dest8->utf8, dest8->utf8_cap,
142                     &utf8_len,
143                     src16->utf16, src16->utf16_len, status);
144     }
145
146     if (U_SUCCESS(*status) && utf8_len <= dest8->utf8_cap)
147         dest8->utf8_len = utf8_len;
148     else
149         icu_buf_utf8_clear(dest8);
150
151     return *status;
152 }
153
154 #endif /* YAZ_HAVE_ICU */
155
156 /*
157  * Local variables:
158  * c-basic-offset: 4
159  * c-file-style: "Stroustrup"
160  * indent-tabs-mode: nil
161  * End:
162  * vim: shiftwidth=4 tabstop=8 expandtab
163  */
164