Optimize speed of icu_iter_get_org_info
[yaz-moved-to-github.git] / src / siconv.c
1 /* This file is part of the YAZ toolkit.
2  * Copyright (C) Index Data
3  * See the file LICENSE for details.
4  */
5 /**
6  * \file
7  * \brief Implements simple ICONV
8  *
9  * This implements an interface similar to that of iconv and
10  * is used by YAZ to interface with iconv (if present).
11  * For systems where iconv is not present, this layer
12  * provides a few important conversions: UTF-8, MARC-8, Latin-1.
13  *
14  */
15
16 #if HAVE_CONFIG_H
17 #include <config.h>
18 #endif
19
20 #include <assert.h>
21 #include <errno.h>
22 #include <string.h>
23
24 #if HAVE_ICONV_H
25 #include <iconv.h>
26 #endif
27
28 #include <yaz/xmalloc.h>
29 #include <yaz/errno.h>
30 #include "iconv-p.h"
31
32 struct yaz_iconv_struct {
33     int my_errno;
34     int init_flag;
35     size_t no_read_x;
36     unsigned long unget_x;
37 #if HAVE_ICONV_H
38     iconv_t iconv_cd;
39 #endif
40     struct yaz_iconv_encoder_s encoder;
41     struct yaz_iconv_decoder_s decoder;
42 };
43
44
45 int yaz_iconv_isbuiltin(yaz_iconv_t cd)
46 {
47     return cd->decoder.read_handle && cd->encoder.write_handle;
48 }
49
50
51 static int prepare_encoders(yaz_iconv_t cd, const char *tocode)
52 {
53     if (yaz_marc8_encoder(tocode, &cd->encoder))
54         return 1;
55     if (yaz_utf8_encoder(tocode, &cd->encoder))
56         return 1;
57     if (yaz_ucs4_encoder(tocode, &cd->encoder))
58         return 1;
59     if (yaz_iso_8859_1_encoder(tocode, &cd->encoder))
60         return 1;
61     if (yaz_iso_5428_encoder(tocode, &cd->encoder))
62         return 1;
63     if (yaz_advancegreek_encoder(tocode, &cd->encoder))
64         return 1;
65     if (yaz_wchar_encoder(tocode, &cd->encoder))
66         return 1;
67     if (yaz_danmarc_encoder(tocode, &cd->encoder))
68         return 1;
69     return 0;
70 }
71
72 static int prepare_decoders(yaz_iconv_t cd, const char *tocode)
73 {
74     if (yaz_marc8_decoder(tocode, &cd->decoder))
75         return 1;
76     if (yaz_iso5426_decoder(tocode, &cd->decoder))
77         return 1;
78     if (yaz_utf8_decoder(tocode, &cd->decoder))
79         return 1;
80     if (yaz_ucs4_decoder(tocode, &cd->decoder))
81         return 1;
82     if (yaz_iso_8859_1_decoder(tocode, &cd->decoder))
83         return 1;
84     if (yaz_iso_5428_decoder(tocode, &cd->decoder))
85         return 1;
86     if (yaz_advancegreek_decoder(tocode, &cd->decoder))
87         return 1;
88     if (yaz_wchar_decoder(tocode, &cd->decoder))
89         return 1;
90     if (yaz_danmarc_decoder(tocode, &cd->decoder))
91         return 1;
92     return 0;
93 }
94
95 yaz_iconv_t yaz_iconv_open(const char *tocode, const char *fromcode)
96 {
97     yaz_iconv_t cd = (yaz_iconv_t) xmalloc(sizeof(*cd));
98
99     cd->encoder.data = 0;
100     cd->encoder.write_handle = 0;
101     cd->encoder.flush_handle = 0;
102     cd->encoder.init_handle = 0;
103     cd->encoder.destroy_handle = 0;
104
105     cd->decoder.data = 0;
106     cd->decoder.read_handle = 0;
107     cd->decoder.init_handle = 0;
108     cd->decoder.destroy_handle = 0;
109
110     cd->my_errno = YAZ_ICONV_UNKNOWN;
111
112     /* a useful hack: if fromcode has leading @,
113        the library not use YAZ's own conversions .. */
114     if (fromcode[0] == '@')
115         fromcode++;
116     else
117     {
118         prepare_encoders(cd, tocode);
119         prepare_decoders(cd, fromcode);
120     }
121     if (cd->decoder.read_handle && cd->encoder.write_handle)
122     {
123 #if HAVE_ICONV_H
124         cd->iconv_cd = (iconv_t) (-1);
125 #endif
126         ;
127     }
128     else
129     {
130 #if HAVE_ICONV_H
131         cd->iconv_cd = iconv_open(tocode, fromcode);
132         if (cd->iconv_cd == (iconv_t) (-1))
133         {
134             yaz_iconv_close(cd);
135             return 0;
136         }
137 #else
138         yaz_iconv_close(cd);
139         return 0;
140 #endif
141     }
142     cd->init_flag = 1;
143     return cd;
144 }
145
146 size_t yaz_iconv(yaz_iconv_t cd, char **inbuf, size_t *inbytesleft,
147                  char **outbuf, size_t *outbytesleft)
148 {
149     char *inbuf0 = 0;
150     size_t r = 0;
151
152 #if HAVE_ICONV_H
153     if (cd->iconv_cd != (iconv_t) (-1))
154     {
155         size_t r =
156             iconv(cd->iconv_cd, inbuf, inbytesleft, outbuf, outbytesleft);
157         if (r == (size_t)(-1))
158         {
159             switch (yaz_errno())
160             {
161             case E2BIG:
162                 cd->my_errno = YAZ_ICONV_E2BIG;
163                 break;
164             case EINVAL:
165                 cd->my_errno = YAZ_ICONV_EINVAL;
166                 break;
167             case EILSEQ:
168                 cd->my_errno = YAZ_ICONV_EILSEQ;
169                 break;
170             default:
171                 cd->my_errno = YAZ_ICONV_UNKNOWN;
172             }
173         }
174         return r;
175     }
176 #endif
177
178     if (inbuf)
179         inbuf0 = *inbuf;
180
181     if (cd->init_flag)
182     {
183         cd->my_errno = YAZ_ICONV_UNKNOWN;
184
185         if (cd->encoder.init_handle)
186             (*cd->encoder.init_handle)(&cd->encoder);
187
188         cd->unget_x = 0;
189         cd->no_read_x = 0;
190
191         if (cd->decoder.init_handle)
192         {
193             size_t no_read = 0;
194             size_t r = (cd->decoder.init_handle)(
195                 cd, &cd->decoder,
196                 inbuf ? (unsigned char *) *inbuf : 0,
197                 inbytesleft ? *inbytesleft : 0,
198                 &no_read);
199             if (r)
200             {
201                 if (cd->my_errno == YAZ_ICONV_EINVAL)
202                     return r;
203                 cd->init_flag = 0;
204                 return r;
205             }
206             if (inbytesleft)
207                 *inbytesleft -= no_read;
208             if (inbuf)
209                 *inbuf += no_read;
210         }
211     }
212     cd->init_flag = 0;
213
214     if (!inbuf || !*inbuf)
215     {
216         if (outbuf && *outbuf)
217         {
218             if (cd->unget_x)
219                 r = (*cd->encoder.write_handle)(cd, &cd->encoder,
220                                                 cd->unget_x, outbuf, outbytesleft);
221             if (cd->encoder.flush_handle)
222                 r = (*cd->encoder.flush_handle)(cd, &cd->encoder,
223                                                 outbuf, outbytesleft);
224         }
225         if (r == 0)
226             cd->init_flag = 1;
227         cd->unget_x = 0;
228         return r;
229     }
230     while (1)
231     {
232         unsigned long x;
233         size_t no_read;
234
235         if (cd->unget_x)
236         {
237             x = cd->unget_x;
238             no_read = cd->no_read_x;
239         }
240         else
241         {
242             if (*inbytesleft == 0)
243             {
244                 r = *inbuf - inbuf0;
245                 break;
246             }
247             x = (*cd->decoder.read_handle)(
248                 cd, &cd->decoder,
249                 (unsigned char *) *inbuf, *inbytesleft, &no_read);
250             if (no_read == 0)
251             {
252                 r = (size_t)(-1);
253                 break;
254             }
255         }
256         if (x)
257         {
258             r = (*cd->encoder.write_handle)(cd, &cd->encoder,
259                                             x, outbuf, outbytesleft);
260             if (r)
261             {
262                 /* unable to write it. save it because read_handle cannot
263                    rewind .. */
264                 if (cd->my_errno == YAZ_ICONV_E2BIG)
265                 {
266                     cd->unget_x = x;
267                     cd->no_read_x = no_read;
268                     break;
269                 }
270             }
271             cd->unget_x = 0;
272         }
273         *inbytesleft -= no_read;
274         (*inbuf) += no_read;
275     }
276     return r;
277 }
278
279 int yaz_iconv_error(yaz_iconv_t cd)
280 {
281     return cd->my_errno;
282 }
283
284 int yaz_iconv_close(yaz_iconv_t cd)
285 {
286 #if HAVE_ICONV_H
287     if (cd->iconv_cd != (iconv_t) (-1))
288         iconv_close(cd->iconv_cd);
289 #endif
290     if (cd->encoder.destroy_handle)
291         (*cd->encoder.destroy_handle)(&cd->encoder);
292     if (cd->decoder.destroy_handle)
293         (*cd->decoder.destroy_handle)(&cd->decoder);
294     xfree(cd);
295     return 0;
296 }
297
298 void yaz_iconv_set_errno(yaz_iconv_t cd, int no)
299 {
300     cd->my_errno = no;
301 }
302
303 /*
304  * Local variables:
305  * c-basic-offset: 4
306  * c-file-style: "Stroustrup"
307  * indent-tabs-mode: nil
308  * End:
309  * vim: shiftwidth=4 tabstop=8 expandtab
310  */
311