be6eae5ed8beac3799bd18eb809982784ef9359b
[yaz-moved-to-github.git] / src / siconv.c
1 /* This file is part of the YAZ toolkit.
2  * Copyright (C) 1995-2013 Index Data
3  * See the file LICENSE for details.
4  */
5 /**
6  * \file
7  * \brief Implements simple ICONV
8  *
9  * This implements an interface similar to that of iconv and
10  * is used by YAZ to interface with iconv (if present).
11  * For systems where iconv is not present, this layer
12  * provides a few important conversions: UTF-8, MARC-8, Latin-1.
13  *
14  */
15
16 #if HAVE_CONFIG_H
17 #include <config.h>
18 #endif
19
20 #include <assert.h>
21 #include <errno.h>
22 #include <string.h>
23
24 #if HAVE_ICONV_H
25 #include <iconv.h>
26 #endif
27
28 #include <yaz/xmalloc.h>
29 #include <yaz/errno.h>
30 #include "iconv-p.h"
31
32 struct yaz_iconv_struct {
33     int my_errno;
34     int init_flag;
35     size_t no_read_x;
36     unsigned long unget_x;
37 #if HAVE_ICONV_H
38     iconv_t iconv_cd;
39 #endif
40     struct yaz_iconv_encoder_s encoder;
41     struct yaz_iconv_decoder_s decoder;
42 };
43
44
45 int yaz_iconv_isbuiltin(yaz_iconv_t cd)
46 {
47     return cd->decoder.read_handle && cd->encoder.write_handle;
48 }
49
50
51 static int prepare_encoders(yaz_iconv_t cd, const char *tocode)
52 {
53     if (yaz_marc8_encoder(tocode, &cd->encoder))
54         return 1;
55     if (yaz_utf8_encoder(tocode, &cd->encoder))
56         return 1;
57     if (yaz_ucs4_encoder(tocode, &cd->encoder))
58         return 1;
59     if (yaz_iso_8859_1_encoder(tocode, &cd->encoder))
60         return 1;
61     if (yaz_iso_5428_encoder(tocode, &cd->encoder))
62         return 1;
63     if (yaz_advancegreek_encoder(tocode, &cd->encoder))
64         return 1;
65     if (yaz_wchar_encoder(tocode, &cd->encoder))
66         return 1;
67     return 0;
68 }
69
70 static int prepare_decoders(yaz_iconv_t cd, const char *tocode)
71 {
72     if (yaz_marc8_decoder(tocode, &cd->decoder))
73         return 1;
74     if (yaz_iso5426_decoder(tocode, &cd->decoder))
75         return 1;
76     if (yaz_utf8_decoder(tocode, &cd->decoder))
77         return 1;
78     if (yaz_ucs4_decoder(tocode, &cd->decoder))
79         return 1;
80     if (yaz_iso_8859_1_decoder(tocode, &cd->decoder))
81         return 1;
82     if (yaz_iso_5428_decoder(tocode, &cd->decoder))
83         return 1;
84     if (yaz_advancegreek_decoder(tocode, &cd->decoder))
85         return 1;
86     if (yaz_wchar_decoder(tocode, &cd->decoder))
87         return 1;
88     if (yaz_danmarc_decoder(tocode, &cd->decoder))
89         return 1;
90     return 0;
91 }
92
93 yaz_iconv_t yaz_iconv_open(const char *tocode, const char *fromcode)
94 {
95     yaz_iconv_t cd = (yaz_iconv_t) xmalloc(sizeof(*cd));
96
97     cd->encoder.data = 0;
98     cd->encoder.write_handle = 0;
99     cd->encoder.flush_handle = 0;
100     cd->encoder.init_handle = 0;
101     cd->encoder.destroy_handle = 0;
102
103     cd->decoder.data = 0;
104     cd->decoder.read_handle = 0;
105     cd->decoder.init_handle = 0;
106     cd->decoder.destroy_handle = 0;
107
108     cd->my_errno = YAZ_ICONV_UNKNOWN;
109
110     /* a useful hack: if fromcode has leading @,
111        the library not use YAZ's own conversions .. */
112     if (fromcode[0] == '@')
113         fromcode++;
114     else
115     {
116         prepare_encoders(cd, tocode);
117         prepare_decoders(cd, fromcode);
118     }
119     if (cd->decoder.read_handle && cd->encoder.write_handle)
120     {
121 #if HAVE_ICONV_H
122         cd->iconv_cd = (iconv_t) (-1);
123 #endif
124         ;
125     }
126     else
127     {
128 #if HAVE_ICONV_H
129         cd->iconv_cd = iconv_open(tocode, fromcode);
130         if (cd->iconv_cd == (iconv_t) (-1))
131         {
132             yaz_iconv_close(cd);
133             return 0;
134         }
135 #else
136         yaz_iconv_close(cd);
137         return 0;
138 #endif
139     }
140     cd->init_flag = 1;
141     return cd;
142 }
143
144 size_t yaz_iconv(yaz_iconv_t cd, char **inbuf, size_t *inbytesleft,
145                  char **outbuf, size_t *outbytesleft)
146 {
147     char *inbuf0 = 0;
148     size_t r = 0;
149
150 #if HAVE_ICONV_H
151     if (cd->iconv_cd != (iconv_t) (-1))
152     {
153         size_t r =
154             iconv(cd->iconv_cd, inbuf, inbytesleft, outbuf, outbytesleft);
155         if (r == (size_t)(-1))
156         {
157             switch (yaz_errno())
158             {
159             case E2BIG:
160                 cd->my_errno = YAZ_ICONV_E2BIG;
161                 break;
162             case EINVAL:
163                 cd->my_errno = YAZ_ICONV_EINVAL;
164                 break;
165             case EILSEQ:
166                 cd->my_errno = YAZ_ICONV_EILSEQ;
167                 break;
168             default:
169                 cd->my_errno = YAZ_ICONV_UNKNOWN;
170             }
171         }
172         return r;
173     }
174 #endif
175
176     if (inbuf)
177         inbuf0 = *inbuf;
178
179     if (cd->init_flag)
180     {
181         cd->my_errno = YAZ_ICONV_UNKNOWN;
182
183         if (cd->encoder.init_handle)
184             (*cd->encoder.init_handle)(&cd->encoder);
185
186         cd->unget_x = 0;
187         cd->no_read_x = 0;
188
189         if (cd->decoder.init_handle)
190         {
191             size_t no_read = 0;
192             size_t r = (cd->decoder.init_handle)(
193                 cd, &cd->decoder,
194                 inbuf ? (unsigned char *) *inbuf : 0,
195                 inbytesleft ? *inbytesleft : 0,
196                 &no_read);
197             if (r)
198             {
199                 if (cd->my_errno == YAZ_ICONV_EINVAL)
200                     return r;
201                 cd->init_flag = 0;
202                 return r;
203             }
204             if (inbytesleft)
205                 *inbytesleft -= no_read;
206             if (inbuf)
207                 *inbuf += no_read;
208         }
209     }
210     cd->init_flag = 0;
211
212     if (!inbuf || !*inbuf)
213     {
214         if (outbuf && *outbuf)
215         {
216             if (cd->unget_x)
217                 r = (*cd->encoder.write_handle)(cd, &cd->encoder,
218                                                 cd->unget_x, outbuf, outbytesleft);
219             if (cd->encoder.flush_handle)
220                 r = (*cd->encoder.flush_handle)(cd, &cd->encoder,
221                                                 outbuf, outbytesleft);
222         }
223         if (r == 0)
224             cd->init_flag = 1;
225         cd->unget_x = 0;
226         return r;
227     }
228     while (1)
229     {
230         unsigned long x;
231         size_t no_read;
232
233         if (cd->unget_x)
234         {
235             x = cd->unget_x;
236             no_read = cd->no_read_x;
237         }
238         else
239         {
240             if (*inbytesleft == 0)
241             {
242                 r = *inbuf - inbuf0;
243                 break;
244             }
245             x = (*cd->decoder.read_handle)(
246                 cd, &cd->decoder,
247                 (unsigned char *) *inbuf, *inbytesleft, &no_read);
248             if (no_read == 0)
249             {
250                 r = (size_t)(-1);
251                 break;
252             }
253         }
254         if (x)
255         {
256             r = (*cd->encoder.write_handle)(cd, &cd->encoder,
257                                             x, outbuf, outbytesleft);
258             if (r)
259             {
260                 /* unable to write it. save it because read_handle cannot
261                    rewind .. */
262                 if (cd->my_errno == YAZ_ICONV_E2BIG)
263                 {
264                     cd->unget_x = x;
265                     cd->no_read_x = no_read;
266                     break;
267                 }
268             }
269             cd->unget_x = 0;
270         }
271         *inbytesleft -= no_read;
272         (*inbuf) += no_read;
273     }
274     return r;
275 }
276
277 int yaz_iconv_error(yaz_iconv_t cd)
278 {
279     return cd->my_errno;
280 }
281
282 int yaz_iconv_close(yaz_iconv_t cd)
283 {
284 #if HAVE_ICONV_H
285     if (cd->iconv_cd != (iconv_t) (-1))
286         iconv_close(cd->iconv_cd);
287 #endif
288     if (cd->encoder.destroy_handle)
289         (*cd->encoder.destroy_handle)(&cd->encoder);
290     if (cd->decoder.destroy_handle)
291         (*cd->decoder.destroy_handle)(&cd->decoder);
292     xfree(cd);
293     return 0;
294 }
295
296 void yaz_iconv_set_errno(yaz_iconv_t cd, int no)
297 {
298     cd->my_errno = no;
299 }
300
301 /*
302  * Local variables:
303  * c-basic-offset: 4
304  * c-file-style: "Stroustrup"
305  * indent-tabs-mode: nil
306  * End:
307  * vim: shiftwidth=4 tabstop=8 expandtab
308  */
309