697afe857740507c1e604d51a2cad768edc770fd
[yaz-moved-to-github.git] / src / siconv.c
1 /* This file is part of the YAZ toolkit.
2  * Copyright (C) 1995-2008 Index Data
3  * See the file LICENSE for details.
4  */
5 /**
6  * \file
7  * \brief Implements simple ICONV
8  *
9  * This implements an interface similar to that of iconv and
10  * is used by YAZ to interface with iconv (if present).
11  * For systems where iconv is not present, this layer
12  * provides a few important conversions: UTF-8, MARC-8, Latin-1.
13  *
14  */
15
16 #if HAVE_CONFIG_H
17 #include <config.h>
18 #endif
19
20 #include <assert.h>
21 #include <errno.h>
22 #include <string.h>
23 #include <ctype.h>
24
25 #if HAVE_ICONV_H
26 #include <iconv.h>
27 #endif
28
29 #include <yaz/xmalloc.h>
30 #include <yaz/nmem.h>
31 #include "iconv-p.h"
32
33 struct yaz_iconv_struct {
34     int my_errno;
35     int init_flag;
36 #if 0
37     size_t (*init_handle)(yaz_iconv_t cd, unsigned char *inbuf,
38                             size_t inbytesleft, size_t *no_read);
39     unsigned long (*read_handle)(yaz_iconv_t cd, unsigned char *inbuf,
40                                  size_t inbytesleft, size_t *no_read);
41 #endif
42     size_t no_read_x;
43     unsigned long unget_x;
44 #if HAVE_ICONV_H
45     iconv_t iconv_cd;
46 #endif
47     struct yaz_iconv_encoder_s encoder;
48     struct yaz_iconv_decoder_s decoder;
49 };
50
51
52 int yaz_iconv_isbuiltin(yaz_iconv_t cd)
53 {
54     return cd->decoder.read_handle && cd->encoder.write_handle;
55 }
56
57
58 static int prepare_encoders(yaz_iconv_t cd, const char *tocode)
59 {
60     if (yaz_marc8_encoder(tocode, &cd->encoder))
61         return 1;
62     if (yaz_utf8_encoder(tocode, &cd->encoder))
63         return 1;
64     if (yaz_ucs4_encoder(tocode, &cd->encoder))
65         return 1;
66     if (yaz_iso_8859_1_encoder(tocode, &cd->encoder))
67         return 1;
68     if (yaz_iso_5428_encoder(tocode, &cd->encoder))
69         return 1;
70     if (yaz_advancegreek_encoder(tocode, &cd->encoder))
71         return 1;
72     if (yaz_wchar_encoder(tocode, &cd->encoder))
73         return 1;
74     return 0;
75 }
76
77 static int prepare_decoders(yaz_iconv_t cd, const char *tocode)
78 {
79     if (yaz_marc8_decoder(tocode, &cd->decoder))
80         return 1;
81     if (yaz_iso5426_decoder(tocode, &cd->decoder))
82         return 1;
83     if (yaz_utf8_decoder(tocode, &cd->decoder))
84         return 1;
85     if (yaz_ucs4_decoder(tocode, &cd->decoder))
86         return 1;
87     if (yaz_iso_8859_1_decoder(tocode, &cd->decoder))
88         return 1;
89     if (yaz_iso_5428_decoder(tocode, &cd->decoder))
90         return 1;
91     if (yaz_advancegreek_decoder(tocode, &cd->decoder))
92         return 1;
93     if (yaz_wchar_decoder(tocode, &cd->decoder))
94         return 1;
95     if (yaz_danmarc_decoder(tocode, &cd->decoder))
96         return 1;
97     return 0;
98 }
99
100 yaz_iconv_t yaz_iconv_open(const char *tocode, const char *fromcode)
101 {
102     yaz_iconv_t cd = (yaz_iconv_t) xmalloc (sizeof(*cd));
103
104     cd->encoder.data = 0;
105     cd->encoder.write_handle = 0;
106     cd->encoder.flush_handle = 0;
107     cd->encoder.init_handle = 0;
108     cd->encoder.destroy_handle = 0;
109
110     cd->decoder.data = 0;
111     cd->decoder.read_handle = 0;
112     cd->decoder.init_handle = 0;
113     cd->decoder.destroy_handle = 0;
114
115     cd->my_errno = YAZ_ICONV_UNKNOWN;
116
117     /* a useful hack: if fromcode has leading @,
118        the library not use YAZ's own conversions .. */
119     if (fromcode[0] == '@')
120         fromcode++;
121     else
122     {
123         prepare_encoders(cd, tocode);
124         prepare_decoders(cd, fromcode);
125     }
126     if (cd->decoder.read_handle && cd->encoder.write_handle)
127     {
128 #if HAVE_ICONV_H
129         cd->iconv_cd = (iconv_t) (-1);
130 #endif
131         ;
132     }
133     else
134     {
135 #if HAVE_ICONV_H
136         cd->iconv_cd = iconv_open(tocode, fromcode);
137         if (cd->iconv_cd == (iconv_t) (-1))
138         {
139             yaz_iconv_close(cd);
140             return 0;
141         }
142 #else
143         yaz_iconv_close(cd);
144         return 0;
145 #endif
146     }
147     cd->init_flag = 1;
148     return cd;
149 }
150
151 size_t yaz_iconv(yaz_iconv_t cd, char **inbuf, size_t *inbytesleft,
152                  char **outbuf, size_t *outbytesleft)
153 {
154     char *inbuf0 = 0;
155     size_t r = 0;
156
157 #if HAVE_ICONV_H
158     if (cd->iconv_cd != (iconv_t) (-1))
159     {
160         size_t r =
161             iconv(cd->iconv_cd, inbuf, inbytesleft, outbuf, outbytesleft);
162         if (r == (size_t)(-1))
163         {
164             switch (yaz_errno())
165             {
166             case E2BIG:
167                 cd->my_errno = YAZ_ICONV_E2BIG;
168                 break;
169             case EINVAL:
170                 cd->my_errno = YAZ_ICONV_EINVAL;
171                 break;
172             case EILSEQ:
173                 cd->my_errno = YAZ_ICONV_EILSEQ;
174                 break;
175             default:
176                 cd->my_errno = YAZ_ICONV_UNKNOWN;
177             }
178         }
179         return r;
180     }
181 #endif
182
183     if (inbuf)
184         inbuf0 = *inbuf;
185
186     if (cd->init_flag)
187     {
188         cd->my_errno = YAZ_ICONV_UNKNOWN;
189         
190         if (cd->encoder.init_handle)
191             (*cd->encoder.init_handle)(&cd->encoder);
192         
193         cd->unget_x = 0;
194         cd->no_read_x = 0;
195
196         if (cd->decoder.init_handle)
197         {
198             size_t no_read = 0;
199             size_t r = (cd->decoder.init_handle)(
200                 cd, &cd->decoder,
201                 inbuf ? (unsigned char *) *inbuf : 0,
202                 inbytesleft ? *inbytesleft : 0, 
203                 &no_read);
204             if (r)
205             {
206                 if (cd->my_errno == YAZ_ICONV_EINVAL)
207                     return r;
208                 cd->init_flag = 0;
209                 return r;
210             }
211             if (inbytesleft)
212                 *inbytesleft -= no_read;
213             if (inbuf)
214                 *inbuf += no_read;
215         }
216     }
217     cd->init_flag = 0;
218
219     if (!inbuf || !*inbuf)
220     {
221         if (outbuf && *outbuf)
222         {
223             if (cd->unget_x)
224                 r = (*cd->encoder.write_handle)(cd, &cd->encoder,
225                                                 cd->unget_x, outbuf, outbytesleft);
226             if (cd->encoder.flush_handle)
227                 r = (*cd->encoder.flush_handle)(cd, &cd->encoder,
228                                                 outbuf, outbytesleft);
229         }
230         if (r == 0)
231             cd->init_flag = 1;
232         cd->unget_x = 0;
233         return r;
234     }
235     while (1)
236     {
237         unsigned long x;
238         size_t no_read;
239
240         if (cd->unget_x)
241         {
242             x = cd->unget_x;
243             no_read = cd->no_read_x;
244         }
245         else
246         {
247             if (*inbytesleft == 0)
248             {
249                 r = *inbuf - inbuf0;
250                 break;
251             }
252             x = (*cd->decoder.read_handle)(
253                 cd, &cd->decoder, 
254                 (unsigned char *) *inbuf, *inbytesleft, &no_read);
255             if (no_read == 0)
256             {
257                 r = (size_t)(-1);
258                 break;
259             }
260         }
261         if (x)
262         {
263             r = (*cd->encoder.write_handle)(cd, &cd->encoder,
264                                             x, outbuf, outbytesleft);
265             if (r)
266             {
267                 /* unable to write it. save it because read_handle cannot
268                    rewind .. */
269                 if (cd->my_errno == YAZ_ICONV_E2BIG)
270                 {
271                     cd->unget_x = x;
272                     cd->no_read_x = no_read;
273                     break;
274                 }
275             }
276             cd->unget_x = 0;
277         }
278         *inbytesleft -= no_read;
279         (*inbuf) += no_read;
280     }
281     return r;
282 }
283
284 int yaz_iconv_error(yaz_iconv_t cd)
285 {
286     return cd->my_errno;
287 }
288
289 int yaz_iconv_close(yaz_iconv_t cd)
290 {
291 #if HAVE_ICONV_H
292     if (cd->iconv_cd != (iconv_t) (-1))
293         iconv_close(cd->iconv_cd);
294 #endif
295     if (cd->encoder.destroy_handle)
296         (*cd->encoder.destroy_handle)(&cd->encoder);
297     if (cd->decoder.destroy_handle)
298         (*cd->decoder.destroy_handle)(&cd->decoder);
299     xfree(cd);
300     return 0;
301 }
302
303 void yaz_iconv_set_errno(yaz_iconv_t cd, int no)
304 {
305     cd->my_errno = no;
306 }
307
308 /*
309  * Local variables:
310  * c-basic-offset: 4
311  * indent-tabs-mode: nil
312  * End:
313  * vim: shiftwidth=4 tabstop=8 expandtab
314  */