Update headers and omit CVS Ids.
[yaz-moved-to-github.git] / src / siconv.c
1 /* This file is part of the YAZ toolkit.
2  * Copyright (C) 1995-2008 Index Data
3  * See the file LICENSE for details.
4  */
5 /**
6  * \file
7  * \brief Implements simple ICONV
8  *
9  * This implements an interface similar to that of iconv and
10  * is used by YAZ to interface with iconv (if present).
11  * For systems where iconv is not present, this layer
12  * provides a few important conversions: UTF-8, MARC-8, Latin-1.
13  *
14  */
15
16 #if HAVE_CONFIG_H
17 #include <config.h>
18 #endif
19
20 #include <assert.h>
21 #include <errno.h>
22 #include <string.h>
23 #include <ctype.h>
24
25 #if HAVE_ICONV_H
26 #include <iconv.h>
27 #endif
28
29 #include <yaz/xmalloc.h>
30 #include <yaz/nmem.h>
31 #include "iconv-p.h"
32
33 struct yaz_iconv_struct {
34     int my_errno;
35     int init_flag;
36 #if 0
37     size_t (*init_handle)(yaz_iconv_t cd, unsigned char *inbuf,
38                             size_t inbytesleft, size_t *no_read);
39     unsigned long (*read_handle)(yaz_iconv_t cd, unsigned char *inbuf,
40                                  size_t inbytesleft, size_t *no_read);
41 #endif
42     size_t no_read_x;
43     unsigned long unget_x;
44 #if HAVE_ICONV_H
45     iconv_t iconv_cd;
46 #endif
47     struct yaz_iconv_encoder_s encoder;
48     struct yaz_iconv_decoder_s decoder;
49 };
50
51
52 int yaz_iconv_isbuiltin(yaz_iconv_t cd)
53 {
54     return cd->decoder.read_handle && cd->encoder.write_handle;
55 }
56
57
58 static int prepare_encoders(yaz_iconv_t cd, const char *tocode)
59 {
60     if (yaz_marc8_encoder(tocode, &cd->encoder))
61         return 1;
62     if (yaz_utf8_encoder(tocode, &cd->encoder))
63         return 1;
64     if (yaz_ucs4_encoder(tocode, &cd->encoder))
65         return 1;
66     if (yaz_iso_8859_1_encoder(tocode, &cd->encoder))
67         return 1;
68     if (yaz_iso_5428_encoder(tocode, &cd->encoder))
69         return 1;
70     if (yaz_advancegreek_encoder(tocode, &cd->encoder))
71         return 1;
72     if (yaz_wchar_encoder(tocode, &cd->encoder))
73         return 1;
74     return 0;
75 }
76
77 static int prepare_decoders(yaz_iconv_t cd, const char *tocode)
78 {
79     if (yaz_marc8_decoder(tocode, &cd->decoder))
80         return 1;
81     if (yaz_utf8_decoder(tocode, &cd->decoder))
82         return 1;
83     if (yaz_ucs4_decoder(tocode, &cd->decoder))
84         return 1;
85     if (yaz_iso_8859_1_decoder(tocode, &cd->decoder))
86         return 1;
87     if (yaz_iso_5428_decoder(tocode, &cd->decoder))
88         return 1;
89     if (yaz_advancegreek_decoder(tocode, &cd->decoder))
90         return 1;
91     if (yaz_wchar_decoder(tocode, &cd->decoder))
92         return 1;
93     return 0;
94 }
95
96 yaz_iconv_t yaz_iconv_open(const char *tocode, const char *fromcode)
97 {
98     yaz_iconv_t cd = (yaz_iconv_t) xmalloc (sizeof(*cd));
99
100     cd->encoder.data = 0;
101     cd->encoder.write_handle = 0;
102     cd->encoder.flush_handle = 0;
103     cd->encoder.init_handle = 0;
104     cd->encoder.destroy_handle = 0;
105
106     cd->decoder.data = 0;
107     cd->decoder.read_handle = 0;
108     cd->decoder.init_handle = 0;
109     cd->decoder.destroy_handle = 0;
110
111     cd->my_errno = YAZ_ICONV_UNKNOWN;
112
113     /* a useful hack: if fromcode has leading @,
114        the library not use YAZ's own conversions .. */
115     if (fromcode[0] == '@')
116         fromcode++;
117     else
118     {
119         prepare_encoders(cd, tocode);
120         prepare_decoders(cd, fromcode);
121     }
122     if (cd->decoder.read_handle && cd->encoder.write_handle)
123     {
124 #if HAVE_ICONV_H
125         cd->iconv_cd = (iconv_t) (-1);
126 #endif
127         ;
128     }
129     else
130     {
131 #if HAVE_ICONV_H
132         cd->iconv_cd = iconv_open(tocode, fromcode);
133         if (cd->iconv_cd == (iconv_t) (-1))
134         {
135             yaz_iconv_close(cd);
136             return 0;
137         }
138 #else
139         yaz_iconv_close(cd);
140         return 0;
141 #endif
142     }
143     cd->init_flag = 1;
144     return cd;
145 }
146
147 size_t yaz_iconv(yaz_iconv_t cd, char **inbuf, size_t *inbytesleft,
148                  char **outbuf, size_t *outbytesleft)
149 {
150     char *inbuf0 = 0;
151     size_t r = 0;
152
153 #if HAVE_ICONV_H
154     if (cd->iconv_cd != (iconv_t) (-1))
155     {
156         size_t r =
157             iconv(cd->iconv_cd, inbuf, inbytesleft, outbuf, outbytesleft);
158         if (r == (size_t)(-1))
159         {
160             switch (yaz_errno())
161             {
162             case E2BIG:
163                 cd->my_errno = YAZ_ICONV_E2BIG;
164                 break;
165             case EINVAL:
166                 cd->my_errno = YAZ_ICONV_EINVAL;
167                 break;
168             case EILSEQ:
169                 cd->my_errno = YAZ_ICONV_EILSEQ;
170                 break;
171             default:
172                 cd->my_errno = YAZ_ICONV_UNKNOWN;
173             }
174         }
175         return r;
176     }
177 #endif
178
179     if (inbuf)
180         inbuf0 = *inbuf;
181
182     if (cd->init_flag)
183     {
184         cd->my_errno = YAZ_ICONV_UNKNOWN;
185         
186         if (cd->encoder.init_handle)
187             (*cd->encoder.init_handle)(&cd->encoder);
188         
189         cd->unget_x = 0;
190         cd->no_read_x = 0;
191
192         if (cd->decoder.init_handle)
193         {
194             size_t no_read = 0;
195             size_t r = (cd->decoder.init_handle)(
196                 cd, &cd->decoder,
197                 inbuf ? (unsigned char *) *inbuf : 0,
198                 inbytesleft ? *inbytesleft : 0, 
199                 &no_read);
200             if (r)
201             {
202                 if (cd->my_errno == YAZ_ICONV_EINVAL)
203                     return r;
204                 cd->init_flag = 0;
205                 return r;
206             }
207             if (inbytesleft)
208                 *inbytesleft -= no_read;
209             if (inbuf)
210                 *inbuf += no_read;
211         }
212     }
213     cd->init_flag = 0;
214
215     if (!inbuf || !*inbuf)
216     {
217         if (outbuf && *outbuf)
218         {
219             if (cd->unget_x)
220                 r = (*cd->encoder.write_handle)(cd, &cd->encoder,
221                                                 cd->unget_x, outbuf, outbytesleft);
222             if (cd->encoder.flush_handle)
223                 r = (*cd->encoder.flush_handle)(cd, &cd->encoder,
224                                                 outbuf, outbytesleft);
225         }
226         if (r == 0)
227             cd->init_flag = 1;
228         cd->unget_x = 0;
229         return r;
230     }
231     while (1)
232     {
233         unsigned long x;
234         size_t no_read;
235
236         if (cd->unget_x)
237         {
238             x = cd->unget_x;
239             no_read = cd->no_read_x;
240         }
241         else
242         {
243             if (*inbytesleft == 0)
244             {
245                 r = *inbuf - inbuf0;
246                 break;
247             }
248             x = (*cd->decoder.read_handle)(
249                 cd, &cd->decoder, 
250                 (unsigned char *) *inbuf, *inbytesleft, &no_read);
251             if (no_read == 0)
252             {
253                 r = (size_t)(-1);
254                 break;
255             }
256         }
257         if (x)
258         {
259             r = (*cd->encoder.write_handle)(cd, &cd->encoder,
260                                             x, outbuf, outbytesleft);
261             if (r)
262             {
263                 /* unable to write it. save it because read_handle cannot
264                    rewind .. */
265                 if (cd->my_errno == YAZ_ICONV_E2BIG)
266                 {
267                     cd->unget_x = x;
268                     cd->no_read_x = no_read;
269                     break;
270                 }
271             }
272             cd->unget_x = 0;
273         }
274         *inbytesleft -= no_read;
275         (*inbuf) += no_read;
276     }
277     return r;
278 }
279
280 int yaz_iconv_error(yaz_iconv_t cd)
281 {
282     return cd->my_errno;
283 }
284
285 int yaz_iconv_close(yaz_iconv_t cd)
286 {
287 #if HAVE_ICONV_H
288     if (cd->iconv_cd != (iconv_t) (-1))
289         iconv_close(cd->iconv_cd);
290 #endif
291     if (cd->encoder.destroy_handle)
292         (*cd->encoder.destroy_handle)(&cd->encoder);
293     if (cd->decoder.destroy_handle)
294         (*cd->decoder.destroy_handle)(&cd->decoder);
295     xfree(cd);
296     return 0;
297 }
298
299 void yaz_iconv_set_errno(yaz_iconv_t cd, int no)
300 {
301     cd->my_errno = no;
302 }
303
304 /*
305  * Local variables:
306  * c-basic-offset: 4
307  * indent-tabs-mode: nil
308  * End:
309  * vim: shiftwidth=4 tabstop=8 expandtab
310  */