a01b10393b661244fab84ba692c074ea656b7b51
[yaz-moved-to-github.git] / util / siconv.c
1 /*
2  * Copyright (c) 1997-2002, Index Data
3  * See the file LICENSE for details.
4  *
5  * $Id: siconv.c,v 1.1 2002-08-27 14:02:13 adam Exp $
6  */
7
8 #if HAVE_CONFIG_H
9 #include <config.h>
10 #endif
11
12 #include <errno.h>
13 #include <string.h>
14 #include <ctype.h>
15
16 #if HAVE_ICONV_H
17 #include <iconv.h>
18 #endif
19
20 #include <yaz/yaz-util.h>
21
22 struct yaz_iconv_struct {
23     int my_errno;
24     unsigned long (*read_handle)(yaz_iconv_t cd, char **inbuf,
25                                  size_t *inbytesleft);
26     size_t (*write_handle)(yaz_iconv_t cd, unsigned long x,
27                            char **outbuf, size_t *outbytesleft);
28 #if HAVE_ICONV_H
29     iconv_t iconv_cd;
30 #endif
31 };
32
33
34 static unsigned long yaz_read_ISO8859_1 (yaz_iconv_t cd,
35                                          char **inbuf, size_t *inbytesleft)
36 {
37     unsigned char *inp = *inbuf;
38     unsigned long x = 0;
39     x = inp[0];
40     (*inbytesleft)--;
41     inp++;
42     *inbuf = inp;
43     return x;
44 }
45
46 static unsigned long yaz_read_UTF8 (yaz_iconv_t cd,
47                                     char **inbuf, size_t *inbytesleft)
48 {
49     unsigned char *inp = *inbuf;
50     unsigned long x = 0;
51     if (inp[0] <= 0x7f)
52     {
53         x = inp[0];
54         
55         (*inbytesleft)--;
56         inp++;
57     }
58     else if (inp[0] <= 0xdf && *inbytesleft >= 2)
59     {
60         x = ((inp[0] & 0x1f) << 6) + (inp[1] & 0x3f);
61         
62         (*inbytesleft) -= 2;
63         inp += 2;
64     }
65     else if (inp[0] <= 0xef && *inbytesleft >= 3)
66     {
67         x =  ((inp[0] & 0x0f) << 12) +
68             ((inp[1] & 0x3f) << 6) +  (inp[1] & 0x3f);
69         
70         (*inbytesleft) -= 3;
71         inp += 3;
72     }
73     else if (inp[0] <= 0xef && *inbytesleft >= 4)
74     {
75         x =  ((inp[0] & 0x07) << 18) +
76             ((inp[1] & 0x3f) << 12) + ((inp[2] & 0x3f) << 6) +
77             (inp[3] & 0x3f);
78         
79         (*inbytesleft) -= 4;
80         inp += 4;
81     }
82     else
83     {
84         cd->my_errno = YAZ_ICONV_EINVAL;
85     }
86     *inbuf = inp;
87     return x;
88 }
89
90 static unsigned long yaz_read_UCS4 (yaz_iconv_t cd,
91                                     char **inbuf, size_t *inbytesleft)
92 {
93     unsigned char *inp = *inbuf;
94     unsigned long x = 0;
95     
96     if (*inbytesleft < 4)
97     {
98         cd->my_errno = YAZ_ICONV_EINVAL; /* incomplete input */
99         return 0;
100     }
101     memcpy (&x, inp, sizeof(x));
102     (*inbytesleft) -= 4;
103     inp += 4;
104     *inbuf = inp;
105     return x;
106 }
107
108 static size_t yaz_write_UTF8 (yaz_iconv_t cd, unsigned long x,
109                               char **outbuf, size_t *outbytesleft)
110 {
111     unsigned char *outp = *outbuf;
112     if (x <= 0x7f && *outbytesleft >= 1)
113     {
114         *outp++ = x;
115         (*outbytesleft)--;
116     } 
117     else if (x <= 0x7ff && *outbytesleft >= 2)
118     {
119         *outp++ = (x >> 6) | 0xc0;
120         *outp++ = (x & 0x3f) | 0x80;
121         (*outbytesleft) -= 2;
122     }
123     else if (x <= 0xffff && *outbytesleft >= 3)
124     {
125         *outp++ = (x >> 12) | 0xe0;
126         *outp++ = ((x >> 6) & 0x3f) | 0x80;
127         *outp++ = (x & 0x3f) | 0x80;
128         (*outbytesleft) -= 3;
129     }
130     else if (x <= 0x1fffff && *outbytesleft >= 4)
131     {
132         *outp++ = (x >> 18) | 0xf0;
133         *outp++ = ((x >> 12) & 0x3f) | 0x80;
134         *outp++ = ((x >> 6) & 0x3f) | 0x80;
135         *outp++ = (x & 0x3f) | 0x80;
136         (*outbytesleft) -= 4;
137     }
138     else if (x > 0x1fffff)
139     {
140         cd->my_errno = YAZ_ICONV_EILSEQ;  /* invalid sequence */
141         return (size_t)(-1);
142     }
143     else 
144     {
145         cd->my_errno = YAZ_ICONV_E2BIG;  /* not room for output */
146         return (size_t)(-1);
147     }
148     *outbuf = outp;
149     return 0;
150 }
151
152 static size_t yaz_write_ISO8859_1 (yaz_iconv_t cd, unsigned long x,
153                                    char **outbuf, size_t *outbytesleft)
154 {
155     unsigned char *outp = *outbuf;
156     if (x > 255 || x < 1)
157     {
158         cd->my_errno = YAZ_ICONV_EILSEQ;
159         return (size_t) -1;
160     }
161     else if (*outbytesleft >= 1)
162     {
163         *outp++ = x;
164         (*outbytesleft)--;
165     }
166     else 
167     {
168         cd->my_errno = YAZ_ICONV_E2BIG;
169         return (size_t)(-1);
170     }
171     *outbuf = outp;
172     return 0;
173 }
174
175
176 static size_t yaz_write_UCS4 (yaz_iconv_t cd, unsigned long x,
177                               char **outbuf, size_t *outbytesleft)
178 {
179     unsigned char *outp = *outbuf;
180     if (x < 1 || x > 0x1fffff)
181     {
182         cd->my_errno = YAZ_ICONV_EILSEQ;
183         return (size_t)(-1);
184     }
185     else if (*outbytesleft >= 4)
186     {
187         memcpy (outp, &x, sizeof(x));
188         outp += 4;
189         (*outbytesleft) -= 4;
190     }
191     else
192     {
193         cd->my_errno = YAZ_ICONV_E2BIG;
194         return (size_t)(-1);
195     }
196     *outbuf = outp;
197     return 0;
198 }
199
200 yaz_iconv_t yaz_iconv_open (const char *tocode, const char *fromcode)
201 {
202     yaz_iconv_t cd = xmalloc (sizeof(*cd));
203
204     cd->write_handle = 0;
205     cd->read_handle = 0;
206     cd->my_errno = YAZ_ICONV_UNKNOWN;
207
208     if (!strcmp(fromcode, "UTF-8"))
209         cd->read_handle = yaz_read_UTF8;
210     else if (!strcmp(fromcode, "ISO-8859-1"))
211         cd->read_handle = yaz_read_ISO8859_1;
212     else if (!strcmp(fromcode, "UCS-4"))
213         cd->read_handle = yaz_read_UCS4;
214
215
216     if (!strcmp(tocode, "UTF-8"))
217         cd->write_handle = yaz_write_UTF8;
218     else if (!strcmp (tocode, "ISO-8859-1"))
219         cd->write_handle = yaz_write_ISO8859_1;
220     else if (!strcmp (tocode, "UCS-4"))
221         cd->write_handle = yaz_write_UCS4;
222
223 #if HAVE_ICONV_H
224     cd->iconv_cd = 0;
225     if (!cd->read_handle || !cd->write_handle)
226     {
227         cd->iconv_cd = iconv_open (tocode, fromcode);
228         if (cd->iconv_cd == (iconv_t) (-1))
229         {
230             xfree (cd);
231             return 0;
232         }
233     }
234 #else
235     if (!cd->to_UCS4 || !cd->from_UCS4)
236     {
237         xfree (cd);
238         return 0;
239     }
240 #endif
241     return cd;
242 }
243
244 size_t yaz_iconv (yaz_iconv_t cd, char **inbuf, size_t *inbytesleft,
245                   char **outbuf, size_t *outbytesleft)
246 {
247     char *inbuf0;
248     size_t r = 0;
249 #if HAVE_ICONV_H
250     if (cd->iconv_cd)
251     {
252         size_t r =
253             iconv(cd->iconv_cd, inbuf, inbytesleft, outbuf, outbytesleft);
254         if (r == (size_t)(-1))
255         {
256             switch (errno)
257             {
258             case E2BIG:
259                 cd->my_errno = YAZ_ICONV_E2BIG;
260                 break;
261             case EINVAL:
262                 cd->my_errno = YAZ_ICONV_EINVAL;
263                 break;
264             case EILSEQ:
265                 cd->my_errno = YAZ_ICONV_EILSEQ;
266                 break;
267             default:
268                 cd->my_errno = YAZ_ICONV_UNKNOWN;
269             }
270         }
271         return r;
272     }
273 #endif
274     if (inbuf == 0 || *inbuf == 0)
275         return 0;
276     inbuf0 = *inbuf;
277     while (1)
278     {
279         unsigned long x;
280
281         if (*inbytesleft == 0)
282         {
283             r = *inbuf - inbuf0;
284             break;
285         }
286         
287         x = (cd->read_handle)(cd, inbuf, inbytesleft);
288         if (x == 0)
289         {
290             r = (size_t)(-1);
291             break;
292         }
293         r = (cd->write_handle)(cd, x, outbuf, outbytesleft);
294         if (r)
295             break;
296     }
297     return r;
298 }
299
300 int yaz_iconv_error (yaz_iconv_t cd)
301 {
302     return cd->my_errno;
303 }
304
305 int yaz_iconv_close (yaz_iconv_t cd)
306 {
307 #if HAVE_ICONV_H
308     if (cd->iconv_cd)
309         iconv_close (cd->iconv_cd);
310 #endif
311     xfree (cd);
312     return 0;
313 }
314
315