2 * Copyright (c) 1997-2002, Index Data
3 * See the file LICENSE for details.
5 * $Id: siconv.c,v 1.7 2002-12-10 10:59:28 adam Exp $
8 /* mini iconv and wrapper for system iconv library (if present) */
22 #include <yaz/yaz-util.h>
24 struct yaz_iconv_struct {
27 size_t (*init_handle)(yaz_iconv_t cd, unsigned char *inbuf,
28 size_t inbytesleft, size_t *no_read);
29 unsigned long (*read_handle)(yaz_iconv_t cd, unsigned char *inbuf,
30 size_t inbytesleft, size_t *no_read);
31 size_t (*write_handle)(yaz_iconv_t cd, unsigned long x,
32 char **outbuf, size_t *outbytesleft);
38 static unsigned long yaz_read_ISO8859_1 (yaz_iconv_t cd, unsigned char *inp,
39 size_t inbytesleft, size_t *no_read)
41 unsigned long x = inp[0];
46 static size_t yaz_init_UTF8 (yaz_iconv_t cd, unsigned char *inp,
47 size_t inbytesleft, size_t *no_read)
56 cd->my_errno = YAZ_ICONV_EINVAL;
59 if (inp[1] != 0xbb || inp[2] != 0xbf)
61 cd->my_errno = YAZ_ICONV_EILSEQ;
68 static unsigned long yaz_read_UTF8 (yaz_iconv_t cd, unsigned char *inp,
69 size_t inbytesleft, size_t *no_read)
78 else if (inp[0] <= 0xbf || inp[0] >= 0xfe)
81 cd->my_errno = YAZ_ICONV_EILSEQ;
83 else if (inp[0] <= 0xdf && inbytesleft >= 2)
85 x = ((inp[0] & 0x1f) << 6) | (inp[1] & 0x3f);
91 cd->my_errno = YAZ_ICONV_EILSEQ;
94 else if (inp[0] <= 0xef && inbytesleft >= 3)
96 x = ((inp[0] & 0x0f) << 12) | ((inp[1] & 0x3f) << 6) |
103 cd->my_errno = YAZ_ICONV_EILSEQ;
106 else if (inp[0] <= 0xf7 && inbytesleft >= 4)
108 x = ((inp[0] & 0x07) << 18) | ((inp[1] & 0x3f) << 12) |
109 ((inp[2] & 0x3f) << 6) | (inp[3] & 0x3f);
115 cd->my_errno = YAZ_ICONV_EILSEQ;
118 else if (inp[0] <= 0xfb && inbytesleft >= 5)
120 x = ((inp[0] & 0x03) << 24) | ((inp[1] & 0x3f) << 18) |
121 ((inp[2] & 0x3f) << 12) | ((inp[3] & 0x3f) << 6) |
128 cd->my_errno = YAZ_ICONV_EILSEQ;
131 else if (inp[0] <= 0xfd && inbytesleft >= 6)
133 x = ((inp[0] & 0x01) << 30) | ((inp[1] & 0x3f) << 24) |
134 ((inp[2] & 0x3f) << 18) | ((inp[3] & 0x3f) << 12) |
135 ((inp[4] & 0x3f) << 6) | (inp[5] & 0x3f);
141 cd->my_errno = YAZ_ICONV_EILSEQ;
147 cd->my_errno = YAZ_ICONV_EINVAL;
152 static unsigned long yaz_read_UCS4 (yaz_iconv_t cd, unsigned char *inp,
153 size_t inbytesleft, size_t *no_read)
159 cd->my_errno = YAZ_ICONV_EINVAL; /* incomplete input */
164 x = (inp[0]<<24) | (inp[1]<<16) | (inp[2]<<8) | inp[3];
170 static unsigned long yaz_read_UCS4LE (yaz_iconv_t cd, unsigned char *inp,
171 size_t inbytesleft, size_t *no_read)
177 cd->my_errno = YAZ_ICONV_EINVAL; /* incomplete input */
182 x = (inp[3]<<24) | (inp[2]<<16) | (inp[1]<<8) | inp[0];
188 static size_t yaz_write_UTF8 (yaz_iconv_t cd, unsigned long x,
189 char **outbuf, size_t *outbytesleft)
191 unsigned char *outp = (unsigned char *) *outbuf;
192 if (x <= 0x7f && *outbytesleft >= 1)
194 *outp++ = (unsigned char) x;
197 else if (x <= 0x7ff && *outbytesleft >= 2)
199 *outp++ = (unsigned char) ((x >> 6) | 0xc0);
200 *outp++ = (unsigned char) ((x & 0x3f) | 0x80);
201 (*outbytesleft) -= 2;
203 else if (x <= 0xffff && *outbytesleft >= 3)
205 *outp++ = (unsigned char) ((x >> 12) | 0xe0);
206 *outp++ = (unsigned char) (((x >> 6) & 0x3f) | 0x80);
207 *outp++ = (unsigned char) ((x & 0x3f) | 0x80);
208 (*outbytesleft) -= 3;
210 else if (x <= 0x1fffff && *outbytesleft >= 4)
212 *outp++ = (unsigned char) ((x >> 18) | 0xf0);
213 *outp++ = (unsigned char) (((x >> 12) & 0x3f) | 0x80);
214 *outp++ = (unsigned char) (((x >> 6) & 0x3f) | 0x80);
215 *outp++ = (unsigned char) ((x & 0x3f) | 0x80);
216 (*outbytesleft) -= 4;
218 else if (x <= 0x3ffffff && *outbytesleft >= 5)
220 *outp++ = (unsigned char) ((x >> 24) | 0xf8);
221 *outp++ = (unsigned char) (((x >> 18) & 0x3f) | 0x80);
222 *outp++ = (unsigned char) (((x >> 12) & 0x3f) | 0x80);
223 *outp++ = (unsigned char) (((x >> 6) & 0x3f) | 0x80);
224 *outp++ = (unsigned char) ((x & 0x3f) | 0x80);
225 (*outbytesleft) -= 5;
227 else if (*outbytesleft >= 6)
229 *outp++ = (unsigned char) ((x >> 30) | 0xfc);
230 *outp++ = (unsigned char) (((x >> 24) & 0x3f) | 0x80);
231 *outp++ = (unsigned char) (((x >> 18) & 0x3f) | 0x80);
232 *outp++ = (unsigned char) (((x >> 12) & 0x3f) | 0x80);
233 *outp++ = (unsigned char) (((x >> 6) & 0x3f) | 0x80);
234 *outp++ = (unsigned char) ((x & 0x3f) | 0x80);
235 (*outbytesleft) -= 6;
239 cd->my_errno = YAZ_ICONV_E2BIG; /* not room for output */
242 *outbuf = (char *) outp;
246 static size_t yaz_write_ISO8859_1 (yaz_iconv_t cd, unsigned long x,
247 char **outbuf, size_t *outbytesleft)
249 unsigned char *outp = (unsigned char *) *outbuf;
250 if (x > 255 || x < 1)
252 cd->my_errno = YAZ_ICONV_EILSEQ;
255 else if (*outbytesleft >= 1)
257 *outp++ = (unsigned char) x;
262 cd->my_errno = YAZ_ICONV_E2BIG;
265 *outbuf = (char *) outp;
270 static size_t yaz_write_UCS4 (yaz_iconv_t cd, unsigned long x,
271 char **outbuf, size_t *outbytesleft)
273 unsigned char *outp = (unsigned char *) *outbuf;
274 if (*outbytesleft >= 4)
276 *outp++ = (unsigned char) (x<<24);
277 *outp++ = (unsigned char) (x<<16);
278 *outp++ = (unsigned char) (x<<8);
279 *outp++ = (unsigned char) x;
280 (*outbytesleft) -= 4;
284 cd->my_errno = YAZ_ICONV_E2BIG;
287 *outbuf = (char *) outp;
291 static size_t yaz_write_UCS4LE (yaz_iconv_t cd, unsigned long x,
292 char **outbuf, size_t *outbytesleft)
294 unsigned char *outp = (unsigned char *) *outbuf;
295 if (*outbytesleft >= 4)
297 *outp++ = (unsigned char) x;
298 *outp++ = (unsigned char) (x<<8);
299 *outp++ = (unsigned char) (x<<16);
300 *outp++ = (unsigned char) (x<<24);
301 (*outbytesleft) -= 4;
305 cd->my_errno = YAZ_ICONV_E2BIG;
308 *outbuf = (char *) outp;
312 int yaz_iconv_isbuiltin(yaz_iconv_t cd)
314 return cd->read_handle && cd->write_handle;
317 yaz_iconv_t yaz_iconv_open (const char *tocode, const char *fromcode)
319 yaz_iconv_t cd = (yaz_iconv_t) xmalloc (sizeof(*cd));
321 cd->write_handle = 0;
324 cd->my_errno = YAZ_ICONV_UNKNOWN;
326 /* a useful hack: if fromcode has leading @,
327 the library not use YAZ's own conversions .. */
328 if (fromcode[0] == '@')
332 if (!yaz_matchstr(fromcode, "UTF8"))
334 cd->read_handle = yaz_read_UTF8;
335 cd->init_handle = yaz_init_UTF8;
337 else if (!yaz_matchstr(fromcode, "ISO88591"))
338 cd->read_handle = yaz_read_ISO8859_1;
339 else if (!yaz_matchstr(fromcode, "UCS4"))
340 cd->read_handle = yaz_read_UCS4;
341 else if (!yaz_matchstr(fromcode, "UCS4LE"))
342 cd->read_handle = yaz_read_UCS4LE;
344 if (!yaz_matchstr(tocode, "UTF8"))
345 cd->write_handle = yaz_write_UTF8;
346 else if (!yaz_matchstr(tocode, "ISO88591"))
347 cd->write_handle = yaz_write_ISO8859_1;
348 else if (!yaz_matchstr (tocode, "UCS4"))
349 cd->write_handle = yaz_write_UCS4;
350 else if (!yaz_matchstr(tocode, "UCS4LE"))
351 cd->write_handle = yaz_write_UCS4LE;
355 if (!cd->read_handle || !cd->write_handle)
357 cd->iconv_cd = iconv_open (tocode, fromcode);
358 if (cd->iconv_cd == (iconv_t) (-1))
365 if (!cd->read_handle || !cd->write_handle)
375 size_t yaz_iconv (yaz_iconv_t cd, char **inbuf, size_t *inbytesleft,
376 char **outbuf, size_t *outbytesleft)
384 iconv(cd->iconv_cd, inbuf, inbytesleft, outbuf, outbytesleft);
385 if (r == (size_t)(-1))
390 cd->my_errno = YAZ_ICONV_E2BIG;
393 cd->my_errno = YAZ_ICONV_EINVAL;
396 cd->my_errno = YAZ_ICONV_EILSEQ;
399 cd->my_errno = YAZ_ICONV_UNKNOWN;
405 if (inbuf == 0 || *inbuf == 0)
408 cd->my_errno = YAZ_ICONV_UNKNOWN;
418 size_t r = (cd->init_handle)(cd, (unsigned char *) *inbuf,
419 *inbytesleft, &no_read);
422 if (cd->my_errno == YAZ_ICONV_EINVAL)
427 *inbytesleft -= no_read;
437 if (*inbytesleft == 0)
443 x = (cd->read_handle)(cd, (unsigned char *) *inbuf, *inbytesleft,
450 r = (cd->write_handle)(cd, x, outbuf, outbytesleft);
453 *inbytesleft -= no_read;
459 int yaz_iconv_error (yaz_iconv_t cd)
464 int yaz_iconv_close (yaz_iconv_t cd)
468 iconv_close (cd->iconv_cd);