Fix bug #260: Multi combining MARC-8 chars not converted properly
[yaz-moved-to-github.git] / src / siconv.c
1 /*
2  * Copyright (C) 1995-2005, Index Data ApS
3  * See the file LICENSE for details.
4  *
5  * $Id: siconv.c,v 1.10 2005-02-02 23:26:38 adam Exp $
6  */
7 /**
8  * \file siconv.c
9  * \brief Implements simple ICONV
10  *
11  * This implements an interface similar to that of iconv and
12  * is used by YAZ to interface with iconv (if present).
13  * For systems where iconv is not present, this layer
14  * provides a few important conversion: UTF-8, MARC-8, Latin-1.
15  */
16
17 #if HAVE_CONFIG_H
18 #include <config.h>
19 #endif
20
21 #include <errno.h>
22 #include <string.h>
23 #include <ctype.h>
24 #if HAVE_WCHAR_H
25 #include <wchar.h>
26 #endif
27
28 #if HAVE_ICONV_H
29 #include <iconv.h>
30 #endif
31
32 #include <yaz/yaz-util.h>
33
34 unsigned long yaz_marc8_1_conv (unsigned char *inp, size_t inbytesleft,
35                               size_t *no_read, int *combining);
36 unsigned long yaz_marc8_2_conv (unsigned char *inp, size_t inbytesleft,
37                                 size_t *no_read, int *combining);
38 unsigned long yaz_marc8_3_conv (unsigned char *inp, size_t inbytesleft,
39                                 size_t *no_read, int *combining);
40 unsigned long yaz_marc8_4_conv (unsigned char *inp, size_t inbytesleft,
41                                 size_t *no_read, int *combining);
42 unsigned long yaz_marc8_5_conv (unsigned char *inp, size_t inbytesleft,
43                                 size_t *no_read, int *combining);
44 unsigned long yaz_marc8_6_conv (unsigned char *inp, size_t inbytesleft,
45                                 size_t *no_read, int *combining);
46 unsigned long yaz_marc8_7_conv (unsigned char *inp, size_t inbytesleft,
47                                 size_t *no_read, int *combining);
48 unsigned long yaz_marc8_8_conv (unsigned char *inp, size_t inbytesleft,
49                                 size_t *no_read, int *combining);
50 unsigned long yaz_marc8_9_conv (unsigned char *inp, size_t inbytesleft,
51                                 size_t *no_read, int *combining);
52     
53 #define NEW_COMB 1
54
55 struct yaz_iconv_struct {
56     int my_errno;
57     int init_flag;
58     size_t (*init_handle)(yaz_iconv_t cd, unsigned char *inbuf,
59                           size_t inbytesleft, size_t *no_read);
60     unsigned long (*read_handle)(yaz_iconv_t cd, unsigned char *inbuf,
61                                  size_t inbytesleft, size_t *no_read);
62     size_t (*write_handle)(yaz_iconv_t cd, unsigned long x,
63                            char **outbuf, size_t *outbytesleft);
64     int marc8_esc_mode;
65 #if NEW_COMB
66     int comb_offset;
67     int comb_size;
68     unsigned long comb_x[8];
69     size_t comb_no_read[8];
70 #else
71     int marc8_comb_x;
72     int marc8_comb_no_read;
73 #endif
74     size_t no_read_x;
75     unsigned unget_x;
76 #if HAVE_ICONV_H
77     iconv_t iconv_cd;
78 #endif
79 };
80
81 static unsigned long yaz_read_ISO8859_1 (yaz_iconv_t cd, unsigned char *inp,
82                                          size_t inbytesleft, size_t *no_read)
83 {
84     unsigned long x = inp[0];
85     *no_read = 1;
86     return x;
87 }
88
89 static size_t yaz_init_UTF8 (yaz_iconv_t cd, unsigned char *inp,
90                              size_t inbytesleft, size_t *no_read)
91 {
92     if (inp[0] != 0xef)
93     {
94         *no_read = 0;
95         return 0;
96     }
97     if (inbytesleft < 3)
98     {
99         cd->my_errno = YAZ_ICONV_EINVAL;
100         return (size_t) -1;
101     }
102     if (inp[1] != 0xbb || inp[2] != 0xbf)
103     {
104         cd->my_errno = YAZ_ICONV_EILSEQ;
105         return (size_t) -1;
106     }
107     *no_read = 3;
108     return 0;
109 }
110
111 static unsigned long yaz_read_UTF8 (yaz_iconv_t cd, unsigned char *inp,
112                                     size_t inbytesleft, size_t *no_read)
113 {
114     unsigned long x = 0;
115
116     if (inp[0] <= 0x7f)
117     {
118         x = inp[0];
119         *no_read = 1;
120     }
121     else if (inp[0] <= 0xbf || inp[0] >= 0xfe)
122     {
123         *no_read = 0;
124         cd->my_errno = YAZ_ICONV_EILSEQ;
125     }
126     else if (inp[0] <= 0xdf && inbytesleft >= 2)
127     {
128         x = ((inp[0] & 0x1f) << 6) | (inp[1] & 0x3f);
129         if (x >= 0x80)
130             *no_read = 2;
131         else
132         {
133             *no_read = 0;
134             cd->my_errno = YAZ_ICONV_EILSEQ;
135         }
136     }
137     else if (inp[0] <= 0xef && inbytesleft >= 3)
138     {
139         x = ((inp[0] & 0x0f) << 12) | ((inp[1] & 0x3f) << 6) |
140             (inp[1] & 0x3f);
141         if (x >= 0x800)
142             *no_read = 3;
143         else
144         {
145             *no_read = 0;
146             cd->my_errno = YAZ_ICONV_EILSEQ;
147         }
148     }
149     else if (inp[0] <= 0xf7 && inbytesleft >= 4)
150     {
151         x =  ((inp[0] & 0x07) << 18) | ((inp[1] & 0x3f) << 12) |
152             ((inp[2] & 0x3f) << 6) | (inp[3] & 0x3f);
153         if (x >= 0x10000)
154             *no_read = 4;
155         else
156         {
157             *no_read = 0;
158             cd->my_errno = YAZ_ICONV_EILSEQ;
159         }
160     }
161     else if (inp[0] <= 0xfb && inbytesleft >= 5)
162     {
163         x =  ((inp[0] & 0x03) << 24) | ((inp[1] & 0x3f) << 18) |
164             ((inp[2] & 0x3f) << 12) | ((inp[3] & 0x3f) << 6) |
165             (inp[4] & 0x3f);
166         if (x >= 0x200000)
167             *no_read = 5;
168         else
169         {
170             *no_read = 0;
171             cd->my_errno = YAZ_ICONV_EILSEQ;
172         }
173     }
174     else if (inp[0] <= 0xfd && inbytesleft >= 6)
175     {
176         x =  ((inp[0] & 0x01) << 30) | ((inp[1] & 0x3f) << 24) |
177             ((inp[2] & 0x3f) << 18) | ((inp[3] & 0x3f) << 12) |
178             ((inp[4] & 0x3f) << 6) | (inp[5] & 0x3f);
179         if (x >= 0x4000000)
180             *no_read = 6;
181         else
182         {
183             *no_read = 0;
184             cd->my_errno = YAZ_ICONV_EILSEQ;
185         }
186     }
187     else
188     {
189         *no_read = 0;
190         cd->my_errno = YAZ_ICONV_EINVAL;
191     }
192     return x;
193 }
194
195 static unsigned long yaz_read_UCS4 (yaz_iconv_t cd, unsigned char *inp,
196                                     size_t inbytesleft, size_t *no_read)
197 {
198     unsigned long x = 0;
199     
200     if (inbytesleft < 4)
201     {
202         cd->my_errno = YAZ_ICONV_EINVAL; /* incomplete input */
203         *no_read = 0;
204     }
205     else
206     {
207         x = (inp[0]<<24) | (inp[1]<<16) | (inp[2]<<8) | inp[3];
208         *no_read = 4;
209     }
210     return x;
211 }
212
213 static unsigned long yaz_read_UCS4LE (yaz_iconv_t cd, unsigned char *inp,
214                                       size_t inbytesleft, size_t *no_read)
215 {
216     unsigned long x = 0;
217     
218     if (inbytesleft < 4)
219     {
220         cd->my_errno = YAZ_ICONV_EINVAL; /* incomplete input */
221         *no_read = 0;
222     }
223     else
224     {
225         x = (inp[3]<<24) | (inp[2]<<16) | (inp[1]<<8) | inp[0];
226         *no_read = 4;
227     }
228     return x;
229 }
230
231 #if HAVE_WCHAR_H
232 static unsigned long yaz_read_wchar_t (yaz_iconv_t cd, unsigned char *inp,
233                                        size_t inbytesleft, size_t *no_read)
234 {
235     unsigned long x = 0;
236     
237     if (inbytesleft < sizeof(wchar_t))
238     {
239         cd->my_errno = YAZ_ICONV_EINVAL; /* incomplete input */
240         *no_read = 0;
241     }
242     else
243     {
244         wchar_t wch;
245         memcpy (&wch, inp, sizeof(wch));
246         x = wch;
247         *no_read = sizeof(wch);
248     }
249     return x;
250 }
251 #endif
252
253
254 #if NEW_COMB
255 static unsigned long yaz_read_marc8_comb (yaz_iconv_t cd, unsigned char *inp,
256                                           size_t inbytesleft, size_t *no_read,
257                                           int *comb);
258
259 static unsigned long yaz_read_marc8 (yaz_iconv_t cd, unsigned char *inp,
260                                      size_t inbytesleft, size_t *no_read)
261 {
262     unsigned long x;
263     if (cd->comb_offset < cd->comb_size)
264     {
265         *no_read = cd->comb_no_read[cd->comb_offset];
266         x = cd->comb_x[cd->comb_offset];
267         cd->comb_offset++;
268         return x;
269     }
270
271     cd->comb_offset = 0;
272     for (cd->comb_size = 0; cd->comb_size < 8; cd->comb_size++)
273     {
274         int comb = 0;
275         x = yaz_read_marc8_comb(cd, inp, inbytesleft, no_read, &comb);
276         if (!comb || !x)
277             break;
278         cd->comb_x[cd->comb_size] = x;
279         cd->comb_no_read[cd->comb_size] = *no_read;
280         inp += *no_read;
281         inbytesleft = inbytesleft - *no_read;
282     }
283     return x;
284 }
285
286 static unsigned long yaz_read_marc8_comb (yaz_iconv_t cd, unsigned char *inp,
287                                           size_t inbytesleft, size_t *no_read,
288                                           int *comb)
289 {
290     *no_read = 0;
291     while(inbytesleft >= 1 && inp[0] == 27)
292     {
293         size_t inbytesleft0 = inbytesleft;
294         inp++;
295         inbytesleft--;
296         while(inbytesleft > 0 && strchr("(,$!", *inp))
297         {
298             inbytesleft--;
299             inp++;
300         }
301         if (inbytesleft <= 0)
302         {
303             *no_read = 0;
304             cd->my_errno = YAZ_ICONV_EINVAL;
305             return 0;
306         }
307         cd->marc8_esc_mode = *inp++;
308         inbytesleft--;
309         (*no_read) += inbytesleft0 - inbytesleft;
310     }
311     if (inbytesleft <= 0)
312         return 0;
313     else
314     {
315         unsigned long x;
316         *comb = 0;
317         size_t no_read_sub = 0;
318
319         switch(cd->marc8_esc_mode)
320         {
321         case 'B':  /* Basic ASCII */
322         case 'E':  /* ANSEL */
323         case 's':  /* ASCII */
324             x = yaz_marc8_1_conv(inp, inbytesleft, &no_read_sub, comb);
325             break;
326         case 'g':  /* Greek */
327             x = yaz_marc8_2_conv(inp, inbytesleft, &no_read_sub, comb);
328             break;
329         case 'b':  /* Subscripts */
330             x = yaz_marc8_3_conv(inp, inbytesleft, &no_read_sub, comb);
331             break;
332         case 'p':  /* Superscripts */
333             x = yaz_marc8_4_conv(inp, inbytesleft, &no_read_sub, comb);
334             break;
335         case '2':  /* Basic Hebrew */
336             x = yaz_marc8_5_conv(inp, inbytesleft, &no_read_sub, comb);
337             break;
338         case 'N':  /* Basic Cyrillic */
339         case 'Q':  /* Extended Cyrillic */
340             x = yaz_marc8_6_conv(inp, inbytesleft, &no_read_sub, comb);
341             break;
342         case '3':  /* Basic Arabic */
343         case '4':  /* Extended Arabic */
344             x = yaz_marc8_7_conv(inp, inbytesleft, &no_read_sub, comb);
345             break;
346         case 'S':  /* Greek */
347             x = yaz_marc8_8_conv(inp, inbytesleft, &no_read_sub, comb);
348             break;
349         case '1':  /* Chinese, Japanese, Korean (EACC) */
350             x = yaz_marc8_9_conv(inp, inbytesleft, &no_read_sub, comb);
351             break;
352         default:
353             *no_read = 0;
354             cd->my_errno = YAZ_ICONV_EILSEQ;
355             return 0;
356         }
357         *no_read += no_read_sub;
358         return x;
359     }
360 }
361 #else
362 static unsigned long yaz_read_marc8 (yaz_iconv_t cd, unsigned char *inp,
363                                      size_t inbytesleft, size_t *no_read)
364 {
365     if (cd->marc8_comb_x)
366     {
367         unsigned long x = cd->marc8_comb_x;
368         *no_read = cd->marc8_comb_no_read;
369         cd->marc8_comb_x = 0;
370         return x;
371     }
372     *no_read = 0;
373     while(inbytesleft >= 1 && inp[0] == 27)
374     {
375         size_t inbytesleft0 = inbytesleft;
376         inp++;
377         inbytesleft--;
378         while(inbytesleft > 0 && strchr("(,$!", *inp))
379         {
380             inbytesleft--;
381             inp++;
382         }
383         if (inbytesleft <= 0)
384         {
385             *no_read = 0;
386             cd->my_errno = YAZ_ICONV_EINVAL;
387             return 0;
388         }
389         cd->marc8_esc_mode = *inp++;
390         inbytesleft--;
391         (*no_read) += inbytesleft0 - inbytesleft;
392     }
393     if (inbytesleft <= 0)
394         return 0;
395     else
396     {
397         unsigned long x;
398         int comb = 0;
399         size_t no_read_sub = 0;
400
401         switch(cd->marc8_esc_mode)
402         {
403         case 'B':  /* Basic ASCII */
404         case 'E':  /* ANSEL */
405         case 's':  /* ASCII */
406             x = yaz_marc8_1_conv(inp, inbytesleft, &no_read_sub, &comb);
407             break;
408         case 'g':  /* Greek */
409             x = yaz_marc8_2_conv(inp, inbytesleft, &no_read_sub, &comb);
410             break;
411         case 'b':  /* Subscripts */
412             x = yaz_marc8_3_conv(inp, inbytesleft, &no_read_sub, &comb);
413             break;
414         case 'p':  /* Superscripts */
415             x = yaz_marc8_4_conv(inp, inbytesleft, &no_read_sub, &comb);
416             break;
417         case '2':  /* Basic Hebrew */
418             x = yaz_marc8_5_conv(inp, inbytesleft, &no_read_sub, &comb);
419             break;
420         case 'N':  /* Basic Cyrillic */
421         case 'Q':  /* Extended Cyrillic */
422             x = yaz_marc8_6_conv(inp, inbytesleft, &no_read_sub, &comb);
423             break;
424         case '3':  /* Basic Arabic */
425         case '4':  /* Extended Arabic */
426             x = yaz_marc8_7_conv(inp, inbytesleft, &no_read_sub, &comb);
427             break;
428         case 'S':  /* Greek */
429             x = yaz_marc8_8_conv(inp, inbytesleft, &no_read_sub, &comb);
430             break;
431         case '1':  /* Chinese, Japanese, Korean (EACC) */
432             x = yaz_marc8_9_conv(inp, inbytesleft, &no_read_sub, &comb);
433             break;
434         default:
435             *no_read = 0;
436             cd->my_errno = YAZ_ICONV_EILSEQ;
437             return 0;
438         }
439 #if 0
440         printf ("esc mode=%c x=%04lX comb=%d\n", cd->marc8_esc_mode, x, comb);
441 #endif
442         *no_read += no_read_sub;
443
444         if (comb && cd->marc8_comb_x == 0)
445         {
446             size_t tmp_read = 0;
447             unsigned long next_x;
448
449             /* read next char .. */
450             next_x = yaz_read_marc8(cd, inp + *no_read,
451                                     inbytesleft - *no_read, &tmp_read);
452             /* save this x for later .. */
453             cd->marc8_comb_x = x;
454             /* save next read for later .. */
455             cd->marc8_comb_no_read = tmp_read;
456             /* return next x - thereby swap */
457             x = next_x;
458         }
459         return x;
460     }
461 }
462 #endif
463
464 static size_t yaz_write_UTF8 (yaz_iconv_t cd, unsigned long x,
465                               char **outbuf, size_t *outbytesleft)
466 {
467     unsigned char *outp = (unsigned char *) *outbuf;
468     if (x <= 0x7f && *outbytesleft >= 1)
469     {
470         *outp++ = (unsigned char) x;
471         (*outbytesleft)--;
472     } 
473     else if (x <= 0x7ff && *outbytesleft >= 2)
474     {
475         *outp++ = (unsigned char) ((x >> 6) | 0xc0);
476         *outp++ = (unsigned char) ((x & 0x3f) | 0x80);
477         (*outbytesleft) -= 2;
478     }
479     else if (x <= 0xffff && *outbytesleft >= 3)
480     {
481         *outp++ = (unsigned char) ((x >> 12) | 0xe0);
482         *outp++ = (unsigned char) (((x >> 6) & 0x3f) | 0x80);
483         *outp++ = (unsigned char) ((x & 0x3f) | 0x80);
484         (*outbytesleft) -= 3;
485     }
486     else if (x <= 0x1fffff && *outbytesleft >= 4)
487     {
488         *outp++ = (unsigned char) ((x >> 18) | 0xf0);
489         *outp++ = (unsigned char) (((x >> 12) & 0x3f) | 0x80);
490         *outp++ = (unsigned char) (((x >> 6)  & 0x3f) | 0x80);
491         *outp++ = (unsigned char) ((x & 0x3f) | 0x80);
492         (*outbytesleft) -= 4;
493     }
494     else if (x <= 0x3ffffff && *outbytesleft >= 5)
495     {
496         *outp++ = (unsigned char) ((x >> 24) | 0xf8);
497         *outp++ = (unsigned char) (((x >> 18) & 0x3f) | 0x80);
498         *outp++ = (unsigned char) (((x >> 12) & 0x3f) | 0x80);
499         *outp++ = (unsigned char) (((x >> 6)  & 0x3f) | 0x80);
500         *outp++ = (unsigned char) ((x & 0x3f) | 0x80);
501         (*outbytesleft) -= 5;
502     }
503     else if (*outbytesleft >= 6)
504     {
505         *outp++ = (unsigned char) ((x >> 30) | 0xfc);
506         *outp++ = (unsigned char) (((x >> 24) & 0x3f) | 0x80);
507         *outp++ = (unsigned char) (((x >> 18) & 0x3f) | 0x80);
508         *outp++ = (unsigned char) (((x >> 12) & 0x3f) | 0x80);
509         *outp++ = (unsigned char) (((x >> 6)  & 0x3f) | 0x80);
510         *outp++ = (unsigned char) ((x & 0x3f) | 0x80);
511         (*outbytesleft) -= 6;
512     }
513     else 
514     {
515         cd->my_errno = YAZ_ICONV_E2BIG;  /* not room for output */
516         return (size_t)(-1);
517     }
518     *outbuf = (char *) outp;
519     return 0;
520 }
521
522 static size_t yaz_write_ISO8859_1 (yaz_iconv_t cd, unsigned long x,
523                                    char **outbuf, size_t *outbytesleft)
524 {
525     unsigned char *outp = (unsigned char *) *outbuf;
526     if (x > 255 || x < 1)
527     {
528         cd->my_errno = YAZ_ICONV_EILSEQ;
529         return (size_t) -1;
530     }
531     else if (*outbytesleft >= 1)
532     {
533         *outp++ = (unsigned char) x;
534         (*outbytesleft)--;
535     }
536     else 
537     {
538         cd->my_errno = YAZ_ICONV_E2BIG;
539         return (size_t)(-1);
540     }
541     *outbuf = (char *) outp;
542     return 0;
543 }
544
545
546 static size_t yaz_write_UCS4 (yaz_iconv_t cd, unsigned long x,
547                               char **outbuf, size_t *outbytesleft)
548 {
549     unsigned char *outp = (unsigned char *) *outbuf;
550     if (*outbytesleft >= 4)
551     {
552         *outp++ = (unsigned char) (x>>24);
553         *outp++ = (unsigned char) (x>>16);
554         *outp++ = (unsigned char) (x>>8);
555         *outp++ = (unsigned char) x;
556         (*outbytesleft) -= 4;
557     }
558     else
559     {
560         cd->my_errno = YAZ_ICONV_E2BIG;
561         return (size_t)(-1);
562     }
563     *outbuf = (char *) outp;
564     return 0;
565 }
566
567 static size_t yaz_write_UCS4LE (yaz_iconv_t cd, unsigned long x,
568                                 char **outbuf, size_t *outbytesleft)
569 {
570     unsigned char *outp = (unsigned char *) *outbuf;
571     if (*outbytesleft >= 4)
572     {
573         *outp++ = (unsigned char) x;
574         *outp++ = (unsigned char) (x>>8);
575         *outp++ = (unsigned char) (x>>16);
576         *outp++ = (unsigned char) (x>>24);
577         (*outbytesleft) -= 4;
578     }
579     else
580     {
581         cd->my_errno = YAZ_ICONV_E2BIG;
582         return (size_t)(-1);
583     }
584     *outbuf = (char *) outp;
585     return 0;
586 }
587
588 #if HAVE_WCHAR_H
589 static size_t yaz_write_wchar_t (yaz_iconv_t cd, unsigned long x,
590                                  char **outbuf, size_t *outbytesleft)
591 {
592     unsigned char *outp = (unsigned char *) *outbuf;
593
594     if (*outbytesleft >= sizeof(wchar_t))
595     {
596         wchar_t wch = x;
597         memcpy(outp, &wch, sizeof(wch));
598         outp += sizeof(wch);
599         (*outbytesleft) -= sizeof(wch);
600     }
601     else
602     {
603         cd->my_errno = YAZ_ICONV_E2BIG;
604         return (size_t)(-1);
605     }
606     *outbuf = (char *) outp;
607     return 0;
608 }
609 #endif
610
611 int yaz_iconv_isbuiltin(yaz_iconv_t cd)
612 {
613     return cd->read_handle && cd->write_handle;
614 }
615
616 yaz_iconv_t yaz_iconv_open (const char *tocode, const char *fromcode)
617 {
618     yaz_iconv_t cd = (yaz_iconv_t) xmalloc (sizeof(*cd));
619
620     cd->write_handle = 0;
621     cd->read_handle = 0;
622     cd->init_handle = 0;
623     cd->my_errno = YAZ_ICONV_UNKNOWN;
624     cd->marc8_esc_mode = 'B';
625 #if NEW_COMB
626     cd->comb_offset = cd->comb_size = 0;
627 #else
628     cd->marc8_comb_x = 0;
629 #endif
630
631     /* a useful hack: if fromcode has leading @,
632        the library not use YAZ's own conversions .. */
633     if (fromcode[0] == '@')
634         fromcode++;
635     else
636     {
637         if (!yaz_matchstr(fromcode, "UTF8"))
638         {
639             cd->read_handle = yaz_read_UTF8;
640             cd->init_handle = yaz_init_UTF8;
641         }
642         else if (!yaz_matchstr(fromcode, "ISO88591"))
643             cd->read_handle = yaz_read_ISO8859_1;
644         else if (!yaz_matchstr(fromcode, "UCS4"))
645             cd->read_handle = yaz_read_UCS4;
646         else if (!yaz_matchstr(fromcode, "UCS4LE"))
647             cd->read_handle = yaz_read_UCS4LE;
648         else if (!yaz_matchstr(fromcode, "MARC8"))
649             cd->read_handle = yaz_read_marc8;
650 #if HAVE_WCHAR_H
651         else if (!yaz_matchstr(fromcode, "WCHAR_T"))
652             cd->read_handle = yaz_read_wchar_t;
653 #endif
654         
655         if (!yaz_matchstr(tocode, "UTF8"))
656             cd->write_handle = yaz_write_UTF8;
657         else if (!yaz_matchstr(tocode, "ISO88591"))
658             cd->write_handle = yaz_write_ISO8859_1;
659         else if (!yaz_matchstr (tocode, "UCS4"))
660             cd->write_handle = yaz_write_UCS4;
661         else if (!yaz_matchstr(tocode, "UCS4LE"))
662             cd->write_handle = yaz_write_UCS4LE;
663 #if HAVE_WCHAR_H
664         else if (!yaz_matchstr(tocode, "WCHAR_T"))
665             cd->write_handle = yaz_write_wchar_t;
666 #endif
667     }
668 #if HAVE_ICONV_H
669     cd->iconv_cd = 0;
670     if (!cd->read_handle || !cd->write_handle)
671     {
672         cd->iconv_cd = iconv_open (tocode, fromcode);
673         if (cd->iconv_cd == (iconv_t) (-1))
674         {
675             xfree (cd);
676             return 0;
677         }
678     }
679 #else
680     if (!cd->read_handle || !cd->write_handle)
681     {
682         xfree (cd);
683         return 0;
684     }
685 #endif
686     cd->init_flag = 1;
687     return cd;
688 }
689
690 size_t yaz_iconv(yaz_iconv_t cd, char **inbuf, size_t *inbytesleft,
691                  char **outbuf, size_t *outbytesleft)
692 {
693     char *inbuf0;
694     size_t r = 0;
695 #if HAVE_ICONV_H
696     if (cd->iconv_cd)
697     {
698         size_t r =
699             iconv(cd->iconv_cd, inbuf, inbytesleft, outbuf, outbytesleft);
700         if (r == (size_t)(-1))
701         {
702             switch (yaz_errno())
703             {
704             case E2BIG:
705                 cd->my_errno = YAZ_ICONV_E2BIG;
706                 break;
707             case EINVAL:
708                 cd->my_errno = YAZ_ICONV_EINVAL;
709                 break;
710             case EILSEQ:
711                 cd->my_errno = YAZ_ICONV_EILSEQ;
712                 break;
713             default:
714                 cd->my_errno = YAZ_ICONV_UNKNOWN;
715             }
716         }
717         return r;
718     }
719 #endif
720     if (inbuf == 0 || *inbuf == 0)
721     {
722         cd->init_flag = 1;
723         cd->my_errno = YAZ_ICONV_UNKNOWN;
724         return 0;
725     }
726     inbuf0 = *inbuf;
727
728     if (cd->init_flag)
729     {
730         if (cd->init_handle)
731         {
732             size_t no_read;
733             size_t r = (cd->init_handle)(cd, (unsigned char *) *inbuf,
734                                          *inbytesleft, &no_read);
735             if (r)
736             {
737                 if (cd->my_errno == YAZ_ICONV_EINVAL)
738                     return r;
739                 cd->init_flag = 0;
740                 return r;
741             }
742             *inbytesleft -= no_read;
743             *inbuf += no_read;
744         }
745         cd->init_flag = 0;
746         cd->unget_x = 0;
747         cd->no_read_x = 0;
748     }
749     while (1)
750     {
751         unsigned long x;
752         size_t no_read;
753
754         if (*inbytesleft == 0)
755         {
756             r = *inbuf - inbuf0;
757             break;
758         }
759         if (!cd->unget_x)
760         {
761             x = (cd->read_handle)(cd, (unsigned char *) *inbuf, *inbytesleft,
762                                   &no_read);
763             if (no_read == 0)
764             {
765                 r = (size_t)(-1);
766                 break;
767             }
768         }
769         else
770         {
771             x = cd->unget_x;
772             no_read = cd->no_read_x;
773         }
774         if (x)
775         {
776             r = (cd->write_handle)(cd, x, outbuf, outbytesleft);
777             if (r)
778             {
779                 /* unable to write it. save it because read_handle cannot
780                    rewind .. */
781                 cd->unget_x = x;
782                 cd->no_read_x = no_read;
783                 break;
784             }
785             cd->unget_x = 0;
786         }
787         *inbytesleft -= no_read;
788         (*inbuf) += no_read;
789     }
790     return r;
791 }
792
793 int yaz_iconv_error (yaz_iconv_t cd)
794 {
795     return cd->my_errno;
796 }
797
798 int yaz_iconv_close (yaz_iconv_t cd)
799 {
800 #if HAVE_ICONV_H
801     if (cd->iconv_cd)
802         iconv_close (cd->iconv_cd);
803 #endif
804     xfree (cd);
805     return 0;
806 }
807
808