Re-organized MARC-8 charset creating code a bit.
[yaz-moved-to-github.git] / src / siconv.c
1 /*
2  * Copyright (C) 1995-2007, Index Data ApS
3  * See the file LICENSE for details.
4  *
5  * $Id: siconv.c,v 1.47 2007-10-12 14:22:19 adam Exp $
6  */
7 /**
8  * \file siconv.c
9  * \brief Implements simple ICONV
10  *
11  * This implements an interface similar to that of iconv and
12  * is used by YAZ to interface with iconv (if present).
13  * For systems where iconv is not present, this layer
14  * provides a few important conversions: UTF-8, MARC-8, Latin-1.
15  *
16  * MARC-8 reference:
17  *  http://www.loc.gov/marc/specifications/speccharmarc8.html
18  */
19
20 #if HAVE_CONFIG_H
21 #include <config.h>
22 #endif
23
24 #include <assert.h>
25 #include <errno.h>
26 #include <string.h>
27 #include <ctype.h>
28 #if HAVE_WCHAR_H
29 #include <wchar.h>
30 #endif
31
32 #if HAVE_ICONV_H
33 #include <iconv.h>
34 #endif
35
36
37 #include <yaz/yaz-util.h>
38
39 unsigned long yaz_marc8_42_conv(unsigned char *inp, size_t inbytesleft,
40                                size_t *no_read, int *combining);
41 unsigned long yaz_marc8_45_conv(unsigned char *inp, size_t inbytesleft,
42                                size_t *no_read, int *combining);
43 unsigned long yaz_marc8_67_conv(unsigned char *inp, size_t inbytesleft,
44                                size_t *no_read, int *combining);
45 unsigned long yaz_marc8_62_conv(unsigned char *inp, size_t inbytesleft,
46                                size_t *no_read, int *combining);
47 unsigned long yaz_marc8_70_conv(unsigned char *inp, size_t inbytesleft,
48                                size_t *no_read, int *combining);
49 unsigned long yaz_marc8_32_conv(unsigned char *inp, size_t inbytesleft,
50                                size_t *no_read, int *combining);
51 unsigned long yaz_marc8_4E_conv(unsigned char *inp, size_t inbytesleft,
52                                size_t *no_read, int *combining);
53 unsigned long yaz_marc8_51_conv(unsigned char *inp, size_t inbytesleft,
54                                size_t *no_read, int *combining);
55 unsigned long yaz_marc8_33_conv(unsigned char *inp, size_t inbytesleft,
56                                size_t *no_read, int *combining);
57 unsigned long yaz_marc8_34_conv(unsigned char *inp, size_t inbytesleft,
58                                size_t *no_read, int *combining);
59 unsigned long yaz_marc8_53_conv(unsigned char *inp, size_t inbytesleft,
60                                size_t *no_read, int *combining);
61 unsigned long yaz_marc8_31_conv(unsigned char *inp, size_t inbytesleft,
62                                size_t *no_read, int *combining);
63
64
65 unsigned long yaz_marc8r_42_conv(unsigned char *inp, size_t inbytesleft,
66                                  size_t *no_read, int *combining);
67 unsigned long yaz_marc8r_45_conv(unsigned char *inp, size_t inbytesleft,
68                                  size_t *no_read, int *combining);
69 unsigned long yaz_marc8r_67_conv(unsigned char *inp, size_t inbytesleft,
70                                  size_t *no_read, int *combining);
71 unsigned long yaz_marc8r_62_conv(unsigned char *inp, size_t inbytesleft,
72                                  size_t *no_read, int *combining);
73 unsigned long yaz_marc8r_70_conv(unsigned char *inp, size_t inbytesleft,
74                                  size_t *no_read, int *combining);
75 unsigned long yaz_marc8r_32_conv(unsigned char *inp, size_t inbytesleft,
76                                  size_t *no_read, int *combining);
77 unsigned long yaz_marc8r_4E_conv(unsigned char *inp, size_t inbytesleft,
78                                  size_t *no_read, int *combining);
79 unsigned long yaz_marc8r_51_conv(unsigned char *inp, size_t inbytesleft,
80                                  size_t *no_read, int *combining);
81 unsigned long yaz_marc8r_33_conv(unsigned char *inp, size_t inbytesleft,
82                                  size_t *no_read, int *combining);
83 unsigned long yaz_marc8r_34_conv(unsigned char *inp, size_t inbytesleft,
84                                  size_t *no_read, int *combining);
85 unsigned long yaz_marc8r_53_conv(unsigned char *inp, size_t inbytesleft,
86                                  size_t *no_read, int *combining);
87 unsigned long yaz_marc8r_31_conv(unsigned char *inp, size_t inbytesleft,
88                                  size_t *no_read, int *combining);
89
90 #define ESC "\033"
91
92 struct yaz_iconv_struct {
93     int my_errno;
94     int init_flag;
95     size_t (*init_handle)(yaz_iconv_t cd, unsigned char *inbuf,
96                           size_t inbytesleft, size_t *no_read);
97     unsigned long (*read_handle)(yaz_iconv_t cd, unsigned char *inbuf,
98                                  size_t inbytesleft, size_t *no_read);
99     size_t (*write_handle)(yaz_iconv_t cd, unsigned long x,
100                            char **outbuf, size_t *outbytesleft);
101     size_t (*flush_handle)(yaz_iconv_t cd,
102                            char **outbuf, size_t *outbytesleft);
103     int marc8_esc_mode;
104
105     int comb_offset;
106     int comb_size;
107     unsigned long comb_x[8];
108     size_t comb_no_read[8];
109     size_t no_read_x;
110     unsigned long unget_x;
111 #if HAVE_ICONV_H
112     iconv_t iconv_cd;
113 #endif
114     unsigned long compose_char;
115
116     unsigned long write_marc8_comb_ch[8];
117     size_t write_marc8_comb_no;
118     unsigned write_marc8_second_half_char;
119     unsigned long write_marc8_last;
120     const char *write_marc8_g0;
121     const char *write_marc8_g1;
122 };
123
124 static struct {
125     unsigned long x1, x2;
126     unsigned y;
127 } latin1_comb[] = {
128     { 'A', 0x0300, 0xc0}, /* LATIN CAPITAL LETTER A WITH GRAVE */
129     { 'A', 0x0301, 0xc1}, /* LATIN CAPITAL LETTER A WITH ACUTE */
130     { 'A', 0x0302, 0xc2}, /* LATIN CAPITAL LETTER A WITH CIRCUMFLEX */
131     { 'A', 0x0303, 0xc3}, /* LATIN CAPITAL LETTER A WITH TILDE */
132     { 'A', 0x0308, 0xc4}, /* LATIN CAPITAL LETTER A WITH DIAERESIS */
133     { 'A', 0x030a, 0xc5}, /* LATIN CAPITAL LETTER A WITH RING ABOVE */
134     /* no need for 0xc6      LATIN CAPITAL LETTER AE */
135     { 'C', 0x0327, 0xc7}, /* LATIN CAPITAL LETTER C WITH CEDILLA */
136     { 'E', 0x0300, 0xc8}, /* LATIN CAPITAL LETTER E WITH GRAVE */
137     { 'E', 0x0301, 0xc9}, /* LATIN CAPITAL LETTER E WITH ACUTE */
138     { 'E', 0x0302, 0xca}, /* LATIN CAPITAL LETTER E WITH CIRCUMFLEX */
139     { 'E', 0x0308, 0xcb}, /* LATIN CAPITAL LETTER E WITH DIAERESIS */
140     { 'I', 0x0300, 0xcc}, /* LATIN CAPITAL LETTER I WITH GRAVE */
141     { 'I', 0x0301, 0xcd}, /* LATIN CAPITAL LETTER I WITH ACUTE */
142     { 'I', 0x0302, 0xce}, /* LATIN CAPITAL LETTER I WITH CIRCUMFLEX */
143     { 'I', 0x0308, 0xcf}, /* LATIN CAPITAL LETTER I WITH DIAERESIS */
144     { 'N', 0x0303, 0xd1}, /* LATIN CAPITAL LETTER N WITH TILDE */
145     { 'O', 0x0300, 0xd2}, /* LATIN CAPITAL LETTER O WITH GRAVE */
146     { 'O', 0x0301, 0xd3}, /* LATIN CAPITAL LETTER O WITH ACUTE */
147     { 'O', 0x0302, 0xd4}, /* LATIN CAPITAL LETTER O WITH CIRCUMFLEX */
148     { 'O', 0x0303, 0xd5}, /* LATIN CAPITAL LETTER O WITH TILDE */
149     { 'O', 0x0308, 0xd6}, /* LATIN CAPITAL LETTER O WITH DIAERESIS */
150     /* omitted:    0xd7      MULTIPLICATION SIGN */
151     /* omitted:    0xd8      LATIN CAPITAL LETTER O WITH STROKE */
152     { 'U', 0x0300, 0xd9}, /* LATIN CAPITAL LETTER U WITH GRAVE */
153     { 'U', 0x0301, 0xda}, /* LATIN CAPITAL LETTER U WITH ACUTE */
154     { 'U', 0x0302, 0xdb}, /* LATIN CAPITAL LETTER U WITH CIRCUMFLEX */
155     { 'U', 0x0308, 0xdc}, /* LATIN CAPITAL LETTER U WITH DIAERESIS */
156     { 'Y', 0x0301, 0xdd}, /* LATIN CAPITAL LETTER Y WITH ACUTE */
157     /* omitted:    0xde      LATIN CAPITAL LETTER THORN */
158     /* omitted:    0xdf      LATIN SMALL LETTER SHARP S */
159     { 'a', 0x0300, 0xe0}, /* LATIN SMALL LETTER A WITH GRAVE */
160     { 'a', 0x0301, 0xe1}, /* LATIN SMALL LETTER A WITH ACUTE */
161     { 'a', 0x0302, 0xe2}, /* LATIN SMALL LETTER A WITH CIRCUMFLEX */
162     { 'a', 0x0303, 0xe3}, /* LATIN SMALL LETTER A WITH TILDE */
163     { 'a', 0x0308, 0xe4}, /* LATIN SMALL LETTER A WITH DIAERESIS */
164     { 'a', 0x030a, 0xe5}, /* LATIN SMALL LETTER A WITH RING ABOVE */
165     /* omitted:    0xe6      LATIN SMALL LETTER AE */
166     { 'c', 0x0327, 0xe7}, /* LATIN SMALL LETTER C WITH CEDILLA */
167     { 'e', 0x0300, 0xe8}, /* LATIN SMALL LETTER E WITH GRAVE */
168     { 'e', 0x0301, 0xe9}, /* LATIN SMALL LETTER E WITH ACUTE */
169     { 'e', 0x0302, 0xea}, /* LATIN SMALL LETTER E WITH CIRCUMFLEX */
170     { 'e', 0x0308, 0xeb}, /* LATIN SMALL LETTER E WITH DIAERESIS */
171     { 'i', 0x0300, 0xec}, /* LATIN SMALL LETTER I WITH GRAVE */
172     { 'i', 0x0301, 0xed}, /* LATIN SMALL LETTER I WITH ACUTE */
173     { 'i', 0x0302, 0xee}, /* LATIN SMALL LETTER I WITH CIRCUMFLEX */
174     { 'i', 0x0308, 0xef}, /* LATIN SMALL LETTER I WITH DIAERESIS */
175     /* omitted:    0xf0      LATIN SMALL LETTER ETH */
176     { 'n', 0x0303, 0xf1}, /* LATIN SMALL LETTER N WITH TILDE */
177     { 'o', 0x0300, 0xf2}, /* LATIN SMALL LETTER O WITH GRAVE */
178     { 'o', 0x0301, 0xf3}, /* LATIN SMALL LETTER O WITH ACUTE */
179     { 'o', 0x0302, 0xf4}, /* LATIN SMALL LETTER O WITH CIRCUMFLEX */
180     { 'o', 0x0303, 0xf5}, /* LATIN SMALL LETTER O WITH TILDE */
181     { 'o', 0x0308, 0xf6}, /* LATIN SMALL LETTER O WITH DIAERESIS */
182     /* omitted:    0xf7      DIVISION SIGN */
183     /* omitted:    0xf8      LATIN SMALL LETTER O WITH STROKE */
184     { 'u', 0x0300, 0xf9}, /* LATIN SMALL LETTER U WITH GRAVE */
185     { 'u', 0x0301, 0xfa}, /* LATIN SMALL LETTER U WITH ACUTE */
186     { 'u', 0x0302, 0xfb}, /* LATIN SMALL LETTER U WITH CIRCUMFLEX */
187     { 'u', 0x0308, 0xfc}, /* LATIN SMALL LETTER U WITH DIAERESIS */
188     { 'y', 0x0301, 0xfd}, /* LATIN SMALL LETTER Y WITH ACUTE */
189     /* omitted:    0xfe      LATIN SMALL LETTER THORN */
190     { 'y', 0x0308, 0xff}, /* LATIN SMALL LETTER Y WITH DIAERESIS */
191     
192     { 0, 0, 0}
193 };
194
195 static unsigned long yaz_read_ISO8859_1 (yaz_iconv_t cd, unsigned char *inp,
196                                          size_t inbytesleft, size_t *no_read)
197 {
198     unsigned long x = inp[0];
199     *no_read = 1;
200     return x;
201 }
202
203
204 static size_t yaz_init_UTF8 (yaz_iconv_t cd, unsigned char *inp,
205                              size_t inbytesleft, size_t *no_read)
206 {
207     if (inp[0] != 0xef)
208     {
209         *no_read = 0;
210         return 0;
211     }
212     if (inbytesleft < 3)
213     {
214         cd->my_errno = YAZ_ICONV_EINVAL;
215         return (size_t) -1;
216     }
217     if (inp[1] != 0xbb && inp[2] == 0xbf)
218         *no_read = 3;
219     else
220         *no_read = 0;
221     return 0;
222 }
223
224 unsigned long yaz_read_UTF8_char(unsigned char *inp,
225                                  size_t inbytesleft, size_t *no_read,
226                                  int *error)
227 {
228     unsigned long x = 0;
229
230     *no_read = 0; /* by default */
231     if (inp[0] <= 0x7f)
232     {
233         x = inp[0];
234         *no_read = 1;
235     }
236     else if (inp[0] <= 0xbf || inp[0] >= 0xfe)
237     {
238         *error = YAZ_ICONV_EILSEQ;
239     }
240     else if (inp[0] <= 0xdf && inbytesleft >= 2)
241     {
242         if ((inp[1] & 0xc0) == 0x80)
243         {
244             x = ((inp[0] & 0x1f) << 6) | (inp[1] & 0x3f);
245             if (x >= 0x80)
246                 *no_read = 2;
247             else
248                 *error = YAZ_ICONV_EILSEQ;
249         }
250         else
251             *error = YAZ_ICONV_EILSEQ;
252     }
253     else if (inp[0] <= 0xef && inbytesleft >= 3)
254     {
255         if ((inp[1] & 0xc0) == 0x80 && (inp[2] & 0xc0) == 0x80)
256         {
257             x = ((inp[0] & 0x0f) << 12) | ((inp[1] & 0x3f) << 6) |
258                 (inp[2] & 0x3f);
259             if (x >= 0x800)
260                 *no_read = 3;
261             else
262                 *error = YAZ_ICONV_EILSEQ;
263         }
264         else
265             *error = YAZ_ICONV_EILSEQ;
266     }            
267     else if (inp[0] <= 0xf7 && inbytesleft >= 4)
268     {
269         if ((inp[1] & 0xc0) == 0x80 && (inp[2] & 0xc0) == 0x80
270             && (inp[3] & 0xc0) == 0x80)
271         {
272             x = ((inp[0] & 0x07) << 18) | ((inp[1] & 0x3f) << 12) |
273                 ((inp[2] & 0x3f) << 6) | (inp[3] & 0x3f);
274             if (x >= 0x10000)
275                 *no_read = 4;
276             else
277                 *error = YAZ_ICONV_EILSEQ;
278         }
279         else
280             *error = YAZ_ICONV_EILSEQ;
281     }
282     else if (inp[0] <= 0xfb && inbytesleft >= 5)
283     {
284         if ((inp[1] & 0xc0) == 0x80 && (inp[2] & 0xc0) == 0x80
285             && (inp[3] & 0xc0) == 0x80 && (inp[4] & 0xc0) == 0x80)
286         {
287             x = ((inp[0] & 0x03) << 24) | ((inp[1] & 0x3f) << 18) |
288                 ((inp[2] & 0x3f) << 12) | ((inp[3] & 0x3f) << 6) |
289                 (inp[4] & 0x3f);
290             if (x >= 0x200000)
291                 *no_read = 5;
292             else
293                 *error = YAZ_ICONV_EILSEQ;
294         }
295         else
296             *error = YAZ_ICONV_EILSEQ;
297     }
298     else if (inp[0] <= 0xfd && inbytesleft >= 6)
299     {
300         if ((inp[1] & 0xc0) == 0x80 && (inp[2] & 0xc0) == 0x80
301             && (inp[3] & 0xc0) == 0x80 && (inp[4] & 0xc0) == 0x80
302             && (inp[5] & 0xc0) == 0x80)
303         {
304             x = ((inp[0] & 0x01) << 30) | ((inp[1] & 0x3f) << 24) |
305                 ((inp[2] & 0x3f) << 18) | ((inp[3] & 0x3f) << 12) |
306                 ((inp[4] & 0x3f) << 6) | (inp[5] & 0x3f);
307             if (x >= 0x4000000)
308                 *no_read = 6;
309             else
310                 *error = YAZ_ICONV_EILSEQ;
311         }
312         else
313             *error = YAZ_ICONV_EILSEQ;
314     }
315     else
316         *error = YAZ_ICONV_EINVAL;  /* incomplete sentence */
317
318     return x;
319 }
320
321 static unsigned long yaz_read_UTF8 (yaz_iconv_t cd, unsigned char *inp,
322                                     size_t inbytesleft, size_t *no_read)
323 {
324     return yaz_read_UTF8_char(inp, inbytesleft, no_read, &cd->my_errno);
325 }
326
327 static unsigned long yaz_read_UCS4 (yaz_iconv_t cd, unsigned char *inp,
328                                     size_t inbytesleft, size_t *no_read)
329 {
330     unsigned long x = 0;
331     
332     if (inbytesleft < 4)
333     {
334         cd->my_errno = YAZ_ICONV_EINVAL; /* incomplete input */
335         *no_read = 0;
336     }
337     else
338     {
339         x = (inp[0]<<24) | (inp[1]<<16) | (inp[2]<<8) | inp[3];
340         *no_read = 4;
341     }
342     return x;
343 }
344
345 static unsigned long yaz_read_UCS4LE (yaz_iconv_t cd, unsigned char *inp,
346                                       size_t inbytesleft, size_t *no_read)
347 {
348     unsigned long x = 0;
349     
350     if (inbytesleft < 4)
351     {
352         cd->my_errno = YAZ_ICONV_EINVAL; /* incomplete input */
353         *no_read = 0;
354     }
355     else
356     {
357         x = (inp[3]<<24) | (inp[2]<<16) | (inp[1]<<8) | inp[0];
358         *no_read = 4;
359     }
360     return x;
361 }
362
363 #if HAVE_WCHAR_H
364 static unsigned long yaz_read_wchar_t (yaz_iconv_t cd, unsigned char *inp,
365                                        size_t inbytesleft, size_t *no_read)
366 {
367     unsigned long x = 0;
368     
369     if (inbytesleft < sizeof(wchar_t))
370     {
371         cd->my_errno = YAZ_ICONV_EINVAL; /* incomplete input */
372         *no_read = 0;
373     }
374     else
375     {
376         wchar_t wch;
377         memcpy (&wch, inp, sizeof(wch));
378         x = wch;
379         *no_read = sizeof(wch);
380     }
381     return x;
382 }
383 #endif
384
385 static unsigned long yaz_read_iso5428_1984(yaz_iconv_t cd, unsigned char *inp,
386                                            size_t inbytesleft, size_t *no_read)
387 {
388     unsigned long x = 0;
389     int tonos = 0;
390     int dialitika = 0;
391
392     *no_read = 0;
393     while (inbytesleft > 0)
394     {
395         if (*inp == 0xa2)
396         {
397             tonos = 1;
398         }
399         else if (*inp == 0xa3)
400         {
401             dialitika = 1;
402         }
403         else
404             break;
405         inp++;
406         --inbytesleft;
407         (*no_read)++;
408     }    
409     if (inbytesleft == 0)
410     {
411         cd->my_errno = YAZ_ICONV_EINVAL; /* incomplete input */
412         *no_read = 0;
413         return 0;
414     }
415     switch (*inp) {
416     case 0xe1: /*  alpha small */
417             if (tonos) 
418                 x = 0x03ac;
419             else 
420                 x = 0x03b1;
421             break;
422     case 0xc1: /*  alpha capital */
423             if (tonos) 
424                 x = 0x0386;
425             else 
426                 x = 0x0391;
427             break;
428
429     case 0xe2: /*  Beta small */
430             x = 0x03b2;
431             break;
432     case 0xc2: /*  Beta capital */
433             x = 0x0392;
434             break;
435
436     case 0xe4: /*  Gamma small */
437             x = 0x03b3;
438             break;
439     case 0xc4: /*  Gamma capital */
440             x = 0x0393;
441             break;
442
443     case 0xe5: /*  Delta small */
444             x = 0x03b4;
445             break;
446     case 0xc5: /*  Delta capital */
447             x = 0x0394;
448             break;
449     case 0xe6: /*  epsilon small */
450             if (tonos) 
451                 x = 0x03ad;
452             else 
453                 x = 0x03b5;
454             break;
455     case 0xc6: /*  epsilon capital */
456             if (tonos) 
457                 x = 0x0388;
458             else 
459                 x = 0x0395;
460             break;
461     case 0xe9: /*  Zeta small */
462             x = 0x03b6;
463             break;
464     case 0xc9: /*  Zeta capital */
465             x = 0x0396;
466             break;
467     case 0xea: /*  Eta small */
468             if (tonos) 
469                 x = 0x03ae;
470             else 
471                 x = 0x03b7;
472             break;
473     case 0xca: /*  Eta capital */
474             if (tonos) 
475                 x = 0x0389;
476             else 
477                 x = 0x0397;
478             break;
479     case 0xeb: /*  Theta small */
480             x = 0x03b8;
481             break;
482     case 0xcb: /*  Theta capital */
483             x = 0x0398;
484             break;
485     case 0xec: /*  Iota small */
486             if (tonos) 
487                 if (dialitika) 
488                     x = 0x0390;
489                 else 
490                     x = 0x03af;
491             else 
492                 if (dialitika) 
493                     x = 0x03ca;
494                 else 
495                     x = 0x03b9;
496             break;
497     case 0xcc: /*  Iota capital */
498             if (tonos) 
499                 x = 0x038a;
500             else 
501                 if (dialitika) 
502                     x = 0x03aa;
503                 else 
504                     x = 0x0399;
505             break;
506     case 0xed: /*  Kappa small */
507             x = 0x03ba;
508             break;
509     case 0xcd: /*  Kappa capital */
510             x = 0x039a;
511             break;
512     case 0xee: /*  Lambda small */
513             x = 0x03bb;
514             break;
515     case 0xce: /*  Lambda capital */
516             x = 0x039b;
517             break;
518     case 0xef: /*  Mu small */
519             x = 0x03bc;
520             break;
521     case 0xcf: /*  Mu capital */
522             x = 0x039c;
523             break;
524     case 0xf0: /*  Nu small */
525             x = 0x03bd;
526             break;
527     case 0xd0: /*  Nu capital */
528             x = 0x039d;
529             break;
530     case 0xf1: /*  Xi small */
531             x = 0x03be;
532             break;
533     case 0xd1: /*  Xi capital */
534             x = 0x039e;
535             break;
536     case 0xf2: /*  Omicron small */
537             if (tonos) 
538                 x = 0x03cc;
539             else 
540                 x = 0x03bf;
541             break;
542     case 0xd2: /*  Omicron capital */
543             if (tonos) 
544                 x = 0x038c;
545             else 
546                 x = 0x039f;
547             break;
548     case 0xf3: /*  Pi small */
549             x = 0x03c0;
550             break;
551     case 0xd3: /*  Pi capital */
552             x = 0x03a0;
553             break;
554     case 0xf5: /*  Rho small */
555             x = 0x03c1;
556             break;
557     case 0xd5: /*  Rho capital */
558             x = 0x03a1;
559             break;
560     case 0xf7: /*  Sigma small (end of words) */
561             x = 0x03c2;
562             break;
563     case 0xf6: /*  Sigma small */
564             x = 0x03c3;
565             break;
566     case 0xd6: /*  Sigma capital */
567             x = 0x03a3;
568             break;
569     case 0xf8: /*  Tau small */
570             x = 0x03c4;
571             break;
572     case 0xd8: /*  Tau capital */
573             x = 0x03a4;
574             break;
575     case 0xf9: /*  Upsilon small */
576             if (tonos) 
577                 if (dialitika) 
578                     x = 0x03b0;
579                 else 
580                     x = 0x03cd;
581             else 
582                 if (dialitika) 
583                     x = 0x03cb;
584                 else 
585                     x = 0x03c5;
586             break;
587     case 0xd9: /*  Upsilon capital */
588             if (tonos) 
589                 x = 0x038e;
590             else 
591                 if (dialitika) 
592                     x = 0x03ab;
593                 else 
594                     x = 0x03a5;
595             break;
596     case 0xfa: /*  Phi small */
597             x = 0x03c6;
598             break;
599     case 0xda: /*  Phi capital */
600             x = 0x03a6;
601             break;
602     case 0xfb: /*  Chi small */
603             x = 0x03c7;
604             break;
605     case 0xdb: /*  Chi capital */
606             x = 0x03a7;
607             break;
608     case 0xfc: /*  Psi small */
609             x = 0x03c8;
610             break;
611     case 0xdc: /*  Psi capital */
612             x = 0x03a8;
613             break;
614     case 0xfd: /*  Omega small */
615             if (tonos) 
616                 x = 0x03ce;
617             else 
618                 x = 0x03c9;
619             break;
620     case 0xdd: /*  Omega capital */
621             if (tonos) 
622                 x = 0x038f;
623             else 
624                 x = 0x03a9;
625             break;
626     default:
627         x = *inp;
628         break;
629     }
630     (*no_read)++;
631     
632     return x;
633 }
634
635 static size_t yaz_write_iso5428_1984(yaz_iconv_t cd, unsigned long x,
636                                      char **outbuf, size_t *outbytesleft)
637 {
638     size_t k = 0;
639     unsigned char *out = (unsigned char*) *outbuf;
640     if (*outbytesleft < 3)
641     {
642         cd->my_errno = YAZ_ICONV_E2BIG;  /* not room for output */
643         return (size_t)(-1);
644     }
645     switch (x)
646     {
647     case 0x03ac : out[k++]=0xa2; out[k++]=0xe1; break;
648     case 0x03b1 : out[k++]=0xe1; break;
649     case 0x0386 : out[k++]=0xa2; out[k++]=0xc1; break;
650     case 0x0391 : out[k++]=0xc1; break;
651     case 0x03b2 : out[k++]=0xe2; break;
652     case 0x0392 : out[k++]=0xc2; break;
653     case 0x03b3 : out[k++]=0xe4; break;
654     case 0x0393 : out[k++]=0xc4; break;
655     case 0x03b4 : out[k++]=0xe5; break;
656     case 0x0394 : out[k++]=0xc5; break;
657     case 0x03ad : out[k++]=0xa2; out[k++]=0xe6; break;
658     case 0x03b5 : out[k++]=0xe6; break;
659     case 0x0388 : out[k++]=0xa2; out[k++]=0xc6; break;
660     case 0x0395 : out[k++]=0xc6; break;
661     case 0x03b6 : out[k++]=0xe9; break;
662     case 0x0396 : out[k++]=0xc9; break;
663     case 0x03ae : out[k++]=0xa2; out[k++]=0xea; break;
664     case 0x03b7 : out[k++]=0xea; break;
665     case 0x0389 : out[k++]=0xa2; out[k++]=0xca; break;
666     case 0x0397 : out[k++]=0xca; break;
667     case 0x03b8 : out[k++]=0xeb; break;
668     case 0x0398 : out[k++]=0xcb; break;
669     case 0x0390 : out[k++]=0xa2; out[k++]=0xa3; out[k++]=0xec; break;
670     case 0x03af : out[k++]=0xa2; out[k++]=0xec; break;
671     case 0x03ca : out[k++]=0xa3; out[k++]=0xec; break;
672     case 0x03b9 : out[k++]=0xec; break;
673     case 0x038a : out[k++]=0xa2; out[k++]=0xcc; break;
674     case 0x03aa : out[k++]=0xa3; out[k++]=0xcc; break;
675     case 0x0399 : out[k++]=0xcc; break;
676     case 0x03ba : out[k++]=0xed; break;
677     case 0x039a : out[k++]=0xcd; break;
678     case 0x03bb : out[k++]=0xee; break;
679     case 0x039b : out[k++]=0xce; break;
680     case 0x03bc : out[k++]=0xef; break;
681     case 0x039c : out[k++]=0xcf; break;
682     case 0x03bd : out[k++]=0xf0; break;
683     case 0x039d : out[k++]=0xd0; break;
684     case 0x03be : out[k++]=0xf1; break;
685     case 0x039e : out[k++]=0xd1; break;
686     case 0x03cc : out[k++]=0xa2; out[k++]=0xf2; break;
687     case 0x03bf : out[k++]=0xf2; break;
688     case 0x038c : out[k++]=0xa2; out[k++]=0xd2; break;
689     case 0x039f : out[k++]=0xd2; break;
690     case 0x03c0 : out[k++]=0xf3; break;
691     case 0x03a0 : out[k++]=0xd3; break;
692     case 0x03c1 : out[k++]=0xf5; break;
693     case 0x03a1 : out[k++]=0xd5; break;
694     case 0x03c2 : out[k++]=0xf7; break;
695     case 0x03c3 : out[k++]=0xf6; break;
696     case 0x03a3 : out[k++]=0xd6; break;
697     case 0x03c4 : out[k++]=0xf8; break;
698     case 0x03a4 : out[k++]=0xd8; break;
699     case 0x03b0 : out[k++]=0xa2; out[k++]=0xa3; out[k++]=0xf9; break;
700     case 0x03cd : out[k++]=0xa2; out[k++]=0xf9; break;
701     case 0x03cb : out[k++]=0xa3; out[k++]=0xf9; break;
702     case 0x03c5 : out[k++]=0xf9; break;
703     case 0x038e : out[k++]=0xa2; out[k++]=0xd9; break;
704     case 0x03ab : out[k++]=0xa3; out[k++]=0xd9; break;
705     case 0x03a5 : out[k++]=0xd9; break;
706     case 0x03c6 : out[k++]=0xfa; break;
707     case 0x03a6 : out[k++]=0xda; break;
708     case 0x03c7 : out[k++]=0xfb; break;
709     case 0x03a7 : out[k++]=0xdb; break;
710     case 0x03c8 : out[k++]=0xfc; break;
711     case 0x03a8 : out[k++]=0xdc; break;
712     case 0x03ce : out[k++]=0xa2; out[k++]=0xfd; break;
713     case 0x03c9 : out[k++]=0xfd; break;
714     case 0x038f : out[k++]=0xa2; out[k++]=0xdd; break;
715     case 0x03a9 : out[k++]=0xdd; break;
716     default:
717         if (x > 255)
718         {
719             cd->my_errno = YAZ_ICONV_EILSEQ;
720             return (size_t) -1;
721         }
722         out[k++] = x;
723         break;
724     }
725     *outbytesleft -= k;
726     (*outbuf) += k;
727     return 0;
728 }
729
730 static unsigned long yaz_read_advancegreek(yaz_iconv_t cd, unsigned char *inp,
731                                            size_t inbytesleft, size_t *no_read)
732 {
733     unsigned long x = 0;
734     int shift = 0;
735     int tonos = 0;
736     int dialitika = 0;
737
738     *no_read = 0;
739     while (inbytesleft > 0)
740     {
741         if (*inp == 0x9d)
742         {
743             tonos = 1;
744         }
745         else if (*inp == 0x9e)
746         {
747             dialitika = 1;
748         }
749         else if (*inp == 0x9f)
750         {
751             shift = 1;
752         }
753         else
754             break;
755         inp++;
756         --inbytesleft;
757         (*no_read)++;
758     }    
759     if (inbytesleft == 0)
760     {
761         cd->my_errno = YAZ_ICONV_EINVAL; /* incomplete input */
762         *no_read = 0;
763         return 0;
764     }
765     switch (*inp) {
766     case 0x81:
767         if (shift) 
768             if (tonos) 
769                 x = 0x0386;
770             else 
771                 x = 0x0391;
772         else 
773             if (tonos) 
774                 x = 0x03ac;
775             else 
776                 x = 0x03b1;
777         break;
778     case 0x82:
779         if (shift) 
780             x = 0x0392;
781         else 
782             x = 0x03b2;
783         
784         break;
785     case 0x83:
786         if (shift) 
787             x = 0x0393;
788         else 
789             x = 0x03b3;
790         break;
791     case 0x84:
792         if (shift) 
793             x = 0x0394;
794         else 
795             x = 0x03b4;
796         break;
797     case 0x85:
798         if (shift) 
799             if (tonos) 
800                 x = 0x0388;
801             else 
802                 x = 0x0395;
803         else 
804             if (tonos) 
805                 x = 0x03ad;
806             else 
807                 x = 0x03b5;
808         break;
809     case 0x86:
810         if (shift) 
811             x = 0x0396;
812         else 
813             x = 0x03b6;
814         break;
815     case 0x87:
816         if (shift) 
817             if (tonos) 
818                 x = 0x0389;
819             else 
820                 x = 0x0397;
821         else 
822             if (tonos) 
823                 x = 0x03ae;
824             else 
825                 x = 0x03b7;
826         break;
827     case 0x88:
828         if (shift) 
829             x = 0x0398;
830         else 
831             x = 0x03b8;
832         break;
833     case 0x89:
834         if (shift) 
835             if (tonos) 
836                 x = 0x038a;
837             else 
838                 if (dialitika) 
839                     x = 0x03aa;
840                 else 
841                     x = 0x0399;
842         else 
843             if (tonos) 
844                 if (dialitika) 
845                     x = 0x0390;
846                 else 
847                     x = 0x03af;
848         
849             else 
850                 if (dialitika) 
851                     x = 0x03ca;
852                 else 
853                     x = 0x03b9;
854         break;
855     case 0x8a:
856         if (shift) 
857             x = 0x039a;
858         else 
859             x = 0x03ba;
860         
861         break;
862     case 0x8b:
863         if (shift) 
864             x = 0x039b;
865         else 
866             x = 0x03bb;
867         break;
868     case 0x8c:
869         if (shift) 
870             x = 0x039c;
871         else 
872             x = 0x03bc;
873         
874         break;
875     case 0x8d:
876         if (shift) 
877             x = 0x039d;
878         else 
879             x = 0x03bd;
880         break;
881     case 0x8e:
882         if (shift) 
883             x = 0x039e;
884         else 
885             x = 0x03be;
886         break;
887     case 0x8f:
888         if (shift) 
889             if (tonos) 
890                 x = 0x038c;
891             else 
892                 x = 0x039f;
893         else 
894             if (tonos) 
895                 x = 0x03cc;
896             else 
897                 x = 0x03bf;
898         break;
899     case 0x90:
900         if (shift) 
901             x = 0x03a0;
902         else 
903             x = 0x03c0;
904         break;
905     case 0x91:
906         if (shift) 
907             x = 0x03a1;
908         else 
909             x = 0x03c1;
910         break;
911     case 0x92:
912         x = 0x03c2;
913         break;
914     case 0x93:
915         if (shift) 
916             x = 0x03a3;
917         else 
918             x = 0x03c3;
919         break;
920     case 0x94:
921         if (shift) 
922             x = 0x03a4;
923         else 
924             x = 0x03c4;
925         break;
926     case 0x95:
927         if (shift) 
928             if (tonos) 
929                 x = 0x038e;
930             else 
931                 if (dialitika) 
932                     x = 0x03ab;
933                 else 
934                     x = 0x03a5;
935         else 
936             if (tonos) 
937                 if (dialitika) 
938                     x = 0x03b0;
939                 else 
940                     x = 0x03cd;
941         
942             else 
943                 if (dialitika) 
944                     x = 0x03cb;
945                 else 
946                     x = 0x03c5;
947         break;
948     case 0x96:
949         if (shift) 
950             x = 0x03a6;
951         else 
952             x = 0x03c6;
953         break;
954     case 0x97:
955         if (shift) 
956             x = 0x03a7;
957         else 
958             x = 0x03c7;
959         break;
960     case 0x98:
961         if (shift) 
962             x = 0x03a8;
963         else 
964             x = 0x03c8;
965         
966         break;
967         
968     case 0x99:
969         if (shift) 
970             if (tonos) 
971                 x = 0x038f;
972             else 
973                 x = 0x03a9;
974         else 
975             if (tonos) 
976                 x = 0x03ce;
977             else 
978                 x = 0x03c9;
979         break;
980     default:
981         x = *inp;
982         break;
983     }
984     (*no_read)++;
985     
986     return x;
987 }
988
989 static size_t yaz_write_advancegreek(yaz_iconv_t cd, unsigned long x,
990                                      char **outbuf, size_t *outbytesleft)
991 {
992     size_t k = 0;
993     unsigned char *out = (unsigned char*) *outbuf;
994     if (*outbytesleft < 3)
995     {
996         cd->my_errno = YAZ_ICONV_E2BIG;  /* not room for output */
997         return (size_t)(-1);
998     }
999     switch (x)
1000     {
1001     case 0x03ac : out[k++]=0x9d; out[k++]=0x81; break;
1002     case 0x03ad : out[k++]=0x9d; out[k++]=0x85; break;
1003     case 0x03ae : out[k++]=0x9d; out[k++]=0x87; break;
1004     case 0x03af : out[k++]=0x9d; out[k++]=0x89; break;
1005     case 0x03cc : out[k++]=0x9d; out[k++]=0x8f; break;
1006     case 0x03cd : out[k++]=0x9d; out[k++]=0x95; break;
1007     case 0x03ce : out[k++]=0x9d; out[k++]=0x99; break;
1008     case 0x0390 : out[k++]=0x9d; out[k++]=0x9e; out[k++]=0x89; break;
1009     case 0x03b0 : out[k++]=0x9d; out[k++]=0x9e; out[k++]=0x95; break;
1010     case 0x0386 : out[k++]=0x9d; out[k++]=0x9f; out[k++]=0x81; break;
1011     case 0x0388 : out[k++]=0x9d; out[k++]=0x9f; out[k++]=0x85; break;
1012     case 0x0389 : out[k++]=0x9d; out[k++]=0x9f; out[k++]=0x87; break;
1013     case 0x038a : out[k++]=0x9d; out[k++]=0x9f; out[k++]=0x89; break;
1014     case 0x038c : out[k++]=0x9d; out[k++]=0x9f; out[k++]=0x8f; break;
1015     case 0x038e : out[k++]=0x9d; out[k++]=0x9f; out[k++]=0x95; break;
1016     case 0x038f : out[k++]=0x9d; out[k++]=0x9f; out[k++]=0x99; break;
1017     case 0x03ca : out[k++]=0x9e; out[k++]=0x89; break;
1018     case 0x03cb : out[k++]=0x9e; out[k++]=0x95; break;
1019     case 0x03aa : out[k++]=0x9e; out[k++]=0x9f; out[k++]=0x89; break;
1020     case 0x03ab : out[k++]=0x9e; out[k++]=0x9f; out[k++]=0x95; break;
1021     case 0x0391 : out[k++]=0x9f; out[k++]=0x81; break;
1022     case 0x0392 : out[k++]=0x9f; out[k++]=0x82; break;
1023     case 0x0393 : out[k++]=0x9f; out[k++]=0x83; break;
1024     case 0x0394 : out[k++]=0x9f; out[k++]=0x84; break;
1025     case 0x0395 : out[k++]=0x9f; out[k++]=0x85; break;
1026     case 0x0396 : out[k++]=0x9f; out[k++]=0x86; break;
1027     case 0x0397 : out[k++]=0x9f; out[k++]=0x87; break;
1028     case 0x0398 : out[k++]=0x9f; out[k++]=0x88; break;
1029     case 0x0399 : out[k++]=0x9f; out[k++]=0x89; break;
1030     case 0x039a : out[k++]=0x9f; out[k++]=0x8a; break;
1031     case 0x039b : out[k++]=0x9f; out[k++]=0x8b; break;
1032     case 0x039c : out[k++]=0x9f; out[k++]=0x8c; break;
1033     case 0x039d : out[k++]=0x9f; out[k++]=0x8d; break;
1034     case 0x039e : out[k++]=0x9f; out[k++]=0x8e; break;
1035     case 0x039f : out[k++]=0x9f; out[k++]=0x8f; break;
1036     case 0x03a0 : out[k++]=0x9f; out[k++]=0x90; break;
1037     case 0x03a1 : out[k++]=0x9f; out[k++]=0x91; break;
1038     case 0x03a3 : out[k++]=0x9f; out[k++]=0x93; break;
1039     case 0x03a4 : out[k++]=0x9f; out[k++]=0x94; break;
1040     case 0x03a5 : out[k++]=0x9f; out[k++]=0x95; break;
1041     case 0x03a6 : out[k++]=0x9f; out[k++]=0x96; break;
1042     case 0x03a7 : out[k++]=0x9f; out[k++]=0x97; break;
1043     case 0x03a8 : out[k++]=0x9f; out[k++]=0x98; break;
1044     case 0x03a9 : out[k++]=0x9f; out[k++]=0x99; break;
1045     case 0x03b1 : out[k++]=0x81; break;
1046     case 0x03b2 : out[k++]=0x82; break;
1047     case 0x03b3 : out[k++]=0x83; break;
1048     case 0x03b4 : out[k++]=0x84; break;
1049     case 0x03b5 : out[k++]=0x85; break;
1050     case 0x03b6 : out[k++]=0x86; break;
1051     case 0x03b7 : out[k++]=0x87; break;
1052     case 0x03b8 : out[k++]=0x88; break;
1053     case 0x03b9 : out[k++]=0x89; break;
1054     case 0x03ba : out[k++]=0x8a; break;
1055     case 0x03bb : out[k++]=0x8b; break;
1056     case 0x03bc : out[k++]=0x8c; break;
1057     case 0x03bd : out[k++]=0x8d; break;
1058     case 0x03be : out[k++]=0x8e; break;
1059     case 0x03bf : out[k++]=0x8f; break;
1060     case 0x03c0 : out[k++]=0x90; break;
1061     case 0x03c1 : out[k++]=0x91; break;
1062     case 0x03c2 : out[k++]=0x92; break;
1063     case 0x03c3 : out[k++]=0x93; break;
1064     case 0x03c4 : out[k++]=0x94; break;
1065     case 0x03c5 : out[k++]=0x95; break;
1066     case 0x03c6 : out[k++]=0x96; break;
1067     case 0x03c7 : out[k++]=0x96; break;
1068     case 0x03c8 : out[k++]=0x98; break;
1069     case 0x03c9 : out[k++]=0x99; break;
1070     default:
1071         if (x > 255)
1072         {
1073             cd->my_errno = YAZ_ICONV_EILSEQ;
1074             return (size_t) -1;
1075         }
1076         out[k++] = x;
1077         break;
1078     }
1079     *outbytesleft -= k;
1080     (*outbuf) += k;
1081     return 0;
1082 }
1083
1084
1085 static unsigned long yaz_read_marc8_comb (yaz_iconv_t cd, unsigned char *inp,
1086                                           size_t inbytesleft, size_t *no_read,
1087                                           int *comb);
1088
1089 static unsigned long yaz_read_marc8 (yaz_iconv_t cd, unsigned char *inp,
1090                                      size_t inbytesleft, size_t *no_read)
1091 {
1092     unsigned long x;
1093     if (cd->comb_offset < cd->comb_size)
1094     {
1095         *no_read = cd->comb_no_read[cd->comb_offset];
1096         x = cd->comb_x[cd->comb_offset];
1097
1098         /* special case for double-diacritic combining characters, 
1099            INVERTED BREVE and DOUBLE TILDE.
1100            We'll increment the no_read counter by 1, since we want to skip over
1101            the processing of the closing ligature character
1102         */
1103         /* this code is no longer necessary.. our handlers code in
1104            yaz_marc8_?_conv (generated by charconv.tcl) now returns
1105            0 and no_read=1 when a sequence does not match the input.
1106            The SECOND HALFs in codetables.xml produces a non-existant
1107            entry in the conversion trie.. Hence when met, the input byte is
1108            skipped as it should (in yaz_iconv)
1109         */
1110 #if 0
1111         if (x == 0x0361 || x == 0x0360)
1112             *no_read += 1;
1113 #endif
1114         cd->comb_offset++;
1115         return x;
1116     }
1117
1118     cd->comb_offset = 0;
1119     for (cd->comb_size = 0; cd->comb_size < 8; cd->comb_size++)
1120     {
1121         int comb = 0;
1122
1123         if (inbytesleft == 0 && cd->comb_size)
1124         {
1125             cd->my_errno = YAZ_ICONV_EINVAL;
1126             x = 0;
1127             *no_read = 0;
1128             break;
1129         }
1130         x = yaz_read_marc8_comb(cd, inp, inbytesleft, no_read, &comb);
1131         if (!comb || !x)
1132             break;
1133         cd->comb_x[cd->comb_size] = x;
1134         cd->comb_no_read[cd->comb_size] = *no_read;
1135         inp += *no_read;
1136         inbytesleft = inbytesleft - *no_read;
1137     }
1138     return x;
1139 }
1140
1141 static unsigned long yaz_read_marc8s(yaz_iconv_t cd, unsigned char *inp,
1142                                      size_t inbytesleft, size_t *no_read)
1143 {
1144     unsigned long x = yaz_read_marc8(cd, inp, inbytesleft, no_read);
1145     if (x && cd->comb_size == 1)
1146     {
1147         /* For MARC8s we try to get a Latin-1 page code out of it */
1148         int i;
1149         for (i = 0; latin1_comb[i].x1; i++)
1150             if (cd->comb_x[0] == latin1_comb[i].x2 && x == latin1_comb[i].x1)
1151             {
1152                 *no_read += cd->comb_no_read[0];
1153                 cd->comb_size = 0;
1154                 x = latin1_comb[i].y;
1155                 break;
1156             }
1157     }
1158     return x;
1159 }
1160
1161 static unsigned long yaz_read_marc8_comb(yaz_iconv_t cd, unsigned char *inp,
1162                                          size_t inbytesleft, size_t *no_read,
1163                                          int *comb)
1164 {
1165     *no_read = 0;
1166     while(inbytesleft >= 1 && inp[0] == 27)
1167     {
1168         size_t inbytesleft0 = inbytesleft;
1169         inp++;
1170         inbytesleft--;
1171         while(inbytesleft > 0 && strchr("(,$!)-", *inp))
1172         {
1173             inbytesleft--;
1174             inp++;
1175         }
1176         if (inbytesleft <= 0)
1177         {
1178             *no_read = 0;
1179             cd->my_errno = YAZ_ICONV_EINVAL;
1180             return 0;
1181         }
1182         cd->marc8_esc_mode = *inp++;
1183         inbytesleft--;
1184         (*no_read) += inbytesleft0 - inbytesleft;
1185     }
1186     if (inbytesleft <= 0)
1187         return 0;
1188     else if (*inp == ' ')
1189     {
1190         *no_read += 1;
1191         return ' ';
1192     }
1193     else
1194     {
1195         unsigned long x;
1196         size_t no_read_sub = 0;
1197         *comb = 0;
1198
1199         switch(cd->marc8_esc_mode)
1200         {
1201         case 'B':  /* Basic ASCII */
1202         case 's':  /* ASCII */
1203         case 'E':  /* ANSEL */
1204             x = yaz_marc8_42_conv(inp, inbytesleft, &no_read_sub, comb);
1205             if (!x)
1206             {
1207                 no_read_sub = 0;
1208                 x = yaz_marc8_45_conv(inp, inbytesleft, &no_read_sub, comb);
1209             }
1210             break;
1211         case 'g':  /* Greek */
1212             x = yaz_marc8_67_conv(inp, inbytesleft, &no_read_sub, comb);
1213             break;
1214         case 'b':  /* Subscripts */
1215             x = yaz_marc8_62_conv(inp, inbytesleft, &no_read_sub, comb);
1216             break;
1217         case 'p':  /* Superscripts */
1218             x = yaz_marc8_70_conv(inp, inbytesleft, &no_read_sub, comb);
1219             break;
1220         case '2':  /* Basic Hebrew */
1221             x = yaz_marc8_32_conv(inp, inbytesleft, &no_read_sub, comb);
1222             break;
1223         case 'N':  /* Basic Cyrillic */
1224             x = yaz_marc8_4E_conv(inp, inbytesleft, &no_read_sub, comb);
1225             break;
1226         case 'Q':  /* Extended Cyrillic */
1227             x = yaz_marc8_51_conv(inp, inbytesleft, &no_read_sub, comb);
1228             break;
1229         case '3':  /* Basic Arabic */
1230             x = yaz_marc8_33_conv(inp, inbytesleft, &no_read_sub, comb);
1231             break;
1232         case '4':  /* Extended Arabic */
1233             x = yaz_marc8_34_conv(inp, inbytesleft, &no_read_sub, comb);
1234             break;
1235         case 'S':  /* Greek */
1236             x = yaz_marc8_53_conv(inp, inbytesleft, &no_read_sub, comb);
1237             break;
1238         case '1':  /* Chinese, Japanese, Korean (EACC) */
1239             x = yaz_marc8_31_conv(inp, inbytesleft, &no_read_sub, comb);
1240             break;
1241         default:
1242             *no_read = 0;
1243             cd->my_errno = YAZ_ICONV_EILSEQ;
1244             return 0;
1245         }
1246         *no_read += no_read_sub;
1247         return x;
1248     }
1249 }
1250
1251 static size_t yaz_write_UTF8(yaz_iconv_t cd, unsigned long x,
1252                              char **outbuf, size_t *outbytesleft)
1253 {
1254     return yaz_write_UTF8_char(x, outbuf, outbytesleft, &cd->my_errno);
1255 }
1256
1257 size_t yaz_write_UTF8_char(unsigned long x,
1258                            char **outbuf, size_t *outbytesleft,
1259                            int *error)
1260 {
1261     unsigned char *outp = (unsigned char *) *outbuf;
1262
1263     if (x <= 0x7f && *outbytesleft >= 1)
1264     {
1265         *outp++ = (unsigned char) x;
1266         (*outbytesleft)--;
1267     } 
1268     else if (x <= 0x7ff && *outbytesleft >= 2)
1269     {
1270         *outp++ = (unsigned char) ((x >> 6) | 0xc0);
1271         *outp++ = (unsigned char) ((x & 0x3f) | 0x80);
1272         (*outbytesleft) -= 2;
1273     }
1274     else if (x <= 0xffff && *outbytesleft >= 3)
1275     {
1276         *outp++ = (unsigned char) ((x >> 12) | 0xe0);
1277         *outp++ = (unsigned char) (((x >> 6) & 0x3f) | 0x80);
1278         *outp++ = (unsigned char) ((x & 0x3f) | 0x80);
1279         (*outbytesleft) -= 3;
1280     }
1281     else if (x <= 0x1fffff && *outbytesleft >= 4)
1282     {
1283         *outp++ = (unsigned char) ((x >> 18) | 0xf0);
1284         *outp++ = (unsigned char) (((x >> 12) & 0x3f) | 0x80);
1285         *outp++ = (unsigned char) (((x >> 6)  & 0x3f) | 0x80);
1286         *outp++ = (unsigned char) ((x & 0x3f) | 0x80);
1287         (*outbytesleft) -= 4;
1288     }
1289     else if (x <= 0x3ffffff && *outbytesleft >= 5)
1290     {
1291         *outp++ = (unsigned char) ((x >> 24) | 0xf8);
1292         *outp++ = (unsigned char) (((x >> 18) & 0x3f) | 0x80);
1293         *outp++ = (unsigned char) (((x >> 12) & 0x3f) | 0x80);
1294         *outp++ = (unsigned char) (((x >> 6)  & 0x3f) | 0x80);
1295         *outp++ = (unsigned char) ((x & 0x3f) | 0x80);
1296         (*outbytesleft) -= 5;
1297     }
1298     else if (*outbytesleft >= 6)
1299     {
1300         *outp++ = (unsigned char) ((x >> 30) | 0xfc);
1301         *outp++ = (unsigned char) (((x >> 24) & 0x3f) | 0x80);
1302         *outp++ = (unsigned char) (((x >> 18) & 0x3f) | 0x80);
1303         *outp++ = (unsigned char) (((x >> 12) & 0x3f) | 0x80);
1304         *outp++ = (unsigned char) (((x >> 6)  & 0x3f) | 0x80);
1305         *outp++ = (unsigned char) ((x & 0x3f) | 0x80);
1306         (*outbytesleft) -= 6;
1307     }
1308     else 
1309     {
1310         *error = YAZ_ICONV_E2BIG;  /* not room for output */
1311         return (size_t)(-1);
1312     }
1313     *outbuf = (char *) outp;
1314     return 0;
1315 }
1316
1317 static size_t yaz_write_ISO8859_1 (yaz_iconv_t cd, unsigned long x,
1318                                    char **outbuf, size_t *outbytesleft)
1319 {
1320     /* list of two char unicode sequence that, when combined, are
1321        equivalent to single unicode chars that can be represented in
1322        ISO-8859-1/Latin-1.
1323        Regular iconv on Linux at least does not seem to convert these,
1324        but since MARC-8 to UTF-8 generates these composed sequence
1325        we get a better chance of a successful MARC-8 -> ISO-8859-1
1326        conversion */
1327     unsigned char *outp = (unsigned char *) *outbuf;
1328
1329     if (cd->compose_char)
1330     {
1331         int i;
1332         for (i = 0; latin1_comb[i].x1; i++)
1333             if (cd->compose_char == latin1_comb[i].x1 && x == latin1_comb[i].x2)
1334             {
1335                 x = latin1_comb[i].y;
1336                 break;
1337             }
1338         if (*outbytesleft < 1)
1339         {  /* no room. Retain compose_char and bail out */
1340             cd->my_errno = YAZ_ICONV_E2BIG;
1341             return (size_t)(-1);
1342         }
1343         if (!latin1_comb[i].x1) 
1344         {   /* not found. Just write compose_char */
1345             *outp++ = (unsigned char) cd->compose_char;
1346             (*outbytesleft)--;
1347             *outbuf = (char *) outp;
1348         }
1349         /* compose_char used so reset it. x now holds current char */
1350         cd->compose_char = 0;
1351     }
1352
1353     if (x > 32 && x < 127 && cd->compose_char == 0)
1354     {
1355         cd->compose_char = x;
1356         return 0;
1357     }
1358     else if (x > 255 || x < 1)
1359     {
1360         cd->my_errno = YAZ_ICONV_EILSEQ;
1361         return (size_t) -1;
1362     }
1363     else if (*outbytesleft < 1)
1364     {
1365         cd->my_errno = YAZ_ICONV_E2BIG;
1366         return (size_t)(-1);
1367     }
1368     *outp++ = (unsigned char) x;
1369     (*outbytesleft)--;
1370     *outbuf = (char *) outp;
1371     return 0;
1372 }
1373
1374 static size_t yaz_flush_ISO8859_1(yaz_iconv_t cd,
1375                                   char **outbuf, size_t *outbytesleft)
1376 {
1377     if (cd->compose_char)
1378     {
1379         unsigned char *outp = (unsigned char *) *outbuf;
1380         if (*outbytesleft < 1)
1381         {
1382             cd->my_errno = YAZ_ICONV_E2BIG;
1383             return (size_t)(-1);
1384         }
1385         *outp++ = (unsigned char) cd->compose_char;
1386         (*outbytesleft)--;
1387         *outbuf = (char *) outp;
1388         cd->compose_char = 0;
1389     }
1390     return 0;
1391 }
1392
1393 static size_t yaz_write_UCS4 (yaz_iconv_t cd, unsigned long x,
1394                               char **outbuf, size_t *outbytesleft)
1395 {
1396     unsigned char *outp = (unsigned char *) *outbuf;
1397     if (*outbytesleft >= 4)
1398     {
1399         *outp++ = (unsigned char) (x>>24);
1400         *outp++ = (unsigned char) (x>>16);
1401         *outp++ = (unsigned char) (x>>8);
1402         *outp++ = (unsigned char) x;
1403         (*outbytesleft) -= 4;
1404     }
1405     else
1406     {
1407         cd->my_errno = YAZ_ICONV_E2BIG;
1408         return (size_t)(-1);
1409     }
1410     *outbuf = (char *) outp;
1411     return 0;
1412 }
1413
1414 static size_t yaz_write_UCS4LE (yaz_iconv_t cd, unsigned long x,
1415                                 char **outbuf, size_t *outbytesleft)
1416 {
1417     unsigned char *outp = (unsigned char *) *outbuf;
1418     if (*outbytesleft >= 4)
1419     {
1420         *outp++ = (unsigned char) x;
1421         *outp++ = (unsigned char) (x>>8);
1422         *outp++ = (unsigned char) (x>>16);
1423         *outp++ = (unsigned char) (x>>24);
1424         (*outbytesleft) -= 4;
1425     }
1426     else
1427     {
1428         cd->my_errno = YAZ_ICONV_E2BIG;
1429         return (size_t)(-1);
1430     }
1431     *outbuf = (char *) outp;
1432     return 0;
1433 }
1434
1435 static unsigned long lookup_marc8(yaz_iconv_t cd,
1436                                   unsigned long x, int *comb,
1437                                   const char **page_chr)
1438 {
1439     char utf8_buf[7];
1440     char *utf8_outbuf = utf8_buf;
1441     size_t utf8_outbytesleft = sizeof(utf8_buf)-1, r;
1442
1443     r = yaz_write_UTF8(cd, x, &utf8_outbuf, &utf8_outbytesleft);
1444     if (r == (size_t)(-1))
1445     {
1446         cd->my_errno = YAZ_ICONV_EILSEQ;
1447         return 0;
1448     }
1449     else
1450     {
1451         unsigned char *inp;
1452         size_t inbytesleft, no_read_sub = 0;
1453         unsigned long x;
1454
1455         *utf8_outbuf = '\0';        
1456         inp = (unsigned char *) utf8_buf;
1457         inbytesleft = strlen(utf8_buf);
1458
1459         x = yaz_marc8r_42_conv(inp, inbytesleft, &no_read_sub, comb);
1460         if (x)
1461         {
1462             *page_chr = ESC "(B";
1463             return x;
1464         }
1465         x = yaz_marc8r_45_conv(inp, inbytesleft, &no_read_sub, comb);
1466         if (x)
1467         {
1468 #if 1
1469             *page_chr = ESC "(B";
1470 #else
1471             /* this possibly solves bug #1778 */
1472             *page_chr = ESC ")!E";
1473 #endif
1474             return x;
1475         }
1476         x = yaz_marc8r_67_conv(inp, inbytesleft, &no_read_sub, comb);
1477         if (x)
1478         {
1479             *page_chr = ESC "g";
1480             return x;
1481         }
1482         x = yaz_marc8r_62_conv(inp, inbytesleft, &no_read_sub, comb);
1483         if (x)
1484         {
1485             *page_chr = ESC "b";
1486             return x;
1487         }
1488         x = yaz_marc8r_70_conv(inp, inbytesleft, &no_read_sub, comb);
1489         if (x)
1490         {
1491             *page_chr = ESC "p";
1492             return x;
1493         }
1494         x = yaz_marc8r_32_conv(inp, inbytesleft, &no_read_sub, comb);
1495         if (x)
1496         {
1497             *page_chr = ESC "(2";
1498             return x;
1499         }
1500         x = yaz_marc8r_4E_conv(inp, inbytesleft, &no_read_sub, comb);
1501         if (x)
1502         {
1503             *page_chr = ESC "(N";
1504             return x;
1505         }
1506         x = yaz_marc8r_51_conv(inp, inbytesleft, &no_read_sub, comb);
1507         if (x)
1508         {
1509             *page_chr = ESC "(Q";
1510             return x;
1511         }
1512         x = yaz_marc8r_33_conv(inp, inbytesleft, &no_read_sub, comb);
1513         if (x)
1514         {
1515             *page_chr = ESC "(3";
1516             return x;
1517         }
1518         x = yaz_marc8r_34_conv(inp, inbytesleft, &no_read_sub, comb);
1519         if (x)
1520         {
1521             *page_chr = ESC "(4";
1522             return x;
1523         }
1524         x = yaz_marc8r_53_conv(inp, inbytesleft, &no_read_sub, comb);
1525         if (x)
1526         {
1527             *page_chr = ESC "(S";
1528             return x;
1529         }
1530         x = yaz_marc8r_31_conv(inp, inbytesleft, &no_read_sub, comb);
1531         if (x)
1532         {
1533             *page_chr = ESC "$1";
1534             return x;
1535         }
1536         cd->my_errno = YAZ_ICONV_EILSEQ;
1537         return x;
1538     }
1539 }
1540
1541 static size_t flush_combos(yaz_iconv_t cd,
1542                            char **outbuf, size_t *outbytesleft)
1543 {
1544     unsigned long y = cd->write_marc8_last;
1545     unsigned char byte;
1546     char out_buf[10];
1547     size_t i, out_no = 0;
1548
1549     if (!y)
1550         return 0;
1551
1552     byte = (unsigned char )((y>>16) & 0xff);
1553     if (byte)
1554         out_buf[out_no++] = byte;
1555     byte = (unsigned char)((y>>8) & 0xff);
1556     if (byte)
1557         out_buf[out_no++] = byte;
1558     byte = (unsigned char )(y & 0xff);
1559     if (byte)
1560         out_buf[out_no++] = byte;
1561
1562     if (out_no + cd->write_marc8_comb_no + 1 > *outbytesleft)
1563     {
1564         cd->my_errno = YAZ_ICONV_E2BIG;
1565         return (size_t) (-1);
1566     }
1567
1568     for (i = 0; i < cd->write_marc8_comb_no; i++)
1569     {
1570         /* all MARC-8 combined characters are simple bytes */
1571         byte = (unsigned char )(cd->write_marc8_comb_ch[i]);
1572         *(*outbuf)++ = byte;
1573         (*outbytesleft)--;
1574     }
1575     memcpy(*outbuf, out_buf, out_no);
1576     *outbuf += out_no;
1577     (*outbytesleft) -= out_no;
1578     if (cd->write_marc8_second_half_char)
1579     {
1580         *(*outbuf)++ = cd->write_marc8_second_half_char;
1581         (*outbytesleft)--;
1582     }        
1583
1584     cd->write_marc8_last = 0;
1585     cd->write_marc8_comb_no = 0;
1586     cd->write_marc8_second_half_char = 0;
1587     return 0;
1588 }
1589
1590 static size_t yaz_write_marc8_page_chr(yaz_iconv_t cd, 
1591                                        char **outbuf, size_t *outbytesleft,
1592                                        const char *page_chr)
1593 {
1594     const char **old_page_chr = &cd->write_marc8_g0;
1595
1596     /* are we going to a G1-set (such as such as ESC ")!E") */
1597     if (page_chr && page_chr[1] == ')')
1598         old_page_chr = &cd->write_marc8_g1;
1599
1600     if (!*old_page_chr || strcmp(page_chr, *old_page_chr))
1601     {
1602         size_t plen = 0;
1603         const char *page_out = page_chr;
1604         
1605         if (*outbytesleft < 8)
1606         {
1607             cd->my_errno = YAZ_ICONV_E2BIG;
1608             
1609             return (size_t) (-1);
1610         }
1611
1612         if (*old_page_chr)
1613         {
1614             if (!strcmp(*old_page_chr, ESC "p") 
1615                 || !strcmp(*old_page_chr, ESC "g")
1616                 || !strcmp(*old_page_chr, ESC "b"))
1617             {
1618                 page_out = ESC "s";
1619                 /* Technique 1 leave */
1620                 if (strcmp(page_chr, ESC "(B")) /* Not going ASCII page? */
1621                 {
1622                     /* Must leave script + enter new page */
1623                     plen = strlen(page_out);
1624                     memcpy(*outbuf, page_out, plen);
1625                     (*outbuf) += plen;
1626                     (*outbytesleft) -= plen;
1627                     page_out = ESC "(B";
1628                 }
1629             }
1630         }
1631         *old_page_chr = page_chr;
1632         plen = strlen(page_out);
1633         memcpy(*outbuf, page_out, plen);
1634         (*outbuf) += plen;
1635         (*outbytesleft) -= plen;
1636     }
1637     return 0;
1638 }
1639
1640
1641 static size_t yaz_write_marc8_2(yaz_iconv_t cd, unsigned long x,
1642                                 char **outbuf, size_t *outbytesleft)
1643 {
1644     int comb = 0;
1645     const char *page_chr = 0;
1646     unsigned long y = lookup_marc8(cd, x, &comb, &page_chr);
1647
1648     if (!y)
1649         return (size_t) (-1);
1650
1651     if (comb)
1652     {
1653         if (page_chr)
1654         {
1655             size_t r = yaz_write_marc8_page_chr(cd, outbuf, outbytesleft, page_chr);
1656             if (r)
1657                 return r;
1658         }
1659         if (x == 0x0361)
1660             cd->write_marc8_second_half_char = 0xEC;
1661         else if (x == 0x0360)
1662             cd->write_marc8_second_half_char = 0xFB;
1663
1664         if (cd->write_marc8_comb_no < 6)
1665             cd->write_marc8_comb_ch[cd->write_marc8_comb_no++] = y;
1666     }
1667     else
1668     {
1669         size_t r = flush_combos(cd, outbuf, outbytesleft);
1670         if (r)
1671             return r;
1672
1673         if (page_chr)
1674         {
1675             r = yaz_write_marc8_page_chr(cd, outbuf, outbytesleft, page_chr);
1676             if (r)
1677                 return r;
1678         }
1679         cd->write_marc8_last = y;
1680     }
1681     return 0;
1682 }
1683
1684 static size_t yaz_flush_marc8(yaz_iconv_t cd,
1685                               char **outbuf, size_t *outbytesleft)
1686 {
1687     size_t r = flush_combos(cd, outbuf, outbytesleft);
1688     if (r)
1689         return r;
1690     cd->write_marc8_g1 = 0;
1691     return yaz_write_marc8_page_chr(cd, outbuf, outbytesleft, ESC "(B");
1692 }
1693
1694 static size_t yaz_write_marc8(yaz_iconv_t cd, unsigned long x,
1695                               char **outbuf, size_t *outbytesleft)
1696 {
1697     int i;
1698     for (i = 0; latin1_comb[i].x1; i++)
1699     {
1700         if (x == latin1_comb[i].y)
1701         {
1702             size_t r ;
1703             /* save the output pointers .. */
1704             char *outbuf0 = *outbuf;
1705             size_t outbytesleft0 = *outbytesleft;
1706             int last_ch = cd->write_marc8_last;
1707
1708             r = yaz_write_marc8_2(cd, latin1_comb[i].x1,
1709                                   outbuf, outbytesleft);
1710             if (r)
1711                 return r;
1712             r = yaz_write_marc8_2(cd, latin1_comb[i].x2,
1713                                   outbuf, outbytesleft);
1714             if (r && cd->my_errno == YAZ_ICONV_E2BIG)
1715             {
1716                 /* not enough room. reset output to original values */
1717                 *outbuf = outbuf0;
1718                 *outbytesleft = outbytesleft0;
1719                 cd->write_marc8_last = last_ch;
1720             }
1721             return r;
1722         }
1723     }
1724     return yaz_write_marc8_2(cd, x, outbuf, outbytesleft);
1725 }
1726
1727
1728 #if HAVE_WCHAR_H
1729 static size_t yaz_write_wchar_t(yaz_iconv_t cd, unsigned long x,
1730                                 char **outbuf, size_t *outbytesleft)
1731 {
1732     unsigned char *outp = (unsigned char *) *outbuf;
1733
1734     if (*outbytesleft >= sizeof(wchar_t))
1735     {
1736         wchar_t wch = x;
1737         memcpy(outp, &wch, sizeof(wch));
1738         outp += sizeof(wch);
1739         (*outbytesleft) -= sizeof(wch);
1740     }
1741     else
1742     {
1743         cd->my_errno = YAZ_ICONV_E2BIG;
1744         return (size_t)(-1);
1745     }
1746     *outbuf = (char *) outp;
1747     return 0;
1748 }
1749 #endif
1750
1751 int yaz_iconv_isbuiltin(yaz_iconv_t cd)
1752 {
1753     return cd->read_handle && cd->write_handle;
1754 }
1755
1756 yaz_iconv_t yaz_iconv_open (const char *tocode, const char *fromcode)
1757 {
1758     yaz_iconv_t cd = (yaz_iconv_t) xmalloc (sizeof(*cd));
1759
1760     cd->write_handle = 0;
1761     cd->read_handle = 0;
1762     cd->init_handle = 0;
1763     cd->flush_handle = 0;
1764     cd->my_errno = YAZ_ICONV_UNKNOWN;
1765
1766     /* a useful hack: if fromcode has leading @,
1767        the library not use YAZ's own conversions .. */
1768     if (fromcode[0] == '@')
1769         fromcode++;
1770     else
1771     {
1772         if (!yaz_matchstr(fromcode, "UTF8"))
1773         {
1774             cd->read_handle = yaz_read_UTF8;
1775             cd->init_handle = yaz_init_UTF8;
1776         }
1777         else if (!yaz_matchstr(fromcode, "ISO88591"))
1778             cd->read_handle = yaz_read_ISO8859_1;
1779         else if (!yaz_matchstr(fromcode, "UCS4"))
1780             cd->read_handle = yaz_read_UCS4;
1781         else if (!yaz_matchstr(fromcode, "UCS4LE"))
1782             cd->read_handle = yaz_read_UCS4LE;
1783         else if (!yaz_matchstr(fromcode, "MARC8"))
1784             cd->read_handle = yaz_read_marc8;
1785         else if (!yaz_matchstr(fromcode, "MARC8s"))
1786             cd->read_handle = yaz_read_marc8s;
1787         else if (!yaz_matchstr(fromcode, "advancegreek"))
1788             cd->read_handle = yaz_read_advancegreek;
1789         else if (!yaz_matchstr(fromcode, "iso54281984"))
1790             cd->read_handle = yaz_read_iso5428_1984;
1791         else if (!yaz_matchstr(fromcode, "iso5428:1984"))
1792             cd->read_handle = yaz_read_iso5428_1984;
1793 #if HAVE_WCHAR_H
1794         else if (!yaz_matchstr(fromcode, "WCHAR_T"))
1795             cd->read_handle = yaz_read_wchar_t;
1796 #endif
1797         
1798         if (!yaz_matchstr(tocode, "UTF8"))
1799             cd->write_handle = yaz_write_UTF8;
1800         else if (!yaz_matchstr(tocode, "ISO88591"))
1801         {
1802             cd->write_handle = yaz_write_ISO8859_1;
1803             cd->flush_handle = yaz_flush_ISO8859_1;
1804         }
1805         else if (!yaz_matchstr (tocode, "UCS4"))
1806             cd->write_handle = yaz_write_UCS4;
1807         else if (!yaz_matchstr(tocode, "UCS4LE"))
1808             cd->write_handle = yaz_write_UCS4LE;
1809         else if (!yaz_matchstr(tocode, "MARC8"))
1810         {
1811             cd->write_handle = yaz_write_marc8;
1812             cd->flush_handle = yaz_flush_marc8;
1813         }
1814         else if (!yaz_matchstr(tocode, "MARC8s"))
1815         {
1816             cd->write_handle = yaz_write_marc8;
1817             cd->flush_handle = yaz_flush_marc8;
1818         }
1819         else if (!yaz_matchstr(tocode, "advancegreek"))
1820         {
1821             cd->write_handle = yaz_write_advancegreek;
1822         }
1823         else if (!yaz_matchstr(tocode, "iso54281984"))
1824         {
1825             cd->write_handle = yaz_write_iso5428_1984;
1826         }
1827         else if (!yaz_matchstr(tocode, "iso5428:1984"))
1828         {
1829             cd->write_handle = yaz_write_iso5428_1984;
1830         }
1831 #if HAVE_WCHAR_H
1832         else if (!yaz_matchstr(tocode, "WCHAR_T"))
1833             cd->write_handle = yaz_write_wchar_t;
1834 #endif
1835     }
1836 #if HAVE_ICONV_H
1837     cd->iconv_cd = 0;
1838     if (!cd->read_handle || !cd->write_handle)
1839     {
1840         cd->iconv_cd = iconv_open (tocode, fromcode);
1841         if (cd->iconv_cd == (iconv_t) (-1))
1842         {
1843             xfree (cd);
1844             return 0;
1845         }
1846     }
1847 #else
1848     if (!cd->read_handle || !cd->write_handle)
1849     {
1850         xfree (cd);
1851         return 0;
1852     }
1853 #endif
1854     cd->init_flag = 1;
1855     return cd;
1856 }
1857
1858 size_t yaz_iconv(yaz_iconv_t cd, char **inbuf, size_t *inbytesleft,
1859                  char **outbuf, size_t *outbytesleft)
1860 {
1861     char *inbuf0 = 0;
1862     size_t r = 0;
1863
1864 #if HAVE_ICONV_H
1865     if (cd->iconv_cd)
1866     {
1867         size_t r =
1868             iconv(cd->iconv_cd, inbuf, inbytesleft, outbuf, outbytesleft);
1869         if (r == (size_t)(-1))
1870         {
1871             switch (yaz_errno())
1872             {
1873             case E2BIG:
1874                 cd->my_errno = YAZ_ICONV_E2BIG;
1875                 break;
1876             case EINVAL:
1877                 cd->my_errno = YAZ_ICONV_EINVAL;
1878                 break;
1879             case EILSEQ:
1880                 cd->my_errno = YAZ_ICONV_EILSEQ;
1881                 break;
1882             default:
1883                 cd->my_errno = YAZ_ICONV_UNKNOWN;
1884             }
1885         }
1886         return r;
1887     }
1888 #endif
1889
1890     if (inbuf)
1891         inbuf0 = *inbuf;
1892
1893     if (cd->init_flag)
1894     {
1895         cd->my_errno = YAZ_ICONV_UNKNOWN;
1896         cd->marc8_esc_mode = 'B';
1897         
1898         cd->comb_offset = cd->comb_size = 0;
1899         cd->compose_char = 0;
1900         
1901         cd->write_marc8_comb_no = 0;
1902         cd->write_marc8_second_half_char = 0;
1903         cd->write_marc8_last = 0;
1904         cd->write_marc8_g0 = ESC "(B";
1905         cd->write_marc8_g1 = 0;
1906         
1907         cd->unget_x = 0;
1908         cd->no_read_x = 0;
1909     }
1910
1911     if (cd->init_flag)
1912     {
1913         if (cd->init_handle && inbuf && *inbuf)
1914         {
1915             size_t no_read = 0;
1916             size_t r = (cd->init_handle)(cd, (unsigned char *) *inbuf,
1917                                          *inbytesleft, &no_read);
1918             if (r)
1919             {
1920                 if (cd->my_errno == YAZ_ICONV_EINVAL)
1921                     return r;
1922                 cd->init_flag = 0;
1923                 return r;
1924             }
1925             *inbytesleft -= no_read;
1926             *inbuf += no_read;
1927         }
1928     }
1929     cd->init_flag = 0;
1930
1931     if (!inbuf || !*inbuf)
1932     {
1933         if (outbuf && *outbuf)
1934         {
1935             if (cd->unget_x)
1936                 r = (*cd->write_handle)(cd, cd->unget_x, outbuf, outbytesleft);
1937             if (cd->flush_handle)
1938                 r = (*cd->flush_handle)(cd, outbuf, outbytesleft);
1939         }
1940         if (r == 0)
1941             cd->init_flag = 1;
1942         cd->unget_x = 0;
1943         return r;
1944     }
1945     while (1)
1946     {
1947         unsigned long x;
1948         size_t no_read;
1949
1950         if (cd->unget_x)
1951         {
1952             x = cd->unget_x;
1953             no_read = cd->no_read_x;
1954         }
1955         else
1956         {
1957             if (*inbytesleft == 0)
1958             {
1959                 r = *inbuf - inbuf0;
1960                 break;
1961             }
1962             x = (*cd->read_handle)(cd, (unsigned char *) *inbuf, *inbytesleft,
1963                                    &no_read);
1964             if (no_read == 0)
1965             {
1966                 r = (size_t)(-1);
1967                 break;
1968             }
1969         }
1970         if (x)
1971         {
1972             r = (*cd->write_handle)(cd, x, outbuf, outbytesleft);
1973             if (r)
1974             {
1975                 /* unable to write it. save it because read_handle cannot
1976                    rewind .. */
1977                 if (cd->my_errno == YAZ_ICONV_E2BIG)
1978                 {
1979                     cd->unget_x = x;
1980                     cd->no_read_x = no_read;
1981                     break;
1982                 }
1983             }
1984             cd->unget_x = 0;
1985         }
1986         *inbytesleft -= no_read;
1987         (*inbuf) += no_read;
1988     }
1989     return r;
1990 }
1991
1992 int yaz_iconv_error (yaz_iconv_t cd)
1993 {
1994     return cd->my_errno;
1995 }
1996
1997 int yaz_iconv_close (yaz_iconv_t cd)
1998 {
1999 #if HAVE_ICONV_H
2000     if (cd->iconv_cd)
2001         iconv_close (cd->iconv_cd);
2002 #endif
2003     xfree (cd);
2004     return 0;
2005 }
2006
2007 /*
2008  * Local variables:
2009  * c-basic-offset: 4
2010  * indent-tabs-mode: nil
2011  * End:
2012  * vim: shiftwidth=4 tabstop=8 expandtab
2013  */