MARC-8 ANSEL fix and proper better handling of incompl. sequences.
[yaz-moved-to-github.git] / src / siconv.c
1 /*
2  * Copyright (C) 1995-2008, Index Data ApS
3  * See the file LICENSE for details.
4  *
5  * $Id: siconv.c,v 1.50 2008-03-12 08:53:28 adam Exp $
6  */
7 /**
8  * \file siconv.c
9  * \brief Implements simple ICONV
10  *
11  * This implements an interface similar to that of iconv and
12  * is used by YAZ to interface with iconv (if present).
13  * For systems where iconv is not present, this layer
14  * provides a few important conversions: UTF-8, MARC-8, Latin-1.
15  *
16  * MARC-8 reference:
17  *  http://www.loc.gov/marc/specifications/speccharmarc8.html
18  */
19
20 #if HAVE_CONFIG_H
21 #include <config.h>
22 #endif
23
24 #include <assert.h>
25 #include <errno.h>
26 #include <string.h>
27 #include <ctype.h>
28 #if HAVE_WCHAR_H
29 #include <wchar.h>
30 #endif
31
32 #if HAVE_ICONV_H
33 #include <iconv.h>
34 #endif
35
36
37 #include <yaz/yaz-util.h>
38
39 unsigned long yaz_marc8_42_conv(unsigned char *inp, size_t inbytesleft,
40                                size_t *no_read, int *combining);
41 unsigned long yaz_marc8_45_conv(unsigned char *inp, size_t inbytesleft,
42                                size_t *no_read, int *combining);
43 unsigned long yaz_marc8_67_conv(unsigned char *inp, size_t inbytesleft,
44                                size_t *no_read, int *combining);
45 unsigned long yaz_marc8_62_conv(unsigned char *inp, size_t inbytesleft,
46                                size_t *no_read, int *combining);
47 unsigned long yaz_marc8_70_conv(unsigned char *inp, size_t inbytesleft,
48                                size_t *no_read, int *combining);
49 unsigned long yaz_marc8_32_conv(unsigned char *inp, size_t inbytesleft,
50                                size_t *no_read, int *combining);
51 unsigned long yaz_marc8_4E_conv(unsigned char *inp, size_t inbytesleft,
52                                size_t *no_read, int *combining);
53 unsigned long yaz_marc8_51_conv(unsigned char *inp, size_t inbytesleft,
54                                size_t *no_read, int *combining);
55 unsigned long yaz_marc8_33_conv(unsigned char *inp, size_t inbytesleft,
56                                size_t *no_read, int *combining);
57 unsigned long yaz_marc8_34_conv(unsigned char *inp, size_t inbytesleft,
58                                size_t *no_read, int *combining);
59 unsigned long yaz_marc8_53_conv(unsigned char *inp, size_t inbytesleft,
60                                size_t *no_read, int *combining);
61 unsigned long yaz_marc8_31_conv(unsigned char *inp, size_t inbytesleft,
62                                size_t *no_read, int *combining);
63
64
65 unsigned long yaz_marc8r_42_conv(unsigned char *inp, size_t inbytesleft,
66                                  size_t *no_read, int *combining);
67 unsigned long yaz_marc8r_45_conv(unsigned char *inp, size_t inbytesleft,
68                                  size_t *no_read, int *combining);
69 unsigned long yaz_marc8r_67_conv(unsigned char *inp, size_t inbytesleft,
70                                  size_t *no_read, int *combining);
71 unsigned long yaz_marc8r_62_conv(unsigned char *inp, size_t inbytesleft,
72                                  size_t *no_read, int *combining);
73 unsigned long yaz_marc8r_70_conv(unsigned char *inp, size_t inbytesleft,
74                                  size_t *no_read, int *combining);
75 unsigned long yaz_marc8r_32_conv(unsigned char *inp, size_t inbytesleft,
76                                  size_t *no_read, int *combining);
77 unsigned long yaz_marc8r_4E_conv(unsigned char *inp, size_t inbytesleft,
78                                  size_t *no_read, int *combining);
79 unsigned long yaz_marc8r_51_conv(unsigned char *inp, size_t inbytesleft,
80                                  size_t *no_read, int *combining);
81 unsigned long yaz_marc8r_33_conv(unsigned char *inp, size_t inbytesleft,
82                                  size_t *no_read, int *combining);
83 unsigned long yaz_marc8r_34_conv(unsigned char *inp, size_t inbytesleft,
84                                  size_t *no_read, int *combining);
85 unsigned long yaz_marc8r_53_conv(unsigned char *inp, size_t inbytesleft,
86                                  size_t *no_read, int *combining);
87 unsigned long yaz_marc8r_31_conv(unsigned char *inp, size_t inbytesleft,
88                                  size_t *no_read, int *combining);
89
90 #define ESC "\033"
91
92 struct yaz_iconv_struct {
93     int my_errno;
94     int init_flag;
95     size_t (*init_handle)(yaz_iconv_t cd, unsigned char *inbuf,
96                           size_t inbytesleft, size_t *no_read);
97     unsigned long (*read_handle)(yaz_iconv_t cd, unsigned char *inbuf,
98                                  size_t inbytesleft, size_t *no_read);
99     size_t (*write_handle)(yaz_iconv_t cd, unsigned long x,
100                            char **outbuf, size_t *outbytesleft);
101     size_t (*flush_handle)(yaz_iconv_t cd,
102                            char **outbuf, size_t *outbytesleft);
103     int g0_mode;
104     int g1_mode;
105
106     int comb_offset;
107     int comb_size;
108     unsigned long comb_x[8];
109     size_t comb_no_read[8];
110     size_t no_read_x;
111     unsigned long unget_x;
112 #if HAVE_ICONV_H
113     iconv_t iconv_cd;
114 #endif
115     unsigned long compose_char;
116
117     unsigned write_marc8_second_half_char;
118     unsigned long write_marc8_last;
119     const char *write_marc8_lpage;
120     const char *write_marc8_g0;
121     const char *write_marc8_g1;
122 };
123
124 static struct {
125     unsigned long x1, x2;
126     unsigned y;
127 } latin1_comb[] = {
128     { 'A', 0x0300, 0xc0}, /* LATIN CAPITAL LETTER A WITH GRAVE */
129     { 'A', 0x0301, 0xc1}, /* LATIN CAPITAL LETTER A WITH ACUTE */
130     { 'A', 0x0302, 0xc2}, /* LATIN CAPITAL LETTER A WITH CIRCUMFLEX */
131     { 'A', 0x0303, 0xc3}, /* LATIN CAPITAL LETTER A WITH TILDE */
132     { 'A', 0x0308, 0xc4}, /* LATIN CAPITAL LETTER A WITH DIAERESIS */
133     { 'A', 0x030a, 0xc5}, /* LATIN CAPITAL LETTER A WITH RING ABOVE */
134     /* no need for 0xc6      LATIN CAPITAL LETTER AE */
135     { 'C', 0x0327, 0xc7}, /* LATIN CAPITAL LETTER C WITH CEDILLA */
136     { 'E', 0x0300, 0xc8}, /* LATIN CAPITAL LETTER E WITH GRAVE */
137     { 'E', 0x0301, 0xc9}, /* LATIN CAPITAL LETTER E WITH ACUTE */
138     { 'E', 0x0302, 0xca}, /* LATIN CAPITAL LETTER E WITH CIRCUMFLEX */
139     { 'E', 0x0308, 0xcb}, /* LATIN CAPITAL LETTER E WITH DIAERESIS */
140     { 'I', 0x0300, 0xcc}, /* LATIN CAPITAL LETTER I WITH GRAVE */
141     { 'I', 0x0301, 0xcd}, /* LATIN CAPITAL LETTER I WITH ACUTE */
142     { 'I', 0x0302, 0xce}, /* LATIN CAPITAL LETTER I WITH CIRCUMFLEX */
143     { 'I', 0x0308, 0xcf}, /* LATIN CAPITAL LETTER I WITH DIAERESIS */
144     { 'N', 0x0303, 0xd1}, /* LATIN CAPITAL LETTER N WITH TILDE */
145     { 'O', 0x0300, 0xd2}, /* LATIN CAPITAL LETTER O WITH GRAVE */
146     { 'O', 0x0301, 0xd3}, /* LATIN CAPITAL LETTER O WITH ACUTE */
147     { 'O', 0x0302, 0xd4}, /* LATIN CAPITAL LETTER O WITH CIRCUMFLEX */
148     { 'O', 0x0303, 0xd5}, /* LATIN CAPITAL LETTER O WITH TILDE */
149     { 'O', 0x0308, 0xd6}, /* LATIN CAPITAL LETTER O WITH DIAERESIS */
150     /* omitted:    0xd7      MULTIPLICATION SIGN */
151     /* omitted:    0xd8      LATIN CAPITAL LETTER O WITH STROKE */
152     { 'U', 0x0300, 0xd9}, /* LATIN CAPITAL LETTER U WITH GRAVE */
153     { 'U', 0x0301, 0xda}, /* LATIN CAPITAL LETTER U WITH ACUTE */
154     { 'U', 0x0302, 0xdb}, /* LATIN CAPITAL LETTER U WITH CIRCUMFLEX */
155     { 'U', 0x0308, 0xdc}, /* LATIN CAPITAL LETTER U WITH DIAERESIS */
156     { 'Y', 0x0301, 0xdd}, /* LATIN CAPITAL LETTER Y WITH ACUTE */
157     /* omitted:    0xde      LATIN CAPITAL LETTER THORN */
158     /* omitted:    0xdf      LATIN SMALL LETTER SHARP S */
159     { 'a', 0x0300, 0xe0}, /* LATIN SMALL LETTER A WITH GRAVE */
160     { 'a', 0x0301, 0xe1}, /* LATIN SMALL LETTER A WITH ACUTE */
161     { 'a', 0x0302, 0xe2}, /* LATIN SMALL LETTER A WITH CIRCUMFLEX */
162     { 'a', 0x0303, 0xe3}, /* LATIN SMALL LETTER A WITH TILDE */
163     { 'a', 0x0308, 0xe4}, /* LATIN SMALL LETTER A WITH DIAERESIS */
164     { 'a', 0x030a, 0xe5}, /* LATIN SMALL LETTER A WITH RING ABOVE */
165     /* omitted:    0xe6      LATIN SMALL LETTER AE */
166     { 'c', 0x0327, 0xe7}, /* LATIN SMALL LETTER C WITH CEDILLA */
167     { 'e', 0x0300, 0xe8}, /* LATIN SMALL LETTER E WITH GRAVE */
168     { 'e', 0x0301, 0xe9}, /* LATIN SMALL LETTER E WITH ACUTE */
169     { 'e', 0x0302, 0xea}, /* LATIN SMALL LETTER E WITH CIRCUMFLEX */
170     { 'e', 0x0308, 0xeb}, /* LATIN SMALL LETTER E WITH DIAERESIS */
171     { 'i', 0x0300, 0xec}, /* LATIN SMALL LETTER I WITH GRAVE */
172     { 'i', 0x0301, 0xed}, /* LATIN SMALL LETTER I WITH ACUTE */
173     { 'i', 0x0302, 0xee}, /* LATIN SMALL LETTER I WITH CIRCUMFLEX */
174     { 'i', 0x0308, 0xef}, /* LATIN SMALL LETTER I WITH DIAERESIS */
175     /* omitted:    0xf0      LATIN SMALL LETTER ETH */
176     { 'n', 0x0303, 0xf1}, /* LATIN SMALL LETTER N WITH TILDE */
177     { 'o', 0x0300, 0xf2}, /* LATIN SMALL LETTER O WITH GRAVE */
178     { 'o', 0x0301, 0xf3}, /* LATIN SMALL LETTER O WITH ACUTE */
179     { 'o', 0x0302, 0xf4}, /* LATIN SMALL LETTER O WITH CIRCUMFLEX */
180     { 'o', 0x0303, 0xf5}, /* LATIN SMALL LETTER O WITH TILDE */
181     { 'o', 0x0308, 0xf6}, /* LATIN SMALL LETTER O WITH DIAERESIS */
182     /* omitted:    0xf7      DIVISION SIGN */
183     /* omitted:    0xf8      LATIN SMALL LETTER O WITH STROKE */
184     { 'u', 0x0300, 0xf9}, /* LATIN SMALL LETTER U WITH GRAVE */
185     { 'u', 0x0301, 0xfa}, /* LATIN SMALL LETTER U WITH ACUTE */
186     { 'u', 0x0302, 0xfb}, /* LATIN SMALL LETTER U WITH CIRCUMFLEX */
187     { 'u', 0x0308, 0xfc}, /* LATIN SMALL LETTER U WITH DIAERESIS */
188     { 'y', 0x0301, 0xfd}, /* LATIN SMALL LETTER Y WITH ACUTE */
189     /* omitted:    0xfe      LATIN SMALL LETTER THORN */
190     { 'y', 0x0308, 0xff}, /* LATIN SMALL LETTER Y WITH DIAERESIS */
191     
192     { 0, 0, 0}
193 };
194
195 static size_t yaz_write_marc8_page_chr(yaz_iconv_t cd, 
196                                        char **outbuf, size_t *outbytesleft,
197                                        const char *page_chr);
198
199 static unsigned long yaz_read_ISO8859_1 (yaz_iconv_t cd, unsigned char *inp,
200                                          size_t inbytesleft, size_t *no_read)
201 {
202     unsigned long x = inp[0];
203     *no_read = 1;
204     return x;
205 }
206
207
208 static size_t yaz_init_UTF8 (yaz_iconv_t cd, unsigned char *inp,
209                              size_t inbytesleft, size_t *no_read)
210 {
211     if (inp[0] != 0xef)
212     {
213         *no_read = 0;
214         return 0;
215     }
216     if (inbytesleft < 3)
217     {
218         cd->my_errno = YAZ_ICONV_EINVAL;
219         return (size_t) -1;
220     }
221     if (inp[1] != 0xbb && inp[2] == 0xbf)
222         *no_read = 3;
223     else
224         *no_read = 0;
225     return 0;
226 }
227
228 unsigned long yaz_read_UTF8_char(unsigned char *inp,
229                                  size_t inbytesleft, size_t *no_read,
230                                  int *error)
231 {
232     unsigned long x = 0;
233
234     *no_read = 0; /* by default */
235     if (inp[0] <= 0x7f)
236     {
237         x = inp[0];
238         *no_read = 1;
239     }
240     else if (inp[0] <= 0xbf || inp[0] >= 0xfe)
241     {
242         *error = YAZ_ICONV_EILSEQ;
243     }
244     else if (inp[0] <= 0xdf && inbytesleft >= 2)
245     {
246         if ((inp[1] & 0xc0) == 0x80)
247         {
248             x = ((inp[0] & 0x1f) << 6) | (inp[1] & 0x3f);
249             if (x >= 0x80)
250                 *no_read = 2;
251             else
252                 *error = YAZ_ICONV_EILSEQ;
253         }
254         else
255             *error = YAZ_ICONV_EILSEQ;
256     }
257     else if (inp[0] <= 0xef && inbytesleft >= 3)
258     {
259         if ((inp[1] & 0xc0) == 0x80 && (inp[2] & 0xc0) == 0x80)
260         {
261             x = ((inp[0] & 0x0f) << 12) | ((inp[1] & 0x3f) << 6) |
262                 (inp[2] & 0x3f);
263             if (x >= 0x800)
264                 *no_read = 3;
265             else
266                 *error = YAZ_ICONV_EILSEQ;
267         }
268         else
269             *error = YAZ_ICONV_EILSEQ;
270     }            
271     else if (inp[0] <= 0xf7 && inbytesleft >= 4)
272     {
273         if ((inp[1] & 0xc0) == 0x80 && (inp[2] & 0xc0) == 0x80
274             && (inp[3] & 0xc0) == 0x80)
275         {
276             x = ((inp[0] & 0x07) << 18) | ((inp[1] & 0x3f) << 12) |
277                 ((inp[2] & 0x3f) << 6) | (inp[3] & 0x3f);
278             if (x >= 0x10000)
279                 *no_read = 4;
280             else
281                 *error = YAZ_ICONV_EILSEQ;
282         }
283         else
284             *error = YAZ_ICONV_EILSEQ;
285     }
286     else if (inp[0] <= 0xfb && inbytesleft >= 5)
287     {
288         if ((inp[1] & 0xc0) == 0x80 && (inp[2] & 0xc0) == 0x80
289             && (inp[3] & 0xc0) == 0x80 && (inp[4] & 0xc0) == 0x80)
290         {
291             x = ((inp[0] & 0x03) << 24) | ((inp[1] & 0x3f) << 18) |
292                 ((inp[2] & 0x3f) << 12) | ((inp[3] & 0x3f) << 6) |
293                 (inp[4] & 0x3f);
294             if (x >= 0x200000)
295                 *no_read = 5;
296             else
297                 *error = YAZ_ICONV_EILSEQ;
298         }
299         else
300             *error = YAZ_ICONV_EILSEQ;
301     }
302     else if (inp[0] <= 0xfd && inbytesleft >= 6)
303     {
304         if ((inp[1] & 0xc0) == 0x80 && (inp[2] & 0xc0) == 0x80
305             && (inp[3] & 0xc0) == 0x80 && (inp[4] & 0xc0) == 0x80
306             && (inp[5] & 0xc0) == 0x80)
307         {
308             x = ((inp[0] & 0x01) << 30) | ((inp[1] & 0x3f) << 24) |
309                 ((inp[2] & 0x3f) << 18) | ((inp[3] & 0x3f) << 12) |
310                 ((inp[4] & 0x3f) << 6) | (inp[5] & 0x3f);
311             if (x >= 0x4000000)
312                 *no_read = 6;
313             else
314                 *error = YAZ_ICONV_EILSEQ;
315         }
316         else
317             *error = YAZ_ICONV_EILSEQ;
318     }
319     else
320         *error = YAZ_ICONV_EINVAL;  /* incomplete sentence */
321
322     return x;
323 }
324
325 static unsigned long yaz_read_UTF8 (yaz_iconv_t cd, unsigned char *inp,
326                                     size_t inbytesleft, size_t *no_read)
327 {
328     return yaz_read_UTF8_char(inp, inbytesleft, no_read, &cd->my_errno);
329 }
330
331 static unsigned long yaz_read_UCS4 (yaz_iconv_t cd, unsigned char *inp,
332                                     size_t inbytesleft, size_t *no_read)
333 {
334     unsigned long x = 0;
335     
336     if (inbytesleft < 4)
337     {
338         cd->my_errno = YAZ_ICONV_EINVAL; /* incomplete input */
339         *no_read = 0;
340     }
341     else
342     {
343         x = (inp[0]<<24) | (inp[1]<<16) | (inp[2]<<8) | inp[3];
344         *no_read = 4;
345     }
346     return x;
347 }
348
349 static unsigned long yaz_read_UCS4LE (yaz_iconv_t cd, unsigned char *inp,
350                                       size_t inbytesleft, size_t *no_read)
351 {
352     unsigned long x = 0;
353     
354     if (inbytesleft < 4)
355     {
356         cd->my_errno = YAZ_ICONV_EINVAL; /* incomplete input */
357         *no_read = 0;
358     }
359     else
360     {
361         x = (inp[3]<<24) | (inp[2]<<16) | (inp[1]<<8) | inp[0];
362         *no_read = 4;
363     }
364     return x;
365 }
366
367 #if HAVE_WCHAR_H
368 static unsigned long yaz_read_wchar_t (yaz_iconv_t cd, unsigned char *inp,
369                                        size_t inbytesleft, size_t *no_read)
370 {
371     unsigned long x = 0;
372     
373     if (inbytesleft < sizeof(wchar_t))
374     {
375         cd->my_errno = YAZ_ICONV_EINVAL; /* incomplete input */
376         *no_read = 0;
377     }
378     else
379     {
380         wchar_t wch;
381         memcpy (&wch, inp, sizeof(wch));
382         x = wch;
383         *no_read = sizeof(wch);
384     }
385     return x;
386 }
387 #endif
388
389 static unsigned long yaz_read_iso5428_1984(yaz_iconv_t cd, unsigned char *inp,
390                                            size_t inbytesleft, size_t *no_read)
391 {
392     unsigned long x = 0;
393     int tonos = 0;
394     int dialitika = 0;
395
396     *no_read = 0;
397     while (inbytesleft > 0)
398     {
399         if (*inp == 0xa2)
400         {
401             tonos = 1;
402         }
403         else if (*inp == 0xa3)
404         {
405             dialitika = 1;
406         }
407         else
408             break;
409         inp++;
410         --inbytesleft;
411         (*no_read)++;
412     }    
413     if (inbytesleft == 0)
414     {
415         cd->my_errno = YAZ_ICONV_EINVAL; /* incomplete input */
416         *no_read = 0;
417         return 0;
418     }
419     switch (*inp) {
420     case 0xe1: /*  alpha small */
421             if (tonos) 
422                 x = 0x03ac;
423             else 
424                 x = 0x03b1;
425             break;
426     case 0xc1: /*  alpha capital */
427             if (tonos) 
428                 x = 0x0386;
429             else 
430                 x = 0x0391;
431             break;
432
433     case 0xe2: /*  Beta small */
434             x = 0x03b2;
435             break;
436     case 0xc2: /*  Beta capital */
437             x = 0x0392;
438             break;
439
440     case 0xe4: /*  Gamma small */
441             x = 0x03b3;
442             break;
443     case 0xc4: /*  Gamma capital */
444             x = 0x0393;
445             break;
446
447     case 0xe5: /*  Delta small */
448             x = 0x03b4;
449             break;
450     case 0xc5: /*  Delta capital */
451             x = 0x0394;
452             break;
453     case 0xe6: /*  epsilon small */
454             if (tonos) 
455                 x = 0x03ad;
456             else 
457                 x = 0x03b5;
458             break;
459     case 0xc6: /*  epsilon capital */
460             if (tonos) 
461                 x = 0x0388;
462             else 
463                 x = 0x0395;
464             break;
465     case 0xe9: /*  Zeta small */
466             x = 0x03b6;
467             break;
468     case 0xc9: /*  Zeta capital */
469             x = 0x0396;
470             break;
471     case 0xea: /*  Eta small */
472             if (tonos) 
473                 x = 0x03ae;
474             else 
475                 x = 0x03b7;
476             break;
477     case 0xca: /*  Eta capital */
478             if (tonos) 
479                 x = 0x0389;
480             else 
481                 x = 0x0397;
482             break;
483     case 0xeb: /*  Theta small */
484             x = 0x03b8;
485             break;
486     case 0xcb: /*  Theta capital */
487             x = 0x0398;
488             break;
489     case 0xec: /*  Iota small */
490             if (tonos) 
491                 if (dialitika) 
492                     x = 0x0390;
493                 else 
494                     x = 0x03af;
495             else 
496                 if (dialitika) 
497                     x = 0x03ca;
498                 else 
499                     x = 0x03b9;
500             break;
501     case 0xcc: /*  Iota capital */
502             if (tonos) 
503                 x = 0x038a;
504             else 
505                 if (dialitika) 
506                     x = 0x03aa;
507                 else 
508                     x = 0x0399;
509             break;
510     case 0xed: /*  Kappa small */
511             x = 0x03ba;
512             break;
513     case 0xcd: /*  Kappa capital */
514             x = 0x039a;
515             break;
516     case 0xee: /*  Lambda small */
517             x = 0x03bb;
518             break;
519     case 0xce: /*  Lambda capital */
520             x = 0x039b;
521             break;
522     case 0xef: /*  Mu small */
523             x = 0x03bc;
524             break;
525     case 0xcf: /*  Mu capital */
526             x = 0x039c;
527             break;
528     case 0xf0: /*  Nu small */
529             x = 0x03bd;
530             break;
531     case 0xd0: /*  Nu capital */
532             x = 0x039d;
533             break;
534     case 0xf1: /*  Xi small */
535             x = 0x03be;
536             break;
537     case 0xd1: /*  Xi capital */
538             x = 0x039e;
539             break;
540     case 0xf2: /*  Omicron small */
541             if (tonos) 
542                 x = 0x03cc;
543             else 
544                 x = 0x03bf;
545             break;
546     case 0xd2: /*  Omicron capital */
547             if (tonos) 
548                 x = 0x038c;
549             else 
550                 x = 0x039f;
551             break;
552     case 0xf3: /*  Pi small */
553             x = 0x03c0;
554             break;
555     case 0xd3: /*  Pi capital */
556             x = 0x03a0;
557             break;
558     case 0xf5: /*  Rho small */
559             x = 0x03c1;
560             break;
561     case 0xd5: /*  Rho capital */
562             x = 0x03a1;
563             break;
564     case 0xf7: /*  Sigma small (end of words) */
565             x = 0x03c2;
566             break;
567     case 0xf6: /*  Sigma small */
568             x = 0x03c3;
569             break;
570     case 0xd6: /*  Sigma capital */
571             x = 0x03a3;
572             break;
573     case 0xf8: /*  Tau small */
574             x = 0x03c4;
575             break;
576     case 0xd8: /*  Tau capital */
577             x = 0x03a4;
578             break;
579     case 0xf9: /*  Upsilon small */
580             if (tonos) 
581                 if (dialitika) 
582                     x = 0x03b0;
583                 else 
584                     x = 0x03cd;
585             else 
586                 if (dialitika) 
587                     x = 0x03cb;
588                 else 
589                     x = 0x03c5;
590             break;
591     case 0xd9: /*  Upsilon capital */
592             if (tonos) 
593                 x = 0x038e;
594             else 
595                 if (dialitika) 
596                     x = 0x03ab;
597                 else 
598                     x = 0x03a5;
599             break;
600     case 0xfa: /*  Phi small */
601             x = 0x03c6;
602             break;
603     case 0xda: /*  Phi capital */
604             x = 0x03a6;
605             break;
606     case 0xfb: /*  Chi small */
607             x = 0x03c7;
608             break;
609     case 0xdb: /*  Chi capital */
610             x = 0x03a7;
611             break;
612     case 0xfc: /*  Psi small */
613             x = 0x03c8;
614             break;
615     case 0xdc: /*  Psi capital */
616             x = 0x03a8;
617             break;
618     case 0xfd: /*  Omega small */
619             if (tonos) 
620                 x = 0x03ce;
621             else 
622                 x = 0x03c9;
623             break;
624     case 0xdd: /*  Omega capital */
625             if (tonos) 
626                 x = 0x038f;
627             else 
628                 x = 0x03a9;
629             break;
630     default:
631         x = *inp;
632         break;
633     }
634     (*no_read)++;
635     
636     return x;
637 }
638
639 static size_t yaz_write_iso5428_1984(yaz_iconv_t cd, unsigned long x,
640                                      char **outbuf, size_t *outbytesleft)
641 {
642     size_t k = 0;
643     unsigned char *out = (unsigned char*) *outbuf;
644     if (*outbytesleft < 3)
645     {
646         cd->my_errno = YAZ_ICONV_E2BIG;  /* not room for output */
647         return (size_t)(-1);
648     }
649     switch (x)
650     {
651     case 0x03ac : out[k++]=0xa2; out[k++]=0xe1; break;
652     case 0x03b1 : out[k++]=0xe1; break;
653     case 0x0386 : out[k++]=0xa2; out[k++]=0xc1; break;
654     case 0x0391 : out[k++]=0xc1; break;
655     case 0x03b2 : out[k++]=0xe2; break;
656     case 0x0392 : out[k++]=0xc2; break;
657     case 0x03b3 : out[k++]=0xe4; break;
658     case 0x0393 : out[k++]=0xc4; break;
659     case 0x03b4 : out[k++]=0xe5; break;
660     case 0x0394 : out[k++]=0xc5; break;
661     case 0x03ad : out[k++]=0xa2; out[k++]=0xe6; break;
662     case 0x03b5 : out[k++]=0xe6; break;
663     case 0x0388 : out[k++]=0xa2; out[k++]=0xc6; break;
664     case 0x0395 : out[k++]=0xc6; break;
665     case 0x03b6 : out[k++]=0xe9; break;
666     case 0x0396 : out[k++]=0xc9; break;
667     case 0x03ae : out[k++]=0xa2; out[k++]=0xea; break;
668     case 0x03b7 : out[k++]=0xea; break;
669     case 0x0389 : out[k++]=0xa2; out[k++]=0xca; break;
670     case 0x0397 : out[k++]=0xca; break;
671     case 0x03b8 : out[k++]=0xeb; break;
672     case 0x0398 : out[k++]=0xcb; break;
673     case 0x0390 : out[k++]=0xa2; out[k++]=0xa3; out[k++]=0xec; break;
674     case 0x03af : out[k++]=0xa2; out[k++]=0xec; break;
675     case 0x03ca : out[k++]=0xa3; out[k++]=0xec; break;
676     case 0x03b9 : out[k++]=0xec; break;
677     case 0x038a : out[k++]=0xa2; out[k++]=0xcc; break;
678     case 0x03aa : out[k++]=0xa3; out[k++]=0xcc; break;
679     case 0x0399 : out[k++]=0xcc; break;
680     case 0x03ba : out[k++]=0xed; break;
681     case 0x039a : out[k++]=0xcd; break;
682     case 0x03bb : out[k++]=0xee; break;
683     case 0x039b : out[k++]=0xce; break;
684     case 0x03bc : out[k++]=0xef; break;
685     case 0x039c : out[k++]=0xcf; break;
686     case 0x03bd : out[k++]=0xf0; break;
687     case 0x039d : out[k++]=0xd0; break;
688     case 0x03be : out[k++]=0xf1; break;
689     case 0x039e : out[k++]=0xd1; break;
690     case 0x03cc : out[k++]=0xa2; out[k++]=0xf2; break;
691     case 0x03bf : out[k++]=0xf2; break;
692     case 0x038c : out[k++]=0xa2; out[k++]=0xd2; break;
693     case 0x039f : out[k++]=0xd2; break;
694     case 0x03c0 : out[k++]=0xf3; break;
695     case 0x03a0 : out[k++]=0xd3; break;
696     case 0x03c1 : out[k++]=0xf5; break;
697     case 0x03a1 : out[k++]=0xd5; break;
698     case 0x03c2 : out[k++]=0xf7; break;
699     case 0x03c3 : out[k++]=0xf6; break;
700     case 0x03a3 : out[k++]=0xd6; break;
701     case 0x03c4 : out[k++]=0xf8; break;
702     case 0x03a4 : out[k++]=0xd8; break;
703     case 0x03b0 : out[k++]=0xa2; out[k++]=0xa3; out[k++]=0xf9; break;
704     case 0x03cd : out[k++]=0xa2; out[k++]=0xf9; break;
705     case 0x03cb : out[k++]=0xa3; out[k++]=0xf9; break;
706     case 0x03c5 : out[k++]=0xf9; break;
707     case 0x038e : out[k++]=0xa2; out[k++]=0xd9; break;
708     case 0x03ab : out[k++]=0xa3; out[k++]=0xd9; break;
709     case 0x03a5 : out[k++]=0xd9; break;
710     case 0x03c6 : out[k++]=0xfa; break;
711     case 0x03a6 : out[k++]=0xda; break;
712     case 0x03c7 : out[k++]=0xfb; break;
713     case 0x03a7 : out[k++]=0xdb; break;
714     case 0x03c8 : out[k++]=0xfc; break;
715     case 0x03a8 : out[k++]=0xdc; break;
716     case 0x03ce : out[k++]=0xa2; out[k++]=0xfd; break;
717     case 0x03c9 : out[k++]=0xfd; break;
718     case 0x038f : out[k++]=0xa2; out[k++]=0xdd; break;
719     case 0x03a9 : out[k++]=0xdd; break;
720     default:
721         if (x > 255)
722         {
723             cd->my_errno = YAZ_ICONV_EILSEQ;
724             return (size_t) -1;
725         }
726         out[k++] = x;
727         break;
728     }
729     *outbytesleft -= k;
730     (*outbuf) += k;
731     return 0;
732 }
733
734 static unsigned long yaz_read_advancegreek(yaz_iconv_t cd, unsigned char *inp,
735                                            size_t inbytesleft, size_t *no_read)
736 {
737     unsigned long x = 0;
738     int shift = 0;
739     int tonos = 0;
740     int dialitika = 0;
741
742     *no_read = 0;
743     while (inbytesleft > 0)
744     {
745         if (*inp == 0x9d)
746         {
747             tonos = 1;
748         }
749         else if (*inp == 0x9e)
750         {
751             dialitika = 1;
752         }
753         else if (*inp == 0x9f)
754         {
755             shift = 1;
756         }
757         else
758             break;
759         inp++;
760         --inbytesleft;
761         (*no_read)++;
762     }    
763     if (inbytesleft == 0)
764     {
765         cd->my_errno = YAZ_ICONV_EINVAL; /* incomplete input */
766         *no_read = 0;
767         return 0;
768     }
769     switch (*inp) {
770     case 0x81:
771         if (shift) 
772             if (tonos) 
773                 x = 0x0386;
774             else 
775                 x = 0x0391;
776         else 
777             if (tonos) 
778                 x = 0x03ac;
779             else 
780                 x = 0x03b1;
781         break;
782     case 0x82:
783         if (shift) 
784             x = 0x0392;
785         else 
786             x = 0x03b2;
787         
788         break;
789     case 0x83:
790         if (shift) 
791             x = 0x0393;
792         else 
793             x = 0x03b3;
794         break;
795     case 0x84:
796         if (shift) 
797             x = 0x0394;
798         else 
799             x = 0x03b4;
800         break;
801     case 0x85:
802         if (shift) 
803             if (tonos) 
804                 x = 0x0388;
805             else 
806                 x = 0x0395;
807         else 
808             if (tonos) 
809                 x = 0x03ad;
810             else 
811                 x = 0x03b5;
812         break;
813     case 0x86:
814         if (shift) 
815             x = 0x0396;
816         else 
817             x = 0x03b6;
818         break;
819     case 0x87:
820         if (shift) 
821             if (tonos) 
822                 x = 0x0389;
823             else 
824                 x = 0x0397;
825         else 
826             if (tonos) 
827                 x = 0x03ae;
828             else 
829                 x = 0x03b7;
830         break;
831     case 0x88:
832         if (shift) 
833             x = 0x0398;
834         else 
835             x = 0x03b8;
836         break;
837     case 0x89:
838         if (shift) 
839             if (tonos) 
840                 x = 0x038a;
841             else 
842                 if (dialitika) 
843                     x = 0x03aa;
844                 else 
845                     x = 0x0399;
846         else 
847             if (tonos) 
848                 if (dialitika) 
849                     x = 0x0390;
850                 else 
851                     x = 0x03af;
852         
853             else 
854                 if (dialitika) 
855                     x = 0x03ca;
856                 else 
857                     x = 0x03b9;
858         break;
859     case 0x8a:
860         if (shift) 
861             x = 0x039a;
862         else 
863             x = 0x03ba;
864         
865         break;
866     case 0x8b:
867         if (shift) 
868             x = 0x039b;
869         else 
870             x = 0x03bb;
871         break;
872     case 0x8c:
873         if (shift) 
874             x = 0x039c;
875         else 
876             x = 0x03bc;
877         
878         break;
879     case 0x8d:
880         if (shift) 
881             x = 0x039d;
882         else 
883             x = 0x03bd;
884         break;
885     case 0x8e:
886         if (shift) 
887             x = 0x039e;
888         else 
889             x = 0x03be;
890         break;
891     case 0x8f:
892         if (shift) 
893             if (tonos) 
894                 x = 0x038c;
895             else 
896                 x = 0x039f;
897         else 
898             if (tonos) 
899                 x = 0x03cc;
900             else 
901                 x = 0x03bf;
902         break;
903     case 0x90:
904         if (shift) 
905             x = 0x03a0;
906         else 
907             x = 0x03c0;
908         break;
909     case 0x91:
910         if (shift) 
911             x = 0x03a1;
912         else 
913             x = 0x03c1;
914         break;
915     case 0x92:
916         x = 0x03c2;
917         break;
918     case 0x93:
919         if (shift) 
920             x = 0x03a3;
921         else 
922             x = 0x03c3;
923         break;
924     case 0x94:
925         if (shift) 
926             x = 0x03a4;
927         else 
928             x = 0x03c4;
929         break;
930     case 0x95:
931         if (shift) 
932             if (tonos) 
933                 x = 0x038e;
934             else 
935                 if (dialitika) 
936                     x = 0x03ab;
937                 else 
938                     x = 0x03a5;
939         else 
940             if (tonos) 
941                 if (dialitika) 
942                     x = 0x03b0;
943                 else 
944                     x = 0x03cd;
945         
946             else 
947                 if (dialitika) 
948                     x = 0x03cb;
949                 else 
950                     x = 0x03c5;
951         break;
952     case 0x96:
953         if (shift) 
954             x = 0x03a6;
955         else 
956             x = 0x03c6;
957         break;
958     case 0x97:
959         if (shift) 
960             x = 0x03a7;
961         else 
962             x = 0x03c7;
963         break;
964     case 0x98:
965         if (shift) 
966             x = 0x03a8;
967         else 
968             x = 0x03c8;
969         
970         break;
971         
972     case 0x99:
973         if (shift) 
974             if (tonos) 
975                 x = 0x038f;
976             else 
977                 x = 0x03a9;
978         else 
979             if (tonos) 
980                 x = 0x03ce;
981             else 
982                 x = 0x03c9;
983         break;
984     default:
985         x = *inp;
986         break;
987     }
988     (*no_read)++;
989     
990     return x;
991 }
992
993 static size_t yaz_write_advancegreek(yaz_iconv_t cd, unsigned long x,
994                                      char **outbuf, size_t *outbytesleft)
995 {
996     size_t k = 0;
997     unsigned char *out = (unsigned char*) *outbuf;
998     if (*outbytesleft < 3)
999     {
1000         cd->my_errno = YAZ_ICONV_E2BIG;  /* not room for output */
1001         return (size_t)(-1);
1002     }
1003     switch (x)
1004     {
1005     case 0x03ac : out[k++]=0x9d; out[k++]=0x81; break;
1006     case 0x03ad : out[k++]=0x9d; out[k++]=0x85; break;
1007     case 0x03ae : out[k++]=0x9d; out[k++]=0x87; break;
1008     case 0x03af : out[k++]=0x9d; out[k++]=0x89; break;
1009     case 0x03cc : out[k++]=0x9d; out[k++]=0x8f; break;
1010     case 0x03cd : out[k++]=0x9d; out[k++]=0x95; break;
1011     case 0x03ce : out[k++]=0x9d; out[k++]=0x99; break;
1012     case 0x0390 : out[k++]=0x9d; out[k++]=0x9e; out[k++]=0x89; break;
1013     case 0x03b0 : out[k++]=0x9d; out[k++]=0x9e; out[k++]=0x95; break;
1014     case 0x0386 : out[k++]=0x9d; out[k++]=0x9f; out[k++]=0x81; break;
1015     case 0x0388 : out[k++]=0x9d; out[k++]=0x9f; out[k++]=0x85; break;
1016     case 0x0389 : out[k++]=0x9d; out[k++]=0x9f; out[k++]=0x87; break;
1017     case 0x038a : out[k++]=0x9d; out[k++]=0x9f; out[k++]=0x89; break;
1018     case 0x038c : out[k++]=0x9d; out[k++]=0x9f; out[k++]=0x8f; break;
1019     case 0x038e : out[k++]=0x9d; out[k++]=0x9f; out[k++]=0x95; break;
1020     case 0x038f : out[k++]=0x9d; out[k++]=0x9f; out[k++]=0x99; break;
1021     case 0x03ca : out[k++]=0x9e; out[k++]=0x89; break;
1022     case 0x03cb : out[k++]=0x9e; out[k++]=0x95; break;
1023     case 0x03aa : out[k++]=0x9e; out[k++]=0x9f; out[k++]=0x89; break;
1024     case 0x03ab : out[k++]=0x9e; out[k++]=0x9f; out[k++]=0x95; break;
1025     case 0x0391 : out[k++]=0x9f; out[k++]=0x81; break;
1026     case 0x0392 : out[k++]=0x9f; out[k++]=0x82; break;
1027     case 0x0393 : out[k++]=0x9f; out[k++]=0x83; break;
1028     case 0x0394 : out[k++]=0x9f; out[k++]=0x84; break;
1029     case 0x0395 : out[k++]=0x9f; out[k++]=0x85; break;
1030     case 0x0396 : out[k++]=0x9f; out[k++]=0x86; break;
1031     case 0x0397 : out[k++]=0x9f; out[k++]=0x87; break;
1032     case 0x0398 : out[k++]=0x9f; out[k++]=0x88; break;
1033     case 0x0399 : out[k++]=0x9f; out[k++]=0x89; break;
1034     case 0x039a : out[k++]=0x9f; out[k++]=0x8a; break;
1035     case 0x039b : out[k++]=0x9f; out[k++]=0x8b; break;
1036     case 0x039c : out[k++]=0x9f; out[k++]=0x8c; break;
1037     case 0x039d : out[k++]=0x9f; out[k++]=0x8d; break;
1038     case 0x039e : out[k++]=0x9f; out[k++]=0x8e; break;
1039     case 0x039f : out[k++]=0x9f; out[k++]=0x8f; break;
1040     case 0x03a0 : out[k++]=0x9f; out[k++]=0x90; break;
1041     case 0x03a1 : out[k++]=0x9f; out[k++]=0x91; break;
1042     case 0x03a3 : out[k++]=0x9f; out[k++]=0x93; break;
1043     case 0x03a4 : out[k++]=0x9f; out[k++]=0x94; break;
1044     case 0x03a5 : out[k++]=0x9f; out[k++]=0x95; break;
1045     case 0x03a6 : out[k++]=0x9f; out[k++]=0x96; break;
1046     case 0x03a7 : out[k++]=0x9f; out[k++]=0x97; break;
1047     case 0x03a8 : out[k++]=0x9f; out[k++]=0x98; break;
1048     case 0x03a9 : out[k++]=0x9f; out[k++]=0x99; break;
1049     case 0x03b1 : out[k++]=0x81; break;
1050     case 0x03b2 : out[k++]=0x82; break;
1051     case 0x03b3 : out[k++]=0x83; break;
1052     case 0x03b4 : out[k++]=0x84; break;
1053     case 0x03b5 : out[k++]=0x85; break;
1054     case 0x03b6 : out[k++]=0x86; break;
1055     case 0x03b7 : out[k++]=0x87; break;
1056     case 0x03b8 : out[k++]=0x88; break;
1057     case 0x03b9 : out[k++]=0x89; break;
1058     case 0x03ba : out[k++]=0x8a; break;
1059     case 0x03bb : out[k++]=0x8b; break;
1060     case 0x03bc : out[k++]=0x8c; break;
1061     case 0x03bd : out[k++]=0x8d; break;
1062     case 0x03be : out[k++]=0x8e; break;
1063     case 0x03bf : out[k++]=0x8f; break;
1064     case 0x03c0 : out[k++]=0x90; break;
1065     case 0x03c1 : out[k++]=0x91; break;
1066     case 0x03c2 : out[k++]=0x92; break;
1067     case 0x03c3 : out[k++]=0x93; break;
1068     case 0x03c4 : out[k++]=0x94; break;
1069     case 0x03c5 : out[k++]=0x95; break;
1070     case 0x03c6 : out[k++]=0x96; break;
1071     case 0x03c7 : out[k++]=0x96; break;
1072     case 0x03c8 : out[k++]=0x98; break;
1073     case 0x03c9 : out[k++]=0x99; break;
1074     default:
1075         if (x > 255)
1076         {
1077             cd->my_errno = YAZ_ICONV_EILSEQ;
1078             return (size_t) -1;
1079         }
1080         out[k++] = x;
1081         break;
1082     }
1083     *outbytesleft -= k;
1084     (*outbuf) += k;
1085     return 0;
1086 }
1087
1088
1089 static unsigned long yaz_read_marc8_comb (yaz_iconv_t cd, unsigned char *inp,
1090                                           size_t inbytesleft, size_t *no_read,
1091                                           int *comb);
1092
1093 static unsigned long yaz_read_marc8 (yaz_iconv_t cd, unsigned char *inp,
1094                                      size_t inbytesleft, size_t *no_read)
1095 {
1096     unsigned long x;
1097     if (cd->comb_offset < cd->comb_size)
1098     {
1099         *no_read = cd->comb_no_read[cd->comb_offset];
1100         x = cd->comb_x[cd->comb_offset];
1101
1102         /* special case for double-diacritic combining characters, 
1103            INVERTED BREVE and DOUBLE TILDE.
1104            We'll increment the no_read counter by 1, since we want to skip over
1105            the processing of the closing ligature character
1106         */
1107         /* this code is no longer necessary.. our handlers code in
1108            yaz_marc8_?_conv (generated by charconv.tcl) now returns
1109            0 and no_read=1 when a sequence does not match the input.
1110            The SECOND HALFs in codetables.xml produces a non-existant
1111            entry in the conversion trie.. Hence when met, the input byte is
1112            skipped as it should (in yaz_iconv)
1113         */
1114 #if 0
1115         if (x == 0x0361 || x == 0x0360)
1116             *no_read += 1;
1117 #endif
1118         cd->comb_offset++;
1119         return x;
1120     }
1121
1122     cd->comb_offset = 0;
1123     for (cd->comb_size = 0; cd->comb_size < 8; cd->comb_size++)
1124     {
1125         int comb = 0;
1126
1127         if (inbytesleft == 0 && cd->comb_size)
1128         {
1129             cd->my_errno = YAZ_ICONV_EINVAL;
1130             x = 0;
1131             *no_read = 0;
1132             break;
1133         }
1134         x = yaz_read_marc8_comb(cd, inp, inbytesleft, no_read, &comb);
1135         if (!comb || !x)
1136             break;
1137         cd->comb_x[cd->comb_size] = x;
1138         cd->comb_no_read[cd->comb_size] = *no_read;
1139         inp += *no_read;
1140         inbytesleft = inbytesleft - *no_read;
1141     }
1142     return x;
1143 }
1144
1145 static unsigned long yaz_read_marc8s(yaz_iconv_t cd, unsigned char *inp,
1146                                      size_t inbytesleft, size_t *no_read)
1147 {
1148     unsigned long x = yaz_read_marc8(cd, inp, inbytesleft, no_read);
1149     if (x && cd->comb_size == 1)
1150     {
1151         /* For MARC8s we try to get a Latin-1 page code out of it */
1152         int i;
1153         for (i = 0; latin1_comb[i].x1; i++)
1154             if (cd->comb_x[0] == latin1_comb[i].x2 && x == latin1_comb[i].x1)
1155             {
1156                 *no_read += cd->comb_no_read[0];
1157                 cd->comb_size = 0;
1158                 x = latin1_comb[i].y;
1159                 break;
1160             }
1161     }
1162     return x;
1163 }
1164
1165 static unsigned long yaz_read_marc8_comb(yaz_iconv_t cd, unsigned char *inp,
1166                                          size_t inbytesleft, size_t *no_read,
1167                                          int *comb)
1168 {
1169     *no_read = 0;
1170     while (inbytesleft > 0 && *inp == 27)
1171     {
1172         int *modep = &cd->g0_mode;
1173         size_t inbytesleft0 = inbytesleft;
1174
1175         inbytesleft--;
1176         inp++;
1177         if (inbytesleft == 0)
1178             goto incomplete;
1179         if (*inp == '$') /* set with multiple bytes */
1180         {
1181             inbytesleft--;
1182             inp++;
1183         }
1184         if (inbytesleft == 0)
1185             goto incomplete;
1186         if (*inp == '(' || *inp == ',')  /* G0 */
1187         {
1188             inbytesleft--;
1189             inp++;
1190         }
1191         else if (*inp == ')' || *inp == '-') /* G1 */
1192         {
1193             inbytesleft--;
1194             inp++;
1195             modep = &cd->g1_mode;
1196         }
1197         if (inbytesleft == 0)
1198             goto incomplete;
1199         if (*inp == '!') /* ANSEL is a special case */
1200         {
1201             inbytesleft--;
1202             inp++;
1203         }
1204         if (inbytesleft == 0)
1205             goto incomplete;
1206         *modep = *inp++; /* Final character */
1207         inbytesleft--;
1208
1209         (*no_read) += inbytesleft0 - inbytesleft;
1210     }
1211     if (inbytesleft == 0)
1212         return 0;
1213     else if (*inp == ' ')
1214     {
1215         *no_read += 1;
1216         return ' ';
1217     }
1218     else
1219     {
1220         unsigned long x;
1221         size_t no_read_sub = 0;
1222         int mode = *inp < 128 ? cd->g0_mode : cd->g1_mode;
1223         *comb = 0;
1224
1225         switch(mode)
1226         {
1227         case 'B':  /* Basic ASCII */
1228         case 's':  /* ASCII */
1229         case 'E':  /* ANSEL */
1230             x = yaz_marc8_42_conv(inp, inbytesleft, &no_read_sub, comb);
1231             if (!x)
1232             {
1233                 no_read_sub = 0;
1234                 x = yaz_marc8_45_conv(inp, inbytesleft, &no_read_sub, comb);
1235             }
1236             break;
1237         case 'g':  /* Greek */
1238             x = yaz_marc8_67_conv(inp, inbytesleft, &no_read_sub, comb);
1239             break;
1240         case 'b':  /* Subscripts */
1241             x = yaz_marc8_62_conv(inp, inbytesleft, &no_read_sub, comb);
1242             break;
1243         case 'p':  /* Superscripts */
1244             x = yaz_marc8_70_conv(inp, inbytesleft, &no_read_sub, comb);
1245             break;
1246         case '2':  /* Basic Hebrew */
1247             x = yaz_marc8_32_conv(inp, inbytesleft, &no_read_sub, comb);
1248             break;
1249         case 'N':  /* Basic Cyrillic */
1250             x = yaz_marc8_4E_conv(inp, inbytesleft, &no_read_sub, comb);
1251             break;
1252         case 'Q':  /* Extended Cyrillic */
1253             x = yaz_marc8_51_conv(inp, inbytesleft, &no_read_sub, comb);
1254             break;
1255         case '3':  /* Basic Arabic */
1256             x = yaz_marc8_33_conv(inp, inbytesleft, &no_read_sub, comb);
1257             break;
1258         case '4':  /* Extended Arabic */
1259             x = yaz_marc8_34_conv(inp, inbytesleft, &no_read_sub, comb);
1260             break;
1261         case 'S':  /* Greek */
1262             x = yaz_marc8_53_conv(inp, inbytesleft, &no_read_sub, comb);
1263             break;
1264         case '1':  /* Chinese, Japanese, Korean (EACC) */
1265             x = yaz_marc8_31_conv(inp, inbytesleft, &no_read_sub, comb);
1266             break;
1267         default:
1268             *no_read = 0;
1269             cd->my_errno = YAZ_ICONV_EILSEQ;
1270             return 0;
1271         }
1272         *no_read += no_read_sub;
1273         return x;
1274     }
1275 incomplete:
1276     *no_read = 0;
1277     cd->my_errno = YAZ_ICONV_EINVAL;
1278     return 0;
1279 }
1280
1281 static size_t yaz_write_UTF8(yaz_iconv_t cd, unsigned long x,
1282                              char **outbuf, size_t *outbytesleft)
1283 {
1284     return yaz_write_UTF8_char(x, outbuf, outbytesleft, &cd->my_errno);
1285 }
1286
1287 size_t yaz_write_UTF8_char(unsigned long x,
1288                            char **outbuf, size_t *outbytesleft,
1289                            int *error)
1290 {
1291     unsigned char *outp = (unsigned char *) *outbuf;
1292
1293     if (x <= 0x7f && *outbytesleft >= 1)
1294     {
1295         *outp++ = (unsigned char) x;
1296         (*outbytesleft)--;
1297     } 
1298     else if (x <= 0x7ff && *outbytesleft >= 2)
1299     {
1300         *outp++ = (unsigned char) ((x >> 6) | 0xc0);
1301         *outp++ = (unsigned char) ((x & 0x3f) | 0x80);
1302         (*outbytesleft) -= 2;
1303     }
1304     else if (x <= 0xffff && *outbytesleft >= 3)
1305     {
1306         *outp++ = (unsigned char) ((x >> 12) | 0xe0);
1307         *outp++ = (unsigned char) (((x >> 6) & 0x3f) | 0x80);
1308         *outp++ = (unsigned char) ((x & 0x3f) | 0x80);
1309         (*outbytesleft) -= 3;
1310     }
1311     else if (x <= 0x1fffff && *outbytesleft >= 4)
1312     {
1313         *outp++ = (unsigned char) ((x >> 18) | 0xf0);
1314         *outp++ = (unsigned char) (((x >> 12) & 0x3f) | 0x80);
1315         *outp++ = (unsigned char) (((x >> 6)  & 0x3f) | 0x80);
1316         *outp++ = (unsigned char) ((x & 0x3f) | 0x80);
1317         (*outbytesleft) -= 4;
1318     }
1319     else if (x <= 0x3ffffff && *outbytesleft >= 5)
1320     {
1321         *outp++ = (unsigned char) ((x >> 24) | 0xf8);
1322         *outp++ = (unsigned char) (((x >> 18) & 0x3f) | 0x80);
1323         *outp++ = (unsigned char) (((x >> 12) & 0x3f) | 0x80);
1324         *outp++ = (unsigned char) (((x >> 6)  & 0x3f) | 0x80);
1325         *outp++ = (unsigned char) ((x & 0x3f) | 0x80);
1326         (*outbytesleft) -= 5;
1327     }
1328     else if (*outbytesleft >= 6)
1329     {
1330         *outp++ = (unsigned char) ((x >> 30) | 0xfc);
1331         *outp++ = (unsigned char) (((x >> 24) & 0x3f) | 0x80);
1332         *outp++ = (unsigned char) (((x >> 18) & 0x3f) | 0x80);
1333         *outp++ = (unsigned char) (((x >> 12) & 0x3f) | 0x80);
1334         *outp++ = (unsigned char) (((x >> 6)  & 0x3f) | 0x80);
1335         *outp++ = (unsigned char) ((x & 0x3f) | 0x80);
1336         (*outbytesleft) -= 6;
1337     }
1338     else 
1339     {
1340         *error = YAZ_ICONV_E2BIG;  /* not room for output */
1341         return (size_t)(-1);
1342     }
1343     *outbuf = (char *) outp;
1344     return 0;
1345 }
1346
1347 static size_t yaz_write_ISO8859_1 (yaz_iconv_t cd, unsigned long x,
1348                                    char **outbuf, size_t *outbytesleft)
1349 {
1350     /* list of two char unicode sequence that, when combined, are
1351        equivalent to single unicode chars that can be represented in
1352        ISO-8859-1/Latin-1.
1353        Regular iconv on Linux at least does not seem to convert these,
1354        but since MARC-8 to UTF-8 generates these composed sequence
1355        we get a better chance of a successful MARC-8 -> ISO-8859-1
1356        conversion */
1357     unsigned char *outp = (unsigned char *) *outbuf;
1358
1359     if (cd->compose_char)
1360     {
1361         int i;
1362         for (i = 0; latin1_comb[i].x1; i++)
1363             if (cd->compose_char == latin1_comb[i].x1 && x == latin1_comb[i].x2)
1364             {
1365                 x = latin1_comb[i].y;
1366                 break;
1367             }
1368         if (*outbytesleft < 1)
1369         {  /* no room. Retain compose_char and bail out */
1370             cd->my_errno = YAZ_ICONV_E2BIG;
1371             return (size_t)(-1);
1372         }
1373         if (!latin1_comb[i].x1) 
1374         {   /* not found. Just write compose_char */
1375             *outp++ = (unsigned char) cd->compose_char;
1376             (*outbytesleft)--;
1377             *outbuf = (char *) outp;
1378         }
1379         /* compose_char used so reset it. x now holds current char */
1380         cd->compose_char = 0;
1381     }
1382
1383     if (x > 32 && x < 127 && cd->compose_char == 0)
1384     {
1385         cd->compose_char = x;
1386         return 0;
1387     }
1388     else if (x > 255 || x < 1)
1389     {
1390         cd->my_errno = YAZ_ICONV_EILSEQ;
1391         return (size_t) -1;
1392     }
1393     else if (*outbytesleft < 1)
1394     {
1395         cd->my_errno = YAZ_ICONV_E2BIG;
1396         return (size_t)(-1);
1397     }
1398     *outp++ = (unsigned char) x;
1399     (*outbytesleft)--;
1400     *outbuf = (char *) outp;
1401     return 0;
1402 }
1403
1404 static size_t yaz_flush_ISO8859_1(yaz_iconv_t cd,
1405                                   char **outbuf, size_t *outbytesleft)
1406 {
1407     if (cd->compose_char)
1408     {
1409         unsigned char *outp = (unsigned char *) *outbuf;
1410         if (*outbytesleft < 1)
1411         {
1412             cd->my_errno = YAZ_ICONV_E2BIG;
1413             return (size_t)(-1);
1414         }
1415         *outp++ = (unsigned char) cd->compose_char;
1416         (*outbytesleft)--;
1417         *outbuf = (char *) outp;
1418         cd->compose_char = 0;
1419     }
1420     return 0;
1421 }
1422
1423 static size_t yaz_write_UCS4 (yaz_iconv_t cd, unsigned long x,
1424                               char **outbuf, size_t *outbytesleft)
1425 {
1426     unsigned char *outp = (unsigned char *) *outbuf;
1427     if (*outbytesleft >= 4)
1428     {
1429         *outp++ = (unsigned char) (x>>24);
1430         *outp++ = (unsigned char) (x>>16);
1431         *outp++ = (unsigned char) (x>>8);
1432         *outp++ = (unsigned char) x;
1433         (*outbytesleft) -= 4;
1434     }
1435     else
1436     {
1437         cd->my_errno = YAZ_ICONV_E2BIG;
1438         return (size_t)(-1);
1439     }
1440     *outbuf = (char *) outp;
1441     return 0;
1442 }
1443
1444 static size_t yaz_write_UCS4LE (yaz_iconv_t cd, unsigned long x,
1445                                 char **outbuf, size_t *outbytesleft)
1446 {
1447     unsigned char *outp = (unsigned char *) *outbuf;
1448     if (*outbytesleft >= 4)
1449     {
1450         *outp++ = (unsigned char) x;
1451         *outp++ = (unsigned char) (x>>8);
1452         *outp++ = (unsigned char) (x>>16);
1453         *outp++ = (unsigned char) (x>>24);
1454         (*outbytesleft) -= 4;
1455     }
1456     else
1457     {
1458         cd->my_errno = YAZ_ICONV_E2BIG;
1459         return (size_t)(-1);
1460     }
1461     *outbuf = (char *) outp;
1462     return 0;
1463 }
1464
1465 static unsigned long lookup_marc8(yaz_iconv_t cd,
1466                                   unsigned long x, int *comb,
1467                                   const char **page_chr)
1468 {
1469     char utf8_buf[7];
1470     char *utf8_outbuf = utf8_buf;
1471     size_t utf8_outbytesleft = sizeof(utf8_buf)-1, r;
1472
1473     r = yaz_write_UTF8(cd, x, &utf8_outbuf, &utf8_outbytesleft);
1474     if (r == (size_t)(-1))
1475     {
1476         cd->my_errno = YAZ_ICONV_EILSEQ;
1477         return 0;
1478     }
1479     else
1480     {
1481         unsigned char *inp;
1482         size_t inbytesleft, no_read_sub = 0;
1483         unsigned long x;
1484
1485         *utf8_outbuf = '\0';        
1486         inp = (unsigned char *) utf8_buf;
1487         inbytesleft = strlen(utf8_buf);
1488
1489         x = yaz_marc8r_42_conv(inp, inbytesleft, &no_read_sub, comb);
1490         if (x)
1491         {
1492             *page_chr = ESC "(B";
1493             return x;
1494         }
1495         x = yaz_marc8r_45_conv(inp, inbytesleft, &no_read_sub, comb);
1496         if (x)
1497         {
1498             *page_chr = ESC "(B";
1499             return x;
1500         }
1501         x = yaz_marc8r_62_conv(inp, inbytesleft, &no_read_sub, comb);
1502         if (x)
1503         {
1504             *page_chr = ESC "b";
1505             return x;
1506         }
1507         x = yaz_marc8r_70_conv(inp, inbytesleft, &no_read_sub, comb);
1508         if (x)
1509         {
1510             *page_chr = ESC "p";
1511             return x;
1512         }
1513         x = yaz_marc8r_32_conv(inp, inbytesleft, &no_read_sub, comb);
1514         if (x)
1515         {
1516             *page_chr = ESC "(2";
1517             return x;
1518         }
1519         x = yaz_marc8r_4E_conv(inp, inbytesleft, &no_read_sub, comb);
1520         if (x)
1521         {
1522             *page_chr = ESC "(N";
1523             return x;
1524         }
1525         x = yaz_marc8r_51_conv(inp, inbytesleft, &no_read_sub, comb);
1526         if (x)
1527         {
1528             *page_chr = ESC "(Q";
1529             return x;
1530         }
1531         x = yaz_marc8r_33_conv(inp, inbytesleft, &no_read_sub, comb);
1532         if (x)
1533         {
1534             *page_chr = ESC "(3";
1535             return x;
1536         }
1537         x = yaz_marc8r_34_conv(inp, inbytesleft, &no_read_sub, comb);
1538         if (x)
1539         {
1540             *page_chr = ESC "(4";
1541             return x;
1542         }
1543         x = yaz_marc8r_53_conv(inp, inbytesleft, &no_read_sub, comb);
1544         if (x)
1545         {
1546             *page_chr = ESC "(S";
1547             return x;
1548         }
1549         x = yaz_marc8r_31_conv(inp, inbytesleft, &no_read_sub, comb);
1550         if (x)
1551         {
1552             *page_chr = ESC "$1";
1553             return x;
1554         }
1555         cd->my_errno = YAZ_ICONV_EILSEQ;
1556         return x;
1557     }
1558 }
1559
1560 static size_t flush_combos(yaz_iconv_t cd,
1561                            char **outbuf, size_t *outbytesleft)
1562 {
1563     unsigned long y = cd->write_marc8_last;
1564     unsigned char byte;
1565     char out_buf[4];
1566     size_t out_no = 0;
1567
1568     if (!y)
1569         return 0;
1570
1571     assert(cd->write_marc8_lpage);
1572     if (cd->write_marc8_lpage)
1573     {
1574         size_t r = yaz_write_marc8_page_chr(cd, outbuf, outbytesleft,
1575                                             cd->write_marc8_lpage);
1576         if (r)
1577             return r;
1578     }
1579
1580     byte = (unsigned char )((y>>16) & 0xff);
1581     if (byte)
1582         out_buf[out_no++] = byte;
1583     byte = (unsigned char)((y>>8) & 0xff);
1584     if (byte)
1585         out_buf[out_no++] = byte;
1586     byte = (unsigned char )(y & 0xff);
1587     if (byte)
1588         out_buf[out_no++] = byte;
1589
1590     if (out_no + 2 >= *outbytesleft)
1591     {
1592         cd->my_errno = YAZ_ICONV_E2BIG;
1593         return (size_t) (-1);
1594     }
1595
1596     memcpy(*outbuf, out_buf, out_no);
1597     *outbuf += out_no;
1598     (*outbytesleft) -= out_no;
1599     if (cd->write_marc8_second_half_char)
1600     {
1601         *(*outbuf)++ = cd->write_marc8_second_half_char;
1602         (*outbytesleft)--;
1603     }        
1604
1605     cd->write_marc8_last = 0;
1606     cd->write_marc8_lpage = 0;
1607     cd->write_marc8_second_half_char = 0;
1608     return 0;
1609 }
1610
1611 static size_t yaz_write_marc8_page_chr(yaz_iconv_t cd, 
1612                                        char **outbuf, size_t *outbytesleft,
1613                                        const char *page_chr)
1614 {
1615     const char **old_page_chr = &cd->write_marc8_g0;
1616
1617     /* are we going to a G1-set (such as such as ESC ")!E") */
1618     if (page_chr && page_chr[1] == ')')
1619         old_page_chr = &cd->write_marc8_g1;
1620
1621     if (!*old_page_chr || strcmp(page_chr, *old_page_chr))
1622     {
1623         size_t plen = 0;
1624         const char *page_out = page_chr;
1625         
1626         if (*outbytesleft < 8)
1627         {
1628             cd->my_errno = YAZ_ICONV_E2BIG;
1629             
1630             return (size_t) (-1);
1631         }
1632
1633         if (*old_page_chr)
1634         {
1635             if (!strcmp(*old_page_chr, ESC "p") 
1636                 || !strcmp(*old_page_chr, ESC "g")
1637                 || !strcmp(*old_page_chr, ESC "b"))
1638             {
1639                 page_out = ESC "s";
1640                 /* Technique 1 leave */
1641                 if (strcmp(page_chr, ESC "(B")) /* Not going ASCII page? */
1642                 {
1643                     /* Must leave script + enter new page */
1644                     plen = strlen(page_out);
1645                     memcpy(*outbuf, page_out, plen);
1646                     (*outbuf) += plen;
1647                     (*outbytesleft) -= plen;
1648                     page_out = ESC "(B";
1649                 }
1650             }
1651         }
1652         *old_page_chr = page_chr;
1653         plen = strlen(page_out);
1654         memcpy(*outbuf, page_out, plen);
1655         (*outbuf) += plen;
1656         (*outbytesleft) -= plen;
1657     }
1658     return 0;
1659 }
1660
1661
1662 static size_t yaz_write_marc8_2(yaz_iconv_t cd, unsigned long x,
1663                                 char **outbuf, size_t *outbytesleft)
1664 {
1665     int comb = 0;
1666     const char *page_chr = 0;
1667     unsigned long y = lookup_marc8(cd, x, &comb, &page_chr);
1668
1669     if (!y)
1670         return (size_t) (-1);
1671
1672     if (comb)
1673     {
1674         if (page_chr)
1675         {
1676             size_t r = yaz_write_marc8_page_chr(cd, outbuf, outbytesleft,
1677                                                 page_chr);
1678             if (r)
1679                 return r;
1680         }
1681         if (x == 0x0361)
1682             cd->write_marc8_second_half_char = 0xEC;
1683         else if (x == 0x0360)
1684             cd->write_marc8_second_half_char = 0xFB;
1685
1686         if (*outbytesleft <= 1)
1687         {
1688             cd->my_errno = YAZ_ICONV_E2BIG;
1689             return (size_t) (-1);
1690         }
1691         *(*outbuf)++ = y;
1692         (*outbytesleft)--;
1693     }
1694     else
1695     {
1696         size_t r = flush_combos(cd, outbuf, outbytesleft);
1697         if (r)
1698             return r;
1699
1700         cd->write_marc8_last = y;
1701         cd->write_marc8_lpage = page_chr;
1702     }
1703     return 0;
1704 }
1705
1706 static size_t yaz_flush_marc8(yaz_iconv_t cd,
1707                               char **outbuf, size_t *outbytesleft)
1708 {
1709     size_t r = flush_combos(cd, outbuf, outbytesleft);
1710     if (r)
1711         return r;
1712     cd->write_marc8_g1 = 0;
1713     return yaz_write_marc8_page_chr(cd, outbuf, outbytesleft, ESC "(B");
1714 }
1715
1716 static size_t yaz_write_marc8(yaz_iconv_t cd, unsigned long x,
1717                               char **outbuf, size_t *outbytesleft)
1718 {
1719     int i;
1720     for (i = 0; latin1_comb[i].x1; i++)
1721     {
1722         if (x == latin1_comb[i].y)
1723         {
1724             size_t r ;
1725             /* save the output pointers .. */
1726             char *outbuf0 = *outbuf;
1727             size_t outbytesleft0 = *outbytesleft;
1728             int last_ch = cd->write_marc8_last;
1729             const char *lpage = cd->write_marc8_lpage;
1730
1731             r = yaz_write_marc8_2(cd, latin1_comb[i].x1,
1732                                   outbuf, outbytesleft);
1733             if (r)
1734                 return r;
1735             r = yaz_write_marc8_2(cd, latin1_comb[i].x2,
1736                                   outbuf, outbytesleft);
1737             if (r && cd->my_errno == YAZ_ICONV_E2BIG)
1738             {
1739                 /* not enough room. reset output to original values */
1740                 *outbuf = outbuf0;
1741                 *outbytesleft = outbytesleft0;
1742                 cd->write_marc8_last = last_ch;
1743                 cd->write_marc8_lpage = lpage;
1744             }
1745             return r;
1746         }
1747     }
1748     return yaz_write_marc8_2(cd, x, outbuf, outbytesleft);
1749 }
1750
1751
1752 #if HAVE_WCHAR_H
1753 static size_t yaz_write_wchar_t(yaz_iconv_t cd, unsigned long x,
1754                                 char **outbuf, size_t *outbytesleft)
1755 {
1756     unsigned char *outp = (unsigned char *) *outbuf;
1757
1758     if (*outbytesleft >= sizeof(wchar_t))
1759     {
1760         wchar_t wch = x;
1761         memcpy(outp, &wch, sizeof(wch));
1762         outp += sizeof(wch);
1763         (*outbytesleft) -= sizeof(wch);
1764     }
1765     else
1766     {
1767         cd->my_errno = YAZ_ICONV_E2BIG;
1768         return (size_t)(-1);
1769     }
1770     *outbuf = (char *) outp;
1771     return 0;
1772 }
1773 #endif
1774
1775 int yaz_iconv_isbuiltin(yaz_iconv_t cd)
1776 {
1777     return cd->read_handle && cd->write_handle;
1778 }
1779
1780 yaz_iconv_t yaz_iconv_open (const char *tocode, const char *fromcode)
1781 {
1782     yaz_iconv_t cd = (yaz_iconv_t) xmalloc (sizeof(*cd));
1783
1784     cd->write_handle = 0;
1785     cd->read_handle = 0;
1786     cd->init_handle = 0;
1787     cd->flush_handle = 0;
1788     cd->my_errno = YAZ_ICONV_UNKNOWN;
1789
1790     /* a useful hack: if fromcode has leading @,
1791        the library not use YAZ's own conversions .. */
1792     if (fromcode[0] == '@')
1793         fromcode++;
1794     else
1795     {
1796         if (!yaz_matchstr(fromcode, "UTF8"))
1797         {
1798             cd->read_handle = yaz_read_UTF8;
1799             cd->init_handle = yaz_init_UTF8;
1800         }
1801         else if (!yaz_matchstr(fromcode, "ISO88591"))
1802             cd->read_handle = yaz_read_ISO8859_1;
1803         else if (!yaz_matchstr(fromcode, "UCS4"))
1804             cd->read_handle = yaz_read_UCS4;
1805         else if (!yaz_matchstr(fromcode, "UCS4LE"))
1806             cd->read_handle = yaz_read_UCS4LE;
1807         else if (!yaz_matchstr(fromcode, "MARC8"))
1808             cd->read_handle = yaz_read_marc8;
1809         else if (!yaz_matchstr(fromcode, "MARC8s"))
1810             cd->read_handle = yaz_read_marc8s;
1811         else if (!yaz_matchstr(fromcode, "advancegreek"))
1812             cd->read_handle = yaz_read_advancegreek;
1813         else if (!yaz_matchstr(fromcode, "iso54281984"))
1814             cd->read_handle = yaz_read_iso5428_1984;
1815         else if (!yaz_matchstr(fromcode, "iso5428:1984"))
1816             cd->read_handle = yaz_read_iso5428_1984;
1817 #if HAVE_WCHAR_H
1818         else if (!yaz_matchstr(fromcode, "WCHAR_T"))
1819             cd->read_handle = yaz_read_wchar_t;
1820 #endif
1821         
1822         if (!yaz_matchstr(tocode, "UTF8"))
1823             cd->write_handle = yaz_write_UTF8;
1824         else if (!yaz_matchstr(tocode, "ISO88591"))
1825         {
1826             cd->write_handle = yaz_write_ISO8859_1;
1827             cd->flush_handle = yaz_flush_ISO8859_1;
1828         }
1829         else if (!yaz_matchstr (tocode, "UCS4"))
1830             cd->write_handle = yaz_write_UCS4;
1831         else if (!yaz_matchstr(tocode, "UCS4LE"))
1832             cd->write_handle = yaz_write_UCS4LE;
1833         else if (!yaz_matchstr(tocode, "MARC8"))
1834         {
1835             cd->write_handle = yaz_write_marc8;
1836             cd->flush_handle = yaz_flush_marc8;
1837         }
1838         else if (!yaz_matchstr(tocode, "MARC8s"))
1839         {
1840             cd->write_handle = yaz_write_marc8;
1841             cd->flush_handle = yaz_flush_marc8;
1842         }
1843         else if (!yaz_matchstr(tocode, "advancegreek"))
1844         {
1845             cd->write_handle = yaz_write_advancegreek;
1846         }
1847         else if (!yaz_matchstr(tocode, "iso54281984"))
1848         {
1849             cd->write_handle = yaz_write_iso5428_1984;
1850         }
1851         else if (!yaz_matchstr(tocode, "iso5428:1984"))
1852         {
1853             cd->write_handle = yaz_write_iso5428_1984;
1854         }
1855 #if HAVE_WCHAR_H
1856         else if (!yaz_matchstr(tocode, "WCHAR_T"))
1857             cd->write_handle = yaz_write_wchar_t;
1858 #endif
1859     }
1860 #if HAVE_ICONV_H
1861     cd->iconv_cd = 0;
1862     if (!cd->read_handle || !cd->write_handle)
1863     {
1864         cd->iconv_cd = iconv_open (tocode, fromcode);
1865         if (cd->iconv_cd == (iconv_t) (-1))
1866         {
1867             xfree (cd);
1868             return 0;
1869         }
1870     }
1871 #else
1872     if (!cd->read_handle || !cd->write_handle)
1873     {
1874         xfree (cd);
1875         return 0;
1876     }
1877 #endif
1878     cd->init_flag = 1;
1879     return cd;
1880 }
1881
1882 size_t yaz_iconv(yaz_iconv_t cd, char **inbuf, size_t *inbytesleft,
1883                  char **outbuf, size_t *outbytesleft)
1884 {
1885     char *inbuf0 = 0;
1886     size_t r = 0;
1887
1888 #if HAVE_ICONV_H
1889     if (cd->iconv_cd)
1890     {
1891         size_t r =
1892             iconv(cd->iconv_cd, inbuf, inbytesleft, outbuf, outbytesleft);
1893         if (r == (size_t)(-1))
1894         {
1895             switch (yaz_errno())
1896             {
1897             case E2BIG:
1898                 cd->my_errno = YAZ_ICONV_E2BIG;
1899                 break;
1900             case EINVAL:
1901                 cd->my_errno = YAZ_ICONV_EINVAL;
1902                 break;
1903             case EILSEQ:
1904                 cd->my_errno = YAZ_ICONV_EILSEQ;
1905                 break;
1906             default:
1907                 cd->my_errno = YAZ_ICONV_UNKNOWN;
1908             }
1909         }
1910         return r;
1911     }
1912 #endif
1913
1914     if (inbuf)
1915         inbuf0 = *inbuf;
1916
1917     if (cd->init_flag)
1918     {
1919         cd->my_errno = YAZ_ICONV_UNKNOWN;
1920         cd->g0_mode = 'B';
1921         cd->g1_mode = 'B';
1922         
1923         cd->comb_offset = cd->comb_size = 0;
1924         cd->compose_char = 0;
1925         
1926         cd->write_marc8_second_half_char = 0;
1927         cd->write_marc8_last = 0;
1928         cd->write_marc8_lpage = 0;
1929         cd->write_marc8_g0 = ESC "(B";
1930         cd->write_marc8_g1 = 0;
1931         
1932         cd->unget_x = 0;
1933         cd->no_read_x = 0;
1934     }
1935
1936     if (cd->init_flag)
1937     {
1938         if (cd->init_handle && inbuf && *inbuf)
1939         {
1940             size_t no_read = 0;
1941             size_t r = (cd->init_handle)(cd, (unsigned char *) *inbuf,
1942                                          *inbytesleft, &no_read);
1943             if (r)
1944             {
1945                 if (cd->my_errno == YAZ_ICONV_EINVAL)
1946                     return r;
1947                 cd->init_flag = 0;
1948                 return r;
1949             }
1950             *inbytesleft -= no_read;
1951             *inbuf += no_read;
1952         }
1953     }
1954     cd->init_flag = 0;
1955
1956     if (!inbuf || !*inbuf)
1957     {
1958         if (outbuf && *outbuf)
1959         {
1960             if (cd->unget_x)
1961                 r = (*cd->write_handle)(cd, cd->unget_x, outbuf, outbytesleft);
1962             if (cd->flush_handle)
1963                 r = (*cd->flush_handle)(cd, outbuf, outbytesleft);
1964         }
1965         if (r == 0)
1966             cd->init_flag = 1;
1967         cd->unget_x = 0;
1968         return r;
1969     }
1970     while (1)
1971     {
1972         unsigned long x;
1973         size_t no_read;
1974
1975         if (cd->unget_x)
1976         {
1977             x = cd->unget_x;
1978             no_read = cd->no_read_x;
1979         }
1980         else
1981         {
1982             if (*inbytesleft == 0)
1983             {
1984                 r = *inbuf - inbuf0;
1985                 break;
1986             }
1987             x = (*cd->read_handle)(cd, (unsigned char *) *inbuf, *inbytesleft,
1988                                    &no_read);
1989             if (no_read == 0)
1990             {
1991                 r = (size_t)(-1);
1992                 break;
1993             }
1994         }
1995         if (x)
1996         {
1997             r = (*cd->write_handle)(cd, x, outbuf, outbytesleft);
1998             if (r)
1999             {
2000                 /* unable to write it. save it because read_handle cannot
2001                    rewind .. */
2002                 if (cd->my_errno == YAZ_ICONV_E2BIG)
2003                 {
2004                     cd->unget_x = x;
2005                     cd->no_read_x = no_read;
2006                     break;
2007                 }
2008             }
2009             cd->unget_x = 0;
2010         }
2011         *inbytesleft -= no_read;
2012         (*inbuf) += no_read;
2013     }
2014     return r;
2015 }
2016
2017 int yaz_iconv_error (yaz_iconv_t cd)
2018 {
2019     return cd->my_errno;
2020 }
2021
2022 int yaz_iconv_close (yaz_iconv_t cd)
2023 {
2024 #if HAVE_ICONV_H
2025     if (cd->iconv_cd)
2026         iconv_close (cd->iconv_cd);
2027 #endif
2028     xfree (cd);
2029     return 0;
2030 }
2031
2032 /*
2033  * Local variables:
2034  * c-basic-offset: 4
2035  * indent-tabs-mode: nil
2036  * End:
2037  * vim: shiftwidth=4 tabstop=8 expandtab
2038  */