Use oid_class rather than int for OID class.
[yaz-moved-to-github.git] / src / siconv.c
1 /*
2  * Copyright (C) 1995-2007, Index Data ApS
3  * See the file LICENSE for details.
4  *
5  * $Id: siconv.c,v 1.48 2007-10-15 20:45:05 adam Exp $
6  */
7 /**
8  * \file siconv.c
9  * \brief Implements simple ICONV
10  *
11  * This implements an interface similar to that of iconv and
12  * is used by YAZ to interface with iconv (if present).
13  * For systems where iconv is not present, this layer
14  * provides a few important conversions: UTF-8, MARC-8, Latin-1.
15  *
16  * MARC-8 reference:
17  *  http://www.loc.gov/marc/specifications/speccharmarc8.html
18  */
19
20 #if HAVE_CONFIG_H
21 #include <config.h>
22 #endif
23
24 #include <assert.h>
25 #include <errno.h>
26 #include <string.h>
27 #include <ctype.h>
28 #if HAVE_WCHAR_H
29 #include <wchar.h>
30 #endif
31
32 #if HAVE_ICONV_H
33 #include <iconv.h>
34 #endif
35
36
37 #include <yaz/yaz-util.h>
38
39 unsigned long yaz_marc8_42_conv(unsigned char *inp, size_t inbytesleft,
40                                size_t *no_read, int *combining);
41 unsigned long yaz_marc8_45_conv(unsigned char *inp, size_t inbytesleft,
42                                size_t *no_read, int *combining);
43 unsigned long yaz_marc8_67_conv(unsigned char *inp, size_t inbytesleft,
44                                size_t *no_read, int *combining);
45 unsigned long yaz_marc8_62_conv(unsigned char *inp, size_t inbytesleft,
46                                size_t *no_read, int *combining);
47 unsigned long yaz_marc8_70_conv(unsigned char *inp, size_t inbytesleft,
48                                size_t *no_read, int *combining);
49 unsigned long yaz_marc8_32_conv(unsigned char *inp, size_t inbytesleft,
50                                size_t *no_read, int *combining);
51 unsigned long yaz_marc8_4E_conv(unsigned char *inp, size_t inbytesleft,
52                                size_t *no_read, int *combining);
53 unsigned long yaz_marc8_51_conv(unsigned char *inp, size_t inbytesleft,
54                                size_t *no_read, int *combining);
55 unsigned long yaz_marc8_33_conv(unsigned char *inp, size_t inbytesleft,
56                                size_t *no_read, int *combining);
57 unsigned long yaz_marc8_34_conv(unsigned char *inp, size_t inbytesleft,
58                                size_t *no_read, int *combining);
59 unsigned long yaz_marc8_53_conv(unsigned char *inp, size_t inbytesleft,
60                                size_t *no_read, int *combining);
61 unsigned long yaz_marc8_31_conv(unsigned char *inp, size_t inbytesleft,
62                                size_t *no_read, int *combining);
63
64
65 unsigned long yaz_marc8r_42_conv(unsigned char *inp, size_t inbytesleft,
66                                  size_t *no_read, int *combining);
67 unsigned long yaz_marc8r_45_conv(unsigned char *inp, size_t inbytesleft,
68                                  size_t *no_read, int *combining);
69 unsigned long yaz_marc8r_67_conv(unsigned char *inp, size_t inbytesleft,
70                                  size_t *no_read, int *combining);
71 unsigned long yaz_marc8r_62_conv(unsigned char *inp, size_t inbytesleft,
72                                  size_t *no_read, int *combining);
73 unsigned long yaz_marc8r_70_conv(unsigned char *inp, size_t inbytesleft,
74                                  size_t *no_read, int *combining);
75 unsigned long yaz_marc8r_32_conv(unsigned char *inp, size_t inbytesleft,
76                                  size_t *no_read, int *combining);
77 unsigned long yaz_marc8r_4E_conv(unsigned char *inp, size_t inbytesleft,
78                                  size_t *no_read, int *combining);
79 unsigned long yaz_marc8r_51_conv(unsigned char *inp, size_t inbytesleft,
80                                  size_t *no_read, int *combining);
81 unsigned long yaz_marc8r_33_conv(unsigned char *inp, size_t inbytesleft,
82                                  size_t *no_read, int *combining);
83 unsigned long yaz_marc8r_34_conv(unsigned char *inp, size_t inbytesleft,
84                                  size_t *no_read, int *combining);
85 unsigned long yaz_marc8r_53_conv(unsigned char *inp, size_t inbytesleft,
86                                  size_t *no_read, int *combining);
87 unsigned long yaz_marc8r_31_conv(unsigned char *inp, size_t inbytesleft,
88                                  size_t *no_read, int *combining);
89
90 #define ESC "\033"
91
92 struct yaz_iconv_struct {
93     int my_errno;
94     int init_flag;
95     size_t (*init_handle)(yaz_iconv_t cd, unsigned char *inbuf,
96                           size_t inbytesleft, size_t *no_read);
97     unsigned long (*read_handle)(yaz_iconv_t cd, unsigned char *inbuf,
98                                  size_t inbytesleft, size_t *no_read);
99     size_t (*write_handle)(yaz_iconv_t cd, unsigned long x,
100                            char **outbuf, size_t *outbytesleft);
101     size_t (*flush_handle)(yaz_iconv_t cd,
102                            char **outbuf, size_t *outbytesleft);
103     int marc8_esc_mode;
104
105     int comb_offset;
106     int comb_size;
107     unsigned long comb_x[8];
108     size_t comb_no_read[8];
109     size_t no_read_x;
110     unsigned long unget_x;
111 #if HAVE_ICONV_H
112     iconv_t iconv_cd;
113 #endif
114     unsigned long compose_char;
115
116     unsigned write_marc8_second_half_char;
117     unsigned long write_marc8_last;
118     const char *write_marc8_lpage;
119     const char *write_marc8_g0;
120     const char *write_marc8_g1;
121 };
122
123 static struct {
124     unsigned long x1, x2;
125     unsigned y;
126 } latin1_comb[] = {
127     { 'A', 0x0300, 0xc0}, /* LATIN CAPITAL LETTER A WITH GRAVE */
128     { 'A', 0x0301, 0xc1}, /* LATIN CAPITAL LETTER A WITH ACUTE */
129     { 'A', 0x0302, 0xc2}, /* LATIN CAPITAL LETTER A WITH CIRCUMFLEX */
130     { 'A', 0x0303, 0xc3}, /* LATIN CAPITAL LETTER A WITH TILDE */
131     { 'A', 0x0308, 0xc4}, /* LATIN CAPITAL LETTER A WITH DIAERESIS */
132     { 'A', 0x030a, 0xc5}, /* LATIN CAPITAL LETTER A WITH RING ABOVE */
133     /* no need for 0xc6      LATIN CAPITAL LETTER AE */
134     { 'C', 0x0327, 0xc7}, /* LATIN CAPITAL LETTER C WITH CEDILLA */
135     { 'E', 0x0300, 0xc8}, /* LATIN CAPITAL LETTER E WITH GRAVE */
136     { 'E', 0x0301, 0xc9}, /* LATIN CAPITAL LETTER E WITH ACUTE */
137     { 'E', 0x0302, 0xca}, /* LATIN CAPITAL LETTER E WITH CIRCUMFLEX */
138     { 'E', 0x0308, 0xcb}, /* LATIN CAPITAL LETTER E WITH DIAERESIS */
139     { 'I', 0x0300, 0xcc}, /* LATIN CAPITAL LETTER I WITH GRAVE */
140     { 'I', 0x0301, 0xcd}, /* LATIN CAPITAL LETTER I WITH ACUTE */
141     { 'I', 0x0302, 0xce}, /* LATIN CAPITAL LETTER I WITH CIRCUMFLEX */
142     { 'I', 0x0308, 0xcf}, /* LATIN CAPITAL LETTER I WITH DIAERESIS */
143     { 'N', 0x0303, 0xd1}, /* LATIN CAPITAL LETTER N WITH TILDE */
144     { 'O', 0x0300, 0xd2}, /* LATIN CAPITAL LETTER O WITH GRAVE */
145     { 'O', 0x0301, 0xd3}, /* LATIN CAPITAL LETTER O WITH ACUTE */
146     { 'O', 0x0302, 0xd4}, /* LATIN CAPITAL LETTER O WITH CIRCUMFLEX */
147     { 'O', 0x0303, 0xd5}, /* LATIN CAPITAL LETTER O WITH TILDE */
148     { 'O', 0x0308, 0xd6}, /* LATIN CAPITAL LETTER O WITH DIAERESIS */
149     /* omitted:    0xd7      MULTIPLICATION SIGN */
150     /* omitted:    0xd8      LATIN CAPITAL LETTER O WITH STROKE */
151     { 'U', 0x0300, 0xd9}, /* LATIN CAPITAL LETTER U WITH GRAVE */
152     { 'U', 0x0301, 0xda}, /* LATIN CAPITAL LETTER U WITH ACUTE */
153     { 'U', 0x0302, 0xdb}, /* LATIN CAPITAL LETTER U WITH CIRCUMFLEX */
154     { 'U', 0x0308, 0xdc}, /* LATIN CAPITAL LETTER U WITH DIAERESIS */
155     { 'Y', 0x0301, 0xdd}, /* LATIN CAPITAL LETTER Y WITH ACUTE */
156     /* omitted:    0xde      LATIN CAPITAL LETTER THORN */
157     /* omitted:    0xdf      LATIN SMALL LETTER SHARP S */
158     { 'a', 0x0300, 0xe0}, /* LATIN SMALL LETTER A WITH GRAVE */
159     { 'a', 0x0301, 0xe1}, /* LATIN SMALL LETTER A WITH ACUTE */
160     { 'a', 0x0302, 0xe2}, /* LATIN SMALL LETTER A WITH CIRCUMFLEX */
161     { 'a', 0x0303, 0xe3}, /* LATIN SMALL LETTER A WITH TILDE */
162     { 'a', 0x0308, 0xe4}, /* LATIN SMALL LETTER A WITH DIAERESIS */
163     { 'a', 0x030a, 0xe5}, /* LATIN SMALL LETTER A WITH RING ABOVE */
164     /* omitted:    0xe6      LATIN SMALL LETTER AE */
165     { 'c', 0x0327, 0xe7}, /* LATIN SMALL LETTER C WITH CEDILLA */
166     { 'e', 0x0300, 0xe8}, /* LATIN SMALL LETTER E WITH GRAVE */
167     { 'e', 0x0301, 0xe9}, /* LATIN SMALL LETTER E WITH ACUTE */
168     { 'e', 0x0302, 0xea}, /* LATIN SMALL LETTER E WITH CIRCUMFLEX */
169     { 'e', 0x0308, 0xeb}, /* LATIN SMALL LETTER E WITH DIAERESIS */
170     { 'i', 0x0300, 0xec}, /* LATIN SMALL LETTER I WITH GRAVE */
171     { 'i', 0x0301, 0xed}, /* LATIN SMALL LETTER I WITH ACUTE */
172     { 'i', 0x0302, 0xee}, /* LATIN SMALL LETTER I WITH CIRCUMFLEX */
173     { 'i', 0x0308, 0xef}, /* LATIN SMALL LETTER I WITH DIAERESIS */
174     /* omitted:    0xf0      LATIN SMALL LETTER ETH */
175     { 'n', 0x0303, 0xf1}, /* LATIN SMALL LETTER N WITH TILDE */
176     { 'o', 0x0300, 0xf2}, /* LATIN SMALL LETTER O WITH GRAVE */
177     { 'o', 0x0301, 0xf3}, /* LATIN SMALL LETTER O WITH ACUTE */
178     { 'o', 0x0302, 0xf4}, /* LATIN SMALL LETTER O WITH CIRCUMFLEX */
179     { 'o', 0x0303, 0xf5}, /* LATIN SMALL LETTER O WITH TILDE */
180     { 'o', 0x0308, 0xf6}, /* LATIN SMALL LETTER O WITH DIAERESIS */
181     /* omitted:    0xf7      DIVISION SIGN */
182     /* omitted:    0xf8      LATIN SMALL LETTER O WITH STROKE */
183     { 'u', 0x0300, 0xf9}, /* LATIN SMALL LETTER U WITH GRAVE */
184     { 'u', 0x0301, 0xfa}, /* LATIN SMALL LETTER U WITH ACUTE */
185     { 'u', 0x0302, 0xfb}, /* LATIN SMALL LETTER U WITH CIRCUMFLEX */
186     { 'u', 0x0308, 0xfc}, /* LATIN SMALL LETTER U WITH DIAERESIS */
187     { 'y', 0x0301, 0xfd}, /* LATIN SMALL LETTER Y WITH ACUTE */
188     /* omitted:    0xfe      LATIN SMALL LETTER THORN */
189     { 'y', 0x0308, 0xff}, /* LATIN SMALL LETTER Y WITH DIAERESIS */
190     
191     { 0, 0, 0}
192 };
193
194 static size_t yaz_write_marc8_page_chr(yaz_iconv_t cd, 
195                                        char **outbuf, size_t *outbytesleft,
196                                        const char *page_chr);
197
198 static unsigned long yaz_read_ISO8859_1 (yaz_iconv_t cd, unsigned char *inp,
199                                          size_t inbytesleft, size_t *no_read)
200 {
201     unsigned long x = inp[0];
202     *no_read = 1;
203     return x;
204 }
205
206
207 static size_t yaz_init_UTF8 (yaz_iconv_t cd, unsigned char *inp,
208                              size_t inbytesleft, size_t *no_read)
209 {
210     if (inp[0] != 0xef)
211     {
212         *no_read = 0;
213         return 0;
214     }
215     if (inbytesleft < 3)
216     {
217         cd->my_errno = YAZ_ICONV_EINVAL;
218         return (size_t) -1;
219     }
220     if (inp[1] != 0xbb && inp[2] == 0xbf)
221         *no_read = 3;
222     else
223         *no_read = 0;
224     return 0;
225 }
226
227 unsigned long yaz_read_UTF8_char(unsigned char *inp,
228                                  size_t inbytesleft, size_t *no_read,
229                                  int *error)
230 {
231     unsigned long x = 0;
232
233     *no_read = 0; /* by default */
234     if (inp[0] <= 0x7f)
235     {
236         x = inp[0];
237         *no_read = 1;
238     }
239     else if (inp[0] <= 0xbf || inp[0] >= 0xfe)
240     {
241         *error = YAZ_ICONV_EILSEQ;
242     }
243     else if (inp[0] <= 0xdf && inbytesleft >= 2)
244     {
245         if ((inp[1] & 0xc0) == 0x80)
246         {
247             x = ((inp[0] & 0x1f) << 6) | (inp[1] & 0x3f);
248             if (x >= 0x80)
249                 *no_read = 2;
250             else
251                 *error = YAZ_ICONV_EILSEQ;
252         }
253         else
254             *error = YAZ_ICONV_EILSEQ;
255     }
256     else if (inp[0] <= 0xef && inbytesleft >= 3)
257     {
258         if ((inp[1] & 0xc0) == 0x80 && (inp[2] & 0xc0) == 0x80)
259         {
260             x = ((inp[0] & 0x0f) << 12) | ((inp[1] & 0x3f) << 6) |
261                 (inp[2] & 0x3f);
262             if (x >= 0x800)
263                 *no_read = 3;
264             else
265                 *error = YAZ_ICONV_EILSEQ;
266         }
267         else
268             *error = YAZ_ICONV_EILSEQ;
269     }            
270     else if (inp[0] <= 0xf7 && inbytesleft >= 4)
271     {
272         if ((inp[1] & 0xc0) == 0x80 && (inp[2] & 0xc0) == 0x80
273             && (inp[3] & 0xc0) == 0x80)
274         {
275             x = ((inp[0] & 0x07) << 18) | ((inp[1] & 0x3f) << 12) |
276                 ((inp[2] & 0x3f) << 6) | (inp[3] & 0x3f);
277             if (x >= 0x10000)
278                 *no_read = 4;
279             else
280                 *error = YAZ_ICONV_EILSEQ;
281         }
282         else
283             *error = YAZ_ICONV_EILSEQ;
284     }
285     else if (inp[0] <= 0xfb && inbytesleft >= 5)
286     {
287         if ((inp[1] & 0xc0) == 0x80 && (inp[2] & 0xc0) == 0x80
288             && (inp[3] & 0xc0) == 0x80 && (inp[4] & 0xc0) == 0x80)
289         {
290             x = ((inp[0] & 0x03) << 24) | ((inp[1] & 0x3f) << 18) |
291                 ((inp[2] & 0x3f) << 12) | ((inp[3] & 0x3f) << 6) |
292                 (inp[4] & 0x3f);
293             if (x >= 0x200000)
294                 *no_read = 5;
295             else
296                 *error = YAZ_ICONV_EILSEQ;
297         }
298         else
299             *error = YAZ_ICONV_EILSEQ;
300     }
301     else if (inp[0] <= 0xfd && inbytesleft >= 6)
302     {
303         if ((inp[1] & 0xc0) == 0x80 && (inp[2] & 0xc0) == 0x80
304             && (inp[3] & 0xc0) == 0x80 && (inp[4] & 0xc0) == 0x80
305             && (inp[5] & 0xc0) == 0x80)
306         {
307             x = ((inp[0] & 0x01) << 30) | ((inp[1] & 0x3f) << 24) |
308                 ((inp[2] & 0x3f) << 18) | ((inp[3] & 0x3f) << 12) |
309                 ((inp[4] & 0x3f) << 6) | (inp[5] & 0x3f);
310             if (x >= 0x4000000)
311                 *no_read = 6;
312             else
313                 *error = YAZ_ICONV_EILSEQ;
314         }
315         else
316             *error = YAZ_ICONV_EILSEQ;
317     }
318     else
319         *error = YAZ_ICONV_EINVAL;  /* incomplete sentence */
320
321     return x;
322 }
323
324 static unsigned long yaz_read_UTF8 (yaz_iconv_t cd, unsigned char *inp,
325                                     size_t inbytesleft, size_t *no_read)
326 {
327     return yaz_read_UTF8_char(inp, inbytesleft, no_read, &cd->my_errno);
328 }
329
330 static unsigned long yaz_read_UCS4 (yaz_iconv_t cd, unsigned char *inp,
331                                     size_t inbytesleft, size_t *no_read)
332 {
333     unsigned long x = 0;
334     
335     if (inbytesleft < 4)
336     {
337         cd->my_errno = YAZ_ICONV_EINVAL; /* incomplete input */
338         *no_read = 0;
339     }
340     else
341     {
342         x = (inp[0]<<24) | (inp[1]<<16) | (inp[2]<<8) | inp[3];
343         *no_read = 4;
344     }
345     return x;
346 }
347
348 static unsigned long yaz_read_UCS4LE (yaz_iconv_t cd, unsigned char *inp,
349                                       size_t inbytesleft, size_t *no_read)
350 {
351     unsigned long x = 0;
352     
353     if (inbytesleft < 4)
354     {
355         cd->my_errno = YAZ_ICONV_EINVAL; /* incomplete input */
356         *no_read = 0;
357     }
358     else
359     {
360         x = (inp[3]<<24) | (inp[2]<<16) | (inp[1]<<8) | inp[0];
361         *no_read = 4;
362     }
363     return x;
364 }
365
366 #if HAVE_WCHAR_H
367 static unsigned long yaz_read_wchar_t (yaz_iconv_t cd, unsigned char *inp,
368                                        size_t inbytesleft, size_t *no_read)
369 {
370     unsigned long x = 0;
371     
372     if (inbytesleft < sizeof(wchar_t))
373     {
374         cd->my_errno = YAZ_ICONV_EINVAL; /* incomplete input */
375         *no_read = 0;
376     }
377     else
378     {
379         wchar_t wch;
380         memcpy (&wch, inp, sizeof(wch));
381         x = wch;
382         *no_read = sizeof(wch);
383     }
384     return x;
385 }
386 #endif
387
388 static unsigned long yaz_read_iso5428_1984(yaz_iconv_t cd, unsigned char *inp,
389                                            size_t inbytesleft, size_t *no_read)
390 {
391     unsigned long x = 0;
392     int tonos = 0;
393     int dialitika = 0;
394
395     *no_read = 0;
396     while (inbytesleft > 0)
397     {
398         if (*inp == 0xa2)
399         {
400             tonos = 1;
401         }
402         else if (*inp == 0xa3)
403         {
404             dialitika = 1;
405         }
406         else
407             break;
408         inp++;
409         --inbytesleft;
410         (*no_read)++;
411     }    
412     if (inbytesleft == 0)
413     {
414         cd->my_errno = YAZ_ICONV_EINVAL; /* incomplete input */
415         *no_read = 0;
416         return 0;
417     }
418     switch (*inp) {
419     case 0xe1: /*  alpha small */
420             if (tonos) 
421                 x = 0x03ac;
422             else 
423                 x = 0x03b1;
424             break;
425     case 0xc1: /*  alpha capital */
426             if (tonos) 
427                 x = 0x0386;
428             else 
429                 x = 0x0391;
430             break;
431
432     case 0xe2: /*  Beta small */
433             x = 0x03b2;
434             break;
435     case 0xc2: /*  Beta capital */
436             x = 0x0392;
437             break;
438
439     case 0xe4: /*  Gamma small */
440             x = 0x03b3;
441             break;
442     case 0xc4: /*  Gamma capital */
443             x = 0x0393;
444             break;
445
446     case 0xe5: /*  Delta small */
447             x = 0x03b4;
448             break;
449     case 0xc5: /*  Delta capital */
450             x = 0x0394;
451             break;
452     case 0xe6: /*  epsilon small */
453             if (tonos) 
454                 x = 0x03ad;
455             else 
456                 x = 0x03b5;
457             break;
458     case 0xc6: /*  epsilon capital */
459             if (tonos) 
460                 x = 0x0388;
461             else 
462                 x = 0x0395;
463             break;
464     case 0xe9: /*  Zeta small */
465             x = 0x03b6;
466             break;
467     case 0xc9: /*  Zeta capital */
468             x = 0x0396;
469             break;
470     case 0xea: /*  Eta small */
471             if (tonos) 
472                 x = 0x03ae;
473             else 
474                 x = 0x03b7;
475             break;
476     case 0xca: /*  Eta capital */
477             if (tonos) 
478                 x = 0x0389;
479             else 
480                 x = 0x0397;
481             break;
482     case 0xeb: /*  Theta small */
483             x = 0x03b8;
484             break;
485     case 0xcb: /*  Theta capital */
486             x = 0x0398;
487             break;
488     case 0xec: /*  Iota small */
489             if (tonos) 
490                 if (dialitika) 
491                     x = 0x0390;
492                 else 
493                     x = 0x03af;
494             else 
495                 if (dialitika) 
496                     x = 0x03ca;
497                 else 
498                     x = 0x03b9;
499             break;
500     case 0xcc: /*  Iota capital */
501             if (tonos) 
502                 x = 0x038a;
503             else 
504                 if (dialitika) 
505                     x = 0x03aa;
506                 else 
507                     x = 0x0399;
508             break;
509     case 0xed: /*  Kappa small */
510             x = 0x03ba;
511             break;
512     case 0xcd: /*  Kappa capital */
513             x = 0x039a;
514             break;
515     case 0xee: /*  Lambda small */
516             x = 0x03bb;
517             break;
518     case 0xce: /*  Lambda capital */
519             x = 0x039b;
520             break;
521     case 0xef: /*  Mu small */
522             x = 0x03bc;
523             break;
524     case 0xcf: /*  Mu capital */
525             x = 0x039c;
526             break;
527     case 0xf0: /*  Nu small */
528             x = 0x03bd;
529             break;
530     case 0xd0: /*  Nu capital */
531             x = 0x039d;
532             break;
533     case 0xf1: /*  Xi small */
534             x = 0x03be;
535             break;
536     case 0xd1: /*  Xi capital */
537             x = 0x039e;
538             break;
539     case 0xf2: /*  Omicron small */
540             if (tonos) 
541                 x = 0x03cc;
542             else 
543                 x = 0x03bf;
544             break;
545     case 0xd2: /*  Omicron capital */
546             if (tonos) 
547                 x = 0x038c;
548             else 
549                 x = 0x039f;
550             break;
551     case 0xf3: /*  Pi small */
552             x = 0x03c0;
553             break;
554     case 0xd3: /*  Pi capital */
555             x = 0x03a0;
556             break;
557     case 0xf5: /*  Rho small */
558             x = 0x03c1;
559             break;
560     case 0xd5: /*  Rho capital */
561             x = 0x03a1;
562             break;
563     case 0xf7: /*  Sigma small (end of words) */
564             x = 0x03c2;
565             break;
566     case 0xf6: /*  Sigma small */
567             x = 0x03c3;
568             break;
569     case 0xd6: /*  Sigma capital */
570             x = 0x03a3;
571             break;
572     case 0xf8: /*  Tau small */
573             x = 0x03c4;
574             break;
575     case 0xd8: /*  Tau capital */
576             x = 0x03a4;
577             break;
578     case 0xf9: /*  Upsilon small */
579             if (tonos) 
580                 if (dialitika) 
581                     x = 0x03b0;
582                 else 
583                     x = 0x03cd;
584             else 
585                 if (dialitika) 
586                     x = 0x03cb;
587                 else 
588                     x = 0x03c5;
589             break;
590     case 0xd9: /*  Upsilon capital */
591             if (tonos) 
592                 x = 0x038e;
593             else 
594                 if (dialitika) 
595                     x = 0x03ab;
596                 else 
597                     x = 0x03a5;
598             break;
599     case 0xfa: /*  Phi small */
600             x = 0x03c6;
601             break;
602     case 0xda: /*  Phi capital */
603             x = 0x03a6;
604             break;
605     case 0xfb: /*  Chi small */
606             x = 0x03c7;
607             break;
608     case 0xdb: /*  Chi capital */
609             x = 0x03a7;
610             break;
611     case 0xfc: /*  Psi small */
612             x = 0x03c8;
613             break;
614     case 0xdc: /*  Psi capital */
615             x = 0x03a8;
616             break;
617     case 0xfd: /*  Omega small */
618             if (tonos) 
619                 x = 0x03ce;
620             else 
621                 x = 0x03c9;
622             break;
623     case 0xdd: /*  Omega capital */
624             if (tonos) 
625                 x = 0x038f;
626             else 
627                 x = 0x03a9;
628             break;
629     default:
630         x = *inp;
631         break;
632     }
633     (*no_read)++;
634     
635     return x;
636 }
637
638 static size_t yaz_write_iso5428_1984(yaz_iconv_t cd, unsigned long x,
639                                      char **outbuf, size_t *outbytesleft)
640 {
641     size_t k = 0;
642     unsigned char *out = (unsigned char*) *outbuf;
643     if (*outbytesleft < 3)
644     {
645         cd->my_errno = YAZ_ICONV_E2BIG;  /* not room for output */
646         return (size_t)(-1);
647     }
648     switch (x)
649     {
650     case 0x03ac : out[k++]=0xa2; out[k++]=0xe1; break;
651     case 0x03b1 : out[k++]=0xe1; break;
652     case 0x0386 : out[k++]=0xa2; out[k++]=0xc1; break;
653     case 0x0391 : out[k++]=0xc1; break;
654     case 0x03b2 : out[k++]=0xe2; break;
655     case 0x0392 : out[k++]=0xc2; break;
656     case 0x03b3 : out[k++]=0xe4; break;
657     case 0x0393 : out[k++]=0xc4; break;
658     case 0x03b4 : out[k++]=0xe5; break;
659     case 0x0394 : out[k++]=0xc5; break;
660     case 0x03ad : out[k++]=0xa2; out[k++]=0xe6; break;
661     case 0x03b5 : out[k++]=0xe6; break;
662     case 0x0388 : out[k++]=0xa2; out[k++]=0xc6; break;
663     case 0x0395 : out[k++]=0xc6; break;
664     case 0x03b6 : out[k++]=0xe9; break;
665     case 0x0396 : out[k++]=0xc9; break;
666     case 0x03ae : out[k++]=0xa2; out[k++]=0xea; break;
667     case 0x03b7 : out[k++]=0xea; break;
668     case 0x0389 : out[k++]=0xa2; out[k++]=0xca; break;
669     case 0x0397 : out[k++]=0xca; break;
670     case 0x03b8 : out[k++]=0xeb; break;
671     case 0x0398 : out[k++]=0xcb; break;
672     case 0x0390 : out[k++]=0xa2; out[k++]=0xa3; out[k++]=0xec; break;
673     case 0x03af : out[k++]=0xa2; out[k++]=0xec; break;
674     case 0x03ca : out[k++]=0xa3; out[k++]=0xec; break;
675     case 0x03b9 : out[k++]=0xec; break;
676     case 0x038a : out[k++]=0xa2; out[k++]=0xcc; break;
677     case 0x03aa : out[k++]=0xa3; out[k++]=0xcc; break;
678     case 0x0399 : out[k++]=0xcc; break;
679     case 0x03ba : out[k++]=0xed; break;
680     case 0x039a : out[k++]=0xcd; break;
681     case 0x03bb : out[k++]=0xee; break;
682     case 0x039b : out[k++]=0xce; break;
683     case 0x03bc : out[k++]=0xef; break;
684     case 0x039c : out[k++]=0xcf; break;
685     case 0x03bd : out[k++]=0xf0; break;
686     case 0x039d : out[k++]=0xd0; break;
687     case 0x03be : out[k++]=0xf1; break;
688     case 0x039e : out[k++]=0xd1; break;
689     case 0x03cc : out[k++]=0xa2; out[k++]=0xf2; break;
690     case 0x03bf : out[k++]=0xf2; break;
691     case 0x038c : out[k++]=0xa2; out[k++]=0xd2; break;
692     case 0x039f : out[k++]=0xd2; break;
693     case 0x03c0 : out[k++]=0xf3; break;
694     case 0x03a0 : out[k++]=0xd3; break;
695     case 0x03c1 : out[k++]=0xf5; break;
696     case 0x03a1 : out[k++]=0xd5; break;
697     case 0x03c2 : out[k++]=0xf7; break;
698     case 0x03c3 : out[k++]=0xf6; break;
699     case 0x03a3 : out[k++]=0xd6; break;
700     case 0x03c4 : out[k++]=0xf8; break;
701     case 0x03a4 : out[k++]=0xd8; break;
702     case 0x03b0 : out[k++]=0xa2; out[k++]=0xa3; out[k++]=0xf9; break;
703     case 0x03cd : out[k++]=0xa2; out[k++]=0xf9; break;
704     case 0x03cb : out[k++]=0xa3; out[k++]=0xf9; break;
705     case 0x03c5 : out[k++]=0xf9; break;
706     case 0x038e : out[k++]=0xa2; out[k++]=0xd9; break;
707     case 0x03ab : out[k++]=0xa3; out[k++]=0xd9; break;
708     case 0x03a5 : out[k++]=0xd9; break;
709     case 0x03c6 : out[k++]=0xfa; break;
710     case 0x03a6 : out[k++]=0xda; break;
711     case 0x03c7 : out[k++]=0xfb; break;
712     case 0x03a7 : out[k++]=0xdb; break;
713     case 0x03c8 : out[k++]=0xfc; break;
714     case 0x03a8 : out[k++]=0xdc; break;
715     case 0x03ce : out[k++]=0xa2; out[k++]=0xfd; break;
716     case 0x03c9 : out[k++]=0xfd; break;
717     case 0x038f : out[k++]=0xa2; out[k++]=0xdd; break;
718     case 0x03a9 : out[k++]=0xdd; break;
719     default:
720         if (x > 255)
721         {
722             cd->my_errno = YAZ_ICONV_EILSEQ;
723             return (size_t) -1;
724         }
725         out[k++] = x;
726         break;
727     }
728     *outbytesleft -= k;
729     (*outbuf) += k;
730     return 0;
731 }
732
733 static unsigned long yaz_read_advancegreek(yaz_iconv_t cd, unsigned char *inp,
734                                            size_t inbytesleft, size_t *no_read)
735 {
736     unsigned long x = 0;
737     int shift = 0;
738     int tonos = 0;
739     int dialitika = 0;
740
741     *no_read = 0;
742     while (inbytesleft > 0)
743     {
744         if (*inp == 0x9d)
745         {
746             tonos = 1;
747         }
748         else if (*inp == 0x9e)
749         {
750             dialitika = 1;
751         }
752         else if (*inp == 0x9f)
753         {
754             shift = 1;
755         }
756         else
757             break;
758         inp++;
759         --inbytesleft;
760         (*no_read)++;
761     }    
762     if (inbytesleft == 0)
763     {
764         cd->my_errno = YAZ_ICONV_EINVAL; /* incomplete input */
765         *no_read = 0;
766         return 0;
767     }
768     switch (*inp) {
769     case 0x81:
770         if (shift) 
771             if (tonos) 
772                 x = 0x0386;
773             else 
774                 x = 0x0391;
775         else 
776             if (tonos) 
777                 x = 0x03ac;
778             else 
779                 x = 0x03b1;
780         break;
781     case 0x82:
782         if (shift) 
783             x = 0x0392;
784         else 
785             x = 0x03b2;
786         
787         break;
788     case 0x83:
789         if (shift) 
790             x = 0x0393;
791         else 
792             x = 0x03b3;
793         break;
794     case 0x84:
795         if (shift) 
796             x = 0x0394;
797         else 
798             x = 0x03b4;
799         break;
800     case 0x85:
801         if (shift) 
802             if (tonos) 
803                 x = 0x0388;
804             else 
805                 x = 0x0395;
806         else 
807             if (tonos) 
808                 x = 0x03ad;
809             else 
810                 x = 0x03b5;
811         break;
812     case 0x86:
813         if (shift) 
814             x = 0x0396;
815         else 
816             x = 0x03b6;
817         break;
818     case 0x87:
819         if (shift) 
820             if (tonos) 
821                 x = 0x0389;
822             else 
823                 x = 0x0397;
824         else 
825             if (tonos) 
826                 x = 0x03ae;
827             else 
828                 x = 0x03b7;
829         break;
830     case 0x88:
831         if (shift) 
832             x = 0x0398;
833         else 
834             x = 0x03b8;
835         break;
836     case 0x89:
837         if (shift) 
838             if (tonos) 
839                 x = 0x038a;
840             else 
841                 if (dialitika) 
842                     x = 0x03aa;
843                 else 
844                     x = 0x0399;
845         else 
846             if (tonos) 
847                 if (dialitika) 
848                     x = 0x0390;
849                 else 
850                     x = 0x03af;
851         
852             else 
853                 if (dialitika) 
854                     x = 0x03ca;
855                 else 
856                     x = 0x03b9;
857         break;
858     case 0x8a:
859         if (shift) 
860             x = 0x039a;
861         else 
862             x = 0x03ba;
863         
864         break;
865     case 0x8b:
866         if (shift) 
867             x = 0x039b;
868         else 
869             x = 0x03bb;
870         break;
871     case 0x8c:
872         if (shift) 
873             x = 0x039c;
874         else 
875             x = 0x03bc;
876         
877         break;
878     case 0x8d:
879         if (shift) 
880             x = 0x039d;
881         else 
882             x = 0x03bd;
883         break;
884     case 0x8e:
885         if (shift) 
886             x = 0x039e;
887         else 
888             x = 0x03be;
889         break;
890     case 0x8f:
891         if (shift) 
892             if (tonos) 
893                 x = 0x038c;
894             else 
895                 x = 0x039f;
896         else 
897             if (tonos) 
898                 x = 0x03cc;
899             else 
900                 x = 0x03bf;
901         break;
902     case 0x90:
903         if (shift) 
904             x = 0x03a0;
905         else 
906             x = 0x03c0;
907         break;
908     case 0x91:
909         if (shift) 
910             x = 0x03a1;
911         else 
912             x = 0x03c1;
913         break;
914     case 0x92:
915         x = 0x03c2;
916         break;
917     case 0x93:
918         if (shift) 
919             x = 0x03a3;
920         else 
921             x = 0x03c3;
922         break;
923     case 0x94:
924         if (shift) 
925             x = 0x03a4;
926         else 
927             x = 0x03c4;
928         break;
929     case 0x95:
930         if (shift) 
931             if (tonos) 
932                 x = 0x038e;
933             else 
934                 if (dialitika) 
935                     x = 0x03ab;
936                 else 
937                     x = 0x03a5;
938         else 
939             if (tonos) 
940                 if (dialitika) 
941                     x = 0x03b0;
942                 else 
943                     x = 0x03cd;
944         
945             else 
946                 if (dialitika) 
947                     x = 0x03cb;
948                 else 
949                     x = 0x03c5;
950         break;
951     case 0x96:
952         if (shift) 
953             x = 0x03a6;
954         else 
955             x = 0x03c6;
956         break;
957     case 0x97:
958         if (shift) 
959             x = 0x03a7;
960         else 
961             x = 0x03c7;
962         break;
963     case 0x98:
964         if (shift) 
965             x = 0x03a8;
966         else 
967             x = 0x03c8;
968         
969         break;
970         
971     case 0x99:
972         if (shift) 
973             if (tonos) 
974                 x = 0x038f;
975             else 
976                 x = 0x03a9;
977         else 
978             if (tonos) 
979                 x = 0x03ce;
980             else 
981                 x = 0x03c9;
982         break;
983     default:
984         x = *inp;
985         break;
986     }
987     (*no_read)++;
988     
989     return x;
990 }
991
992 static size_t yaz_write_advancegreek(yaz_iconv_t cd, unsigned long x,
993                                      char **outbuf, size_t *outbytesleft)
994 {
995     size_t k = 0;
996     unsigned char *out = (unsigned char*) *outbuf;
997     if (*outbytesleft < 3)
998     {
999         cd->my_errno = YAZ_ICONV_E2BIG;  /* not room for output */
1000         return (size_t)(-1);
1001     }
1002     switch (x)
1003     {
1004     case 0x03ac : out[k++]=0x9d; out[k++]=0x81; break;
1005     case 0x03ad : out[k++]=0x9d; out[k++]=0x85; break;
1006     case 0x03ae : out[k++]=0x9d; out[k++]=0x87; break;
1007     case 0x03af : out[k++]=0x9d; out[k++]=0x89; break;
1008     case 0x03cc : out[k++]=0x9d; out[k++]=0x8f; break;
1009     case 0x03cd : out[k++]=0x9d; out[k++]=0x95; break;
1010     case 0x03ce : out[k++]=0x9d; out[k++]=0x99; break;
1011     case 0x0390 : out[k++]=0x9d; out[k++]=0x9e; out[k++]=0x89; break;
1012     case 0x03b0 : out[k++]=0x9d; out[k++]=0x9e; out[k++]=0x95; break;
1013     case 0x0386 : out[k++]=0x9d; out[k++]=0x9f; out[k++]=0x81; break;
1014     case 0x0388 : out[k++]=0x9d; out[k++]=0x9f; out[k++]=0x85; break;
1015     case 0x0389 : out[k++]=0x9d; out[k++]=0x9f; out[k++]=0x87; break;
1016     case 0x038a : out[k++]=0x9d; out[k++]=0x9f; out[k++]=0x89; break;
1017     case 0x038c : out[k++]=0x9d; out[k++]=0x9f; out[k++]=0x8f; break;
1018     case 0x038e : out[k++]=0x9d; out[k++]=0x9f; out[k++]=0x95; break;
1019     case 0x038f : out[k++]=0x9d; out[k++]=0x9f; out[k++]=0x99; break;
1020     case 0x03ca : out[k++]=0x9e; out[k++]=0x89; break;
1021     case 0x03cb : out[k++]=0x9e; out[k++]=0x95; break;
1022     case 0x03aa : out[k++]=0x9e; out[k++]=0x9f; out[k++]=0x89; break;
1023     case 0x03ab : out[k++]=0x9e; out[k++]=0x9f; out[k++]=0x95; break;
1024     case 0x0391 : out[k++]=0x9f; out[k++]=0x81; break;
1025     case 0x0392 : out[k++]=0x9f; out[k++]=0x82; break;
1026     case 0x0393 : out[k++]=0x9f; out[k++]=0x83; break;
1027     case 0x0394 : out[k++]=0x9f; out[k++]=0x84; break;
1028     case 0x0395 : out[k++]=0x9f; out[k++]=0x85; break;
1029     case 0x0396 : out[k++]=0x9f; out[k++]=0x86; break;
1030     case 0x0397 : out[k++]=0x9f; out[k++]=0x87; break;
1031     case 0x0398 : out[k++]=0x9f; out[k++]=0x88; break;
1032     case 0x0399 : out[k++]=0x9f; out[k++]=0x89; break;
1033     case 0x039a : out[k++]=0x9f; out[k++]=0x8a; break;
1034     case 0x039b : out[k++]=0x9f; out[k++]=0x8b; break;
1035     case 0x039c : out[k++]=0x9f; out[k++]=0x8c; break;
1036     case 0x039d : out[k++]=0x9f; out[k++]=0x8d; break;
1037     case 0x039e : out[k++]=0x9f; out[k++]=0x8e; break;
1038     case 0x039f : out[k++]=0x9f; out[k++]=0x8f; break;
1039     case 0x03a0 : out[k++]=0x9f; out[k++]=0x90; break;
1040     case 0x03a1 : out[k++]=0x9f; out[k++]=0x91; break;
1041     case 0x03a3 : out[k++]=0x9f; out[k++]=0x93; break;
1042     case 0x03a4 : out[k++]=0x9f; out[k++]=0x94; break;
1043     case 0x03a5 : out[k++]=0x9f; out[k++]=0x95; break;
1044     case 0x03a6 : out[k++]=0x9f; out[k++]=0x96; break;
1045     case 0x03a7 : out[k++]=0x9f; out[k++]=0x97; break;
1046     case 0x03a8 : out[k++]=0x9f; out[k++]=0x98; break;
1047     case 0x03a9 : out[k++]=0x9f; out[k++]=0x99; break;
1048     case 0x03b1 : out[k++]=0x81; break;
1049     case 0x03b2 : out[k++]=0x82; break;
1050     case 0x03b3 : out[k++]=0x83; break;
1051     case 0x03b4 : out[k++]=0x84; break;
1052     case 0x03b5 : out[k++]=0x85; break;
1053     case 0x03b6 : out[k++]=0x86; break;
1054     case 0x03b7 : out[k++]=0x87; break;
1055     case 0x03b8 : out[k++]=0x88; break;
1056     case 0x03b9 : out[k++]=0x89; break;
1057     case 0x03ba : out[k++]=0x8a; break;
1058     case 0x03bb : out[k++]=0x8b; break;
1059     case 0x03bc : out[k++]=0x8c; break;
1060     case 0x03bd : out[k++]=0x8d; break;
1061     case 0x03be : out[k++]=0x8e; break;
1062     case 0x03bf : out[k++]=0x8f; break;
1063     case 0x03c0 : out[k++]=0x90; break;
1064     case 0x03c1 : out[k++]=0x91; break;
1065     case 0x03c2 : out[k++]=0x92; break;
1066     case 0x03c3 : out[k++]=0x93; break;
1067     case 0x03c4 : out[k++]=0x94; break;
1068     case 0x03c5 : out[k++]=0x95; break;
1069     case 0x03c6 : out[k++]=0x96; break;
1070     case 0x03c7 : out[k++]=0x96; break;
1071     case 0x03c8 : out[k++]=0x98; break;
1072     case 0x03c9 : out[k++]=0x99; break;
1073     default:
1074         if (x > 255)
1075         {
1076             cd->my_errno = YAZ_ICONV_EILSEQ;
1077             return (size_t) -1;
1078         }
1079         out[k++] = x;
1080         break;
1081     }
1082     *outbytesleft -= k;
1083     (*outbuf) += k;
1084     return 0;
1085 }
1086
1087
1088 static unsigned long yaz_read_marc8_comb (yaz_iconv_t cd, unsigned char *inp,
1089                                           size_t inbytesleft, size_t *no_read,
1090                                           int *comb);
1091
1092 static unsigned long yaz_read_marc8 (yaz_iconv_t cd, unsigned char *inp,
1093                                      size_t inbytesleft, size_t *no_read)
1094 {
1095     unsigned long x;
1096     if (cd->comb_offset < cd->comb_size)
1097     {
1098         *no_read = cd->comb_no_read[cd->comb_offset];
1099         x = cd->comb_x[cd->comb_offset];
1100
1101         /* special case for double-diacritic combining characters, 
1102            INVERTED BREVE and DOUBLE TILDE.
1103            We'll increment the no_read counter by 1, since we want to skip over
1104            the processing of the closing ligature character
1105         */
1106         /* this code is no longer necessary.. our handlers code in
1107            yaz_marc8_?_conv (generated by charconv.tcl) now returns
1108            0 and no_read=1 when a sequence does not match the input.
1109            The SECOND HALFs in codetables.xml produces a non-existant
1110            entry in the conversion trie.. Hence when met, the input byte is
1111            skipped as it should (in yaz_iconv)
1112         */
1113 #if 0
1114         if (x == 0x0361 || x == 0x0360)
1115             *no_read += 1;
1116 #endif
1117         cd->comb_offset++;
1118         return x;
1119     }
1120
1121     cd->comb_offset = 0;
1122     for (cd->comb_size = 0; cd->comb_size < 8; cd->comb_size++)
1123     {
1124         int comb = 0;
1125
1126         if (inbytesleft == 0 && cd->comb_size)
1127         {
1128             cd->my_errno = YAZ_ICONV_EINVAL;
1129             x = 0;
1130             *no_read = 0;
1131             break;
1132         }
1133         x = yaz_read_marc8_comb(cd, inp, inbytesleft, no_read, &comb);
1134         if (!comb || !x)
1135             break;
1136         cd->comb_x[cd->comb_size] = x;
1137         cd->comb_no_read[cd->comb_size] = *no_read;
1138         inp += *no_read;
1139         inbytesleft = inbytesleft - *no_read;
1140     }
1141     return x;
1142 }
1143
1144 static unsigned long yaz_read_marc8s(yaz_iconv_t cd, unsigned char *inp,
1145                                      size_t inbytesleft, size_t *no_read)
1146 {
1147     unsigned long x = yaz_read_marc8(cd, inp, inbytesleft, no_read);
1148     if (x && cd->comb_size == 1)
1149     {
1150         /* For MARC8s we try to get a Latin-1 page code out of it */
1151         int i;
1152         for (i = 0; latin1_comb[i].x1; i++)
1153             if (cd->comb_x[0] == latin1_comb[i].x2 && x == latin1_comb[i].x1)
1154             {
1155                 *no_read += cd->comb_no_read[0];
1156                 cd->comb_size = 0;
1157                 x = latin1_comb[i].y;
1158                 break;
1159             }
1160     }
1161     return x;
1162 }
1163
1164 static unsigned long yaz_read_marc8_comb(yaz_iconv_t cd, unsigned char *inp,
1165                                          size_t inbytesleft, size_t *no_read,
1166                                          int *comb)
1167 {
1168     *no_read = 0;
1169     while(inbytesleft >= 1 && inp[0] == 27)
1170     {
1171         size_t inbytesleft0 = inbytesleft;
1172         inp++;
1173         inbytesleft--;
1174         while(inbytesleft > 0 && strchr("(,$!)-", *inp))
1175         {
1176             inbytesleft--;
1177             inp++;
1178         }
1179         if (inbytesleft <= 0)
1180         {
1181             *no_read = 0;
1182             cd->my_errno = YAZ_ICONV_EINVAL;
1183             return 0;
1184         }
1185         cd->marc8_esc_mode = *inp++;
1186         inbytesleft--;
1187         (*no_read) += inbytesleft0 - inbytesleft;
1188     }
1189     if (inbytesleft <= 0)
1190         return 0;
1191     else if (*inp == ' ')
1192     {
1193         *no_read += 1;
1194         return ' ';
1195     }
1196     else
1197     {
1198         unsigned long x;
1199         size_t no_read_sub = 0;
1200         *comb = 0;
1201
1202         switch(cd->marc8_esc_mode)
1203         {
1204         case 'B':  /* Basic ASCII */
1205         case 's':  /* ASCII */
1206         case 'E':  /* ANSEL */
1207             x = yaz_marc8_42_conv(inp, inbytesleft, &no_read_sub, comb);
1208             if (!x)
1209             {
1210                 no_read_sub = 0;
1211                 x = yaz_marc8_45_conv(inp, inbytesleft, &no_read_sub, comb);
1212             }
1213             break;
1214         case 'g':  /* Greek */
1215             x = yaz_marc8_67_conv(inp, inbytesleft, &no_read_sub, comb);
1216             break;
1217         case 'b':  /* Subscripts */
1218             x = yaz_marc8_62_conv(inp, inbytesleft, &no_read_sub, comb);
1219             break;
1220         case 'p':  /* Superscripts */
1221             x = yaz_marc8_70_conv(inp, inbytesleft, &no_read_sub, comb);
1222             break;
1223         case '2':  /* Basic Hebrew */
1224             x = yaz_marc8_32_conv(inp, inbytesleft, &no_read_sub, comb);
1225             break;
1226         case 'N':  /* Basic Cyrillic */
1227             x = yaz_marc8_4E_conv(inp, inbytesleft, &no_read_sub, comb);
1228             break;
1229         case 'Q':  /* Extended Cyrillic */
1230             x = yaz_marc8_51_conv(inp, inbytesleft, &no_read_sub, comb);
1231             break;
1232         case '3':  /* Basic Arabic */
1233             x = yaz_marc8_33_conv(inp, inbytesleft, &no_read_sub, comb);
1234             break;
1235         case '4':  /* Extended Arabic */
1236             x = yaz_marc8_34_conv(inp, inbytesleft, &no_read_sub, comb);
1237             break;
1238         case 'S':  /* Greek */
1239             x = yaz_marc8_53_conv(inp, inbytesleft, &no_read_sub, comb);
1240             break;
1241         case '1':  /* Chinese, Japanese, Korean (EACC) */
1242             x = yaz_marc8_31_conv(inp, inbytesleft, &no_read_sub, comb);
1243             break;
1244         default:
1245             *no_read = 0;
1246             cd->my_errno = YAZ_ICONV_EILSEQ;
1247             return 0;
1248         }
1249         *no_read += no_read_sub;
1250         return x;
1251     }
1252 }
1253
1254 static size_t yaz_write_UTF8(yaz_iconv_t cd, unsigned long x,
1255                              char **outbuf, size_t *outbytesleft)
1256 {
1257     return yaz_write_UTF8_char(x, outbuf, outbytesleft, &cd->my_errno);
1258 }
1259
1260 size_t yaz_write_UTF8_char(unsigned long x,
1261                            char **outbuf, size_t *outbytesleft,
1262                            int *error)
1263 {
1264     unsigned char *outp = (unsigned char *) *outbuf;
1265
1266     if (x <= 0x7f && *outbytesleft >= 1)
1267     {
1268         *outp++ = (unsigned char) x;
1269         (*outbytesleft)--;
1270     } 
1271     else if (x <= 0x7ff && *outbytesleft >= 2)
1272     {
1273         *outp++ = (unsigned char) ((x >> 6) | 0xc0);
1274         *outp++ = (unsigned char) ((x & 0x3f) | 0x80);
1275         (*outbytesleft) -= 2;
1276     }
1277     else if (x <= 0xffff && *outbytesleft >= 3)
1278     {
1279         *outp++ = (unsigned char) ((x >> 12) | 0xe0);
1280         *outp++ = (unsigned char) (((x >> 6) & 0x3f) | 0x80);
1281         *outp++ = (unsigned char) ((x & 0x3f) | 0x80);
1282         (*outbytesleft) -= 3;
1283     }
1284     else if (x <= 0x1fffff && *outbytesleft >= 4)
1285     {
1286         *outp++ = (unsigned char) ((x >> 18) | 0xf0);
1287         *outp++ = (unsigned char) (((x >> 12) & 0x3f) | 0x80);
1288         *outp++ = (unsigned char) (((x >> 6)  & 0x3f) | 0x80);
1289         *outp++ = (unsigned char) ((x & 0x3f) | 0x80);
1290         (*outbytesleft) -= 4;
1291     }
1292     else if (x <= 0x3ffffff && *outbytesleft >= 5)
1293     {
1294         *outp++ = (unsigned char) ((x >> 24) | 0xf8);
1295         *outp++ = (unsigned char) (((x >> 18) & 0x3f) | 0x80);
1296         *outp++ = (unsigned char) (((x >> 12) & 0x3f) | 0x80);
1297         *outp++ = (unsigned char) (((x >> 6)  & 0x3f) | 0x80);
1298         *outp++ = (unsigned char) ((x & 0x3f) | 0x80);
1299         (*outbytesleft) -= 5;
1300     }
1301     else if (*outbytesleft >= 6)
1302     {
1303         *outp++ = (unsigned char) ((x >> 30) | 0xfc);
1304         *outp++ = (unsigned char) (((x >> 24) & 0x3f) | 0x80);
1305         *outp++ = (unsigned char) (((x >> 18) & 0x3f) | 0x80);
1306         *outp++ = (unsigned char) (((x >> 12) & 0x3f) | 0x80);
1307         *outp++ = (unsigned char) (((x >> 6)  & 0x3f) | 0x80);
1308         *outp++ = (unsigned char) ((x & 0x3f) | 0x80);
1309         (*outbytesleft) -= 6;
1310     }
1311     else 
1312     {
1313         *error = YAZ_ICONV_E2BIG;  /* not room for output */
1314         return (size_t)(-1);
1315     }
1316     *outbuf = (char *) outp;
1317     return 0;
1318 }
1319
1320 static size_t yaz_write_ISO8859_1 (yaz_iconv_t cd, unsigned long x,
1321                                    char **outbuf, size_t *outbytesleft)
1322 {
1323     /* list of two char unicode sequence that, when combined, are
1324        equivalent to single unicode chars that can be represented in
1325        ISO-8859-1/Latin-1.
1326        Regular iconv on Linux at least does not seem to convert these,
1327        but since MARC-8 to UTF-8 generates these composed sequence
1328        we get a better chance of a successful MARC-8 -> ISO-8859-1
1329        conversion */
1330     unsigned char *outp = (unsigned char *) *outbuf;
1331
1332     if (cd->compose_char)
1333     {
1334         int i;
1335         for (i = 0; latin1_comb[i].x1; i++)
1336             if (cd->compose_char == latin1_comb[i].x1 && x == latin1_comb[i].x2)
1337             {
1338                 x = latin1_comb[i].y;
1339                 break;
1340             }
1341         if (*outbytesleft < 1)
1342         {  /* no room. Retain compose_char and bail out */
1343             cd->my_errno = YAZ_ICONV_E2BIG;
1344             return (size_t)(-1);
1345         }
1346         if (!latin1_comb[i].x1) 
1347         {   /* not found. Just write compose_char */
1348             *outp++ = (unsigned char) cd->compose_char;
1349             (*outbytesleft)--;
1350             *outbuf = (char *) outp;
1351         }
1352         /* compose_char used so reset it. x now holds current char */
1353         cd->compose_char = 0;
1354     }
1355
1356     if (x > 32 && x < 127 && cd->compose_char == 0)
1357     {
1358         cd->compose_char = x;
1359         return 0;
1360     }
1361     else if (x > 255 || x < 1)
1362     {
1363         cd->my_errno = YAZ_ICONV_EILSEQ;
1364         return (size_t) -1;
1365     }
1366     else if (*outbytesleft < 1)
1367     {
1368         cd->my_errno = YAZ_ICONV_E2BIG;
1369         return (size_t)(-1);
1370     }
1371     *outp++ = (unsigned char) x;
1372     (*outbytesleft)--;
1373     *outbuf = (char *) outp;
1374     return 0;
1375 }
1376
1377 static size_t yaz_flush_ISO8859_1(yaz_iconv_t cd,
1378                                   char **outbuf, size_t *outbytesleft)
1379 {
1380     if (cd->compose_char)
1381     {
1382         unsigned char *outp = (unsigned char *) *outbuf;
1383         if (*outbytesleft < 1)
1384         {
1385             cd->my_errno = YAZ_ICONV_E2BIG;
1386             return (size_t)(-1);
1387         }
1388         *outp++ = (unsigned char) cd->compose_char;
1389         (*outbytesleft)--;
1390         *outbuf = (char *) outp;
1391         cd->compose_char = 0;
1392     }
1393     return 0;
1394 }
1395
1396 static size_t yaz_write_UCS4 (yaz_iconv_t cd, unsigned long x,
1397                               char **outbuf, size_t *outbytesleft)
1398 {
1399     unsigned char *outp = (unsigned char *) *outbuf;
1400     if (*outbytesleft >= 4)
1401     {
1402         *outp++ = (unsigned char) (x>>24);
1403         *outp++ = (unsigned char) (x>>16);
1404         *outp++ = (unsigned char) (x>>8);
1405         *outp++ = (unsigned char) x;
1406         (*outbytesleft) -= 4;
1407     }
1408     else
1409     {
1410         cd->my_errno = YAZ_ICONV_E2BIG;
1411         return (size_t)(-1);
1412     }
1413     *outbuf = (char *) outp;
1414     return 0;
1415 }
1416
1417 static size_t yaz_write_UCS4LE (yaz_iconv_t cd, unsigned long x,
1418                                 char **outbuf, size_t *outbytesleft)
1419 {
1420     unsigned char *outp = (unsigned char *) *outbuf;
1421     if (*outbytesleft >= 4)
1422     {
1423         *outp++ = (unsigned char) x;
1424         *outp++ = (unsigned char) (x>>8);
1425         *outp++ = (unsigned char) (x>>16);
1426         *outp++ = (unsigned char) (x>>24);
1427         (*outbytesleft) -= 4;
1428     }
1429     else
1430     {
1431         cd->my_errno = YAZ_ICONV_E2BIG;
1432         return (size_t)(-1);
1433     }
1434     *outbuf = (char *) outp;
1435     return 0;
1436 }
1437
1438 static unsigned long lookup_marc8(yaz_iconv_t cd,
1439                                   unsigned long x, int *comb,
1440                                   const char **page_chr)
1441 {
1442     char utf8_buf[7];
1443     char *utf8_outbuf = utf8_buf;
1444     size_t utf8_outbytesleft = sizeof(utf8_buf)-1, r;
1445
1446     r = yaz_write_UTF8(cd, x, &utf8_outbuf, &utf8_outbytesleft);
1447     if (r == (size_t)(-1))
1448     {
1449         cd->my_errno = YAZ_ICONV_EILSEQ;
1450         return 0;
1451     }
1452     else
1453     {
1454         unsigned char *inp;
1455         size_t inbytesleft, no_read_sub = 0;
1456         unsigned long x;
1457
1458         *utf8_outbuf = '\0';        
1459         inp = (unsigned char *) utf8_buf;
1460         inbytesleft = strlen(utf8_buf);
1461
1462         x = yaz_marc8r_42_conv(inp, inbytesleft, &no_read_sub, comb);
1463         if (x)
1464         {
1465             *page_chr = ESC "(B";
1466             return x;
1467         }
1468         x = yaz_marc8r_45_conv(inp, inbytesleft, &no_read_sub, comb);
1469         if (x)
1470         {
1471             *page_chr = ESC "(B";
1472             return x;
1473         }
1474         x = yaz_marc8r_67_conv(inp, inbytesleft, &no_read_sub, comb);
1475         if (x)
1476         {
1477             *page_chr = ESC "g";
1478             return x;
1479         }
1480         x = yaz_marc8r_62_conv(inp, inbytesleft, &no_read_sub, comb);
1481         if (x)
1482         {
1483             *page_chr = ESC "b";
1484             return x;
1485         }
1486         x = yaz_marc8r_70_conv(inp, inbytesleft, &no_read_sub, comb);
1487         if (x)
1488         {
1489             *page_chr = ESC "p";
1490             return x;
1491         }
1492         x = yaz_marc8r_32_conv(inp, inbytesleft, &no_read_sub, comb);
1493         if (x)
1494         {
1495             *page_chr = ESC "(2";
1496             return x;
1497         }
1498         x = yaz_marc8r_4E_conv(inp, inbytesleft, &no_read_sub, comb);
1499         if (x)
1500         {
1501             *page_chr = ESC "(N";
1502             return x;
1503         }
1504         x = yaz_marc8r_51_conv(inp, inbytesleft, &no_read_sub, comb);
1505         if (x)
1506         {
1507             *page_chr = ESC "(Q";
1508             return x;
1509         }
1510         x = yaz_marc8r_33_conv(inp, inbytesleft, &no_read_sub, comb);
1511         if (x)
1512         {
1513             *page_chr = ESC "(3";
1514             return x;
1515         }
1516         x = yaz_marc8r_34_conv(inp, inbytesleft, &no_read_sub, comb);
1517         if (x)
1518         {
1519             *page_chr = ESC "(4";
1520             return x;
1521         }
1522         x = yaz_marc8r_53_conv(inp, inbytesleft, &no_read_sub, comb);
1523         if (x)
1524         {
1525             *page_chr = ESC "(S";
1526             return x;
1527         }
1528         x = yaz_marc8r_31_conv(inp, inbytesleft, &no_read_sub, comb);
1529         if (x)
1530         {
1531             *page_chr = ESC "$1";
1532             return x;
1533         }
1534         cd->my_errno = YAZ_ICONV_EILSEQ;
1535         return x;
1536     }
1537 }
1538
1539 static size_t flush_combos(yaz_iconv_t cd,
1540                            char **outbuf, size_t *outbytesleft)
1541 {
1542     unsigned long y = cd->write_marc8_last;
1543     unsigned char byte;
1544     char out_buf[4];
1545     size_t out_no = 0;
1546
1547     if (!y)
1548         return 0;
1549
1550     assert(cd->write_marc8_lpage);
1551     if (cd->write_marc8_lpage)
1552     {
1553         size_t r = yaz_write_marc8_page_chr(cd, outbuf, outbytesleft,
1554                                             cd->write_marc8_lpage);
1555         if (r)
1556             return r;
1557     }
1558
1559     byte = (unsigned char )((y>>16) & 0xff);
1560     if (byte)
1561         out_buf[out_no++] = byte;
1562     byte = (unsigned char)((y>>8) & 0xff);
1563     if (byte)
1564         out_buf[out_no++] = byte;
1565     byte = (unsigned char )(y & 0xff);
1566     if (byte)
1567         out_buf[out_no++] = byte;
1568
1569     if (out_no + 2 >= *outbytesleft)
1570     {
1571         cd->my_errno = YAZ_ICONV_E2BIG;
1572         return (size_t) (-1);
1573     }
1574
1575     memcpy(*outbuf, out_buf, out_no);
1576     *outbuf += out_no;
1577     (*outbytesleft) -= out_no;
1578     if (cd->write_marc8_second_half_char)
1579     {
1580         *(*outbuf)++ = cd->write_marc8_second_half_char;
1581         (*outbytesleft)--;
1582     }        
1583
1584     cd->write_marc8_last = 0;
1585     cd->write_marc8_lpage = 0;
1586     cd->write_marc8_second_half_char = 0;
1587     return 0;
1588 }
1589
1590 static size_t yaz_write_marc8_page_chr(yaz_iconv_t cd, 
1591                                        char **outbuf, size_t *outbytesleft,
1592                                        const char *page_chr)
1593 {
1594     const char **old_page_chr = &cd->write_marc8_g0;
1595
1596     /* are we going to a G1-set (such as such as ESC ")!E") */
1597     if (page_chr && page_chr[1] == ')')
1598         old_page_chr = &cd->write_marc8_g1;
1599
1600     if (!*old_page_chr || strcmp(page_chr, *old_page_chr))
1601     {
1602         size_t plen = 0;
1603         const char *page_out = page_chr;
1604         
1605         if (*outbytesleft < 8)
1606         {
1607             cd->my_errno = YAZ_ICONV_E2BIG;
1608             
1609             return (size_t) (-1);
1610         }
1611
1612         if (*old_page_chr)
1613         {
1614             if (!strcmp(*old_page_chr, ESC "p") 
1615                 || !strcmp(*old_page_chr, ESC "g")
1616                 || !strcmp(*old_page_chr, ESC "b"))
1617             {
1618                 page_out = ESC "s";
1619                 /* Technique 1 leave */
1620                 if (strcmp(page_chr, ESC "(B")) /* Not going ASCII page? */
1621                 {
1622                     /* Must leave script + enter new page */
1623                     plen = strlen(page_out);
1624                     memcpy(*outbuf, page_out, plen);
1625                     (*outbuf) += plen;
1626                     (*outbytesleft) -= plen;
1627                     page_out = ESC "(B";
1628                 }
1629             }
1630         }
1631         *old_page_chr = page_chr;
1632         plen = strlen(page_out);
1633         memcpy(*outbuf, page_out, plen);
1634         (*outbuf) += plen;
1635         (*outbytesleft) -= plen;
1636     }
1637     return 0;
1638 }
1639
1640
1641 static size_t yaz_write_marc8_2(yaz_iconv_t cd, unsigned long x,
1642                                 char **outbuf, size_t *outbytesleft)
1643 {
1644     int comb = 0;
1645     const char *page_chr = 0;
1646     unsigned long y = lookup_marc8(cd, x, &comb, &page_chr);
1647
1648     if (!y)
1649         return (size_t) (-1);
1650
1651     if (comb)
1652     {
1653         if (page_chr)
1654         {
1655             size_t r = yaz_write_marc8_page_chr(cd, outbuf, outbytesleft,
1656                                                 page_chr);
1657             if (r)
1658                 return r;
1659         }
1660         if (x == 0x0361)
1661             cd->write_marc8_second_half_char = 0xEC;
1662         else if (x == 0x0360)
1663             cd->write_marc8_second_half_char = 0xFB;
1664
1665         if (*outbytesleft <= 1)
1666         {
1667             cd->my_errno = YAZ_ICONV_E2BIG;
1668             return (size_t) (-1);
1669         }
1670         *(*outbuf)++ = y;
1671         (*outbytesleft)--;
1672     }
1673     else
1674     {
1675         size_t r = flush_combos(cd, outbuf, outbytesleft);
1676         if (r)
1677             return r;
1678
1679         cd->write_marc8_last = y;
1680         cd->write_marc8_lpage = page_chr;
1681     }
1682     return 0;
1683 }
1684
1685 static size_t yaz_flush_marc8(yaz_iconv_t cd,
1686                               char **outbuf, size_t *outbytesleft)
1687 {
1688     size_t r = flush_combos(cd, outbuf, outbytesleft);
1689     if (r)
1690         return r;
1691     cd->write_marc8_g1 = 0;
1692     return yaz_write_marc8_page_chr(cd, outbuf, outbytesleft, ESC "(B");
1693 }
1694
1695 static size_t yaz_write_marc8(yaz_iconv_t cd, unsigned long x,
1696                               char **outbuf, size_t *outbytesleft)
1697 {
1698     int i;
1699     for (i = 0; latin1_comb[i].x1; i++)
1700     {
1701         if (x == latin1_comb[i].y)
1702         {
1703             size_t r ;
1704             /* save the output pointers .. */
1705             char *outbuf0 = *outbuf;
1706             size_t outbytesleft0 = *outbytesleft;
1707             int last_ch = cd->write_marc8_last;
1708             const char *lpage = cd->write_marc8_lpage;
1709
1710             r = yaz_write_marc8_2(cd, latin1_comb[i].x1,
1711                                   outbuf, outbytesleft);
1712             if (r)
1713                 return r;
1714             r = yaz_write_marc8_2(cd, latin1_comb[i].x2,
1715                                   outbuf, outbytesleft);
1716             if (r && cd->my_errno == YAZ_ICONV_E2BIG)
1717             {
1718                 /* not enough room. reset output to original values */
1719                 *outbuf = outbuf0;
1720                 *outbytesleft = outbytesleft0;
1721                 cd->write_marc8_last = last_ch;
1722                 cd->write_marc8_lpage = lpage;
1723             }
1724             return r;
1725         }
1726     }
1727     return yaz_write_marc8_2(cd, x, outbuf, outbytesleft);
1728 }
1729
1730
1731 #if HAVE_WCHAR_H
1732 static size_t yaz_write_wchar_t(yaz_iconv_t cd, unsigned long x,
1733                                 char **outbuf, size_t *outbytesleft)
1734 {
1735     unsigned char *outp = (unsigned char *) *outbuf;
1736
1737     if (*outbytesleft >= sizeof(wchar_t))
1738     {
1739         wchar_t wch = x;
1740         memcpy(outp, &wch, sizeof(wch));
1741         outp += sizeof(wch);
1742         (*outbytesleft) -= sizeof(wch);
1743     }
1744     else
1745     {
1746         cd->my_errno = YAZ_ICONV_E2BIG;
1747         return (size_t)(-1);
1748     }
1749     *outbuf = (char *) outp;
1750     return 0;
1751 }
1752 #endif
1753
1754 int yaz_iconv_isbuiltin(yaz_iconv_t cd)
1755 {
1756     return cd->read_handle && cd->write_handle;
1757 }
1758
1759 yaz_iconv_t yaz_iconv_open (const char *tocode, const char *fromcode)
1760 {
1761     yaz_iconv_t cd = (yaz_iconv_t) xmalloc (sizeof(*cd));
1762
1763     cd->write_handle = 0;
1764     cd->read_handle = 0;
1765     cd->init_handle = 0;
1766     cd->flush_handle = 0;
1767     cd->my_errno = YAZ_ICONV_UNKNOWN;
1768
1769     /* a useful hack: if fromcode has leading @,
1770        the library not use YAZ's own conversions .. */
1771     if (fromcode[0] == '@')
1772         fromcode++;
1773     else
1774     {
1775         if (!yaz_matchstr(fromcode, "UTF8"))
1776         {
1777             cd->read_handle = yaz_read_UTF8;
1778             cd->init_handle = yaz_init_UTF8;
1779         }
1780         else if (!yaz_matchstr(fromcode, "ISO88591"))
1781             cd->read_handle = yaz_read_ISO8859_1;
1782         else if (!yaz_matchstr(fromcode, "UCS4"))
1783             cd->read_handle = yaz_read_UCS4;
1784         else if (!yaz_matchstr(fromcode, "UCS4LE"))
1785             cd->read_handle = yaz_read_UCS4LE;
1786         else if (!yaz_matchstr(fromcode, "MARC8"))
1787             cd->read_handle = yaz_read_marc8;
1788         else if (!yaz_matchstr(fromcode, "MARC8s"))
1789             cd->read_handle = yaz_read_marc8s;
1790         else if (!yaz_matchstr(fromcode, "advancegreek"))
1791             cd->read_handle = yaz_read_advancegreek;
1792         else if (!yaz_matchstr(fromcode, "iso54281984"))
1793             cd->read_handle = yaz_read_iso5428_1984;
1794         else if (!yaz_matchstr(fromcode, "iso5428:1984"))
1795             cd->read_handle = yaz_read_iso5428_1984;
1796 #if HAVE_WCHAR_H
1797         else if (!yaz_matchstr(fromcode, "WCHAR_T"))
1798             cd->read_handle = yaz_read_wchar_t;
1799 #endif
1800         
1801         if (!yaz_matchstr(tocode, "UTF8"))
1802             cd->write_handle = yaz_write_UTF8;
1803         else if (!yaz_matchstr(tocode, "ISO88591"))
1804         {
1805             cd->write_handle = yaz_write_ISO8859_1;
1806             cd->flush_handle = yaz_flush_ISO8859_1;
1807         }
1808         else if (!yaz_matchstr (tocode, "UCS4"))
1809             cd->write_handle = yaz_write_UCS4;
1810         else if (!yaz_matchstr(tocode, "UCS4LE"))
1811             cd->write_handle = yaz_write_UCS4LE;
1812         else if (!yaz_matchstr(tocode, "MARC8"))
1813         {
1814             cd->write_handle = yaz_write_marc8;
1815             cd->flush_handle = yaz_flush_marc8;
1816         }
1817         else if (!yaz_matchstr(tocode, "MARC8s"))
1818         {
1819             cd->write_handle = yaz_write_marc8;
1820             cd->flush_handle = yaz_flush_marc8;
1821         }
1822         else if (!yaz_matchstr(tocode, "advancegreek"))
1823         {
1824             cd->write_handle = yaz_write_advancegreek;
1825         }
1826         else if (!yaz_matchstr(tocode, "iso54281984"))
1827         {
1828             cd->write_handle = yaz_write_iso5428_1984;
1829         }
1830         else if (!yaz_matchstr(tocode, "iso5428:1984"))
1831         {
1832             cd->write_handle = yaz_write_iso5428_1984;
1833         }
1834 #if HAVE_WCHAR_H
1835         else if (!yaz_matchstr(tocode, "WCHAR_T"))
1836             cd->write_handle = yaz_write_wchar_t;
1837 #endif
1838     }
1839 #if HAVE_ICONV_H
1840     cd->iconv_cd = 0;
1841     if (!cd->read_handle || !cd->write_handle)
1842     {
1843         cd->iconv_cd = iconv_open (tocode, fromcode);
1844         if (cd->iconv_cd == (iconv_t) (-1))
1845         {
1846             xfree (cd);
1847             return 0;
1848         }
1849     }
1850 #else
1851     if (!cd->read_handle || !cd->write_handle)
1852     {
1853         xfree (cd);
1854         return 0;
1855     }
1856 #endif
1857     cd->init_flag = 1;
1858     return cd;
1859 }
1860
1861 size_t yaz_iconv(yaz_iconv_t cd, char **inbuf, size_t *inbytesleft,
1862                  char **outbuf, size_t *outbytesleft)
1863 {
1864     char *inbuf0 = 0;
1865     size_t r = 0;
1866
1867 #if HAVE_ICONV_H
1868     if (cd->iconv_cd)
1869     {
1870         size_t r =
1871             iconv(cd->iconv_cd, inbuf, inbytesleft, outbuf, outbytesleft);
1872         if (r == (size_t)(-1))
1873         {
1874             switch (yaz_errno())
1875             {
1876             case E2BIG:
1877                 cd->my_errno = YAZ_ICONV_E2BIG;
1878                 break;
1879             case EINVAL:
1880                 cd->my_errno = YAZ_ICONV_EINVAL;
1881                 break;
1882             case EILSEQ:
1883                 cd->my_errno = YAZ_ICONV_EILSEQ;
1884                 break;
1885             default:
1886                 cd->my_errno = YAZ_ICONV_UNKNOWN;
1887             }
1888         }
1889         return r;
1890     }
1891 #endif
1892
1893     if (inbuf)
1894         inbuf0 = *inbuf;
1895
1896     if (cd->init_flag)
1897     {
1898         cd->my_errno = YAZ_ICONV_UNKNOWN;
1899         cd->marc8_esc_mode = 'B';
1900         
1901         cd->comb_offset = cd->comb_size = 0;
1902         cd->compose_char = 0;
1903         
1904         cd->write_marc8_second_half_char = 0;
1905         cd->write_marc8_last = 0;
1906         cd->write_marc8_lpage = 0;
1907         cd->write_marc8_g0 = ESC "(B";
1908         cd->write_marc8_g1 = 0;
1909         
1910         cd->unget_x = 0;
1911         cd->no_read_x = 0;
1912     }
1913
1914     if (cd->init_flag)
1915     {
1916         if (cd->init_handle && inbuf && *inbuf)
1917         {
1918             size_t no_read = 0;
1919             size_t r = (cd->init_handle)(cd, (unsigned char *) *inbuf,
1920                                          *inbytesleft, &no_read);
1921             if (r)
1922             {
1923                 if (cd->my_errno == YAZ_ICONV_EINVAL)
1924                     return r;
1925                 cd->init_flag = 0;
1926                 return r;
1927             }
1928             *inbytesleft -= no_read;
1929             *inbuf += no_read;
1930         }
1931     }
1932     cd->init_flag = 0;
1933
1934     if (!inbuf || !*inbuf)
1935     {
1936         if (outbuf && *outbuf)
1937         {
1938             if (cd->unget_x)
1939                 r = (*cd->write_handle)(cd, cd->unget_x, outbuf, outbytesleft);
1940             if (cd->flush_handle)
1941                 r = (*cd->flush_handle)(cd, outbuf, outbytesleft);
1942         }
1943         if (r == 0)
1944             cd->init_flag = 1;
1945         cd->unget_x = 0;
1946         return r;
1947     }
1948     while (1)
1949     {
1950         unsigned long x;
1951         size_t no_read;
1952
1953         if (cd->unget_x)
1954         {
1955             x = cd->unget_x;
1956             no_read = cd->no_read_x;
1957         }
1958         else
1959         {
1960             if (*inbytesleft == 0)
1961             {
1962                 r = *inbuf - inbuf0;
1963                 break;
1964             }
1965             x = (*cd->read_handle)(cd, (unsigned char *) *inbuf, *inbytesleft,
1966                                    &no_read);
1967             if (no_read == 0)
1968             {
1969                 r = (size_t)(-1);
1970                 break;
1971             }
1972         }
1973         if (x)
1974         {
1975             r = (*cd->write_handle)(cd, x, outbuf, outbytesleft);
1976             if (r)
1977             {
1978                 /* unable to write it. save it because read_handle cannot
1979                    rewind .. */
1980                 if (cd->my_errno == YAZ_ICONV_E2BIG)
1981                 {
1982                     cd->unget_x = x;
1983                     cd->no_read_x = no_read;
1984                     break;
1985                 }
1986             }
1987             cd->unget_x = 0;
1988         }
1989         *inbytesleft -= no_read;
1990         (*inbuf) += no_read;
1991     }
1992     return r;
1993 }
1994
1995 int yaz_iconv_error (yaz_iconv_t cd)
1996 {
1997     return cd->my_errno;
1998 }
1999
2000 int yaz_iconv_close (yaz_iconv_t cd)
2001 {
2002 #if HAVE_ICONV_H
2003     if (cd->iconv_cd)
2004         iconv_close (cd->iconv_cd);
2005 #endif
2006     xfree (cd);
2007     return 0;
2008 }
2009
2010 /*
2011  * Local variables:
2012  * c-basic-offset: 4
2013  * indent-tabs-mode: nil
2014  * End:
2015  * vim: shiftwidth=4 tabstop=8 expandtab
2016  */