Treat space (20X) as present in all MARC-8 charsets
[yaz-moved-to-github.git] / src / siconv.c
1 /*
2  * Copyright (C) 1995-2007, Index Data ApS
3  * See the file LICENSE for details.
4  *
5  * $Id: siconv.c,v 1.44 2007-09-22 18:49:55 adam Exp $
6  */
7 /**
8  * \file siconv.c
9  * \brief Implements simple ICONV
10  *
11  * This implements an interface similar to that of iconv and
12  * is used by YAZ to interface with iconv (if present).
13  * For systems where iconv is not present, this layer
14  * provides a few important conversions: UTF-8, MARC-8, Latin-1.
15  *
16  * MARC-8 reference:
17  *  http://www.loc.gov/marc/specifications/speccharmarc8.html
18  */
19
20 #if HAVE_CONFIG_H
21 #include <config.h>
22 #endif
23
24 #include <assert.h>
25 #include <errno.h>
26 #include <string.h>
27 #include <ctype.h>
28 #if HAVE_WCHAR_H
29 #include <wchar.h>
30 #endif
31
32 #if HAVE_ICONV_H
33 #include <iconv.h>
34 #endif
35
36
37 #include <yaz/yaz-util.h>
38
39 unsigned long yaz_marc8_42_conv(unsigned char *inp, size_t inbytesleft,
40                                size_t *no_read, int *combining);
41 unsigned long yaz_marc8_45_conv(unsigned char *inp, size_t inbytesleft,
42                                size_t *no_read, int *combining);
43 unsigned long yaz_marc8_67_conv(unsigned char *inp, size_t inbytesleft,
44                                size_t *no_read, int *combining);
45 unsigned long yaz_marc8_62_conv(unsigned char *inp, size_t inbytesleft,
46                                size_t *no_read, int *combining);
47 unsigned long yaz_marc8_70_conv(unsigned char *inp, size_t inbytesleft,
48                                size_t *no_read, int *combining);
49 unsigned long yaz_marc8_32_conv(unsigned char *inp, size_t inbytesleft,
50                                size_t *no_read, int *combining);
51 unsigned long yaz_marc8_4E_conv(unsigned char *inp, size_t inbytesleft,
52                                size_t *no_read, int *combining);
53 unsigned long yaz_marc8_51_conv(unsigned char *inp, size_t inbytesleft,
54                                size_t *no_read, int *combining);
55 unsigned long yaz_marc8_33_conv(unsigned char *inp, size_t inbytesleft,
56                                size_t *no_read, int *combining);
57 unsigned long yaz_marc8_34_conv(unsigned char *inp, size_t inbytesleft,
58                                size_t *no_read, int *combining);
59 unsigned long yaz_marc8_53_conv(unsigned char *inp, size_t inbytesleft,
60                                size_t *no_read, int *combining);
61 unsigned long yaz_marc8_31_conv(unsigned char *inp, size_t inbytesleft,
62                                size_t *no_read, int *combining);
63
64
65 unsigned long yaz_marc8r_42_conv(unsigned char *inp, size_t inbytesleft,
66                                  size_t *no_read, int *combining);
67 unsigned long yaz_marc8r_45_conv(unsigned char *inp, size_t inbytesleft,
68                                  size_t *no_read, int *combining);
69 unsigned long yaz_marc8r_67_conv(unsigned char *inp, size_t inbytesleft,
70                                  size_t *no_read, int *combining);
71 unsigned long yaz_marc8r_62_conv(unsigned char *inp, size_t inbytesleft,
72                                  size_t *no_read, int *combining);
73 unsigned long yaz_marc8r_70_conv(unsigned char *inp, size_t inbytesleft,
74                                  size_t *no_read, int *combining);
75 unsigned long yaz_marc8r_32_conv(unsigned char *inp, size_t inbytesleft,
76                                  size_t *no_read, int *combining);
77 unsigned long yaz_marc8r_4E_conv(unsigned char *inp, size_t inbytesleft,
78                                  size_t *no_read, int *combining);
79 unsigned long yaz_marc8r_51_conv(unsigned char *inp, size_t inbytesleft,
80                                  size_t *no_read, int *combining);
81 unsigned long yaz_marc8r_33_conv(unsigned char *inp, size_t inbytesleft,
82                                  size_t *no_read, int *combining);
83 unsigned long yaz_marc8r_34_conv(unsigned char *inp, size_t inbytesleft,
84                                  size_t *no_read, int *combining);
85 unsigned long yaz_marc8r_53_conv(unsigned char *inp, size_t inbytesleft,
86                                  size_t *no_read, int *combining);
87 unsigned long yaz_marc8r_31_conv(unsigned char *inp, size_t inbytesleft,
88                                  size_t *no_read, int *combining);
89
90 struct yaz_iconv_struct {
91     int my_errno;
92     int init_flag;
93     size_t (*init_handle)(yaz_iconv_t cd, unsigned char *inbuf,
94                           size_t inbytesleft, size_t *no_read);
95     unsigned long (*read_handle)(yaz_iconv_t cd, unsigned char *inbuf,
96                                  size_t inbytesleft, size_t *no_read);
97     size_t (*write_handle)(yaz_iconv_t cd, unsigned long x,
98                            char **outbuf, size_t *outbytesleft);
99     size_t (*flush_handle)(yaz_iconv_t cd,
100                            char **outbuf, size_t *outbytesleft);
101     int marc8_esc_mode;
102
103     int comb_offset;
104     int comb_size;
105     unsigned long comb_x[8];
106     size_t comb_no_read[8];
107     size_t no_read_x;
108     unsigned long unget_x;
109 #if HAVE_ICONV_H
110     iconv_t iconv_cd;
111 #endif
112     unsigned long compose_char;
113
114     unsigned long write_marc8_comb_ch[8];
115     size_t write_marc8_comb_no;
116     unsigned write_marc8_second_half_char;
117     unsigned long write_marc8_last;
118     const char *write_marc8_page_chr;
119 };
120
121 static struct {
122     unsigned long x1, x2;
123     unsigned y;
124 } latin1_comb[] = {
125     { 'A', 0x0300, 0xc0}, /* LATIN CAPITAL LETTER A WITH GRAVE */
126     { 'A', 0x0301, 0xc1}, /* LATIN CAPITAL LETTER A WITH ACUTE */
127     { 'A', 0x0302, 0xc2}, /* LATIN CAPITAL LETTER A WITH CIRCUMFLEX */
128     { 'A', 0x0303, 0xc3}, /* LATIN CAPITAL LETTER A WITH TILDE */
129     { 'A', 0x0308, 0xc4}, /* LATIN CAPITAL LETTER A WITH DIAERESIS */
130     { 'A', 0x030a, 0xc5}, /* LATIN CAPITAL LETTER A WITH RING ABOVE */
131     /* no need for 0xc6      LATIN CAPITAL LETTER AE */
132     { 'C', 0x0327, 0xc7}, /* LATIN CAPITAL LETTER C WITH CEDILLA */
133     { 'E', 0x0300, 0xc8}, /* LATIN CAPITAL LETTER E WITH GRAVE */
134     { 'E', 0x0301, 0xc9}, /* LATIN CAPITAL LETTER E WITH ACUTE */
135     { 'E', 0x0302, 0xca}, /* LATIN CAPITAL LETTER E WITH CIRCUMFLEX */
136     { 'E', 0x0308, 0xcb}, /* LATIN CAPITAL LETTER E WITH DIAERESIS */
137     { 'I', 0x0300, 0xcc}, /* LATIN CAPITAL LETTER I WITH GRAVE */
138     { 'I', 0x0301, 0xcd}, /* LATIN CAPITAL LETTER I WITH ACUTE */
139     { 'I', 0x0302, 0xce}, /* LATIN CAPITAL LETTER I WITH CIRCUMFLEX */
140     { 'I', 0x0308, 0xcf}, /* LATIN CAPITAL LETTER I WITH DIAERESIS */
141     { 'N', 0x0303, 0xd1}, /* LATIN CAPITAL LETTER N WITH TILDE */
142     { 'O', 0x0300, 0xd2}, /* LATIN CAPITAL LETTER O WITH GRAVE */
143     { 'O', 0x0301, 0xd3}, /* LATIN CAPITAL LETTER O WITH ACUTE */
144     { 'O', 0x0302, 0xd4}, /* LATIN CAPITAL LETTER O WITH CIRCUMFLEX */
145     { 'O', 0x0303, 0xd5}, /* LATIN CAPITAL LETTER O WITH TILDE */
146     { 'O', 0x0308, 0xd6}, /* LATIN CAPITAL LETTER O WITH DIAERESIS */
147     /* omitted:    0xd7      MULTIPLICATION SIGN */
148     /* omitted:    0xd8      LATIN CAPITAL LETTER O WITH STROKE */
149     { 'U', 0x0300, 0xd9}, /* LATIN CAPITAL LETTER U WITH GRAVE */
150     { 'U', 0x0301, 0xda}, /* LATIN CAPITAL LETTER U WITH ACUTE */
151     { 'U', 0x0302, 0xdb}, /* LATIN CAPITAL LETTER U WITH CIRCUMFLEX */
152     { 'U', 0x0308, 0xdc}, /* LATIN CAPITAL LETTER U WITH DIAERESIS */
153     { 'Y', 0x0301, 0xdd}, /* LATIN CAPITAL LETTER Y WITH ACUTE */
154     /* omitted:    0xde      LATIN CAPITAL LETTER THORN */
155     /* omitted:    0xdf      LATIN SMALL LETTER SHARP S */
156     { 'a', 0x0300, 0xe0}, /* LATIN SMALL LETTER A WITH GRAVE */
157     { 'a', 0x0301, 0xe1}, /* LATIN SMALL LETTER A WITH ACUTE */
158     { 'a', 0x0302, 0xe2}, /* LATIN SMALL LETTER A WITH CIRCUMFLEX */
159     { 'a', 0x0303, 0xe3}, /* LATIN SMALL LETTER A WITH TILDE */
160     { 'a', 0x0308, 0xe4}, /* LATIN SMALL LETTER A WITH DIAERESIS */
161     { 'a', 0x030a, 0xe5}, /* LATIN SMALL LETTER A WITH RING ABOVE */
162     /* omitted:    0xe6      LATIN SMALL LETTER AE */
163     { 'c', 0x0327, 0xe7}, /* LATIN SMALL LETTER C WITH CEDILLA */
164     { 'e', 0x0300, 0xe8}, /* LATIN SMALL LETTER E WITH GRAVE */
165     { 'e', 0x0301, 0xe9}, /* LATIN SMALL LETTER E WITH ACUTE */
166     { 'e', 0x0302, 0xea}, /* LATIN SMALL LETTER E WITH CIRCUMFLEX */
167     { 'e', 0x0308, 0xeb}, /* LATIN SMALL LETTER E WITH DIAERESIS */
168     { 'i', 0x0300, 0xec}, /* LATIN SMALL LETTER I WITH GRAVE */
169     { 'i', 0x0301, 0xed}, /* LATIN SMALL LETTER I WITH ACUTE */
170     { 'i', 0x0302, 0xee}, /* LATIN SMALL LETTER I WITH CIRCUMFLEX */
171     { 'i', 0x0308, 0xef}, /* LATIN SMALL LETTER I WITH DIAERESIS */
172     /* omitted:    0xf0      LATIN SMALL LETTER ETH */
173     { 'n', 0x0303, 0xf1}, /* LATIN SMALL LETTER N WITH TILDE */
174     { 'o', 0x0300, 0xf2}, /* LATIN SMALL LETTER O WITH GRAVE */
175     { 'o', 0x0301, 0xf3}, /* LATIN SMALL LETTER O WITH ACUTE */
176     { 'o', 0x0302, 0xf4}, /* LATIN SMALL LETTER O WITH CIRCUMFLEX */
177     { 'o', 0x0303, 0xf5}, /* LATIN SMALL LETTER O WITH TILDE */
178     { 'o', 0x0308, 0xf6}, /* LATIN SMALL LETTER O WITH DIAERESIS */
179     /* omitted:    0xf7      DIVISION SIGN */
180     /* omitted:    0xf8      LATIN SMALL LETTER O WITH STROKE */
181     { 'u', 0x0300, 0xf9}, /* LATIN SMALL LETTER U WITH GRAVE */
182     { 'u', 0x0301, 0xfa}, /* LATIN SMALL LETTER U WITH ACUTE */
183     { 'u', 0x0302, 0xfb}, /* LATIN SMALL LETTER U WITH CIRCUMFLEX */
184     { 'u', 0x0308, 0xfc}, /* LATIN SMALL LETTER U WITH DIAERESIS */
185     { 'y', 0x0301, 0xfd}, /* LATIN SMALL LETTER Y WITH ACUTE */
186     /* omitted:    0xfe      LATIN SMALL LETTER THORN */
187     { 'y', 0x0308, 0xff}, /* LATIN SMALL LETTER Y WITH DIAERESIS */
188     
189     { 0, 0, 0}
190 };
191
192 static unsigned long yaz_read_ISO8859_1 (yaz_iconv_t cd, unsigned char *inp,
193                                          size_t inbytesleft, size_t *no_read)
194 {
195     unsigned long x = inp[0];
196     *no_read = 1;
197     return x;
198 }
199
200
201 static size_t yaz_init_UTF8 (yaz_iconv_t cd, unsigned char *inp,
202                              size_t inbytesleft, size_t *no_read)
203 {
204     if (inp[0] != 0xef)
205     {
206         *no_read = 0;
207         return 0;
208     }
209     if (inbytesleft < 3)
210     {
211         cd->my_errno = YAZ_ICONV_EINVAL;
212         return (size_t) -1;
213     }
214     if (inp[1] != 0xbb && inp[2] == 0xbf)
215         *no_read = 3;
216     else
217         *no_read = 0;
218     return 0;
219 }
220
221 unsigned long yaz_read_UTF8_char(unsigned char *inp,
222                                  size_t inbytesleft, size_t *no_read,
223                                  int *error)
224 {
225     unsigned long x = 0;
226
227     *no_read = 0; /* by default */
228     if (inp[0] <= 0x7f)
229     {
230         x = inp[0];
231         *no_read = 1;
232     }
233     else if (inp[0] <= 0xbf || inp[0] >= 0xfe)
234     {
235         *error = YAZ_ICONV_EILSEQ;
236     }
237     else if (inp[0] <= 0xdf && inbytesleft >= 2)
238     {
239         if ((inp[1] & 0xc0) == 0x80)
240         {
241             x = ((inp[0] & 0x1f) << 6) | (inp[1] & 0x3f);
242             if (x >= 0x80)
243                 *no_read = 2;
244             else
245                 *error = YAZ_ICONV_EILSEQ;
246         }
247         else
248             *error = YAZ_ICONV_EILSEQ;
249     }
250     else if (inp[0] <= 0xef && inbytesleft >= 3)
251     {
252         if ((inp[1] & 0xc0) == 0x80 && (inp[2] & 0xc0) == 0x80)
253         {
254             x = ((inp[0] & 0x0f) << 12) | ((inp[1] & 0x3f) << 6) |
255                 (inp[2] & 0x3f);
256             if (x >= 0x800)
257                 *no_read = 3;
258             else
259                 *error = YAZ_ICONV_EILSEQ;
260         }
261         else
262             *error = YAZ_ICONV_EILSEQ;
263     }            
264     else if (inp[0] <= 0xf7 && inbytesleft >= 4)
265     {
266         if ((inp[1] & 0xc0) == 0x80 && (inp[2] & 0xc0) == 0x80
267             && (inp[3] & 0xc0) == 0x80)
268         {
269             x = ((inp[0] & 0x07) << 18) | ((inp[1] & 0x3f) << 12) |
270                 ((inp[2] & 0x3f) << 6) | (inp[3] & 0x3f);
271             if (x >= 0x10000)
272                 *no_read = 4;
273             else
274                 *error = YAZ_ICONV_EILSEQ;
275         }
276         else
277             *error = YAZ_ICONV_EILSEQ;
278     }
279     else if (inp[0] <= 0xfb && inbytesleft >= 5)
280     {
281         if ((inp[1] & 0xc0) == 0x80 && (inp[2] & 0xc0) == 0x80
282             && (inp[3] & 0xc0) == 0x80 && (inp[4] & 0xc0) == 0x80)
283         {
284             x = ((inp[0] & 0x03) << 24) | ((inp[1] & 0x3f) << 18) |
285                 ((inp[2] & 0x3f) << 12) | ((inp[3] & 0x3f) << 6) |
286                 (inp[4] & 0x3f);
287             if (x >= 0x200000)
288                 *no_read = 5;
289             else
290                 *error = YAZ_ICONV_EILSEQ;
291         }
292         else
293             *error = YAZ_ICONV_EILSEQ;
294     }
295     else if (inp[0] <= 0xfd && inbytesleft >= 6)
296     {
297         if ((inp[1] & 0xc0) == 0x80 && (inp[2] & 0xc0) == 0x80
298             && (inp[3] & 0xc0) == 0x80 && (inp[4] & 0xc0) == 0x80
299             && (inp[5] & 0xc0) == 0x80)
300         {
301             x = ((inp[0] & 0x01) << 30) | ((inp[1] & 0x3f) << 24) |
302                 ((inp[2] & 0x3f) << 18) | ((inp[3] & 0x3f) << 12) |
303                 ((inp[4] & 0x3f) << 6) | (inp[5] & 0x3f);
304             if (x >= 0x4000000)
305                 *no_read = 6;
306             else
307                 *error = YAZ_ICONV_EILSEQ;
308         }
309         else
310             *error = YAZ_ICONV_EILSEQ;
311     }
312     else
313         *error = YAZ_ICONV_EINVAL;  /* incomplete sentence */
314
315     return x;
316 }
317
318 static unsigned long yaz_read_UTF8 (yaz_iconv_t cd, unsigned char *inp,
319                                     size_t inbytesleft, size_t *no_read)
320 {
321     return yaz_read_UTF8_char(inp, inbytesleft, no_read, &cd->my_errno);
322 }
323
324 static unsigned long yaz_read_UCS4 (yaz_iconv_t cd, unsigned char *inp,
325                                     size_t inbytesleft, size_t *no_read)
326 {
327     unsigned long x = 0;
328     
329     if (inbytesleft < 4)
330     {
331         cd->my_errno = YAZ_ICONV_EINVAL; /* incomplete input */
332         *no_read = 0;
333     }
334     else
335     {
336         x = (inp[0]<<24) | (inp[1]<<16) | (inp[2]<<8) | inp[3];
337         *no_read = 4;
338     }
339     return x;
340 }
341
342 static unsigned long yaz_read_UCS4LE (yaz_iconv_t cd, unsigned char *inp,
343                                       size_t inbytesleft, size_t *no_read)
344 {
345     unsigned long x = 0;
346     
347     if (inbytesleft < 4)
348     {
349         cd->my_errno = YAZ_ICONV_EINVAL; /* incomplete input */
350         *no_read = 0;
351     }
352     else
353     {
354         x = (inp[3]<<24) | (inp[2]<<16) | (inp[1]<<8) | inp[0];
355         *no_read = 4;
356     }
357     return x;
358 }
359
360 #if HAVE_WCHAR_H
361 static unsigned long yaz_read_wchar_t (yaz_iconv_t cd, unsigned char *inp,
362                                        size_t inbytesleft, size_t *no_read)
363 {
364     unsigned long x = 0;
365     
366     if (inbytesleft < sizeof(wchar_t))
367     {
368         cd->my_errno = YAZ_ICONV_EINVAL; /* incomplete input */
369         *no_read = 0;
370     }
371     else
372     {
373         wchar_t wch;
374         memcpy (&wch, inp, sizeof(wch));
375         x = wch;
376         *no_read = sizeof(wch);
377     }
378     return x;
379 }
380 #endif
381
382 static unsigned long yaz_read_iso5428_1984(yaz_iconv_t cd, unsigned char *inp,
383                                            size_t inbytesleft, size_t *no_read)
384 {
385     unsigned long x = 0;
386     int tonos = 0;
387     int dialitika = 0;
388
389     *no_read = 0;
390     while (inbytesleft > 0)
391     {
392         if (*inp == 0xa2)
393         {
394             tonos = 1;
395         }
396         else if (*inp == 0xa3)
397         {
398             dialitika = 1;
399         }
400         else
401             break;
402         inp++;
403         --inbytesleft;
404         (*no_read)++;
405     }    
406     if (inbytesleft == 0)
407     {
408         cd->my_errno = YAZ_ICONV_EINVAL; /* incomplete input */
409         *no_read = 0;
410         return 0;
411     }
412     switch (*inp) {
413     case 0xe1: /*  alpha small */
414             if (tonos) 
415                 x = 0x03ac;
416             else 
417                 x = 0x03b1;
418             break;
419     case 0xc1: /*  alpha capital */
420             if (tonos) 
421                 x = 0x0386;
422             else 
423                 x = 0x0391;
424             break;
425
426     case 0xe2: /*  Beta small */
427             x = 0x03b2;
428             break;
429     case 0xc2: /*  Beta capital */
430             x = 0x0392;
431             break;
432
433     case 0xe4: /*  Gamma small */
434             x = 0x03b3;
435             break;
436     case 0xc4: /*  Gamma capital */
437             x = 0x0393;
438             break;
439
440     case 0xe5: /*  Delta small */
441             x = 0x03b4;
442             break;
443     case 0xc5: /*  Delta capital */
444             x = 0x0394;
445             break;
446     case 0xe6: /*  epsilon small */
447             if (tonos) 
448                 x = 0x03ad;
449             else 
450                 x = 0x03b5;
451             break;
452     case 0xc6: /*  epsilon capital */
453             if (tonos) 
454                 x = 0x0388;
455             else 
456                 x = 0x0395;
457             break;
458     case 0xe9: /*  Zeta small */
459             x = 0x03b6;
460             break;
461     case 0xc9: /*  Zeta capital */
462             x = 0x0396;
463             break;
464     case 0xea: /*  Eta small */
465             if (tonos) 
466                 x = 0x03ae;
467             else 
468                 x = 0x03b7;
469             break;
470     case 0xca: /*  Eta capital */
471             if (tonos) 
472                 x = 0x0389;
473             else 
474                 x = 0x0397;
475             break;
476     case 0xeb: /*  Theta small */
477             x = 0x03b8;
478             break;
479     case 0xcb: /*  Theta capital */
480             x = 0x0398;
481             break;
482     case 0xec: /*  Iota small */
483             if (tonos) 
484                 if (dialitika) 
485                     x = 0x0390;
486                 else 
487                     x = 0x03af;
488             else 
489                 if (dialitika) 
490                     x = 0x03ca;
491                 else 
492                     x = 0x03b9;
493             break;
494     case 0xcc: /*  Iota capital */
495             if (tonos) 
496                 x = 0x038a;
497             else 
498                 if (dialitika) 
499                     x = 0x03aa;
500                 else 
501                     x = 0x0399;
502             break;
503     case 0xed: /*  Kappa small */
504             x = 0x03ba;
505             break;
506     case 0xcd: /*  Kappa capital */
507             x = 0x039a;
508             break;
509     case 0xee: /*  Lambda small */
510             x = 0x03bb;
511             break;
512     case 0xce: /*  Lambda capital */
513             x = 0x039b;
514             break;
515     case 0xef: /*  Mu small */
516             x = 0x03bc;
517             break;
518     case 0xcf: /*  Mu capital */
519             x = 0x039c;
520             break;
521     case 0xf0: /*  Nu small */
522             x = 0x03bd;
523             break;
524     case 0xd0: /*  Nu capital */
525             x = 0x039d;
526             break;
527     case 0xf1: /*  Xi small */
528             x = 0x03be;
529             break;
530     case 0xd1: /*  Xi capital */
531             x = 0x039e;
532             break;
533     case 0xf2: /*  Omicron small */
534             if (tonos) 
535                 x = 0x03cc;
536             else 
537                 x = 0x03bf;
538             break;
539     case 0xd2: /*  Omicron capital */
540             if (tonos) 
541                 x = 0x038c;
542             else 
543                 x = 0x039f;
544             break;
545     case 0xf3: /*  Pi small */
546             x = 0x03c0;
547             break;
548     case 0xd3: /*  Pi capital */
549             x = 0x03a0;
550             break;
551     case 0xf5: /*  Rho small */
552             x = 0x03c1;
553             break;
554     case 0xd5: /*  Rho capital */
555             x = 0x03a1;
556             break;
557     case 0xf7: /*  Sigma small (end of words) */
558             x = 0x03c2;
559             break;
560     case 0xf6: /*  Sigma small */
561             x = 0x03c3;
562             break;
563     case 0xd6: /*  Sigma capital */
564             x = 0x03a3;
565             break;
566     case 0xf8: /*  Tau small */
567             x = 0x03c4;
568             break;
569     case 0xd8: /*  Tau capital */
570             x = 0x03a4;
571             break;
572     case 0xf9: /*  Upsilon small */
573             if (tonos) 
574                 if (dialitika) 
575                     x = 0x03b0;
576                 else 
577                     x = 0x03cd;
578             else 
579                 if (dialitika) 
580                     x = 0x03cb;
581                 else 
582                     x = 0x03c5;
583             break;
584     case 0xd9: /*  Upsilon capital */
585             if (tonos) 
586                 x = 0x038e;
587             else 
588                 if (dialitika) 
589                     x = 0x03ab;
590                 else 
591                     x = 0x03a5;
592             break;
593     case 0xfa: /*  Phi small */
594             x = 0x03c6;
595             break;
596     case 0xda: /*  Phi capital */
597             x = 0x03a6;
598             break;
599     case 0xfb: /*  Chi small */
600             x = 0x03c7;
601             break;
602     case 0xdb: /*  Chi capital */
603             x = 0x03a7;
604             break;
605     case 0xfc: /*  Psi small */
606             x = 0x03c8;
607             break;
608     case 0xdc: /*  Psi capital */
609             x = 0x03a8;
610             break;
611     case 0xfd: /*  Omega small */
612             if (tonos) 
613                 x = 0x03ce;
614             else 
615                 x = 0x03c9;
616             break;
617     case 0xdd: /*  Omega capital */
618             if (tonos) 
619                 x = 0x038f;
620             else 
621                 x = 0x03a9;
622             break;
623     default:
624         x = *inp;
625         break;
626     }
627     (*no_read)++;
628     
629     return x;
630 }
631
632 static size_t yaz_write_iso5428_1984(yaz_iconv_t cd, unsigned long x,
633                                      char **outbuf, size_t *outbytesleft)
634 {
635     size_t k = 0;
636     unsigned char *out = (unsigned char*) *outbuf;
637     if (*outbytesleft < 3)
638     {
639         cd->my_errno = YAZ_ICONV_E2BIG;  /* not room for output */
640         return (size_t)(-1);
641     }
642     switch (x)
643     {
644     case 0x03ac : out[k++]=0xa2; out[k++]=0xe1; break;
645     case 0x03b1 : out[k++]=0xe1; break;
646     case 0x0386 : out[k++]=0xa2; out[k++]=0xc1; break;
647     case 0x0391 : out[k++]=0xc1; break;
648     case 0x03b2 : out[k++]=0xe2; break;
649     case 0x0392 : out[k++]=0xc2; break;
650     case 0x03b3 : out[k++]=0xe4; break;
651     case 0x0393 : out[k++]=0xc4; break;
652     case 0x03b4 : out[k++]=0xe5; break;
653     case 0x0394 : out[k++]=0xc5; break;
654     case 0x03ad : out[k++]=0xa2; out[k++]=0xe6; break;
655     case 0x03b5 : out[k++]=0xe6; break;
656     case 0x0388 : out[k++]=0xa2; out[k++]=0xc6; break;
657     case 0x0395 : out[k++]=0xc6; break;
658     case 0x03b6 : out[k++]=0xe9; break;
659     case 0x0396 : out[k++]=0xc9; break;
660     case 0x03ae : out[k++]=0xa2; out[k++]=0xea; break;
661     case 0x03b7 : out[k++]=0xea; break;
662     case 0x0389 : out[k++]=0xa2; out[k++]=0xca; break;
663     case 0x0397 : out[k++]=0xca; break;
664     case 0x03b8 : out[k++]=0xeb; break;
665     case 0x0398 : out[k++]=0xcb; break;
666     case 0x0390 : out[k++]=0xa2; out[k++]=0xa3; out[k++]=0xec; break;
667     case 0x03af : out[k++]=0xa2; out[k++]=0xec; break;
668     case 0x03ca : out[k++]=0xa3; out[k++]=0xec; break;
669     case 0x03b9 : out[k++]=0xec; break;
670     case 0x038a : out[k++]=0xa2; out[k++]=0xcc; break;
671     case 0x03aa : out[k++]=0xa3; out[k++]=0xcc; break;
672     case 0x0399 : out[k++]=0xcc; break;
673     case 0x03ba : out[k++]=0xed; break;
674     case 0x039a : out[k++]=0xcd; break;
675     case 0x03bb : out[k++]=0xee; break;
676     case 0x039b : out[k++]=0xce; break;
677     case 0x03bc : out[k++]=0xef; break;
678     case 0x039c : out[k++]=0xcf; break;
679     case 0x03bd : out[k++]=0xf0; break;
680     case 0x039d : out[k++]=0xd0; break;
681     case 0x03be : out[k++]=0xf1; break;
682     case 0x039e : out[k++]=0xd1; break;
683     case 0x03cc : out[k++]=0xa2; out[k++]=0xf2; break;
684     case 0x03bf : out[k++]=0xf2; break;
685     case 0x038c : out[k++]=0xa2; out[k++]=0xd2; break;
686     case 0x039f : out[k++]=0xd2; break;
687     case 0x03c0 : out[k++]=0xf3; break;
688     case 0x03a0 : out[k++]=0xd3; break;
689     case 0x03c1 : out[k++]=0xf5; break;
690     case 0x03a1 : out[k++]=0xd5; break;
691     case 0x03c2 : out[k++]=0xf7; break;
692     case 0x03c3 : out[k++]=0xf6; break;
693     case 0x03a3 : out[k++]=0xd6; break;
694     case 0x03c4 : out[k++]=0xf8; break;
695     case 0x03a4 : out[k++]=0xd8; break;
696     case 0x03b0 : out[k++]=0xa2; out[k++]=0xa3; out[k++]=0xf9; break;
697     case 0x03cd : out[k++]=0xa2; out[k++]=0xf9; break;
698     case 0x03cb : out[k++]=0xa3; out[k++]=0xf9; break;
699     case 0x03c5 : out[k++]=0xf9; break;
700     case 0x038e : out[k++]=0xa2; out[k++]=0xd9; break;
701     case 0x03ab : out[k++]=0xa3; out[k++]=0xd9; break;
702     case 0x03a5 : out[k++]=0xd9; break;
703     case 0x03c6 : out[k++]=0xfa; break;
704     case 0x03a6 : out[k++]=0xda; break;
705     case 0x03c7 : out[k++]=0xfb; break;
706     case 0x03a7 : out[k++]=0xdb; break;
707     case 0x03c8 : out[k++]=0xfc; break;
708     case 0x03a8 : out[k++]=0xdc; break;
709     case 0x03ce : out[k++]=0xa2; out[k++]=0xfd; break;
710     case 0x03c9 : out[k++]=0xfd; break;
711     case 0x038f : out[k++]=0xa2; out[k++]=0xdd; break;
712     case 0x03a9 : out[k++]=0xdd; break;
713     default:
714         if (x > 255)
715         {
716             cd->my_errno = YAZ_ICONV_EILSEQ;
717             return (size_t) -1;
718         }
719         out[k++] = x;
720         break;
721     }
722     *outbytesleft -= k;
723     (*outbuf) += k;
724     return 0;
725 }
726
727 static unsigned long yaz_read_advancegreek(yaz_iconv_t cd, unsigned char *inp,
728                                            size_t inbytesleft, size_t *no_read)
729 {
730     unsigned long x = 0;
731     int shift = 0;
732     int tonos = 0;
733     int dialitika = 0;
734
735     *no_read = 0;
736     while (inbytesleft > 0)
737     {
738         if (*inp == 0x9d)
739         {
740             tonos = 1;
741         }
742         else if (*inp == 0x9e)
743         {
744             dialitika = 1;
745         }
746         else if (*inp == 0x9f)
747         {
748             shift = 1;
749         }
750         else
751             break;
752         inp++;
753         --inbytesleft;
754         (*no_read)++;
755     }    
756     if (inbytesleft == 0)
757     {
758         cd->my_errno = YAZ_ICONV_EINVAL; /* incomplete input */
759         *no_read = 0;
760         return 0;
761     }
762     switch (*inp) {
763     case 0x81:
764         if (shift) 
765             if (tonos) 
766                 x = 0x0386;
767             else 
768                 x = 0x0391;
769         else 
770             if (tonos) 
771                 x = 0x03ac;
772             else 
773                 x = 0x03b1;
774         break;
775     case 0x82:
776         if (shift) 
777             x = 0x0392;
778         else 
779             x = 0x03b2;
780         
781         break;
782     case 0x83:
783         if (shift) 
784             x = 0x0393;
785         else 
786             x = 0x03b3;
787         break;
788     case 0x84:
789         if (shift) 
790             x = 0x0394;
791         else 
792             x = 0x03b4;
793         break;
794     case 0x85:
795         if (shift) 
796             if (tonos) 
797                 x = 0x0388;
798             else 
799                 x = 0x0395;
800         else 
801             if (tonos) 
802                 x = 0x03ad;
803             else 
804                 x = 0x03b5;
805         break;
806     case 0x86:
807         if (shift) 
808             x = 0x0396;
809         else 
810             x = 0x03b6;
811         break;
812     case 0x87:
813         if (shift) 
814             if (tonos) 
815                 x = 0x0389;
816             else 
817                 x = 0x0397;
818         else 
819             if (tonos) 
820                 x = 0x03ae;
821             else 
822                 x = 0x03b7;
823         break;
824     case 0x88:
825         if (shift) 
826             x = 0x0398;
827         else 
828             x = 0x03b8;
829         break;
830     case 0x89:
831         if (shift) 
832             if (tonos) 
833                 x = 0x038a;
834             else 
835                 if (dialitika) 
836                     x = 0x03aa;
837                 else 
838                     x = 0x0399;
839         else 
840             if (tonos) 
841                 if (dialitika) 
842                     x = 0x0390;
843                 else 
844                     x = 0x03af;
845         
846             else 
847                 if (dialitika) 
848                     x = 0x03ca;
849                 else 
850                     x = 0x03b9;
851         break;
852     case 0x8a:
853         if (shift) 
854             x = 0x039a;
855         else 
856             x = 0x03ba;
857         
858         break;
859     case 0x8b:
860         if (shift) 
861             x = 0x039b;
862         else 
863             x = 0x03bb;
864         break;
865     case 0x8c:
866         if (shift) 
867             x = 0x039c;
868         else 
869             x = 0x03bc;
870         
871         break;
872     case 0x8d:
873         if (shift) 
874             x = 0x039d;
875         else 
876             x = 0x03bd;
877         break;
878     case 0x8e:
879         if (shift) 
880             x = 0x039e;
881         else 
882             x = 0x03be;
883         break;
884     case 0x8f:
885         if (shift) 
886             if (tonos) 
887                 x = 0x038c;
888             else 
889                 x = 0x039f;
890         else 
891             if (tonos) 
892                 x = 0x03cc;
893             else 
894                 x = 0x03bf;
895         break;
896     case 0x90:
897         if (shift) 
898             x = 0x03a0;
899         else 
900             x = 0x03c0;
901         break;
902     case 0x91:
903         if (shift) 
904             x = 0x03a1;
905         else 
906             x = 0x03c1;
907         break;
908     case 0x92:
909         x = 0x03c2;
910         break;
911     case 0x93:
912         if (shift) 
913             x = 0x03a3;
914         else 
915             x = 0x03c3;
916         break;
917     case 0x94:
918         if (shift) 
919             x = 0x03a4;
920         else 
921             x = 0x03c4;
922         break;
923     case 0x95:
924         if (shift) 
925             if (tonos) 
926                 x = 0x038e;
927             else 
928                 if (dialitika) 
929                     x = 0x03ab;
930                 else 
931                     x = 0x03a5;
932         else 
933             if (tonos) 
934                 if (dialitika) 
935                     x = 0x03b0;
936                 else 
937                     x = 0x03cd;
938         
939             else 
940                 if (dialitika) 
941                     x = 0x03cb;
942                 else 
943                     x = 0x03c5;
944         break;
945     case 0x96:
946         if (shift) 
947             x = 0x03a6;
948         else 
949             x = 0x03c6;
950         break;
951     case 0x97:
952         if (shift) 
953             x = 0x03a7;
954         else 
955             x = 0x03c7;
956         break;
957     case 0x98:
958         if (shift) 
959             x = 0x03a8;
960         else 
961             x = 0x03c8;
962         
963         break;
964         
965     case 0x99:
966         if (shift) 
967             if (tonos) 
968                 x = 0x038f;
969             else 
970                 x = 0x03a9;
971         else 
972             if (tonos) 
973                 x = 0x03ce;
974             else 
975                 x = 0x03c9;
976         break;
977     default:
978         x = *inp;
979         break;
980     }
981     (*no_read)++;
982     
983     return x;
984 }
985
986 static size_t yaz_write_advancegreek(yaz_iconv_t cd, unsigned long x,
987                                      char **outbuf, size_t *outbytesleft)
988 {
989     size_t k = 0;
990     unsigned char *out = (unsigned char*) *outbuf;
991     if (*outbytesleft < 3)
992     {
993         cd->my_errno = YAZ_ICONV_E2BIG;  /* not room for output */
994         return (size_t)(-1);
995     }
996     switch (x)
997     {
998     case 0x03ac : out[k++]=0x9d; out[k++]=0x81; break;
999     case 0x03ad : out[k++]=0x9d; out[k++]=0x85; break;
1000     case 0x03ae : out[k++]=0x9d; out[k++]=0x87; break;
1001     case 0x03af : out[k++]=0x9d; out[k++]=0x89; break;
1002     case 0x03cc : out[k++]=0x9d; out[k++]=0x8f; break;
1003     case 0x03cd : out[k++]=0x9d; out[k++]=0x95; break;
1004     case 0x03ce : out[k++]=0x9d; out[k++]=0x99; break;
1005     case 0x0390 : out[k++]=0x9d; out[k++]=0x9e; out[k++]=0x89; break;
1006     case 0x03b0 : out[k++]=0x9d; out[k++]=0x9e; out[k++]=0x95; break;
1007     case 0x0386 : out[k++]=0x9d; out[k++]=0x9f; out[k++]=0x81; break;
1008     case 0x0388 : out[k++]=0x9d; out[k++]=0x9f; out[k++]=0x85; break;
1009     case 0x0389 : out[k++]=0x9d; out[k++]=0x9f; out[k++]=0x87; break;
1010     case 0x038a : out[k++]=0x9d; out[k++]=0x9f; out[k++]=0x89; break;
1011     case 0x038c : out[k++]=0x9d; out[k++]=0x9f; out[k++]=0x8f; break;
1012     case 0x038e : out[k++]=0x9d; out[k++]=0x9f; out[k++]=0x95; break;
1013     case 0x038f : out[k++]=0x9d; out[k++]=0x9f; out[k++]=0x99; break;
1014     case 0x03ca : out[k++]=0x9e; out[k++]=0x89; break;
1015     case 0x03cb : out[k++]=0x9e; out[k++]=0x95; break;
1016     case 0x03aa : out[k++]=0x9e; out[k++]=0x9f; out[k++]=0x89; break;
1017     case 0x03ab : out[k++]=0x9e; out[k++]=0x9f; out[k++]=0x95; break;
1018     case 0x0391 : out[k++]=0x9f; out[k++]=0x81; break;
1019     case 0x0392 : out[k++]=0x9f; out[k++]=0x82; break;
1020     case 0x0393 : out[k++]=0x9f; out[k++]=0x83; break;
1021     case 0x0394 : out[k++]=0x9f; out[k++]=0x84; break;
1022     case 0x0395 : out[k++]=0x9f; out[k++]=0x85; break;
1023     case 0x0396 : out[k++]=0x9f; out[k++]=0x86; break;
1024     case 0x0397 : out[k++]=0x9f; out[k++]=0x87; break;
1025     case 0x0398 : out[k++]=0x9f; out[k++]=0x88; break;
1026     case 0x0399 : out[k++]=0x9f; out[k++]=0x89; break;
1027     case 0x039a : out[k++]=0x9f; out[k++]=0x8a; break;
1028     case 0x039b : out[k++]=0x9f; out[k++]=0x8b; break;
1029     case 0x039c : out[k++]=0x9f; out[k++]=0x8c; break;
1030     case 0x039d : out[k++]=0x9f; out[k++]=0x8d; break;
1031     case 0x039e : out[k++]=0x9f; out[k++]=0x8e; break;
1032     case 0x039f : out[k++]=0x9f; out[k++]=0x8f; break;
1033     case 0x03a0 : out[k++]=0x9f; out[k++]=0x90; break;
1034     case 0x03a1 : out[k++]=0x9f; out[k++]=0x91; break;
1035     case 0x03a3 : out[k++]=0x9f; out[k++]=0x93; break;
1036     case 0x03a4 : out[k++]=0x9f; out[k++]=0x94; break;
1037     case 0x03a5 : out[k++]=0x9f; out[k++]=0x95; break;
1038     case 0x03a6 : out[k++]=0x9f; out[k++]=0x96; break;
1039     case 0x03a7 : out[k++]=0x9f; out[k++]=0x97; break;
1040     case 0x03a8 : out[k++]=0x9f; out[k++]=0x98; break;
1041     case 0x03a9 : out[k++]=0x9f; out[k++]=0x99; break;
1042     case 0x03b1 : out[k++]=0x81; break;
1043     case 0x03b2 : out[k++]=0x82; break;
1044     case 0x03b3 : out[k++]=0x83; break;
1045     case 0x03b4 : out[k++]=0x84; break;
1046     case 0x03b5 : out[k++]=0x85; break;
1047     case 0x03b6 : out[k++]=0x86; break;
1048     case 0x03b7 : out[k++]=0x87; break;
1049     case 0x03b8 : out[k++]=0x88; break;
1050     case 0x03b9 : out[k++]=0x89; break;
1051     case 0x03ba : out[k++]=0x8a; break;
1052     case 0x03bb : out[k++]=0x8b; break;
1053     case 0x03bc : out[k++]=0x8c; break;
1054     case 0x03bd : out[k++]=0x8d; break;
1055     case 0x03be : out[k++]=0x8e; break;
1056     case 0x03bf : out[k++]=0x8f; break;
1057     case 0x03c0 : out[k++]=0x90; break;
1058     case 0x03c1 : out[k++]=0x91; break;
1059     case 0x03c2 : out[k++]=0x92; break;
1060     case 0x03c3 : out[k++]=0x93; break;
1061     case 0x03c4 : out[k++]=0x94; break;
1062     case 0x03c5 : out[k++]=0x95; break;
1063     case 0x03c6 : out[k++]=0x96; break;
1064     case 0x03c7 : out[k++]=0x96; break;
1065     case 0x03c8 : out[k++]=0x98; break;
1066     case 0x03c9 : out[k++]=0x99; break;
1067     default:
1068         if (x > 255)
1069         {
1070             cd->my_errno = YAZ_ICONV_EILSEQ;
1071             return (size_t) -1;
1072         }
1073         out[k++] = x;
1074         break;
1075     }
1076     *outbytesleft -= k;
1077     (*outbuf) += k;
1078     return 0;
1079 }
1080
1081
1082 static unsigned long yaz_read_marc8_comb (yaz_iconv_t cd, unsigned char *inp,
1083                                           size_t inbytesleft, size_t *no_read,
1084                                           int *comb);
1085
1086 static unsigned long yaz_read_marc8 (yaz_iconv_t cd, unsigned char *inp,
1087                                      size_t inbytesleft, size_t *no_read)
1088 {
1089     unsigned long x;
1090     if (cd->comb_offset < cd->comb_size)
1091     {
1092         *no_read = cd->comb_no_read[cd->comb_offset];
1093         x = cd->comb_x[cd->comb_offset];
1094
1095         /* special case for double-diacritic combining characters, 
1096            INVERTED BREVE and DOUBLE TILDE.
1097            We'll increment the no_read counter by 1, since we want to skip over
1098            the processing of the closing ligature character
1099         */
1100         /* this code is no longer necessary.. our handlers code in
1101            yaz_marc8_?_conv (generated by charconv.tcl) now returns
1102            0 and no_read=1 when a sequence does not match the input.
1103            The SECOND HALFs in codetables.xml produces a non-existant
1104            entry in the conversion trie.. Hence when met, the input byte is
1105            skipped as it should (in yaz_iconv)
1106         */
1107 #if 0
1108         if (x == 0x0361 || x == 0x0360)
1109             *no_read += 1;
1110 #endif
1111         cd->comb_offset++;
1112         return x;
1113     }
1114
1115     cd->comb_offset = 0;
1116     for (cd->comb_size = 0; cd->comb_size < 8; cd->comb_size++)
1117     {
1118         int comb = 0;
1119
1120         if (inbytesleft == 0 && cd->comb_size)
1121         {
1122             cd->my_errno = YAZ_ICONV_EINVAL;
1123             x = 0;
1124             *no_read = 0;
1125             break;
1126         }
1127         x = yaz_read_marc8_comb(cd, inp, inbytesleft, no_read, &comb);
1128         if (!comb || !x)
1129             break;
1130         cd->comb_x[cd->comb_size] = x;
1131         cd->comb_no_read[cd->comb_size] = *no_read;
1132         inp += *no_read;
1133         inbytesleft = inbytesleft - *no_read;
1134     }
1135     return x;
1136 }
1137
1138 static unsigned long yaz_read_marc8s(yaz_iconv_t cd, unsigned char *inp,
1139                                      size_t inbytesleft, size_t *no_read)
1140 {
1141     unsigned long x = yaz_read_marc8(cd, inp, inbytesleft, no_read);
1142     if (x && cd->comb_size == 1)
1143     {
1144         /* For MARC8s we try to get a Latin-1 page code out of it */
1145         int i;
1146         for (i = 0; latin1_comb[i].x1; i++)
1147             if (cd->comb_x[0] == latin1_comb[i].x2 && x == latin1_comb[i].x1)
1148             {
1149                 *no_read += cd->comb_no_read[0];
1150                 cd->comb_size = 0;
1151                 x = latin1_comb[i].y;
1152                 break;
1153             }
1154     }
1155     return x;
1156 }
1157
1158 static unsigned long yaz_read_marc8_comb(yaz_iconv_t cd, unsigned char *inp,
1159                                          size_t inbytesleft, size_t *no_read,
1160                                          int *comb)
1161 {
1162     *no_read = 0;
1163     while(inbytesleft >= 1 && inp[0] == 27)
1164     {
1165         size_t inbytesleft0 = inbytesleft;
1166         inp++;
1167         inbytesleft--;
1168         while(inbytesleft > 0 && strchr("(,$!)-", *inp))
1169         {
1170             inbytesleft--;
1171             inp++;
1172         }
1173         if (inbytesleft <= 0)
1174         {
1175             *no_read = 0;
1176             cd->my_errno = YAZ_ICONV_EINVAL;
1177             return 0;
1178         }
1179         cd->marc8_esc_mode = *inp++;
1180         inbytesleft--;
1181         (*no_read) += inbytesleft0 - inbytesleft;
1182     }
1183     if (inbytesleft <= 0)
1184         return 0;
1185     else if (*inp == ' ')
1186     {
1187         *no_read += 1;
1188         return ' ';
1189     }
1190     else
1191     {
1192         unsigned long x;
1193         size_t no_read_sub = 0;
1194         *comb = 0;
1195
1196         switch(cd->marc8_esc_mode)
1197         {
1198         case 'B':  /* Basic ASCII */
1199         case 's':  /* ASCII */
1200         case 'E':  /* ANSEL */
1201             x = yaz_marc8_42_conv(inp, inbytesleft, &no_read_sub, comb);
1202             if (!x)
1203             {
1204                 no_read_sub = 0;
1205                 x = yaz_marc8_45_conv(inp, inbytesleft, &no_read_sub, comb);
1206             }
1207             break;
1208         case 'g':  /* Greek */
1209             x = yaz_marc8_67_conv(inp, inbytesleft, &no_read_sub, comb);
1210             break;
1211         case 'b':  /* Subscripts */
1212             x = yaz_marc8_62_conv(inp, inbytesleft, &no_read_sub, comb);
1213             break;
1214         case 'p':  /* Superscripts */
1215             x = yaz_marc8_70_conv(inp, inbytesleft, &no_read_sub, comb);
1216             break;
1217         case '2':  /* Basic Hebrew */
1218             x = yaz_marc8_32_conv(inp, inbytesleft, &no_read_sub, comb);
1219             break;
1220         case 'N':  /* Basic Cyrillic */
1221             x = yaz_marc8_4E_conv(inp, inbytesleft, &no_read_sub, comb);
1222             break;
1223         case 'Q':  /* Extended Cyrillic */
1224             x = yaz_marc8_51_conv(inp, inbytesleft, &no_read_sub, comb);
1225             break;
1226         case '3':  /* Basic Arabic */
1227             x = yaz_marc8_33_conv(inp, inbytesleft, &no_read_sub, comb);
1228             break;
1229         case '4':  /* Extended Arabic */
1230             x = yaz_marc8_34_conv(inp, inbytesleft, &no_read_sub, comb);
1231             break;
1232         case 'S':  /* Greek */
1233             x = yaz_marc8_53_conv(inp, inbytesleft, &no_read_sub, comb);
1234             break;
1235         case '1':  /* Chinese, Japanese, Korean (EACC) */
1236             x = yaz_marc8_31_conv(inp, inbytesleft, &no_read_sub, comb);
1237             break;
1238         default:
1239             *no_read = 0;
1240             cd->my_errno = YAZ_ICONV_EILSEQ;
1241             return 0;
1242         }
1243         *no_read += no_read_sub;
1244         return x;
1245     }
1246 }
1247
1248 static size_t yaz_write_UTF8(yaz_iconv_t cd, unsigned long x,
1249                              char **outbuf, size_t *outbytesleft)
1250 {
1251     return yaz_write_UTF8_char(x, outbuf, outbytesleft, &cd->my_errno);
1252 }
1253
1254 size_t yaz_write_UTF8_char(unsigned long x,
1255                            char **outbuf, size_t *outbytesleft,
1256                            int *error)
1257 {
1258     unsigned char *outp = (unsigned char *) *outbuf;
1259
1260     if (x <= 0x7f && *outbytesleft >= 1)
1261     {
1262         *outp++ = (unsigned char) x;
1263         (*outbytesleft)--;
1264     } 
1265     else if (x <= 0x7ff && *outbytesleft >= 2)
1266     {
1267         *outp++ = (unsigned char) ((x >> 6) | 0xc0);
1268         *outp++ = (unsigned char) ((x & 0x3f) | 0x80);
1269         (*outbytesleft) -= 2;
1270     }
1271     else if (x <= 0xffff && *outbytesleft >= 3)
1272     {
1273         *outp++ = (unsigned char) ((x >> 12) | 0xe0);
1274         *outp++ = (unsigned char) (((x >> 6) & 0x3f) | 0x80);
1275         *outp++ = (unsigned char) ((x & 0x3f) | 0x80);
1276         (*outbytesleft) -= 3;
1277     }
1278     else if (x <= 0x1fffff && *outbytesleft >= 4)
1279     {
1280         *outp++ = (unsigned char) ((x >> 18) | 0xf0);
1281         *outp++ = (unsigned char) (((x >> 12) & 0x3f) | 0x80);
1282         *outp++ = (unsigned char) (((x >> 6)  & 0x3f) | 0x80);
1283         *outp++ = (unsigned char) ((x & 0x3f) | 0x80);
1284         (*outbytesleft) -= 4;
1285     }
1286     else if (x <= 0x3ffffff && *outbytesleft >= 5)
1287     {
1288         *outp++ = (unsigned char) ((x >> 24) | 0xf8);
1289         *outp++ = (unsigned char) (((x >> 18) & 0x3f) | 0x80);
1290         *outp++ = (unsigned char) (((x >> 12) & 0x3f) | 0x80);
1291         *outp++ = (unsigned char) (((x >> 6)  & 0x3f) | 0x80);
1292         *outp++ = (unsigned char) ((x & 0x3f) | 0x80);
1293         (*outbytesleft) -= 5;
1294     }
1295     else if (*outbytesleft >= 6)
1296     {
1297         *outp++ = (unsigned char) ((x >> 30) | 0xfc);
1298         *outp++ = (unsigned char) (((x >> 24) & 0x3f) | 0x80);
1299         *outp++ = (unsigned char) (((x >> 18) & 0x3f) | 0x80);
1300         *outp++ = (unsigned char) (((x >> 12) & 0x3f) | 0x80);
1301         *outp++ = (unsigned char) (((x >> 6)  & 0x3f) | 0x80);
1302         *outp++ = (unsigned char) ((x & 0x3f) | 0x80);
1303         (*outbytesleft) -= 6;
1304     }
1305     else 
1306     {
1307         *error = YAZ_ICONV_E2BIG;  /* not room for output */
1308         return (size_t)(-1);
1309     }
1310     *outbuf = (char *) outp;
1311     return 0;
1312 }
1313
1314 static size_t yaz_write_ISO8859_1 (yaz_iconv_t cd, unsigned long x,
1315                                    char **outbuf, size_t *outbytesleft)
1316 {
1317     /* list of two char unicode sequence that, when combined, are
1318        equivalent to single unicode chars that can be represented in
1319        ISO-8859-1/Latin-1.
1320        Regular iconv on Linux at least does not seem to convert these,
1321        but since MARC-8 to UTF-8 generates these composed sequence
1322        we get a better chance of a successful MARC-8 -> ISO-8859-1
1323        conversion */
1324     unsigned char *outp = (unsigned char *) *outbuf;
1325
1326     if (cd->compose_char)
1327     {
1328         int i;
1329         for (i = 0; latin1_comb[i].x1; i++)
1330             if (cd->compose_char == latin1_comb[i].x1 && x == latin1_comb[i].x2)
1331             {
1332                 x = latin1_comb[i].y;
1333                 break;
1334             }
1335         if (*outbytesleft < 1)
1336         {  /* no room. Retain compose_char and bail out */
1337             cd->my_errno = YAZ_ICONV_E2BIG;
1338             return (size_t)(-1);
1339         }
1340         if (!latin1_comb[i].x1) 
1341         {   /* not found. Just write compose_char */
1342             *outp++ = (unsigned char) cd->compose_char;
1343             (*outbytesleft)--;
1344             *outbuf = (char *) outp;
1345         }
1346         /* compose_char used so reset it. x now holds current char */
1347         cd->compose_char = 0;
1348     }
1349
1350     if (x > 32 && x < 127 && cd->compose_char == 0)
1351     {
1352         cd->compose_char = x;
1353         return 0;
1354     }
1355     else if (x > 255 || x < 1)
1356     {
1357         cd->my_errno = YAZ_ICONV_EILSEQ;
1358         return (size_t) -1;
1359     }
1360     else if (*outbytesleft < 1)
1361     {
1362         cd->my_errno = YAZ_ICONV_E2BIG;
1363         return (size_t)(-1);
1364     }
1365     *outp++ = (unsigned char) x;
1366     (*outbytesleft)--;
1367     *outbuf = (char *) outp;
1368     return 0;
1369 }
1370
1371 static size_t yaz_flush_ISO8859_1(yaz_iconv_t cd,
1372                                   char **outbuf, size_t *outbytesleft)
1373 {
1374     if (cd->compose_char)
1375     {
1376         unsigned char *outp = (unsigned char *) *outbuf;
1377         if (*outbytesleft < 1)
1378         {
1379             cd->my_errno = YAZ_ICONV_E2BIG;
1380             return (size_t)(-1);
1381         }
1382         *outp++ = (unsigned char) cd->compose_char;
1383         (*outbytesleft)--;
1384         *outbuf = (char *) outp;
1385         cd->compose_char = 0;
1386     }
1387     return 0;
1388 }
1389
1390 static size_t yaz_write_UCS4 (yaz_iconv_t cd, unsigned long x,
1391                               char **outbuf, size_t *outbytesleft)
1392 {
1393     unsigned char *outp = (unsigned char *) *outbuf;
1394     if (*outbytesleft >= 4)
1395     {
1396         *outp++ = (unsigned char) (x>>24);
1397         *outp++ = (unsigned char) (x>>16);
1398         *outp++ = (unsigned char) (x>>8);
1399         *outp++ = (unsigned char) x;
1400         (*outbytesleft) -= 4;
1401     }
1402     else
1403     {
1404         cd->my_errno = YAZ_ICONV_E2BIG;
1405         return (size_t)(-1);
1406     }
1407     *outbuf = (char *) outp;
1408     return 0;
1409 }
1410
1411 static size_t yaz_write_UCS4LE (yaz_iconv_t cd, unsigned long x,
1412                                 char **outbuf, size_t *outbytesleft)
1413 {
1414     unsigned char *outp = (unsigned char *) *outbuf;
1415     if (*outbytesleft >= 4)
1416     {
1417         *outp++ = (unsigned char) x;
1418         *outp++ = (unsigned char) (x>>8);
1419         *outp++ = (unsigned char) (x>>16);
1420         *outp++ = (unsigned char) (x>>24);
1421         (*outbytesleft) -= 4;
1422     }
1423     else
1424     {
1425         cd->my_errno = YAZ_ICONV_E2BIG;
1426         return (size_t)(-1);
1427     }
1428     *outbuf = (char *) outp;
1429     return 0;
1430 }
1431
1432 static unsigned long lookup_marc8(yaz_iconv_t cd,
1433                                   unsigned long x, int *comb,
1434                                   const char **page_chr)
1435 {
1436     char utf8_buf[7];
1437     char *utf8_outbuf = utf8_buf;
1438     size_t utf8_outbytesleft = sizeof(utf8_buf)-1, r;
1439
1440     r = yaz_write_UTF8(cd, x, &utf8_outbuf, &utf8_outbytesleft);
1441     if (r == (size_t)(-1))
1442     {
1443         cd->my_errno = YAZ_ICONV_EILSEQ;
1444         return 0;
1445     }
1446     else
1447     {
1448         unsigned char *inp;
1449         size_t inbytesleft, no_read_sub = 0;
1450         unsigned long x;
1451
1452         *utf8_outbuf = '\0';        
1453         inp = (unsigned char *) utf8_buf;
1454         inbytesleft = strlen(utf8_buf);
1455
1456         if (x == ' ')
1457             return x;
1458         
1459         x = yaz_marc8r_42_conv(inp, inbytesleft, &no_read_sub, comb);
1460         if (x)
1461         {
1462             *page_chr = "\033(B";
1463             return x;
1464         }
1465         x = yaz_marc8r_45_conv(inp, inbytesleft, &no_read_sub, comb);
1466         if (x)
1467         {
1468             *page_chr = "\033(B";
1469             return x;
1470         }
1471         x = yaz_marc8r_67_conv(inp, inbytesleft, &no_read_sub, comb);
1472         if (x)
1473         {
1474             *page_chr = "\033g";
1475             return x;
1476         }
1477         x = yaz_marc8r_62_conv(inp, inbytesleft, &no_read_sub, comb);
1478         if (x)
1479         {
1480             *page_chr = "\033b";
1481             return x;
1482         }
1483         x = yaz_marc8r_70_conv(inp, inbytesleft, &no_read_sub, comb);
1484         if (x)
1485         {
1486             *page_chr = "\033p";
1487             return x;
1488         }
1489         x = yaz_marc8r_32_conv(inp, inbytesleft, &no_read_sub, comb);
1490         if (x)
1491         {
1492             *page_chr = "\033(2";
1493             return x;
1494         }
1495         x = yaz_marc8r_4E_conv(inp, inbytesleft, &no_read_sub, comb);
1496         if (x)
1497         {
1498             *page_chr = "\033(N";
1499             return x;
1500         }
1501         x = yaz_marc8r_51_conv(inp, inbytesleft, &no_read_sub, comb);
1502         if (x)
1503         {
1504             *page_chr = "\033(Q";
1505             return x;
1506         }
1507         x = yaz_marc8r_33_conv(inp, inbytesleft, &no_read_sub, comb);
1508         if (x)
1509         {
1510             *page_chr = "\033(3";
1511             return x;
1512         }
1513         x = yaz_marc8r_34_conv(inp, inbytesleft, &no_read_sub, comb);
1514         if (x)
1515         {
1516             *page_chr = "\033(4";
1517             return x;
1518         }
1519         x = yaz_marc8r_53_conv(inp, inbytesleft, &no_read_sub, comb);
1520         if (x)
1521         {
1522             *page_chr = "\033(S";
1523             return x;
1524         }
1525         x = yaz_marc8r_31_conv(inp, inbytesleft, &no_read_sub, comb);
1526         if (x)
1527         {
1528             *page_chr = "\033$1";
1529             return x;
1530         }
1531         cd->my_errno = YAZ_ICONV_EILSEQ;
1532         return x;
1533     }
1534 }
1535
1536 static size_t flush_combos(yaz_iconv_t cd,
1537                            char **outbuf, size_t *outbytesleft)
1538 {
1539     unsigned long y = cd->write_marc8_last;
1540     unsigned char byte;
1541     char out_buf[10];
1542     size_t i, out_no = 0;
1543
1544     if (!y)
1545         return 0;
1546
1547     byte = (unsigned char )((y>>16) & 0xff);
1548     if (byte)
1549         out_buf[out_no++] = byte;
1550     byte = (unsigned char)((y>>8) & 0xff);
1551     if (byte)
1552         out_buf[out_no++] = byte;
1553     byte = (unsigned char )(y & 0xff);
1554     if (byte)
1555         out_buf[out_no++] = byte;
1556
1557     if (out_no + cd->write_marc8_comb_no + 1 > *outbytesleft)
1558     {
1559         cd->my_errno = YAZ_ICONV_E2BIG;
1560         return (size_t) (-1);
1561     }
1562
1563     for (i = 0; i < cd->write_marc8_comb_no; i++)
1564     {
1565         /* all MARC-8 combined characters are simple bytes */
1566         byte = (unsigned char )(cd->write_marc8_comb_ch[i]);
1567         *(*outbuf)++ = byte;
1568         (*outbytesleft)--;
1569     }
1570     memcpy(*outbuf, out_buf, out_no);
1571     *outbuf += out_no;
1572     (*outbytesleft) -= out_no;
1573     if (cd->write_marc8_second_half_char)
1574     {
1575         *(*outbuf)++ = cd->write_marc8_second_half_char;
1576         (*outbytesleft)--;
1577     }        
1578
1579     cd->write_marc8_last = 0;
1580     cd->write_marc8_comb_no = 0;
1581     cd->write_marc8_second_half_char = 0;
1582     return 0;
1583 }
1584
1585 static size_t yaz_write_marc8_page_chr(yaz_iconv_t cd, 
1586                                        char **outbuf, size_t *outbytesleft,
1587                                        const char *page_chr)
1588 {
1589     const char *old_page_chr = cd->write_marc8_page_chr;
1590     if (strcmp(page_chr, old_page_chr))
1591     {
1592         size_t plen = 0;
1593         const char *page_out = page_chr;
1594         
1595         if (*outbytesleft < 8)
1596         {
1597             cd->my_errno = YAZ_ICONV_E2BIG;
1598             
1599             return (size_t) (-1);
1600         }
1601         cd->write_marc8_page_chr = page_chr;
1602         
1603         if (!strcmp(old_page_chr, "\033p") 
1604             || !strcmp(old_page_chr, "\033g")
1605             || !strcmp(old_page_chr, "\033b"))
1606         {
1607             /* Technique 1 leave */
1608             page_out = "\033s";
1609             if (strcmp(page_chr, "\033(B")) /* Not going ASCII page? */
1610             {
1611                 /* Must leave script + enter new page */
1612                 plen = strlen(page_out);
1613                 memcpy(*outbuf, page_out, plen);
1614                 (*outbuf) += plen;
1615                 (*outbytesleft) -= plen;
1616                 page_out = page_chr;
1617             }
1618         }
1619         plen = strlen(page_out);
1620         memcpy(*outbuf, page_out, plen);
1621         (*outbuf) += plen;
1622         (*outbytesleft) -= plen;
1623     }
1624     return 0;
1625 }
1626
1627
1628 static size_t yaz_write_marc8_2(yaz_iconv_t cd, unsigned long x,
1629                                 char **outbuf, size_t *outbytesleft)
1630 {
1631     int comb = 0;
1632     const char *page_chr = 0;
1633     unsigned long y = lookup_marc8(cd, x, &comb, &page_chr);
1634
1635     if (!y)
1636         return (size_t) (-1);
1637
1638     if (comb)
1639     {
1640         if (x == 0x0361)
1641             cd->write_marc8_second_half_char = 0xEC;
1642         else if (x == 0x0360)
1643             cd->write_marc8_second_half_char = 0xFB;
1644
1645         if (cd->write_marc8_comb_no < 6)
1646             cd->write_marc8_comb_ch[cd->write_marc8_comb_no++] = y;
1647     }
1648     else
1649     {
1650         size_t r = flush_combos(cd, outbuf, outbytesleft);
1651         if (r)
1652             return r;
1653
1654         if (page_chr)
1655         {
1656             r = yaz_write_marc8_page_chr(cd, outbuf, outbytesleft, page_chr);
1657             if (r)
1658                 return r;
1659         }
1660         cd->write_marc8_last = y;
1661     }
1662     return 0;
1663 }
1664
1665 static size_t yaz_flush_marc8(yaz_iconv_t cd,
1666                               char **outbuf, size_t *outbytesleft)
1667 {
1668     size_t r = flush_combos(cd, outbuf, outbytesleft);
1669     if (r)
1670         return r;
1671     return yaz_write_marc8_page_chr(cd, outbuf, outbytesleft, "\033(B");
1672 }
1673
1674 static size_t yaz_write_marc8(yaz_iconv_t cd, unsigned long x,
1675                               char **outbuf, size_t *outbytesleft)
1676 {
1677     int i;
1678     for (i = 0; latin1_comb[i].x1; i++)
1679     {
1680         if (x == latin1_comb[i].y)
1681         {
1682             size_t r ;
1683             /* save the output pointers .. */
1684             char *outbuf0 = *outbuf;
1685             size_t outbytesleft0 = *outbytesleft;
1686             int last_ch = cd->write_marc8_last;
1687
1688             r = yaz_write_marc8_2(cd, latin1_comb[i].x1,
1689                                   outbuf, outbytesleft);
1690             if (r)
1691                 return r;
1692             r = yaz_write_marc8_2(cd, latin1_comb[i].x2,
1693                                   outbuf, outbytesleft);
1694             if (r && cd->my_errno == YAZ_ICONV_E2BIG)
1695             {
1696                 /* not enough room. reset output to original values */
1697                 *outbuf = outbuf0;
1698                 *outbytesleft = outbytesleft0;
1699                 cd->write_marc8_last = last_ch;
1700             }
1701             return r;
1702         }
1703     }
1704     return yaz_write_marc8_2(cd, x, outbuf, outbytesleft);
1705 }
1706
1707
1708 #if HAVE_WCHAR_H
1709 static size_t yaz_write_wchar_t(yaz_iconv_t cd, unsigned long x,
1710                                 char **outbuf, size_t *outbytesleft)
1711 {
1712     unsigned char *outp = (unsigned char *) *outbuf;
1713
1714     if (*outbytesleft >= sizeof(wchar_t))
1715     {
1716         wchar_t wch = x;
1717         memcpy(outp, &wch, sizeof(wch));
1718         outp += sizeof(wch);
1719         (*outbytesleft) -= sizeof(wch);
1720     }
1721     else
1722     {
1723         cd->my_errno = YAZ_ICONV_E2BIG;
1724         return (size_t)(-1);
1725     }
1726     *outbuf = (char *) outp;
1727     return 0;
1728 }
1729 #endif
1730
1731 int yaz_iconv_isbuiltin(yaz_iconv_t cd)
1732 {
1733     return cd->read_handle && cd->write_handle;
1734 }
1735
1736 yaz_iconv_t yaz_iconv_open (const char *tocode, const char *fromcode)
1737 {
1738     yaz_iconv_t cd = (yaz_iconv_t) xmalloc (sizeof(*cd));
1739
1740     cd->write_handle = 0;
1741     cd->read_handle = 0;
1742     cd->init_handle = 0;
1743     cd->flush_handle = 0;
1744     cd->my_errno = YAZ_ICONV_UNKNOWN;
1745
1746     /* a useful hack: if fromcode has leading @,
1747        the library not use YAZ's own conversions .. */
1748     if (fromcode[0] == '@')
1749         fromcode++;
1750     else
1751     {
1752         if (!yaz_matchstr(fromcode, "UTF8"))
1753         {
1754             cd->read_handle = yaz_read_UTF8;
1755             cd->init_handle = yaz_init_UTF8;
1756         }
1757         else if (!yaz_matchstr(fromcode, "ISO88591"))
1758             cd->read_handle = yaz_read_ISO8859_1;
1759         else if (!yaz_matchstr(fromcode, "UCS4"))
1760             cd->read_handle = yaz_read_UCS4;
1761         else if (!yaz_matchstr(fromcode, "UCS4LE"))
1762             cd->read_handle = yaz_read_UCS4LE;
1763         else if (!yaz_matchstr(fromcode, "MARC8"))
1764             cd->read_handle = yaz_read_marc8;
1765         else if (!yaz_matchstr(fromcode, "MARC8s"))
1766             cd->read_handle = yaz_read_marc8s;
1767         else if (!yaz_matchstr(fromcode, "advancegreek"))
1768             cd->read_handle = yaz_read_advancegreek;
1769         else if (!yaz_matchstr(fromcode, "iso54281984"))
1770             cd->read_handle = yaz_read_iso5428_1984;
1771         else if (!yaz_matchstr(fromcode, "iso5428:1984"))
1772             cd->read_handle = yaz_read_iso5428_1984;
1773 #if HAVE_WCHAR_H
1774         else if (!yaz_matchstr(fromcode, "WCHAR_T"))
1775             cd->read_handle = yaz_read_wchar_t;
1776 #endif
1777         
1778         if (!yaz_matchstr(tocode, "UTF8"))
1779             cd->write_handle = yaz_write_UTF8;
1780         else if (!yaz_matchstr(tocode, "ISO88591"))
1781         {
1782             cd->write_handle = yaz_write_ISO8859_1;
1783             cd->flush_handle = yaz_flush_ISO8859_1;
1784         }
1785         else if (!yaz_matchstr (tocode, "UCS4"))
1786             cd->write_handle = yaz_write_UCS4;
1787         else if (!yaz_matchstr(tocode, "UCS4LE"))
1788             cd->write_handle = yaz_write_UCS4LE;
1789         else if (!yaz_matchstr(tocode, "MARC8"))
1790         {
1791             cd->write_handle = yaz_write_marc8;
1792             cd->flush_handle = yaz_flush_marc8;
1793         }
1794         else if (!yaz_matchstr(tocode, "MARC8s"))
1795         {
1796             cd->write_handle = yaz_write_marc8;
1797             cd->flush_handle = yaz_flush_marc8;
1798         }
1799         else if (!yaz_matchstr(tocode, "advancegreek"))
1800         {
1801             cd->write_handle = yaz_write_advancegreek;
1802         }
1803         else if (!yaz_matchstr(tocode, "iso54281984"))
1804         {
1805             cd->write_handle = yaz_write_iso5428_1984;
1806         }
1807         else if (!yaz_matchstr(tocode, "iso5428:1984"))
1808         {
1809             cd->write_handle = yaz_write_iso5428_1984;
1810         }
1811 #if HAVE_WCHAR_H
1812         else if (!yaz_matchstr(tocode, "WCHAR_T"))
1813             cd->write_handle = yaz_write_wchar_t;
1814 #endif
1815     }
1816 #if HAVE_ICONV_H
1817     cd->iconv_cd = 0;
1818     if (!cd->read_handle || !cd->write_handle)
1819     {
1820         cd->iconv_cd = iconv_open (tocode, fromcode);
1821         if (cd->iconv_cd == (iconv_t) (-1))
1822         {
1823             xfree (cd);
1824             return 0;
1825         }
1826     }
1827 #else
1828     if (!cd->read_handle || !cd->write_handle)
1829     {
1830         xfree (cd);
1831         return 0;
1832     }
1833 #endif
1834     cd->init_flag = 1;
1835     return cd;
1836 }
1837
1838 size_t yaz_iconv(yaz_iconv_t cd, char **inbuf, size_t *inbytesleft,
1839                  char **outbuf, size_t *outbytesleft)
1840 {
1841     char *inbuf0 = 0;
1842     size_t r = 0;
1843
1844 #if HAVE_ICONV_H
1845     if (cd->iconv_cd)
1846     {
1847         size_t r =
1848             iconv(cd->iconv_cd, inbuf, inbytesleft, outbuf, outbytesleft);
1849         if (r == (size_t)(-1))
1850         {
1851             switch (yaz_errno())
1852             {
1853             case E2BIG:
1854                 cd->my_errno = YAZ_ICONV_E2BIG;
1855                 break;
1856             case EINVAL:
1857                 cd->my_errno = YAZ_ICONV_EINVAL;
1858                 break;
1859             case EILSEQ:
1860                 cd->my_errno = YAZ_ICONV_EILSEQ;
1861                 break;
1862             default:
1863                 cd->my_errno = YAZ_ICONV_UNKNOWN;
1864             }
1865         }
1866         return r;
1867     }
1868 #endif
1869
1870     if (inbuf)
1871         inbuf0 = *inbuf;
1872
1873     if (cd->init_flag)
1874     {
1875         cd->my_errno = YAZ_ICONV_UNKNOWN;
1876         cd->marc8_esc_mode = 'B';
1877         
1878         cd->comb_offset = cd->comb_size = 0;
1879         cd->compose_char = 0;
1880         
1881         cd->write_marc8_comb_no = 0;
1882         cd->write_marc8_second_half_char = 0;
1883         cd->write_marc8_last = 0;
1884         cd->write_marc8_page_chr = "\033(B";
1885         
1886         cd->unget_x = 0;
1887         cd->no_read_x = 0;
1888     }
1889
1890     if (cd->init_flag)
1891     {
1892         if (cd->init_handle && inbuf && *inbuf)
1893         {
1894             size_t no_read = 0;
1895             size_t r = (cd->init_handle)(cd, (unsigned char *) *inbuf,
1896                                          *inbytesleft, &no_read);
1897             if (r)
1898             {
1899                 if (cd->my_errno == YAZ_ICONV_EINVAL)
1900                     return r;
1901                 cd->init_flag = 0;
1902                 return r;
1903             }
1904             *inbytesleft -= no_read;
1905             *inbuf += no_read;
1906         }
1907     }
1908     cd->init_flag = 0;
1909
1910     if (!inbuf || !*inbuf)
1911     {
1912         if (outbuf && *outbuf)
1913         {
1914             if (cd->unget_x)
1915                 r = (*cd->write_handle)(cd, cd->unget_x, outbuf, outbytesleft);
1916             if (cd->flush_handle)
1917                 r = (*cd->flush_handle)(cd, outbuf, outbytesleft);
1918         }
1919         if (r == 0)
1920             cd->init_flag = 1;
1921         cd->unget_x = 0;
1922         return r;
1923     }
1924     while (1)
1925     {
1926         unsigned long x;
1927         size_t no_read;
1928
1929         if (cd->unget_x)
1930         {
1931             x = cd->unget_x;
1932             no_read = cd->no_read_x;
1933         }
1934         else
1935         {
1936             if (*inbytesleft == 0)
1937             {
1938                 r = *inbuf - inbuf0;
1939                 break;
1940             }
1941             x = (*cd->read_handle)(cd, (unsigned char *) *inbuf, *inbytesleft,
1942                                    &no_read);
1943             if (no_read == 0)
1944             {
1945                 r = (size_t)(-1);
1946                 break;
1947             }
1948         }
1949         if (x)
1950         {
1951             r = (*cd->write_handle)(cd, x, outbuf, outbytesleft);
1952             if (r)
1953             {
1954                 /* unable to write it. save it because read_handle cannot
1955                    rewind .. */
1956                 if (cd->my_errno == YAZ_ICONV_E2BIG)
1957                 {
1958                     cd->unget_x = x;
1959                     cd->no_read_x = no_read;
1960                     break;
1961                 }
1962             }
1963             cd->unget_x = 0;
1964         }
1965         *inbytesleft -= no_read;
1966         (*inbuf) += no_read;
1967     }
1968     return r;
1969 }
1970
1971 int yaz_iconv_error (yaz_iconv_t cd)
1972 {
1973     return cd->my_errno;
1974 }
1975
1976 int yaz_iconv_close (yaz_iconv_t cd)
1977 {
1978 #if HAVE_ICONV_H
1979     if (cd->iconv_cd)
1980         iconv_close (cd->iconv_cd);
1981 #endif
1982     xfree (cd);
1983     return 0;
1984 }
1985
1986 /*
1987  * Local variables:
1988  * c-basic-offset: 4
1989  * indent-tabs-mode: nil
1990  * End:
1991  * vim: shiftwidth=4 tabstop=8 expandtab
1992  */