Fixed bug #2120: Different greek symbols, UTF-8 to MARC-8.
[yaz-moved-to-github.git] / src / siconv.c
1 /*
2  * Copyright (C) 1995-2007, Index Data ApS
3  * See the file LICENSE for details.
4  *
5  * $Id: siconv.c,v 1.50 2008-03-12 08:53:28 adam Exp $
6  */
7 /**
8  * \file siconv.c
9  * \brief Implements simple ICONV
10  *
11  * This implements an interface similar to that of iconv and
12  * is used by YAZ to interface with iconv (if present).
13  * For systems where iconv is not present, this layer
14  * provides a few important conversions: UTF-8, MARC-8, Latin-1.
15  *
16  * MARC-8 reference:
17  *  http://www.loc.gov/marc/specifications/speccharmarc8.html
18  */
19
20 #if HAVE_CONFIG_H
21 #include <config.h>
22 #endif
23
24 #include <assert.h>
25 #include <errno.h>
26 #include <string.h>
27 #include <ctype.h>
28 #if HAVE_WCHAR_H
29 #include <wchar.h>
30 #endif
31
32 #if HAVE_ICONV_H
33 #include <iconv.h>
34 #endif
35
36
37 #include <yaz/yaz-util.h>
38
39 unsigned long yaz_marc8_42_conv(unsigned char *inp, size_t inbytesleft,
40                                size_t *no_read, int *combining);
41 unsigned long yaz_marc8_45_conv(unsigned char *inp, size_t inbytesleft,
42                                size_t *no_read, int *combining);
43 unsigned long yaz_marc8_67_conv(unsigned char *inp, size_t inbytesleft,
44                                size_t *no_read, int *combining);
45 unsigned long yaz_marc8_62_conv(unsigned char *inp, size_t inbytesleft,
46                                size_t *no_read, int *combining);
47 unsigned long yaz_marc8_70_conv(unsigned char *inp, size_t inbytesleft,
48                                size_t *no_read, int *combining);
49 unsigned long yaz_marc8_32_conv(unsigned char *inp, size_t inbytesleft,
50                                size_t *no_read, int *combining);
51 unsigned long yaz_marc8_4E_conv(unsigned char *inp, size_t inbytesleft,
52                                size_t *no_read, int *combining);
53 unsigned long yaz_marc8_51_conv(unsigned char *inp, size_t inbytesleft,
54                                size_t *no_read, int *combining);
55 unsigned long yaz_marc8_33_conv(unsigned char *inp, size_t inbytesleft,
56                                size_t *no_read, int *combining);
57 unsigned long yaz_marc8_34_conv(unsigned char *inp, size_t inbytesleft,
58                                size_t *no_read, int *combining);
59 unsigned long yaz_marc8_53_conv(unsigned char *inp, size_t inbytesleft,
60                                size_t *no_read, int *combining);
61 unsigned long yaz_marc8_31_conv(unsigned char *inp, size_t inbytesleft,
62                                size_t *no_read, int *combining);
63
64
65 unsigned long yaz_marc8r_42_conv(unsigned char *inp, size_t inbytesleft,
66                                  size_t *no_read, int *combining);
67 unsigned long yaz_marc8r_45_conv(unsigned char *inp, size_t inbytesleft,
68                                  size_t *no_read, int *combining);
69 unsigned long yaz_marc8r_67_conv(unsigned char *inp, size_t inbytesleft,
70                                  size_t *no_read, int *combining);
71 unsigned long yaz_marc8r_62_conv(unsigned char *inp, size_t inbytesleft,
72                                  size_t *no_read, int *combining);
73 unsigned long yaz_marc8r_70_conv(unsigned char *inp, size_t inbytesleft,
74                                  size_t *no_read, int *combining);
75 unsigned long yaz_marc8r_32_conv(unsigned char *inp, size_t inbytesleft,
76                                  size_t *no_read, int *combining);
77 unsigned long yaz_marc8r_4E_conv(unsigned char *inp, size_t inbytesleft,
78                                  size_t *no_read, int *combining);
79 unsigned long yaz_marc8r_51_conv(unsigned char *inp, size_t inbytesleft,
80                                  size_t *no_read, int *combining);
81 unsigned long yaz_marc8r_33_conv(unsigned char *inp, size_t inbytesleft,
82                                  size_t *no_read, int *combining);
83 unsigned long yaz_marc8r_34_conv(unsigned char *inp, size_t inbytesleft,
84                                  size_t *no_read, int *combining);
85 unsigned long yaz_marc8r_53_conv(unsigned char *inp, size_t inbytesleft,
86                                  size_t *no_read, int *combining);
87 unsigned long yaz_marc8r_31_conv(unsigned char *inp, size_t inbytesleft,
88                                  size_t *no_read, int *combining);
89
90 #define ESC "\033"
91
92 struct yaz_iconv_struct {
93     int my_errno;
94     int init_flag;
95     size_t (*init_handle)(yaz_iconv_t cd, unsigned char *inbuf,
96                           size_t inbytesleft, size_t *no_read);
97     unsigned long (*read_handle)(yaz_iconv_t cd, unsigned char *inbuf,
98                                  size_t inbytesleft, size_t *no_read);
99     size_t (*write_handle)(yaz_iconv_t cd, unsigned long x,
100                            char **outbuf, size_t *outbytesleft);
101     size_t (*flush_handle)(yaz_iconv_t cd,
102                            char **outbuf, size_t *outbytesleft);
103     int g0_mode;
104     int g1_mode;
105
106     int comb_offset;
107     int comb_size;
108     unsigned long comb_x[8];
109     size_t comb_no_read[8];
110     size_t no_read_x;
111     unsigned long unget_x;
112 #if HAVE_ICONV_H
113     iconv_t iconv_cd;
114 #endif
115     unsigned long compose_char;
116
117     unsigned write_marc8_second_half_char;
118     unsigned long write_marc8_last;
119     const char *write_marc8_lpage;
120     const char *write_marc8_g0;
121     const char *write_marc8_g1;
122 };
123
124 static struct {
125     unsigned long x1, x2;
126     unsigned y;
127 } latin1_comb[] = {
128     { 'A', 0x0300, 0xc0}, /* LATIN CAPITAL LETTER A WITH GRAVE */
129     { 'A', 0x0301, 0xc1}, /* LATIN CAPITAL LETTER A WITH ACUTE */
130     { 'A', 0x0302, 0xc2}, /* LATIN CAPITAL LETTER A WITH CIRCUMFLEX */
131     { 'A', 0x0303, 0xc3}, /* LATIN CAPITAL LETTER A WITH TILDE */
132     { 'A', 0x0308, 0xc4}, /* LATIN CAPITAL LETTER A WITH DIAERESIS */
133     { 'A', 0x030a, 0xc5}, /* LATIN CAPITAL LETTER A WITH RING ABOVE */
134     /* no need for 0xc6      LATIN CAPITAL LETTER AE */
135     { 'C', 0x0327, 0xc7}, /* LATIN CAPITAL LETTER C WITH CEDILLA */
136     { 'E', 0x0300, 0xc8}, /* LATIN CAPITAL LETTER E WITH GRAVE */
137     { 'E', 0x0301, 0xc9}, /* LATIN CAPITAL LETTER E WITH ACUTE */
138     { 'E', 0x0302, 0xca}, /* LATIN CAPITAL LETTER E WITH CIRCUMFLEX */
139     { 'E', 0x0308, 0xcb}, /* LATIN CAPITAL LETTER E WITH DIAERESIS */
140     { 'I', 0x0300, 0xcc}, /* LATIN CAPITAL LETTER I WITH GRAVE */
141     { 'I', 0x0301, 0xcd}, /* LATIN CAPITAL LETTER I WITH ACUTE */
142     { 'I', 0x0302, 0xce}, /* LATIN CAPITAL LETTER I WITH CIRCUMFLEX */
143     { 'I', 0x0308, 0xcf}, /* LATIN CAPITAL LETTER I WITH DIAERESIS */
144     { 'N', 0x0303, 0xd1}, /* LATIN CAPITAL LETTER N WITH TILDE */
145     { 'O', 0x0300, 0xd2}, /* LATIN CAPITAL LETTER O WITH GRAVE */
146     { 'O', 0x0301, 0xd3}, /* LATIN CAPITAL LETTER O WITH ACUTE */
147     { 'O', 0x0302, 0xd4}, /* LATIN CAPITAL LETTER O WITH CIRCUMFLEX */
148     { 'O', 0x0303, 0xd5}, /* LATIN CAPITAL LETTER O WITH TILDE */
149     { 'O', 0x0308, 0xd6}, /* LATIN CAPITAL LETTER O WITH DIAERESIS */
150     /* omitted:    0xd7      MULTIPLICATION SIGN */
151     /* omitted:    0xd8      LATIN CAPITAL LETTER O WITH STROKE */
152     { 'U', 0x0300, 0xd9}, /* LATIN CAPITAL LETTER U WITH GRAVE */
153     { 'U', 0x0301, 0xda}, /* LATIN CAPITAL LETTER U WITH ACUTE */
154     { 'U', 0x0302, 0xdb}, /* LATIN CAPITAL LETTER U WITH CIRCUMFLEX */
155     { 'U', 0x0308, 0xdc}, /* LATIN CAPITAL LETTER U WITH DIAERESIS */
156     { 'Y', 0x0301, 0xdd}, /* LATIN CAPITAL LETTER Y WITH ACUTE */
157     /* omitted:    0xde      LATIN CAPITAL LETTER THORN */
158     /* omitted:    0xdf      LATIN SMALL LETTER SHARP S */
159     { 'a', 0x0300, 0xe0}, /* LATIN SMALL LETTER A WITH GRAVE */
160     { 'a', 0x0301, 0xe1}, /* LATIN SMALL LETTER A WITH ACUTE */
161     { 'a', 0x0302, 0xe2}, /* LATIN SMALL LETTER A WITH CIRCUMFLEX */
162     { 'a', 0x0303, 0xe3}, /* LATIN SMALL LETTER A WITH TILDE */
163     { 'a', 0x0308, 0xe4}, /* LATIN SMALL LETTER A WITH DIAERESIS */
164     { 'a', 0x030a, 0xe5}, /* LATIN SMALL LETTER A WITH RING ABOVE */
165     /* omitted:    0xe6      LATIN SMALL LETTER AE */
166     { 'c', 0x0327, 0xe7}, /* LATIN SMALL LETTER C WITH CEDILLA */
167     { 'e', 0x0300, 0xe8}, /* LATIN SMALL LETTER E WITH GRAVE */
168     { 'e', 0x0301, 0xe9}, /* LATIN SMALL LETTER E WITH ACUTE */
169     { 'e', 0x0302, 0xea}, /* LATIN SMALL LETTER E WITH CIRCUMFLEX */
170     { 'e', 0x0308, 0xeb}, /* LATIN SMALL LETTER E WITH DIAERESIS */
171     { 'i', 0x0300, 0xec}, /* LATIN SMALL LETTER I WITH GRAVE */
172     { 'i', 0x0301, 0xed}, /* LATIN SMALL LETTER I WITH ACUTE */
173     { 'i', 0x0302, 0xee}, /* LATIN SMALL LETTER I WITH CIRCUMFLEX */
174     { 'i', 0x0308, 0xef}, /* LATIN SMALL LETTER I WITH DIAERESIS */
175     /* omitted:    0xf0      LATIN SMALL LETTER ETH */
176     { 'n', 0x0303, 0xf1}, /* LATIN SMALL LETTER N WITH TILDE */
177     { 'o', 0x0300, 0xf2}, /* LATIN SMALL LETTER O WITH GRAVE */
178     { 'o', 0x0301, 0xf3}, /* LATIN SMALL LETTER O WITH ACUTE */
179     { 'o', 0x0302, 0xf4}, /* LATIN SMALL LETTER O WITH CIRCUMFLEX */
180     { 'o', 0x0303, 0xf5}, /* LATIN SMALL LETTER O WITH TILDE */
181     { 'o', 0x0308, 0xf6}, /* LATIN SMALL LETTER O WITH DIAERESIS */
182     /* omitted:    0xf7      DIVISION SIGN */
183     /* omitted:    0xf8      LATIN SMALL LETTER O WITH STROKE */
184     { 'u', 0x0300, 0xf9}, /* LATIN SMALL LETTER U WITH GRAVE */
185     { 'u', 0x0301, 0xfa}, /* LATIN SMALL LETTER U WITH ACUTE */
186     { 'u', 0x0302, 0xfb}, /* LATIN SMALL LETTER U WITH CIRCUMFLEX */
187     { 'u', 0x0308, 0xfc}, /* LATIN SMALL LETTER U WITH DIAERESIS */
188     { 'y', 0x0301, 0xfd}, /* LATIN SMALL LETTER Y WITH ACUTE */
189     /* omitted:    0xfe      LATIN SMALL LETTER THORN */
190     { 'y', 0x0308, 0xff}, /* LATIN SMALL LETTER Y WITH DIAERESIS */
191     
192     { 0, 0, 0}
193 };
194
195 static size_t yaz_write_marc8_page_chr(yaz_iconv_t cd, 
196                                        char **outbuf, size_t *outbytesleft,
197                                        const char *page_chr);
198
199 static unsigned long yaz_read_ISO8859_1 (yaz_iconv_t cd, unsigned char *inp,
200                                          size_t inbytesleft, size_t *no_read)
201 {
202     unsigned long x = inp[0];
203     *no_read = 1;
204     return x;
205 }
206
207
208 static size_t yaz_init_UTF8 (yaz_iconv_t cd, unsigned char *inp,
209                              size_t inbytesleft, size_t *no_read)
210 {
211     if (inp[0] != 0xef)
212     {
213         *no_read = 0;
214         return 0;
215     }
216     if (inbytesleft < 3)
217     {
218         cd->my_errno = YAZ_ICONV_EINVAL;
219         return (size_t) -1;
220     }
221     if (inp[1] != 0xbb && inp[2] == 0xbf)
222         *no_read = 3;
223     else
224         *no_read = 0;
225     return 0;
226 }
227
228 unsigned long yaz_read_UTF8_char(unsigned char *inp,
229                                  size_t inbytesleft, size_t *no_read,
230                                  int *error)
231 {
232     unsigned long x = 0;
233
234     *no_read = 0; /* by default */
235     if (inp[0] <= 0x7f)
236     {
237         x = inp[0];
238         *no_read = 1;
239     }
240     else if (inp[0] <= 0xbf || inp[0] >= 0xfe)
241     {
242         *error = YAZ_ICONV_EILSEQ;
243     }
244     else if (inp[0] <= 0xdf && inbytesleft >= 2)
245     {
246         if ((inp[1] & 0xc0) == 0x80)
247         {
248             x = ((inp[0] & 0x1f) << 6) | (inp[1] & 0x3f);
249             if (x >= 0x80)
250                 *no_read = 2;
251             else
252                 *error = YAZ_ICONV_EILSEQ;
253         }
254         else
255             *error = YAZ_ICONV_EILSEQ;
256     }
257     else if (inp[0] <= 0xef && inbytesleft >= 3)
258     {
259         if ((inp[1] & 0xc0) == 0x80 && (inp[2] & 0xc0) == 0x80)
260         {
261             x = ((inp[0] & 0x0f) << 12) | ((inp[1] & 0x3f) << 6) |
262                 (inp[2] & 0x3f);
263             if (x >= 0x800)
264                 *no_read = 3;
265             else
266                 *error = YAZ_ICONV_EILSEQ;
267         }
268         else
269             *error = YAZ_ICONV_EILSEQ;
270     }            
271     else if (inp[0] <= 0xf7 && inbytesleft >= 4)
272     {
273         if ((inp[1] & 0xc0) == 0x80 && (inp[2] & 0xc0) == 0x80
274             && (inp[3] & 0xc0) == 0x80)
275         {
276             x = ((inp[0] & 0x07) << 18) | ((inp[1] & 0x3f) << 12) |
277                 ((inp[2] & 0x3f) << 6) | (inp[3] & 0x3f);
278             if (x >= 0x10000)
279                 *no_read = 4;
280             else
281                 *error = YAZ_ICONV_EILSEQ;
282         }
283         else
284             *error = YAZ_ICONV_EILSEQ;
285     }
286     else if (inp[0] <= 0xfb && inbytesleft >= 5)
287     {
288         if ((inp[1] & 0xc0) == 0x80 && (inp[2] & 0xc0) == 0x80
289             && (inp[3] & 0xc0) == 0x80 && (inp[4] & 0xc0) == 0x80)
290         {
291             x = ((inp[0] & 0x03) << 24) | ((inp[1] & 0x3f) << 18) |
292                 ((inp[2] & 0x3f) << 12) | ((inp[3] & 0x3f) << 6) |
293                 (inp[4] & 0x3f);
294             if (x >= 0x200000)
295                 *no_read = 5;
296             else
297                 *error = YAZ_ICONV_EILSEQ;
298         }
299         else
300             *error = YAZ_ICONV_EILSEQ;
301     }
302     else if (inp[0] <= 0xfd && inbytesleft >= 6)
303     {
304         if ((inp[1] & 0xc0) == 0x80 && (inp[2] & 0xc0) == 0x80
305             && (inp[3] & 0xc0) == 0x80 && (inp[4] & 0xc0) == 0x80
306             && (inp[5] & 0xc0) == 0x80)
307         {
308             x = ((inp[0] & 0x01) << 30) | ((inp[1] & 0x3f) << 24) |
309                 ((inp[2] & 0x3f) << 18) | ((inp[3] & 0x3f) << 12) |
310                 ((inp[4] & 0x3f) << 6) | (inp[5] & 0x3f);
311             if (x >= 0x4000000)
312                 *no_read = 6;
313             else
314                 *error = YAZ_ICONV_EILSEQ;
315         }
316         else
317             *error = YAZ_ICONV_EILSEQ;
318     }
319     else
320         *error = YAZ_ICONV_EINVAL;  /* incomplete sentence */
321
322     return x;
323 }
324
325 static unsigned long yaz_read_UTF8 (yaz_iconv_t cd, unsigned char *inp,
326                                     size_t inbytesleft, size_t *no_read)
327 {
328     return yaz_read_UTF8_char(inp, inbytesleft, no_read, &cd->my_errno);
329 }
330
331 static unsigned long yaz_read_UCS4 (yaz_iconv_t cd, unsigned char *inp,
332                                     size_t inbytesleft, size_t *no_read)
333 {
334     unsigned long x = 0;
335     
336     if (inbytesleft < 4)
337     {
338         cd->my_errno = YAZ_ICONV_EINVAL; /* incomplete input */
339         *no_read = 0;
340     }
341     else
342     {
343         x = (inp[0]<<24) | (inp[1]<<16) | (inp[2]<<8) | inp[3];
344         *no_read = 4;
345     }
346     return x;
347 }
348
349 static unsigned long yaz_read_UCS4LE (yaz_iconv_t cd, unsigned char *inp,
350                                       size_t inbytesleft, size_t *no_read)
351 {
352     unsigned long x = 0;
353     
354     if (inbytesleft < 4)
355     {
356         cd->my_errno = YAZ_ICONV_EINVAL; /* incomplete input */
357         *no_read = 0;
358     }
359     else
360     {
361         x = (inp[3]<<24) | (inp[2]<<16) | (inp[1]<<8) | inp[0];
362         *no_read = 4;
363     }
364     return x;
365 }
366
367 #if HAVE_WCHAR_H
368 static unsigned long yaz_read_wchar_t (yaz_iconv_t cd, unsigned char *inp,
369                                        size_t inbytesleft, size_t *no_read)
370 {
371     unsigned long x = 0;
372     
373     if (inbytesleft < sizeof(wchar_t))
374     {
375         cd->my_errno = YAZ_ICONV_EINVAL; /* incomplete input */
376         *no_read = 0;
377     }
378     else
379     {
380         wchar_t wch;
381         memcpy (&wch, inp, sizeof(wch));
382         x = wch;
383         *no_read = sizeof(wch);
384     }
385     return x;
386 }
387 #endif
388
389 static unsigned long yaz_read_iso5428_1984(yaz_iconv_t cd, unsigned char *inp,
390                                            size_t inbytesleft, size_t *no_read)
391 {
392     unsigned long x = 0;
393     int tonos = 0;
394     int dialitika = 0;
395
396     *no_read = 0;
397     while (inbytesleft > 0)
398     {
399         if (*inp == 0xa2)
400         {
401             tonos = 1;
402         }
403         else if (*inp == 0xa3)
404         {
405             dialitika = 1;
406         }
407         else
408             break;
409         inp++;
410         --inbytesleft;
411         (*no_read)++;
412     }    
413     if (inbytesleft == 0)
414     {
415         cd->my_errno = YAZ_ICONV_EINVAL; /* incomplete input */
416         *no_read = 0;
417         return 0;
418     }
419     switch (*inp) {
420     case 0xe1: /*  alpha small */
421             if (tonos) 
422                 x = 0x03ac;
423             else 
424                 x = 0x03b1;
425             break;
426     case 0xc1: /*  alpha capital */
427             if (tonos) 
428                 x = 0x0386;
429             else 
430                 x = 0x0391;
431             break;
432
433     case 0xe2: /*  Beta small */
434             x = 0x03b2;
435             break;
436     case 0xc2: /*  Beta capital */
437             x = 0x0392;
438             break;
439
440     case 0xe4: /*  Gamma small */
441             x = 0x03b3;
442             break;
443     case 0xc4: /*  Gamma capital */
444             x = 0x0393;
445             break;
446
447     case 0xe5: /*  Delta small */
448             x = 0x03b4;
449             break;
450     case 0xc5: /*  Delta capital */
451             x = 0x0394;
452             break;
453     case 0xe6: /*  epsilon small */
454             if (tonos) 
455                 x = 0x03ad;
456             else 
457                 x = 0x03b5;
458             break;
459     case 0xc6: /*  epsilon capital */
460             if (tonos) 
461                 x = 0x0388;
462             else 
463                 x = 0x0395;
464             break;
465     case 0xe9: /*  Zeta small */
466             x = 0x03b6;
467             break;
468     case 0xc9: /*  Zeta capital */
469             x = 0x0396;
470             break;
471     case 0xea: /*  Eta small */
472             if (tonos) 
473                 x = 0x03ae;
474             else 
475                 x = 0x03b7;
476             break;
477     case 0xca: /*  Eta capital */
478             if (tonos) 
479                 x = 0x0389;
480             else 
481                 x = 0x0397;
482             break;
483     case 0xeb: /*  Theta small */
484             x = 0x03b8;
485             break;
486     case 0xcb: /*  Theta capital */
487             x = 0x0398;
488             break;
489     case 0xec: /*  Iota small */
490             if (tonos) 
491                 if (dialitika) 
492                     x = 0x0390;
493                 else 
494                     x = 0x03af;
495             else 
496                 if (dialitika) 
497                     x = 0x03ca;
498                 else 
499                     x = 0x03b9;
500             break;
501     case 0xcc: /*  Iota capital */
502             if (tonos) 
503                 x = 0x038a;
504             else 
505                 if (dialitika) 
506                     x = 0x03aa;
507                 else 
508                     x = 0x0399;
509             break;
510     case 0xed: /*  Kappa small */
511             x = 0x03ba;
512             break;
513     case 0xcd: /*  Kappa capital */
514             x = 0x039a;
515             break;
516     case 0xee: /*  Lambda small */
517             x = 0x03bb;
518             break;
519     case 0xce: /*  Lambda capital */
520             x = 0x039b;
521             break;
522     case 0xef: /*  Mu small */
523             x = 0x03bc;
524             break;
525     case 0xcf: /*  Mu capital */
526             x = 0x039c;
527             break;
528     case 0xf0: /*  Nu small */
529             x = 0x03bd;
530             break;
531     case 0xd0: /*  Nu capital */
532             x = 0x039d;
533             break;
534     case 0xf1: /*  Xi small */
535             x = 0x03be;
536             break;
537     case 0xd1: /*  Xi capital */
538             x = 0x039e;
539             break;
540     case 0xf2: /*  Omicron small */
541             if (tonos) 
542                 x = 0x03cc;
543             else 
544                 x = 0x03bf;
545             break;
546     case 0xd2: /*  Omicron capital */
547             if (tonos) 
548                 x = 0x038c;
549             else 
550                 x = 0x039f;
551             break;
552     case 0xf3: /*  Pi small */
553             x = 0x03c0;
554             break;
555     case 0xd3: /*  Pi capital */
556             x = 0x03a0;
557             break;
558     case 0xf5: /*  Rho small */
559             x = 0x03c1;
560             break;
561     case 0xd5: /*  Rho capital */
562             x = 0x03a1;
563             break;
564     case 0xf7: /*  Sigma small (end of words) */
565             x = 0x03c2;
566             break;
567     case 0xf6: /*  Sigma small */
568             x = 0x03c3;
569             break;
570     case 0xd6: /*  Sigma capital */
571             x = 0x03a3;
572             break;
573     case 0xf8: /*  Tau small */
574             x = 0x03c4;
575             break;
576     case 0xd8: /*  Tau capital */
577             x = 0x03a4;
578             break;
579     case 0xf9: /*  Upsilon small */
580             if (tonos) 
581                 if (dialitika) 
582                     x = 0x03b0;
583                 else 
584                     x = 0x03cd;
585             else 
586                 if (dialitika) 
587                     x = 0x03cb;
588                 else 
589                     x = 0x03c5;
590             break;
591     case 0xd9: /*  Upsilon capital */
592             if (tonos) 
593                 x = 0x038e;
594             else 
595                 if (dialitika) 
596                     x = 0x03ab;
597                 else 
598                     x = 0x03a5;
599             break;
600     case 0xfa: /*  Phi small */
601             x = 0x03c6;
602             break;
603     case 0xda: /*  Phi capital */
604             x = 0x03a6;
605             break;
606     case 0xfb: /*  Chi small */
607             x = 0x03c7;
608             break;
609     case 0xdb: /*  Chi capital */
610             x = 0x03a7;
611             break;
612     case 0xfc: /*  Psi small */
613             x = 0x03c8;
614             break;
615     case 0xdc: /*  Psi capital */
616             x = 0x03a8;
617             break;
618     case 0xfd: /*  Omega small */
619             if (tonos) 
620                 x = 0x03ce;
621             else 
622                 x = 0x03c9;
623             break;
624     case 0xdd: /*  Omega capital */
625             if (tonos) 
626                 x = 0x038f;
627             else 
628                 x = 0x03a9;
629             break;
630     default:
631         x = *inp;
632         break;
633     }
634     (*no_read)++;
635     
636     return x;
637 }
638
639 static size_t yaz_write_iso5428_1984(yaz_iconv_t cd, unsigned long x,
640                                      char **outbuf, size_t *outbytesleft)
641 {
642     size_t k = 0;
643     unsigned char *out = (unsigned char*) *outbuf;
644     if (*outbytesleft < 3)
645     {
646         cd->my_errno = YAZ_ICONV_E2BIG;  /* not room for output */
647         return (size_t)(-1);
648     }
649     switch (x)
650     {
651     case 0x03ac : out[k++]=0xa2; out[k++]=0xe1; break;
652     case 0x03b1 : out[k++]=0xe1; break;
653     case 0x0386 : out[k++]=0xa2; out[k++]=0xc1; break;
654     case 0x0391 : out[k++]=0xc1; break;
655     case 0x03b2 : out[k++]=0xe2; break;
656     case 0x0392 : out[k++]=0xc2; break;
657     case 0x03b3 : out[k++]=0xe4; break;
658     case 0x0393 : out[k++]=0xc4; break;
659     case 0x03b4 : out[k++]=0xe5; break;
660     case 0x0394 : out[k++]=0xc5; break;
661     case 0x03ad : out[k++]=0xa2; out[k++]=0xe6; break;
662     case 0x03b5 : out[k++]=0xe6; break;
663     case 0x0388 : out[k++]=0xa2; out[k++]=0xc6; break;
664     case 0x0395 : out[k++]=0xc6; break;
665     case 0x03b6 : out[k++]=0xe9; break;
666     case 0x0396 : out[k++]=0xc9; break;
667     case 0x03ae : out[k++]=0xa2; out[k++]=0xea; break;
668     case 0x03b7 : out[k++]=0xea; break;
669     case 0x0389 : out[k++]=0xa2; out[k++]=0xca; break;
670     case 0x0397 : out[k++]=0xca; break;
671     case 0x03b8 : out[k++]=0xeb; break;
672     case 0x0398 : out[k++]=0xcb; break;
673     case 0x0390 : out[k++]=0xa2; out[k++]=0xa3; out[k++]=0xec; break;
674     case 0x03af : out[k++]=0xa2; out[k++]=0xec; break;
675     case 0x03ca : out[k++]=0xa3; out[k++]=0xec; break;
676     case 0x03b9 : out[k++]=0xec; break;
677     case 0x038a : out[k++]=0xa2; out[k++]=0xcc; break;
678     case 0x03aa : out[k++]=0xa3; out[k++]=0xcc; break;
679     case 0x0399 : out[k++]=0xcc; break;
680     case 0x03ba : out[k++]=0xed; break;
681     case 0x039a : out[k++]=0xcd; break;
682     case 0x03bb : out[k++]=0xee; break;
683     case 0x039b : out[k++]=0xce; break;
684     case 0x03bc : out[k++]=0xef; break;
685     case 0x039c : out[k++]=0xcf; break;
686     case 0x03bd : out[k++]=0xf0; break;
687     case 0x039d : out[k++]=0xd0; break;
688     case 0x03be : out[k++]=0xf1; break;
689     case 0x039e : out[k++]=0xd1; break;
690     case 0x03cc : out[k++]=0xa2; out[k++]=0xf2; break;
691     case 0x03bf : out[k++]=0xf2; break;
692     case 0x038c : out[k++]=0xa2; out[k++]=0xd2; break;
693     case 0x039f : out[k++]=0xd2; break;
694     case 0x03c0 : out[k++]=0xf3; break;
695     case 0x03a0 : out[k++]=0xd3; break;
696     case 0x03c1 : out[k++]=0xf5; break;
697     case 0x03a1 : out[k++]=0xd5; break;
698     case 0x03c2 : out[k++]=0xf7; break;
699     case 0x03c3 : out[k++]=0xf6; break;
700     case 0x03a3 : out[k++]=0xd6; break;
701     case 0x03c4 : out[k++]=0xf8; break;
702     case 0x03a4 : out[k++]=0xd8; break;
703     case 0x03b0 : out[k++]=0xa2; out[k++]=0xa3; out[k++]=0xf9; break;
704     case 0x03cd : out[k++]=0xa2; out[k++]=0xf9; break;
705     case 0x03cb : out[k++]=0xa3; out[k++]=0xf9; break;
706     case 0x03c5 : out[k++]=0xf9; break;
707     case 0x038e : out[k++]=0xa2; out[k++]=0xd9; break;
708     case 0x03ab : out[k++]=0xa3; out[k++]=0xd9; break;
709     case 0x03a5 : out[k++]=0xd9; break;
710     case 0x03c6 : out[k++]=0xfa; break;
711     case 0x03a6 : out[k++]=0xda; break;
712     case 0x03c7 : out[k++]=0xfb; break;
713     case 0x03a7 : out[k++]=0xdb; break;
714     case 0x03c8 : out[k++]=0xfc; break;
715     case 0x03a8 : out[k++]=0xdc; break;
716     case 0x03ce : out[k++]=0xa2; out[k++]=0xfd; break;
717     case 0x03c9 : out[k++]=0xfd; break;
718     case 0x038f : out[k++]=0xa2; out[k++]=0xdd; break;
719     case 0x03a9 : out[k++]=0xdd; break;
720     default:
721         if (x > 255)
722         {
723             cd->my_errno = YAZ_ICONV_EILSEQ;
724             return (size_t) -1;
725         }
726         out[k++] = x;
727         break;
728     }
729     *outbytesleft -= k;
730     (*outbuf) += k;
731     return 0;
732 }
733
734 static unsigned long yaz_read_advancegreek(yaz_iconv_t cd, unsigned char *inp,
735                                            size_t inbytesleft, size_t *no_read)
736 {
737     unsigned long x = 0;
738     int shift = 0;
739     int tonos = 0;
740     int dialitika = 0;
741
742     *no_read = 0;
743     while (inbytesleft > 0)
744     {
745         if (*inp == 0x9d)
746         {
747             tonos = 1;
748         }
749         else if (*inp == 0x9e)
750         {
751             dialitika = 1;
752         }
753         else if (*inp == 0x9f)
754         {
755             shift = 1;
756         }
757         else
758             break;
759         inp++;
760         --inbytesleft;
761         (*no_read)++;
762     }    
763     if (inbytesleft == 0)
764     {
765         cd->my_errno = YAZ_ICONV_EINVAL; /* incomplete input */
766         *no_read = 0;
767         return 0;
768     }
769     switch (*inp) {
770     case 0x81:
771         if (shift) 
772             if (tonos) 
773                 x = 0x0386;
774             else 
775                 x = 0x0391;
776         else 
777             if (tonos) 
778                 x = 0x03ac;
779             else 
780                 x = 0x03b1;
781         break;
782     case 0x82:
783         if (shift) 
784             x = 0x0392;
785         else 
786             x = 0x03b2;
787         
788         break;
789     case 0x83:
790         if (shift) 
791             x = 0x0393;
792         else 
793             x = 0x03b3;
794         break;
795     case 0x84:
796         if (shift) 
797             x = 0x0394;
798         else 
799             x = 0x03b4;
800         break;
801     case 0x85:
802         if (shift) 
803             if (tonos) 
804                 x = 0x0388;
805             else 
806                 x = 0x0395;
807         else 
808             if (tonos) 
809                 x = 0x03ad;
810             else 
811                 x = 0x03b5;
812         break;
813     case 0x86:
814         if (shift) 
815             x = 0x0396;
816         else 
817             x = 0x03b6;
818         break;
819     case 0x87:
820         if (shift) 
821             if (tonos) 
822                 x = 0x0389;
823             else 
824                 x = 0x0397;
825         else 
826             if (tonos) 
827                 x = 0x03ae;
828             else 
829                 x = 0x03b7;
830         break;
831     case 0x88:
832         if (shift) 
833             x = 0x0398;
834         else 
835             x = 0x03b8;
836         break;
837     case 0x89:
838         if (shift) 
839             if (tonos) 
840                 x = 0x038a;
841             else 
842                 if (dialitika) 
843                     x = 0x03aa;
844                 else 
845                     x = 0x0399;
846         else 
847             if (tonos) 
848                 if (dialitika) 
849                     x = 0x0390;
850                 else 
851                     x = 0x03af;
852         
853             else 
854                 if (dialitika) 
855                     x = 0x03ca;
856                 else 
857                     x = 0x03b9;
858         break;
859     case 0x8a:
860         if (shift) 
861             x = 0x039a;
862         else 
863             x = 0x03ba;
864         
865         break;
866     case 0x8b:
867         if (shift) 
868             x = 0x039b;
869         else 
870             x = 0x03bb;
871         break;
872     case 0x8c:
873         if (shift) 
874             x = 0x039c;
875         else 
876             x = 0x03bc;
877         
878         break;
879     case 0x8d:
880         if (shift) 
881             x = 0x039d;
882         else 
883             x = 0x03bd;
884         break;
885     case 0x8e:
886         if (shift) 
887             x = 0x039e;
888         else 
889             x = 0x03be;
890         break;
891     case 0x8f:
892         if (shift) 
893             if (tonos) 
894                 x = 0x038c;
895             else 
896                 x = 0x039f;
897         else 
898             if (tonos) 
899                 x = 0x03cc;
900             else 
901                 x = 0x03bf;
902         break;
903     case 0x90:
904         if (shift) 
905             x = 0x03a0;
906         else 
907             x = 0x03c0;
908         break;
909     case 0x91:
910         if (shift) 
911             x = 0x03a1;
912         else 
913             x = 0x03c1;
914         break;
915     case 0x92:
916         x = 0x03c2;
917         break;
918     case 0x93:
919         if (shift) 
920             x = 0x03a3;
921         else 
922             x = 0x03c3;
923         break;
924     case 0x94:
925         if (shift) 
926             x = 0x03a4;
927         else 
928             x = 0x03c4;
929         break;
930     case 0x95:
931         if (shift) 
932             if (tonos) 
933                 x = 0x038e;
934             else 
935                 if (dialitika) 
936                     x = 0x03ab;
937                 else 
938                     x = 0x03a5;
939         else 
940             if (tonos) 
941                 if (dialitika) 
942                     x = 0x03b0;
943                 else 
944                     x = 0x03cd;
945         
946             else 
947                 if (dialitika) 
948                     x = 0x03cb;
949                 else 
950                     x = 0x03c5;
951         break;
952     case 0x96:
953         if (shift) 
954             x = 0x03a6;
955         else 
956             x = 0x03c6;
957         break;
958     case 0x97:
959         if (shift) 
960             x = 0x03a7;
961         else 
962             x = 0x03c7;
963         break;
964     case 0x98:
965         if (shift) 
966             x = 0x03a8;
967         else 
968             x = 0x03c8;
969         
970         break;
971         
972     case 0x99:
973         if (shift) 
974             if (tonos) 
975                 x = 0x038f;
976             else 
977                 x = 0x03a9;
978         else 
979             if (tonos) 
980                 x = 0x03ce;
981             else 
982                 x = 0x03c9;
983         break;
984     default:
985         x = *inp;
986         break;
987     }
988     (*no_read)++;
989     
990     return x;
991 }
992
993 static size_t yaz_write_advancegreek(yaz_iconv_t cd, unsigned long x,
994                                      char **outbuf, size_t *outbytesleft)
995 {
996     size_t k = 0;
997     unsigned char *out = (unsigned char*) *outbuf;
998     if (*outbytesleft < 3)
999     {
1000         cd->my_errno = YAZ_ICONV_E2BIG;  /* not room for output */
1001         return (size_t)(-1);
1002     }
1003     switch (x)
1004     {
1005     case 0x03ac : out[k++]=0x9d; out[k++]=0x81; break;
1006     case 0x03ad : out[k++]=0x9d; out[k++]=0x85; break;
1007     case 0x03ae : out[k++]=0x9d; out[k++]=0x87; break;
1008     case 0x03af : out[k++]=0x9d; out[k++]=0x89; break;
1009     case 0x03cc : out[k++]=0x9d; out[k++]=0x8f; break;
1010     case 0x03cd : out[k++]=0x9d; out[k++]=0x95; break;
1011     case 0x03ce : out[k++]=0x9d; out[k++]=0x99; break;
1012     case 0x0390 : out[k++]=0x9d; out[k++]=0x9e; out[k++]=0x89; break;
1013     case 0x03b0 : out[k++]=0x9d; out[k++]=0x9e; out[k++]=0x95; break;
1014     case 0x0386 : out[k++]=0x9d; out[k++]=0x9f; out[k++]=0x81; break;
1015     case 0x0388 : out[k++]=0x9d; out[k++]=0x9f; out[k++]=0x85; break;
1016     case 0x0389 : out[k++]=0x9d; out[k++]=0x9f; out[k++]=0x87; break;
1017     case 0x038a : out[k++]=0x9d; out[k++]=0x9f; out[k++]=0x89; break;
1018     case 0x038c : out[k++]=0x9d; out[k++]=0x9f; out[k++]=0x8f; break;
1019     case 0x038e : out[k++]=0x9d; out[k++]=0x9f; out[k++]=0x95; break;
1020     case 0x038f : out[k++]=0x9d; out[k++]=0x9f; out[k++]=0x99; break;
1021     case 0x03ca : out[k++]=0x9e; out[k++]=0x89; break;
1022     case 0x03cb : out[k++]=0x9e; out[k++]=0x95; break;
1023     case 0x03aa : out[k++]=0x9e; out[k++]=0x9f; out[k++]=0x89; break;
1024     case 0x03ab : out[k++]=0x9e; out[k++]=0x9f; out[k++]=0x95; break;
1025     case 0x0391 : out[k++]=0x9f; out[k++]=0x81; break;
1026     case 0x0392 : out[k++]=0x9f; out[k++]=0x82; break;
1027     case 0x0393 : out[k++]=0x9f; out[k++]=0x83; break;
1028     case 0x0394 : out[k++]=0x9f; out[k++]=0x84; break;
1029     case 0x0395 : out[k++]=0x9f; out[k++]=0x85; break;
1030     case 0x0396 : out[k++]=0x9f; out[k++]=0x86; break;
1031     case 0x0397 : out[k++]=0x9f; out[k++]=0x87; break;
1032     case 0x0398 : out[k++]=0x9f; out[k++]=0x88; break;
1033     case 0x0399 : out[k++]=0x9f; out[k++]=0x89; break;
1034     case 0x039a : out[k++]=0x9f; out[k++]=0x8a; break;
1035     case 0x039b : out[k++]=0x9f; out[k++]=0x8b; break;
1036     case 0x039c : out[k++]=0x9f; out[k++]=0x8c; break;
1037     case 0x039d : out[k++]=0x9f; out[k++]=0x8d; break;
1038     case 0x039e : out[k++]=0x9f; out[k++]=0x8e; break;
1039     case 0x039f : out[k++]=0x9f; out[k++]=0x8f; break;
1040     case 0x03a0 : out[k++]=0x9f; out[k++]=0x90; break;
1041     case 0x03a1 : out[k++]=0x9f; out[k++]=0x91; break;
1042     case 0x03a3 : out[k++]=0x9f; out[k++]=0x93; break;
1043     case 0x03a4 : out[k++]=0x9f; out[k++]=0x94; break;
1044     case 0x03a5 : out[k++]=0x9f; out[k++]=0x95; break;
1045     case 0x03a6 : out[k++]=0x9f; out[k++]=0x96; break;
1046     case 0x03a7 : out[k++]=0x9f; out[k++]=0x97; break;
1047     case 0x03a8 : out[k++]=0x9f; out[k++]=0x98; break;
1048     case 0x03a9 : out[k++]=0x9f; out[k++]=0x99; break;
1049     case 0x03b1 : out[k++]=0x81; break;
1050     case 0x03b2 : out[k++]=0x82; break;
1051     case 0x03b3 : out[k++]=0x83; break;
1052     case 0x03b4 : out[k++]=0x84; break;
1053     case 0x03b5 : out[k++]=0x85; break;
1054     case 0x03b6 : out[k++]=0x86; break;
1055     case 0x03b7 : out[k++]=0x87; break;
1056     case 0x03b8 : out[k++]=0x88; break;
1057     case 0x03b9 : out[k++]=0x89; break;
1058     case 0x03ba : out[k++]=0x8a; break;
1059     case 0x03bb : out[k++]=0x8b; break;
1060     case 0x03bc : out[k++]=0x8c; break;
1061     case 0x03bd : out[k++]=0x8d; break;
1062     case 0x03be : out[k++]=0x8e; break;
1063     case 0x03bf : out[k++]=0x8f; break;
1064     case 0x03c0 : out[k++]=0x90; break;
1065     case 0x03c1 : out[k++]=0x91; break;
1066     case 0x03c2 : out[k++]=0x92; break;
1067     case 0x03c3 : out[k++]=0x93; break;
1068     case 0x03c4 : out[k++]=0x94; break;
1069     case 0x03c5 : out[k++]=0x95; break;
1070     case 0x03c6 : out[k++]=0x96; break;
1071     case 0x03c7 : out[k++]=0x96; break;
1072     case 0x03c8 : out[k++]=0x98; break;
1073     case 0x03c9 : out[k++]=0x99; break;
1074     default:
1075         if (x > 255)
1076         {
1077             cd->my_errno = YAZ_ICONV_EILSEQ;
1078             return (size_t) -1;
1079         }
1080         out[k++] = x;
1081         break;
1082     }
1083     *outbytesleft -= k;
1084     (*outbuf) += k;
1085     return 0;
1086 }
1087
1088
1089 static unsigned long yaz_read_marc8_comb (yaz_iconv_t cd, unsigned char *inp,
1090                                           size_t inbytesleft, size_t *no_read,
1091                                           int *comb);
1092
1093 static unsigned long yaz_read_marc8 (yaz_iconv_t cd, unsigned char *inp,
1094                                      size_t inbytesleft, size_t *no_read)
1095 {
1096     unsigned long x;
1097     if (cd->comb_offset < cd->comb_size)
1098     {
1099         *no_read = cd->comb_no_read[cd->comb_offset];
1100         x = cd->comb_x[cd->comb_offset];
1101
1102         /* special case for double-diacritic combining characters, 
1103            INVERTED BREVE and DOUBLE TILDE.
1104            We'll increment the no_read counter by 1, since we want to skip over
1105            the processing of the closing ligature character
1106         */
1107         /* this code is no longer necessary.. our handlers code in
1108            yaz_marc8_?_conv (generated by charconv.tcl) now returns
1109            0 and no_read=1 when a sequence does not match the input.
1110            The SECOND HALFs in codetables.xml produces a non-existant
1111            entry in the conversion trie.. Hence when met, the input byte is
1112            skipped as it should (in yaz_iconv)
1113         */
1114 #if 0
1115         if (x == 0x0361 || x == 0x0360)
1116             *no_read += 1;
1117 #endif
1118         cd->comb_offset++;
1119         return x;
1120     }
1121
1122     cd->comb_offset = 0;
1123     for (cd->comb_size = 0; cd->comb_size < 8; cd->comb_size++)
1124     {
1125         int comb = 0;
1126
1127         if (inbytesleft == 0 && cd->comb_size)
1128         {
1129             cd->my_errno = YAZ_ICONV_EINVAL;
1130             x = 0;
1131             *no_read = 0;
1132             break;
1133         }
1134         x = yaz_read_marc8_comb(cd, inp, inbytesleft, no_read, &comb);
1135         if (!comb || !x)
1136             break;
1137         cd->comb_x[cd->comb_size] = x;
1138         cd->comb_no_read[cd->comb_size] = *no_read;
1139         inp += *no_read;
1140         inbytesleft = inbytesleft - *no_read;
1141     }
1142     return x;
1143 }
1144
1145 static unsigned long yaz_read_marc8s(yaz_iconv_t cd, unsigned char *inp,
1146                                      size_t inbytesleft, size_t *no_read)
1147 {
1148     unsigned long x = yaz_read_marc8(cd, inp, inbytesleft, no_read);
1149     if (x && cd->comb_size == 1)
1150     {
1151         /* For MARC8s we try to get a Latin-1 page code out of it */
1152         int i;
1153         for (i = 0; latin1_comb[i].x1; i++)
1154             if (cd->comb_x[0] == latin1_comb[i].x2 && x == latin1_comb[i].x1)
1155             {
1156                 *no_read += cd->comb_no_read[0];
1157                 cd->comb_size = 0;
1158                 x = latin1_comb[i].y;
1159                 break;
1160             }
1161     }
1162     return x;
1163 }
1164
1165 static unsigned long yaz_read_marc8_comb(yaz_iconv_t cd, unsigned char *inp,
1166                                          size_t inbytesleft, size_t *no_read,
1167                                          int *comb)
1168 {
1169     *no_read = 0;
1170     while(inbytesleft >= 1 && inp[0] == 27)
1171     {
1172         int ch;
1173         size_t inbytesleft0 = inbytesleft;
1174         inp++;
1175         inbytesleft--;
1176         if (inbytesleft > 0 && *inp == '$')
1177         {
1178             inbytesleft--;
1179             inp++;
1180         }
1181         if (inbytesleft <= 1)
1182         {
1183             *no_read = 0;
1184             cd->my_errno = YAZ_ICONV_EINVAL;
1185             return 0;
1186         }
1187         inbytesleft--;
1188         ch = *inp++;
1189         if (inbytesleft > 0 && (ch == '(' || ch == ','))
1190         {
1191             inbytesleft--;
1192             cd->g0_mode = *inp++;
1193         }
1194         else if (inbytesleft > 0 && (ch == ')' || ch == '-'))
1195         {
1196             inbytesleft--;
1197             cd->g1_mode = *inp++;
1198         }
1199         else
1200             cd->g0_mode = ch;
1201
1202         (*no_read) += inbytesleft0 - inbytesleft;
1203     }
1204     if (inbytesleft <= 0)
1205         return 0;
1206     else if (*inp == ' ')
1207     {
1208         *no_read += 1;
1209         return ' ';
1210     }
1211     else
1212     {
1213         unsigned long x;
1214         size_t no_read_sub = 0;
1215         int mode = *inp < 128 ? cd->g0_mode : cd->g1_mode;
1216         *comb = 0;
1217
1218         switch(mode)
1219         {
1220         case 'B':  /* Basic ASCII */
1221         case 's':  /* ASCII */
1222         case 'E':  /* ANSEL */
1223             x = yaz_marc8_42_conv(inp, inbytesleft, &no_read_sub, comb);
1224             if (!x)
1225             {
1226                 no_read_sub = 0;
1227                 x = yaz_marc8_45_conv(inp, inbytesleft, &no_read_sub, comb);
1228             }
1229             break;
1230         case 'g':  /* Greek */
1231             x = yaz_marc8_67_conv(inp, inbytesleft, &no_read_sub, comb);
1232             break;
1233         case 'b':  /* Subscripts */
1234             x = yaz_marc8_62_conv(inp, inbytesleft, &no_read_sub, comb);
1235             break;
1236         case 'p':  /* Superscripts */
1237             x = yaz_marc8_70_conv(inp, inbytesleft, &no_read_sub, comb);
1238             break;
1239         case '2':  /* Basic Hebrew */
1240             x = yaz_marc8_32_conv(inp, inbytesleft, &no_read_sub, comb);
1241             break;
1242         case 'N':  /* Basic Cyrillic */
1243             x = yaz_marc8_4E_conv(inp, inbytesleft, &no_read_sub, comb);
1244             break;
1245         case 'Q':  /* Extended Cyrillic */
1246             x = yaz_marc8_51_conv(inp, inbytesleft, &no_read_sub, comb);
1247             break;
1248         case '3':  /* Basic Arabic */
1249             x = yaz_marc8_33_conv(inp, inbytesleft, &no_read_sub, comb);
1250             break;
1251         case '4':  /* Extended Arabic */
1252             x = yaz_marc8_34_conv(inp, inbytesleft, &no_read_sub, comb);
1253             break;
1254         case 'S':  /* Greek */
1255             x = yaz_marc8_53_conv(inp, inbytesleft, &no_read_sub, comb);
1256             break;
1257         case '1':  /* Chinese, Japanese, Korean (EACC) */
1258             x = yaz_marc8_31_conv(inp, inbytesleft, &no_read_sub, comb);
1259             break;
1260         default:
1261             *no_read = 0;
1262             cd->my_errno = YAZ_ICONV_EILSEQ;
1263             return 0;
1264         }
1265         *no_read += no_read_sub;
1266         return x;
1267     }
1268 }
1269
1270 static size_t yaz_write_UTF8(yaz_iconv_t cd, unsigned long x,
1271                              char **outbuf, size_t *outbytesleft)
1272 {
1273     return yaz_write_UTF8_char(x, outbuf, outbytesleft, &cd->my_errno);
1274 }
1275
1276 size_t yaz_write_UTF8_char(unsigned long x,
1277                            char **outbuf, size_t *outbytesleft,
1278                            int *error)
1279 {
1280     unsigned char *outp = (unsigned char *) *outbuf;
1281
1282     if (x <= 0x7f && *outbytesleft >= 1)
1283     {
1284         *outp++ = (unsigned char) x;
1285         (*outbytesleft)--;
1286     } 
1287     else if (x <= 0x7ff && *outbytesleft >= 2)
1288     {
1289         *outp++ = (unsigned char) ((x >> 6) | 0xc0);
1290         *outp++ = (unsigned char) ((x & 0x3f) | 0x80);
1291         (*outbytesleft) -= 2;
1292     }
1293     else if (x <= 0xffff && *outbytesleft >= 3)
1294     {
1295         *outp++ = (unsigned char) ((x >> 12) | 0xe0);
1296         *outp++ = (unsigned char) (((x >> 6) & 0x3f) | 0x80);
1297         *outp++ = (unsigned char) ((x & 0x3f) | 0x80);
1298         (*outbytesleft) -= 3;
1299     }
1300     else if (x <= 0x1fffff && *outbytesleft >= 4)
1301     {
1302         *outp++ = (unsigned char) ((x >> 18) | 0xf0);
1303         *outp++ = (unsigned char) (((x >> 12) & 0x3f) | 0x80);
1304         *outp++ = (unsigned char) (((x >> 6)  & 0x3f) | 0x80);
1305         *outp++ = (unsigned char) ((x & 0x3f) | 0x80);
1306         (*outbytesleft) -= 4;
1307     }
1308     else if (x <= 0x3ffffff && *outbytesleft >= 5)
1309     {
1310         *outp++ = (unsigned char) ((x >> 24) | 0xf8);
1311         *outp++ = (unsigned char) (((x >> 18) & 0x3f) | 0x80);
1312         *outp++ = (unsigned char) (((x >> 12) & 0x3f) | 0x80);
1313         *outp++ = (unsigned char) (((x >> 6)  & 0x3f) | 0x80);
1314         *outp++ = (unsigned char) ((x & 0x3f) | 0x80);
1315         (*outbytesleft) -= 5;
1316     }
1317     else if (*outbytesleft >= 6)
1318     {
1319         *outp++ = (unsigned char) ((x >> 30) | 0xfc);
1320         *outp++ = (unsigned char) (((x >> 24) & 0x3f) | 0x80);
1321         *outp++ = (unsigned char) (((x >> 18) & 0x3f) | 0x80);
1322         *outp++ = (unsigned char) (((x >> 12) & 0x3f) | 0x80);
1323         *outp++ = (unsigned char) (((x >> 6)  & 0x3f) | 0x80);
1324         *outp++ = (unsigned char) ((x & 0x3f) | 0x80);
1325         (*outbytesleft) -= 6;
1326     }
1327     else 
1328     {
1329         *error = YAZ_ICONV_E2BIG;  /* not room for output */
1330         return (size_t)(-1);
1331     }
1332     *outbuf = (char *) outp;
1333     return 0;
1334 }
1335
1336 static size_t yaz_write_ISO8859_1 (yaz_iconv_t cd, unsigned long x,
1337                                    char **outbuf, size_t *outbytesleft)
1338 {
1339     /* list of two char unicode sequence that, when combined, are
1340        equivalent to single unicode chars that can be represented in
1341        ISO-8859-1/Latin-1.
1342        Regular iconv on Linux at least does not seem to convert these,
1343        but since MARC-8 to UTF-8 generates these composed sequence
1344        we get a better chance of a successful MARC-8 -> ISO-8859-1
1345        conversion */
1346     unsigned char *outp = (unsigned char *) *outbuf;
1347
1348     if (cd->compose_char)
1349     {
1350         int i;
1351         for (i = 0; latin1_comb[i].x1; i++)
1352             if (cd->compose_char == latin1_comb[i].x1 && x == latin1_comb[i].x2)
1353             {
1354                 x = latin1_comb[i].y;
1355                 break;
1356             }
1357         if (*outbytesleft < 1)
1358         {  /* no room. Retain compose_char and bail out */
1359             cd->my_errno = YAZ_ICONV_E2BIG;
1360             return (size_t)(-1);
1361         }
1362         if (!latin1_comb[i].x1) 
1363         {   /* not found. Just write compose_char */
1364             *outp++ = (unsigned char) cd->compose_char;
1365             (*outbytesleft)--;
1366             *outbuf = (char *) outp;
1367         }
1368         /* compose_char used so reset it. x now holds current char */
1369         cd->compose_char = 0;
1370     }
1371
1372     if (x > 32 && x < 127 && cd->compose_char == 0)
1373     {
1374         cd->compose_char = x;
1375         return 0;
1376     }
1377     else if (x > 255 || x < 1)
1378     {
1379         cd->my_errno = YAZ_ICONV_EILSEQ;
1380         return (size_t) -1;
1381     }
1382     else if (*outbytesleft < 1)
1383     {
1384         cd->my_errno = YAZ_ICONV_E2BIG;
1385         return (size_t)(-1);
1386     }
1387     *outp++ = (unsigned char) x;
1388     (*outbytesleft)--;
1389     *outbuf = (char *) outp;
1390     return 0;
1391 }
1392
1393 static size_t yaz_flush_ISO8859_1(yaz_iconv_t cd,
1394                                   char **outbuf, size_t *outbytesleft)
1395 {
1396     if (cd->compose_char)
1397     {
1398         unsigned char *outp = (unsigned char *) *outbuf;
1399         if (*outbytesleft < 1)
1400         {
1401             cd->my_errno = YAZ_ICONV_E2BIG;
1402             return (size_t)(-1);
1403         }
1404         *outp++ = (unsigned char) cd->compose_char;
1405         (*outbytesleft)--;
1406         *outbuf = (char *) outp;
1407         cd->compose_char = 0;
1408     }
1409     return 0;
1410 }
1411
1412 static size_t yaz_write_UCS4 (yaz_iconv_t cd, unsigned long x,
1413                               char **outbuf, size_t *outbytesleft)
1414 {
1415     unsigned char *outp = (unsigned char *) *outbuf;
1416     if (*outbytesleft >= 4)
1417     {
1418         *outp++ = (unsigned char) (x>>24);
1419         *outp++ = (unsigned char) (x>>16);
1420         *outp++ = (unsigned char) (x>>8);
1421         *outp++ = (unsigned char) x;
1422         (*outbytesleft) -= 4;
1423     }
1424     else
1425     {
1426         cd->my_errno = YAZ_ICONV_E2BIG;
1427         return (size_t)(-1);
1428     }
1429     *outbuf = (char *) outp;
1430     return 0;
1431 }
1432
1433 static size_t yaz_write_UCS4LE (yaz_iconv_t cd, unsigned long x,
1434                                 char **outbuf, size_t *outbytesleft)
1435 {
1436     unsigned char *outp = (unsigned char *) *outbuf;
1437     if (*outbytesleft >= 4)
1438     {
1439         *outp++ = (unsigned char) x;
1440         *outp++ = (unsigned char) (x>>8);
1441         *outp++ = (unsigned char) (x>>16);
1442         *outp++ = (unsigned char) (x>>24);
1443         (*outbytesleft) -= 4;
1444     }
1445     else
1446     {
1447         cd->my_errno = YAZ_ICONV_E2BIG;
1448         return (size_t)(-1);
1449     }
1450     *outbuf = (char *) outp;
1451     return 0;
1452 }
1453
1454 static unsigned long lookup_marc8(yaz_iconv_t cd,
1455                                   unsigned long x, int *comb,
1456                                   const char **page_chr)
1457 {
1458     char utf8_buf[7];
1459     char *utf8_outbuf = utf8_buf;
1460     size_t utf8_outbytesleft = sizeof(utf8_buf)-1, r;
1461
1462     r = yaz_write_UTF8(cd, x, &utf8_outbuf, &utf8_outbytesleft);
1463     if (r == (size_t)(-1))
1464     {
1465         cd->my_errno = YAZ_ICONV_EILSEQ;
1466         return 0;
1467     }
1468     else
1469     {
1470         unsigned char *inp;
1471         size_t inbytesleft, no_read_sub = 0;
1472         unsigned long x;
1473
1474         *utf8_outbuf = '\0';        
1475         inp = (unsigned char *) utf8_buf;
1476         inbytesleft = strlen(utf8_buf);
1477
1478         x = yaz_marc8r_42_conv(inp, inbytesleft, &no_read_sub, comb);
1479         if (x)
1480         {
1481             *page_chr = ESC "(B";
1482             return x;
1483         }
1484         x = yaz_marc8r_45_conv(inp, inbytesleft, &no_read_sub, comb);
1485         if (x)
1486         {
1487             *page_chr = ESC "(B";
1488             return x;
1489         }
1490         x = yaz_marc8r_62_conv(inp, inbytesleft, &no_read_sub, comb);
1491         if (x)
1492         {
1493             *page_chr = ESC "b";
1494             return x;
1495         }
1496         x = yaz_marc8r_70_conv(inp, inbytesleft, &no_read_sub, comb);
1497         if (x)
1498         {
1499             *page_chr = ESC "p";
1500             return x;
1501         }
1502         x = yaz_marc8r_32_conv(inp, inbytesleft, &no_read_sub, comb);
1503         if (x)
1504         {
1505             *page_chr = ESC "(2";
1506             return x;
1507         }
1508         x = yaz_marc8r_4E_conv(inp, inbytesleft, &no_read_sub, comb);
1509         if (x)
1510         {
1511             *page_chr = ESC "(N";
1512             return x;
1513         }
1514         x = yaz_marc8r_51_conv(inp, inbytesleft, &no_read_sub, comb);
1515         if (x)
1516         {
1517             *page_chr = ESC "(Q";
1518             return x;
1519         }
1520         x = yaz_marc8r_33_conv(inp, inbytesleft, &no_read_sub, comb);
1521         if (x)
1522         {
1523             *page_chr = ESC "(3";
1524             return x;
1525         }
1526         x = yaz_marc8r_34_conv(inp, inbytesleft, &no_read_sub, comb);
1527         if (x)
1528         {
1529             *page_chr = ESC "(4";
1530             return x;
1531         }
1532         x = yaz_marc8r_53_conv(inp, inbytesleft, &no_read_sub, comb);
1533         if (x)
1534         {
1535             *page_chr = ESC "(S";
1536             return x;
1537         }
1538         x = yaz_marc8r_31_conv(inp, inbytesleft, &no_read_sub, comb);
1539         if (x)
1540         {
1541             *page_chr = ESC "$1";
1542             return x;
1543         }
1544         cd->my_errno = YAZ_ICONV_EILSEQ;
1545         return x;
1546     }
1547 }
1548
1549 static size_t flush_combos(yaz_iconv_t cd,
1550                            char **outbuf, size_t *outbytesleft)
1551 {
1552     unsigned long y = cd->write_marc8_last;
1553     unsigned char byte;
1554     char out_buf[4];
1555     size_t out_no = 0;
1556
1557     if (!y)
1558         return 0;
1559
1560     assert(cd->write_marc8_lpage);
1561     if (cd->write_marc8_lpage)
1562     {
1563         size_t r = yaz_write_marc8_page_chr(cd, outbuf, outbytesleft,
1564                                             cd->write_marc8_lpage);
1565         if (r)
1566             return r;
1567     }
1568
1569     byte = (unsigned char )((y>>16) & 0xff);
1570     if (byte)
1571         out_buf[out_no++] = byte;
1572     byte = (unsigned char)((y>>8) & 0xff);
1573     if (byte)
1574         out_buf[out_no++] = byte;
1575     byte = (unsigned char )(y & 0xff);
1576     if (byte)
1577         out_buf[out_no++] = byte;
1578
1579     if (out_no + 2 >= *outbytesleft)
1580     {
1581         cd->my_errno = YAZ_ICONV_E2BIG;
1582         return (size_t) (-1);
1583     }
1584
1585     memcpy(*outbuf, out_buf, out_no);
1586     *outbuf += out_no;
1587     (*outbytesleft) -= out_no;
1588     if (cd->write_marc8_second_half_char)
1589     {
1590         *(*outbuf)++ = cd->write_marc8_second_half_char;
1591         (*outbytesleft)--;
1592     }        
1593
1594     cd->write_marc8_last = 0;
1595     cd->write_marc8_lpage = 0;
1596     cd->write_marc8_second_half_char = 0;
1597     return 0;
1598 }
1599
1600 static size_t yaz_write_marc8_page_chr(yaz_iconv_t cd, 
1601                                        char **outbuf, size_t *outbytesleft,
1602                                        const char *page_chr)
1603 {
1604     const char **old_page_chr = &cd->write_marc8_g0;
1605
1606     /* are we going to a G1-set (such as such as ESC ")!E") */
1607     if (page_chr && page_chr[1] == ')')
1608         old_page_chr = &cd->write_marc8_g1;
1609
1610     if (!*old_page_chr || strcmp(page_chr, *old_page_chr))
1611     {
1612         size_t plen = 0;
1613         const char *page_out = page_chr;
1614         
1615         if (*outbytesleft < 8)
1616         {
1617             cd->my_errno = YAZ_ICONV_E2BIG;
1618             
1619             return (size_t) (-1);
1620         }
1621
1622         if (*old_page_chr)
1623         {
1624             if (!strcmp(*old_page_chr, ESC "p") 
1625                 || !strcmp(*old_page_chr, ESC "g")
1626                 || !strcmp(*old_page_chr, ESC "b"))
1627             {
1628                 page_out = ESC "s";
1629                 /* Technique 1 leave */
1630                 if (strcmp(page_chr, ESC "(B")) /* Not going ASCII page? */
1631                 {
1632                     /* Must leave script + enter new page */
1633                     plen = strlen(page_out);
1634                     memcpy(*outbuf, page_out, plen);
1635                     (*outbuf) += plen;
1636                     (*outbytesleft) -= plen;
1637                     page_out = ESC "(B";
1638                 }
1639             }
1640         }
1641         *old_page_chr = page_chr;
1642         plen = strlen(page_out);
1643         memcpy(*outbuf, page_out, plen);
1644         (*outbuf) += plen;
1645         (*outbytesleft) -= plen;
1646     }
1647     return 0;
1648 }
1649
1650
1651 static size_t yaz_write_marc8_2(yaz_iconv_t cd, unsigned long x,
1652                                 char **outbuf, size_t *outbytesleft)
1653 {
1654     int comb = 0;
1655     const char *page_chr = 0;
1656     unsigned long y = lookup_marc8(cd, x, &comb, &page_chr);
1657
1658     if (!y)
1659         return (size_t) (-1);
1660
1661     if (comb)
1662     {
1663         if (page_chr)
1664         {
1665             size_t r = yaz_write_marc8_page_chr(cd, outbuf, outbytesleft,
1666                                                 page_chr);
1667             if (r)
1668                 return r;
1669         }
1670         if (x == 0x0361)
1671             cd->write_marc8_second_half_char = 0xEC;
1672         else if (x == 0x0360)
1673             cd->write_marc8_second_half_char = 0xFB;
1674
1675         if (*outbytesleft <= 1)
1676         {
1677             cd->my_errno = YAZ_ICONV_E2BIG;
1678             return (size_t) (-1);
1679         }
1680         *(*outbuf)++ = y;
1681         (*outbytesleft)--;
1682     }
1683     else
1684     {
1685         size_t r = flush_combos(cd, outbuf, outbytesleft);
1686         if (r)
1687             return r;
1688
1689         cd->write_marc8_last = y;
1690         cd->write_marc8_lpage = page_chr;
1691     }
1692     return 0;
1693 }
1694
1695 static size_t yaz_flush_marc8(yaz_iconv_t cd,
1696                               char **outbuf, size_t *outbytesleft)
1697 {
1698     size_t r = flush_combos(cd, outbuf, outbytesleft);
1699     if (r)
1700         return r;
1701     cd->write_marc8_g1 = 0;
1702     return yaz_write_marc8_page_chr(cd, outbuf, outbytesleft, ESC "(B");
1703 }
1704
1705 static size_t yaz_write_marc8(yaz_iconv_t cd, unsigned long x,
1706                               char **outbuf, size_t *outbytesleft)
1707 {
1708     int i;
1709     for (i = 0; latin1_comb[i].x1; i++)
1710     {
1711         if (x == latin1_comb[i].y)
1712         {
1713             size_t r ;
1714             /* save the output pointers .. */
1715             char *outbuf0 = *outbuf;
1716             size_t outbytesleft0 = *outbytesleft;
1717             int last_ch = cd->write_marc8_last;
1718             const char *lpage = cd->write_marc8_lpage;
1719
1720             r = yaz_write_marc8_2(cd, latin1_comb[i].x1,
1721                                   outbuf, outbytesleft);
1722             if (r)
1723                 return r;
1724             r = yaz_write_marc8_2(cd, latin1_comb[i].x2,
1725                                   outbuf, outbytesleft);
1726             if (r && cd->my_errno == YAZ_ICONV_E2BIG)
1727             {
1728                 /* not enough room. reset output to original values */
1729                 *outbuf = outbuf0;
1730                 *outbytesleft = outbytesleft0;
1731                 cd->write_marc8_last = last_ch;
1732                 cd->write_marc8_lpage = lpage;
1733             }
1734             return r;
1735         }
1736     }
1737     return yaz_write_marc8_2(cd, x, outbuf, outbytesleft);
1738 }
1739
1740
1741 #if HAVE_WCHAR_H
1742 static size_t yaz_write_wchar_t(yaz_iconv_t cd, unsigned long x,
1743                                 char **outbuf, size_t *outbytesleft)
1744 {
1745     unsigned char *outp = (unsigned char *) *outbuf;
1746
1747     if (*outbytesleft >= sizeof(wchar_t))
1748     {
1749         wchar_t wch = x;
1750         memcpy(outp, &wch, sizeof(wch));
1751         outp += sizeof(wch);
1752         (*outbytesleft) -= sizeof(wch);
1753     }
1754     else
1755     {
1756         cd->my_errno = YAZ_ICONV_E2BIG;
1757         return (size_t)(-1);
1758     }
1759     *outbuf = (char *) outp;
1760     return 0;
1761 }
1762 #endif
1763
1764 int yaz_iconv_isbuiltin(yaz_iconv_t cd)
1765 {
1766     return cd->read_handle && cd->write_handle;
1767 }
1768
1769 yaz_iconv_t yaz_iconv_open (const char *tocode, const char *fromcode)
1770 {
1771     yaz_iconv_t cd = (yaz_iconv_t) xmalloc (sizeof(*cd));
1772
1773     cd->write_handle = 0;
1774     cd->read_handle = 0;
1775     cd->init_handle = 0;
1776     cd->flush_handle = 0;
1777     cd->my_errno = YAZ_ICONV_UNKNOWN;
1778
1779     /* a useful hack: if fromcode has leading @,
1780        the library not use YAZ's own conversions .. */
1781     if (fromcode[0] == '@')
1782         fromcode++;
1783     else
1784     {
1785         if (!yaz_matchstr(fromcode, "UTF8"))
1786         {
1787             cd->read_handle = yaz_read_UTF8;
1788             cd->init_handle = yaz_init_UTF8;
1789         }
1790         else if (!yaz_matchstr(fromcode, "ISO88591"))
1791             cd->read_handle = yaz_read_ISO8859_1;
1792         else if (!yaz_matchstr(fromcode, "UCS4"))
1793             cd->read_handle = yaz_read_UCS4;
1794         else if (!yaz_matchstr(fromcode, "UCS4LE"))
1795             cd->read_handle = yaz_read_UCS4LE;
1796         else if (!yaz_matchstr(fromcode, "MARC8"))
1797             cd->read_handle = yaz_read_marc8;
1798         else if (!yaz_matchstr(fromcode, "MARC8s"))
1799             cd->read_handle = yaz_read_marc8s;
1800         else if (!yaz_matchstr(fromcode, "advancegreek"))
1801             cd->read_handle = yaz_read_advancegreek;
1802         else if (!yaz_matchstr(fromcode, "iso54281984"))
1803             cd->read_handle = yaz_read_iso5428_1984;
1804         else if (!yaz_matchstr(fromcode, "iso5428:1984"))
1805             cd->read_handle = yaz_read_iso5428_1984;
1806 #if HAVE_WCHAR_H
1807         else if (!yaz_matchstr(fromcode, "WCHAR_T"))
1808             cd->read_handle = yaz_read_wchar_t;
1809 #endif
1810         
1811         if (!yaz_matchstr(tocode, "UTF8"))
1812             cd->write_handle = yaz_write_UTF8;
1813         else if (!yaz_matchstr(tocode, "ISO88591"))
1814         {
1815             cd->write_handle = yaz_write_ISO8859_1;
1816             cd->flush_handle = yaz_flush_ISO8859_1;
1817         }
1818         else if (!yaz_matchstr (tocode, "UCS4"))
1819             cd->write_handle = yaz_write_UCS4;
1820         else if (!yaz_matchstr(tocode, "UCS4LE"))
1821             cd->write_handle = yaz_write_UCS4LE;
1822         else if (!yaz_matchstr(tocode, "MARC8"))
1823         {
1824             cd->write_handle = yaz_write_marc8;
1825             cd->flush_handle = yaz_flush_marc8;
1826         }
1827         else if (!yaz_matchstr(tocode, "MARC8s"))
1828         {
1829             cd->write_handle = yaz_write_marc8;
1830             cd->flush_handle = yaz_flush_marc8;
1831         }
1832         else if (!yaz_matchstr(tocode, "advancegreek"))
1833         {
1834             cd->write_handle = yaz_write_advancegreek;
1835         }
1836         else if (!yaz_matchstr(tocode, "iso54281984"))
1837         {
1838             cd->write_handle = yaz_write_iso5428_1984;
1839         }
1840         else if (!yaz_matchstr(tocode, "iso5428:1984"))
1841         {
1842             cd->write_handle = yaz_write_iso5428_1984;
1843         }
1844 #if HAVE_WCHAR_H
1845         else if (!yaz_matchstr(tocode, "WCHAR_T"))
1846             cd->write_handle = yaz_write_wchar_t;
1847 #endif
1848     }
1849 #if HAVE_ICONV_H
1850     cd->iconv_cd = 0;
1851     if (!cd->read_handle || !cd->write_handle)
1852     {
1853         cd->iconv_cd = iconv_open (tocode, fromcode);
1854         if (cd->iconv_cd == (iconv_t) (-1))
1855         {
1856             xfree (cd);
1857             return 0;
1858         }
1859     }
1860 #else
1861     if (!cd->read_handle || !cd->write_handle)
1862     {
1863         xfree (cd);
1864         return 0;
1865     }
1866 #endif
1867     cd->init_flag = 1;
1868     return cd;
1869 }
1870
1871 size_t yaz_iconv(yaz_iconv_t cd, char **inbuf, size_t *inbytesleft,
1872                  char **outbuf, size_t *outbytesleft)
1873 {
1874     char *inbuf0 = 0;
1875     size_t r = 0;
1876
1877 #if HAVE_ICONV_H
1878     if (cd->iconv_cd)
1879     {
1880         size_t r =
1881             iconv(cd->iconv_cd, inbuf, inbytesleft, outbuf, outbytesleft);
1882         if (r == (size_t)(-1))
1883         {
1884             switch (yaz_errno())
1885             {
1886             case E2BIG:
1887                 cd->my_errno = YAZ_ICONV_E2BIG;
1888                 break;
1889             case EINVAL:
1890                 cd->my_errno = YAZ_ICONV_EINVAL;
1891                 break;
1892             case EILSEQ:
1893                 cd->my_errno = YAZ_ICONV_EILSEQ;
1894                 break;
1895             default:
1896                 cd->my_errno = YAZ_ICONV_UNKNOWN;
1897             }
1898         }
1899         return r;
1900     }
1901 #endif
1902
1903     if (inbuf)
1904         inbuf0 = *inbuf;
1905
1906     if (cd->init_flag)
1907     {
1908         cd->my_errno = YAZ_ICONV_UNKNOWN;
1909         cd->g0_mode = 'B';
1910         cd->g1_mode = 'B';
1911         
1912         cd->comb_offset = cd->comb_size = 0;
1913         cd->compose_char = 0;
1914         
1915         cd->write_marc8_second_half_char = 0;
1916         cd->write_marc8_last = 0;
1917         cd->write_marc8_lpage = 0;
1918         cd->write_marc8_g0 = ESC "(B";
1919         cd->write_marc8_g1 = 0;
1920         
1921         cd->unget_x = 0;
1922         cd->no_read_x = 0;
1923     }
1924
1925     if (cd->init_flag)
1926     {
1927         if (cd->init_handle && inbuf && *inbuf)
1928         {
1929             size_t no_read = 0;
1930             size_t r = (cd->init_handle)(cd, (unsigned char *) *inbuf,
1931                                          *inbytesleft, &no_read);
1932             if (r)
1933             {
1934                 if (cd->my_errno == YAZ_ICONV_EINVAL)
1935                     return r;
1936                 cd->init_flag = 0;
1937                 return r;
1938             }
1939             *inbytesleft -= no_read;
1940             *inbuf += no_read;
1941         }
1942     }
1943     cd->init_flag = 0;
1944
1945     if (!inbuf || !*inbuf)
1946     {
1947         if (outbuf && *outbuf)
1948         {
1949             if (cd->unget_x)
1950                 r = (*cd->write_handle)(cd, cd->unget_x, outbuf, outbytesleft);
1951             if (cd->flush_handle)
1952                 r = (*cd->flush_handle)(cd, outbuf, outbytesleft);
1953         }
1954         if (r == 0)
1955             cd->init_flag = 1;
1956         cd->unget_x = 0;
1957         return r;
1958     }
1959     while (1)
1960     {
1961         unsigned long x;
1962         size_t no_read;
1963
1964         if (cd->unget_x)
1965         {
1966             x = cd->unget_x;
1967             no_read = cd->no_read_x;
1968         }
1969         else
1970         {
1971             if (*inbytesleft == 0)
1972             {
1973                 r = *inbuf - inbuf0;
1974                 break;
1975             }
1976             x = (*cd->read_handle)(cd, (unsigned char *) *inbuf, *inbytesleft,
1977                                    &no_read);
1978             if (no_read == 0)
1979             {
1980                 r = (size_t)(-1);
1981                 break;
1982             }
1983         }
1984         if (x)
1985         {
1986             r = (*cd->write_handle)(cd, x, outbuf, outbytesleft);
1987             if (r)
1988             {
1989                 /* unable to write it. save it because read_handle cannot
1990                    rewind .. */
1991                 if (cd->my_errno == YAZ_ICONV_E2BIG)
1992                 {
1993                     cd->unget_x = x;
1994                     cd->no_read_x = no_read;
1995                     break;
1996                 }
1997             }
1998             cd->unget_x = 0;
1999         }
2000         *inbytesleft -= no_read;
2001         (*inbuf) += no_read;
2002     }
2003     return r;
2004 }
2005
2006 int yaz_iconv_error (yaz_iconv_t cd)
2007 {
2008     return cd->my_errno;
2009 }
2010
2011 int yaz_iconv_close (yaz_iconv_t cd)
2012 {
2013 #if HAVE_ICONV_H
2014     if (cd->iconv_cd)
2015         iconv_close (cd->iconv_cd);
2016 #endif
2017     xfree (cd);
2018     return 0;
2019 }
2020
2021 /*
2022  * Local variables:
2023  * c-basic-offset: 4
2024  * indent-tabs-mode: nil
2025  * End:
2026  * vim: shiftwidth=4 tabstop=8 expandtab
2027  */