Moved more members of public struct odr (ODR*) to struct Odr_private.
[yaz-moved-to-github.git] / src / siconv.c
1 /*
2  * Copyright (C) 1995-2007, Index Data ApS
3  * See the file LICENSE for details.
4  *
5  * $Id: siconv.c,v 1.36 2007-03-17 00:10:40 adam Exp $
6  */
7 /**
8  * \file siconv.c
9  * \brief Implements simple ICONV
10  *
11  * This implements an interface similar to that of iconv and
12  * is used by YAZ to interface with iconv (if present).
13  * For systems where iconv is not present, this layer
14  * provides a few important conversions: UTF-8, MARC-8, Latin-1.
15  *
16  * MARC-8 reference:
17  *  http://www.loc.gov/marc/specifications/speccharmarc8.html
18  */
19
20 #if HAVE_CONFIG_H
21 #include <config.h>
22 #endif
23
24 #include <assert.h>
25 #include <errno.h>
26 #include <string.h>
27 #include <ctype.h>
28 #if HAVE_WCHAR_H
29 #include <wchar.h>
30 #endif
31
32 #if HAVE_ICONV_H
33 #include <iconv.h>
34 #endif
35
36
37 #include <yaz/yaz-util.h>
38
39 unsigned long yaz_marc8_1_conv(unsigned char *inp, size_t inbytesleft,
40                                size_t *no_read, int *combining);
41 unsigned long yaz_marc8_2_conv(unsigned char *inp, size_t inbytesleft,
42                                size_t *no_read, int *combining);
43 unsigned long yaz_marc8_3_conv(unsigned char *inp, size_t inbytesleft,
44                                size_t *no_read, int *combining);
45 unsigned long yaz_marc8_4_conv(unsigned char *inp, size_t inbytesleft,
46                                size_t *no_read, int *combining);
47 unsigned long yaz_marc8_5_conv(unsigned char *inp, size_t inbytesleft,
48                                size_t *no_read, int *combining);
49 unsigned long yaz_marc8_6_conv(unsigned char *inp, size_t inbytesleft,
50                                size_t *no_read, int *combining);
51 unsigned long yaz_marc8_7_conv(unsigned char *inp, size_t inbytesleft,
52                                size_t *no_read, int *combining);
53 unsigned long yaz_marc8_8_conv(unsigned char *inp, size_t inbytesleft,
54                                size_t *no_read, int *combining);
55 unsigned long yaz_marc8_9_conv(unsigned char *inp, size_t inbytesleft,
56                                size_t *no_read, int *combining);
57
58
59 unsigned long yaz_marc8r_1_conv(unsigned char *inp, size_t inbytesleft,
60                                 size_t *no_read, int *combining);
61 unsigned long yaz_marc8r_2_conv(unsigned char *inp, size_t inbytesleft,
62                                 size_t *no_read, int *combining);
63 unsigned long yaz_marc8r_3_conv(unsigned char *inp, size_t inbytesleft,
64                                 size_t *no_read, int *combining);
65 unsigned long yaz_marc8r_4_conv(unsigned char *inp, size_t inbytesleft,
66                                 size_t *no_read, int *combining);
67 unsigned long yaz_marc8r_5_conv(unsigned char *inp, size_t inbytesleft,
68                                 size_t *no_read, int *combining);
69 unsigned long yaz_marc8r_6_conv(unsigned char *inp, size_t inbytesleft,
70                                 size_t *no_read, int *combining);
71 unsigned long yaz_marc8r_7_conv(unsigned char *inp, size_t inbytesleft,
72                                 size_t *no_read, int *combining);
73 unsigned long yaz_marc8r_8_conv(unsigned char *inp, size_t inbytesleft,
74                                 size_t *no_read, int *combining);
75 unsigned long yaz_marc8r_9_conv(unsigned char *inp, size_t inbytesleft,
76                                 size_t *no_read, int *combining);
77
78 struct yaz_iconv_struct {
79     int my_errno;
80     int init_flag;
81     size_t (*init_handle)(yaz_iconv_t cd, unsigned char *inbuf,
82                           size_t inbytesleft, size_t *no_read);
83     unsigned long (*read_handle)(yaz_iconv_t cd, unsigned char *inbuf,
84                                  size_t inbytesleft, size_t *no_read);
85     size_t (*write_handle)(yaz_iconv_t cd, unsigned long x,
86                            char **outbuf, size_t *outbytesleft,
87                            int last);
88     size_t (*flush_handle)(yaz_iconv_t cd,
89                            char **outbuf, size_t *outbytesleft);
90     int marc8_esc_mode;
91
92     int comb_offset;
93     int comb_size;
94     unsigned long comb_x[8];
95     size_t comb_no_read[8];
96     size_t no_read_x;
97     unsigned long unget_x;
98 #if HAVE_ICONV_H
99     iconv_t iconv_cd;
100 #endif
101     unsigned long compose_char;
102
103     unsigned long write_marc8_comb_ch[8];
104     size_t write_marc8_comb_no;
105     unsigned write_marc8_second_half_char;
106     unsigned long write_marc8_last;
107     const char *write_marc8_page_chr;
108 };
109
110 static struct {
111     unsigned long x1, x2;
112     unsigned y;
113 } latin1_comb[] = {
114     { 'A', 0x0300, 0xc0}, /* LATIN CAPITAL LETTER A WITH GRAVE */
115     { 'A', 0x0301, 0xc1}, /* LATIN CAPITAL LETTER A WITH ACUTE */
116     { 'A', 0x0302, 0xc2}, /* LATIN CAPITAL LETTER A WITH CIRCUMFLEX */
117     { 'A', 0x0303, 0xc3}, /* LATIN CAPITAL LETTER A WITH TILDE */
118     { 'A', 0x0308, 0xc4}, /* LATIN CAPITAL LETTER A WITH DIAERESIS */
119     { 'A', 0x030a, 0xc5}, /* LATIN CAPITAL LETTER A WITH RING ABOVE */
120     /* no need for 0xc6      LATIN CAPITAL LETTER AE */
121     { 'C', 0x0327, 0xc7}, /* LATIN CAPITAL LETTER C WITH CEDILLA */
122     { 'E', 0x0300, 0xc8}, /* LATIN CAPITAL LETTER E WITH GRAVE */
123     { 'E', 0x0301, 0xc9}, /* LATIN CAPITAL LETTER E WITH ACUTE */
124     { 'E', 0x0302, 0xca}, /* LATIN CAPITAL LETTER E WITH CIRCUMFLEX */
125     { 'E', 0x0308, 0xcb}, /* LATIN CAPITAL LETTER E WITH DIAERESIS */
126     { 'I', 0x0300, 0xcc}, /* LATIN CAPITAL LETTER I WITH GRAVE */
127     { 'I', 0x0301, 0xcd}, /* LATIN CAPITAL LETTER I WITH ACUTE */
128     { 'I', 0x0302, 0xce}, /* LATIN CAPITAL LETTER I WITH CIRCUMFLEX */
129     { 'I', 0x0308, 0xcf}, /* LATIN CAPITAL LETTER I WITH DIAERESIS */
130     { 'N', 0x0303, 0xd1}, /* LATIN CAPITAL LETTER N WITH TILDE */
131     { 'O', 0x0300, 0xd2}, /* LATIN CAPITAL LETTER O WITH GRAVE */
132     { 'O', 0x0301, 0xd3}, /* LATIN CAPITAL LETTER O WITH ACUTE */
133     { 'O', 0x0302, 0xd4}, /* LATIN CAPITAL LETTER O WITH CIRCUMFLEX */
134     { 'O', 0x0303, 0xd5}, /* LATIN CAPITAL LETTER O WITH TILDE */
135     { 'O', 0x0308, 0xd6}, /* LATIN CAPITAL LETTER O WITH DIAERESIS */
136     /* omitted:    0xd7      MULTIPLICATION SIGN */
137     /* omitted:    0xd8      LATIN CAPITAL LETTER O WITH STROKE */
138     { 'U', 0x0300, 0xd9}, /* LATIN CAPITAL LETTER U WITH GRAVE */
139     { 'U', 0x0301, 0xda}, /* LATIN CAPITAL LETTER U WITH ACUTE */
140     { 'U', 0x0302, 0xdb}, /* LATIN CAPITAL LETTER U WITH CIRCUMFLEX */
141     { 'U', 0x0308, 0xdc}, /* LATIN CAPITAL LETTER U WITH DIAERESIS */
142     { 'Y', 0x0301, 0xdd}, /* LATIN CAPITAL LETTER Y WITH ACUTE */
143     /* omitted:    0xde      LATIN CAPITAL LETTER THORN */
144     /* omitted:    0xdf      LATIN SMALL LETTER SHARP S */
145     { 'a', 0x0300, 0xe0}, /* LATIN SMALL LETTER A WITH GRAVE */
146     { 'a', 0x0301, 0xe1}, /* LATIN SMALL LETTER A WITH ACUTE */
147     { 'a', 0x0302, 0xe2}, /* LATIN SMALL LETTER A WITH CIRCUMFLEX */
148     { 'a', 0x0303, 0xe3}, /* LATIN SMALL LETTER A WITH TILDE */
149     { 'a', 0x0308, 0xe4}, /* LATIN SMALL LETTER A WITH DIAERESIS */
150     { 'a', 0x030a, 0xe5}, /* LATIN SMALL LETTER A WITH RING ABOVE */
151     /* omitted:    0xe6      LATIN SMALL LETTER AE */
152     { 'c', 0x0327, 0xe7}, /* LATIN SMALL LETTER C WITH CEDILLA */
153     { 'e', 0x0300, 0xe8}, /* LATIN SMALL LETTER E WITH GRAVE */
154     { 'e', 0x0301, 0xe9}, /* LATIN SMALL LETTER E WITH ACUTE */
155     { 'e', 0x0302, 0xea}, /* LATIN SMALL LETTER E WITH CIRCUMFLEX */
156     { 'e', 0x0308, 0xeb}, /* LATIN SMALL LETTER E WITH DIAERESIS */
157     { 'i', 0x0300, 0xec}, /* LATIN SMALL LETTER I WITH GRAVE */
158     { 'i', 0x0301, 0xed}, /* LATIN SMALL LETTER I WITH ACUTE */
159     { 'i', 0x0302, 0xee}, /* LATIN SMALL LETTER I WITH CIRCUMFLEX */
160     { 'i', 0x0308, 0xef}, /* LATIN SMALL LETTER I WITH DIAERESIS */
161     /* omitted:    0xf0      LATIN SMALL LETTER ETH */
162     { 'n', 0x0303, 0xf1}, /* LATIN SMALL LETTER N WITH TILDE */
163     { 'o', 0x0300, 0xf2}, /* LATIN SMALL LETTER O WITH GRAVE */
164     { 'o', 0x0301, 0xf3}, /* LATIN SMALL LETTER O WITH ACUTE */
165     { 'o', 0x0302, 0xf4}, /* LATIN SMALL LETTER O WITH CIRCUMFLEX */
166     { 'o', 0x0303, 0xf5}, /* LATIN SMALL LETTER O WITH TILDE */
167     { 'o', 0x0308, 0xf6}, /* LATIN SMALL LETTER O WITH DIAERESIS */
168     /* omitted:    0xf7      DIVISION SIGN */
169     /* omitted:    0xf8      LATIN SMALL LETTER O WITH STROKE */
170     { 'u', 0x0300, 0xf9}, /* LATIN SMALL LETTER U WITH GRAVE */
171     { 'u', 0x0301, 0xfa}, /* LATIN SMALL LETTER U WITH ACUTE */
172     { 'u', 0x0302, 0xfb}, /* LATIN SMALL LETTER U WITH CIRCUMFLEX */
173     { 'u', 0x0308, 0xfc}, /* LATIN SMALL LETTER U WITH DIAERESIS */
174     { 'y', 0x0301, 0xfd}, /* LATIN SMALL LETTER Y WITH ACUTE */
175     /* omitted:    0xfe      LATIN SMALL LETTER THORN */
176     { 'y', 0x0308, 0xff}, /* LATIN SMALL LETTER Y WITH DIAERESIS */
177     
178     { 0, 0, 0}
179 };
180
181 static unsigned long yaz_read_ISO8859_1 (yaz_iconv_t cd, unsigned char *inp,
182                                          size_t inbytesleft, size_t *no_read)
183 {
184     unsigned long x = inp[0];
185     *no_read = 1;
186     return x;
187 }
188
189
190 static size_t yaz_init_UTF8 (yaz_iconv_t cd, unsigned char *inp,
191                              size_t inbytesleft, size_t *no_read)
192 {
193     if (inp[0] != 0xef)
194     {
195         *no_read = 0;
196         return 0;
197     }
198     if (inbytesleft < 3)
199     {
200         cd->my_errno = YAZ_ICONV_EINVAL;
201         return (size_t) -1;
202     }
203     if (inp[1] != 0xbb && inp[2] == 0xbf)
204         *no_read = 3;
205     else
206         *no_read = 0;
207     return 0;
208 }
209
210 unsigned long yaz_read_UTF8_char(unsigned char *inp,
211                                  size_t inbytesleft, size_t *no_read,
212                                  int *error)
213 {
214     unsigned long x = 0;
215
216     if (inp[0] <= 0x7f)
217     {
218         x = inp[0];
219         *no_read = 1;
220     }
221     else if (inp[0] <= 0xbf || inp[0] >= 0xfe)
222     {
223         *no_read = 0;
224         *error = YAZ_ICONV_EILSEQ;
225     }
226     else if (inp[0] <= 0xdf && inbytesleft >= 2)
227     {
228         x = ((inp[0] & 0x1f) << 6) | (inp[1] & 0x3f);
229         if (x >= 0x80)
230             *no_read = 2;
231         else
232         {
233             *no_read = 0;
234             *error = YAZ_ICONV_EILSEQ;
235         }
236     }
237     else if (inp[0] <= 0xef && inbytesleft >= 3)
238     {
239         x = ((inp[0] & 0x0f) << 12) | ((inp[1] & 0x3f) << 6) |
240             (inp[2] & 0x3f);
241         if (x >= 0x800)
242             *no_read = 3;
243         else
244         {
245             *no_read = 0;
246             *error = YAZ_ICONV_EILSEQ;
247         }
248     }
249     else if (inp[0] <= 0xf7 && inbytesleft >= 4)
250     {
251         x =  ((inp[0] & 0x07) << 18) | ((inp[1] & 0x3f) << 12) |
252             ((inp[2] & 0x3f) << 6) | (inp[3] & 0x3f);
253         if (x >= 0x10000)
254             *no_read = 4;
255         else
256         {
257             *no_read = 0;
258             *error = YAZ_ICONV_EILSEQ;
259         }
260     }
261     else if (inp[0] <= 0xfb && inbytesleft >= 5)
262     {
263         x =  ((inp[0] & 0x03) << 24) | ((inp[1] & 0x3f) << 18) |
264             ((inp[2] & 0x3f) << 12) | ((inp[3] & 0x3f) << 6) |
265             (inp[4] & 0x3f);
266         if (x >= 0x200000)
267             *no_read = 5;
268         else
269         {
270             *no_read = 0;
271             *error = YAZ_ICONV_EILSEQ;
272         }
273     }
274     else if (inp[0] <= 0xfd && inbytesleft >= 6)
275     {
276         x =  ((inp[0] & 0x01) << 30) | ((inp[1] & 0x3f) << 24) |
277             ((inp[2] & 0x3f) << 18) | ((inp[3] & 0x3f) << 12) |
278             ((inp[4] & 0x3f) << 6) | (inp[5] & 0x3f);
279         if (x >= 0x4000000)
280             *no_read = 6;
281         else
282         {
283             *no_read = 0;
284             *error = YAZ_ICONV_EILSEQ;
285         }
286     }
287     else
288     {
289         *no_read = 0;
290         *error = YAZ_ICONV_EINVAL;
291     }
292     return x;
293 }
294
295 static unsigned long yaz_read_UTF8 (yaz_iconv_t cd, unsigned char *inp,
296                                     size_t inbytesleft, size_t *no_read)
297 {
298     return yaz_read_UTF8_char(inp, inbytesleft, no_read, &cd->my_errno);
299 }
300
301 static unsigned long yaz_read_UCS4 (yaz_iconv_t cd, unsigned char *inp,
302                                     size_t inbytesleft, size_t *no_read)
303 {
304     unsigned long x = 0;
305     
306     if (inbytesleft < 4)
307     {
308         cd->my_errno = YAZ_ICONV_EINVAL; /* incomplete input */
309         *no_read = 0;
310     }
311     else
312     {
313         x = (inp[0]<<24) | (inp[1]<<16) | (inp[2]<<8) | inp[3];
314         *no_read = 4;
315     }
316     return x;
317 }
318
319 static unsigned long yaz_read_UCS4LE (yaz_iconv_t cd, unsigned char *inp,
320                                       size_t inbytesleft, size_t *no_read)
321 {
322     unsigned long x = 0;
323     
324     if (inbytesleft < 4)
325     {
326         cd->my_errno = YAZ_ICONV_EINVAL; /* incomplete input */
327         *no_read = 0;
328     }
329     else
330     {
331         x = (inp[3]<<24) | (inp[2]<<16) | (inp[1]<<8) | inp[0];
332         *no_read = 4;
333     }
334     return x;
335 }
336
337 #if HAVE_WCHAR_H
338 static unsigned long yaz_read_wchar_t (yaz_iconv_t cd, unsigned char *inp,
339                                        size_t inbytesleft, size_t *no_read)
340 {
341     unsigned long x = 0;
342     
343     if (inbytesleft < sizeof(wchar_t))
344     {
345         cd->my_errno = YAZ_ICONV_EINVAL; /* incomplete input */
346         *no_read = 0;
347     }
348     else
349     {
350         wchar_t wch;
351         memcpy (&wch, inp, sizeof(wch));
352         x = wch;
353         *no_read = sizeof(wch);
354     }
355     return x;
356 }
357 #endif
358
359 static unsigned long yaz_read_advancegreek(yaz_iconv_t cd, unsigned char *inp,
360                                            size_t inbytesleft, size_t *no_read)
361 {
362     unsigned long x = 0;
363     int shift = 0;
364     int tonos = 0;
365     int dialitika = 0;
366
367     *no_read = 0;
368     while (inbytesleft > 0)
369     {
370         if (*inp == 0x9d)
371         {
372             tonos = 1;
373         }
374         else if (*inp == 0x9e)
375         {
376             dialitika = 1;
377         }
378         else if (*inp == 0x9f)
379         {
380             shift = 1;
381         }
382         else
383             break;
384         inp++;
385         --inbytesleft;
386         (*no_read)++;
387     }    
388     if (inbytesleft == 0)
389     {
390         cd->my_errno = YAZ_ICONV_EINVAL; /* incomplete input */
391         *no_read = 0;
392         return 0;
393     }
394     switch (*inp) {
395     case 0x81:
396         if (shift) 
397             if (tonos) 
398                 x = 0x0386;
399             else 
400                 x = 0x0391;
401         else 
402             if (tonos) 
403                 x = 0x03ac;
404             else 
405                 x = 0x03b1;
406         break;
407     case 0x82:
408         if (shift) 
409             x = 0x0392;
410         else 
411             x = 0x03b2;
412         
413         break;
414     case 0x83:
415         if (shift) 
416             x = 0x0393;
417         else 
418             x = 0x03b3;
419         break;
420     case 0x84:
421         if (shift) 
422             x = 0x0394;
423         else 
424             x = 0x03b4;
425         break;
426     case 0x85:
427         if (shift) 
428             if (tonos) 
429                 x = 0x0388;
430             else 
431                 x = 0x0395;
432         else 
433             if (tonos) 
434                 x = 0x03ad;
435             else 
436                 x = 0x03b5;
437         break;
438     case 0x86:
439         if (shift) 
440             x = 0x0396;
441         else 
442             x = 0x03b6;
443         break;
444     case 0x87:
445         if (shift) 
446             if (tonos) 
447                 x = 0x0389;
448             else 
449                 x = 0x0397;
450         else 
451             if (tonos) 
452                 x = 0x03ae;
453             else 
454                 x = 0x03b7;
455         break;
456     case 0x88:
457         if (shift) 
458             x = 0x0398;
459         else 
460             x = 0x03b8;
461         break;
462     case 0x89:
463         if (shift) 
464             if (tonos) 
465                 x = 0x038a;
466             else 
467                 if (dialitika) 
468                     x = 0x9e;
469                 else 
470                     x = 0x0399;
471         else 
472             if (tonos) 
473                 if (dialitika) 
474                     x = 0x0390;
475                 else 
476                     x = 0x03af;
477         
478             else 
479                 if (dialitika) 
480                     x = 0x03ca;
481                 else 
482                     x = 0x03b9;
483         break;
484     case 0x8a:
485         if (shift) 
486             x = 0x039a;
487         else 
488             x = 0x03ba;
489         
490         break;
491     case 0x8b:
492         if (shift) 
493             x = 0x039b;
494         else 
495             x = 0x03bb;
496         break;
497     case 0x8c:
498         if (shift) 
499             x = 0x039c;
500         else 
501             x = 0x03bc;
502         
503         break;
504     case 0x8d:
505         if (shift) 
506             x = 0x039d;
507         else 
508             x = 0x03bd;
509         break;
510     case 0x8e:
511         if (shift) 
512             x = 0x039e;
513         else 
514             x = 0x03be;
515         break;
516     case 0x8f:
517         if (shift) 
518             if (tonos) 
519                 x = 0x038c;
520             else 
521                 x = 0x039f;
522         else 
523             if (tonos) 
524                 x = 0x03cc;
525             else 
526                 x = 0x03bf;
527         break;
528     case 0x90:
529         if (shift) 
530             x = 0x03a0;
531         else 
532             x = 0x03c0;
533         break;
534     case 0x91:
535         if (shift) 
536             x = 0x03a1;
537         else 
538             x = 0x03c1;
539         break;
540     case 0x92:
541         x = 0x03c2;
542         break;
543     case 0x93:
544         if (shift) 
545             x = 0x03a3;
546         else 
547             x = 0x03c3;
548         break;
549     case 0x94:
550         if (shift) 
551             x = 0x03a4;
552         else 
553             x = 0x03c4;
554         break;
555     case 0x95:
556         if (shift) 
557             if (tonos) 
558                 x = 0x038e;
559             else 
560                 if (dialitika) 
561                     x = 0x03ab;
562                 else 
563                     x = 0x03a5;
564         else 
565             if (tonos) 
566                 if (dialitika) 
567                     x = 0x03b0;
568                 else 
569                     x = 0x03cd;
570         
571             else 
572                 if (dialitika) 
573                     x = 0x03cb;
574                 else 
575                     x = 0x03c5;
576         break;
577     case 0x96:
578         if (shift) 
579             x = 0x03a6;
580         else 
581             x = 0x03c6;
582         break;
583     case 0x97:
584         if (shift) 
585             x = 0x03a7;
586         else 
587             x = 0x03c7;
588         break;
589     case 0x98:
590         if (shift) 
591             x = 0x03a8;
592         else 
593             x = 0x03c8;
594         
595         break;
596         
597     case 0x99:
598         if (shift) 
599             if (tonos) 
600                 x = 0x038f;
601             else 
602                 x = 0x03a9;
603         else 
604             if (tonos) 
605                 x = 0x03ce;
606             else 
607                 x = 0x03c9;
608         break;
609     default:
610         x = *inp;
611         break;
612     }
613     (*no_read)++;
614     
615     return x;
616 }
617
618 static size_t yaz_write_advancegreek(yaz_iconv_t cd, unsigned long x,
619                                      char **outbuf, size_t *outbytesleft,
620                                      int last)
621 {
622     size_t k = 0;
623     unsigned char *out = (unsigned char*) *outbuf;
624     if (*outbytesleft < 3)
625     {
626         cd->my_errno = YAZ_ICONV_E2BIG;  /* not room for output */
627         return (size_t)(-1);
628     }
629     switch (x)
630     {
631     case 0x03ac : out[k++]=0x9d; out[k++]=0x81; break;
632     case 0x03ad : out[k++]=0x9d; out[k++]=0x85; break;
633     case 0x03ae : out[k++]=0x9d; out[k++]=0x87; break;
634     case 0x03af : out[k++]=0x9d; out[k++]=0x89; break;
635     case 0x03cc : out[k++]=0x9d; out[k++]=0x8f; break;
636     case 0x03cd : out[k++]=0x9d; out[k++]=0x95; break;
637     case 0x03ce : out[k++]=0x9d; out[k++]=0x99; break;
638     case 0x0390 : out[k++]=0x9d; out[k++]=0x9e; out[k++]=0x89; break;
639     case 0x03b0 : out[k++]=0x9d; out[k++]=0x9e; out[k++]=0x95; break;
640     case 0x0386 : out[k++]=0x9d; out[k++]=0x9f; out[k++]=0x81; break;
641     case 0x0388 : out[k++]=0x9d; out[k++]=0x9f; out[k++]=0x85; break;
642     case 0x0389 : out[k++]=0x9d; out[k++]=0x9f; out[k++]=0x87; break;
643     case 0x038a : out[k++]=0x9d; out[k++]=0x9f; out[k++]=0x89; break;
644     case 0x038c : out[k++]=0x9d; out[k++]=0x9f; out[k++]=0x8f; break;
645     case 0x038e : out[k++]=0x9d; out[k++]=0x9f; out[k++]=0x95; break;
646     case 0x038f : out[k++]=0x9d; out[k++]=0x9f; out[k++]=0x99; break;
647     case 0x03ca : out[k++]=0x9e; out[k++]=0x89; break;
648     case 0x03cb : out[k++]=0x9e; out[k++]=0x95; break;
649     case 0x03aa : out[k++]=0x9e; out[k++]=0x9f; out[k++]=0x89; break;
650     case 0x03ab : out[k++]=0x9e; out[k++]=0x9f; out[k++]=0x95; break;
651     case 0x0391 : out[k++]=0x9f; out[k++]=0x81; break;
652     case 0x0392 : out[k++]=0x9f; out[k++]=0x82; break;
653     case 0x0393 : out[k++]=0x9f; out[k++]=0x83; break;
654     case 0x0394 : out[k++]=0x9f; out[k++]=0x84; break;
655     case 0x0395 : out[k++]=0x9f; out[k++]=0x85; break;
656     case 0x0396 : out[k++]=0x9f; out[k++]=0x86; break;
657     case 0x0397 : out[k++]=0x9f; out[k++]=0x87; break;
658     case 0x0398 : out[k++]=0x9f; out[k++]=0x88; break;
659     case 0x0399 : out[k++]=0x9f; out[k++]=0x89; break;
660     case 0x039a : out[k++]=0x9f; out[k++]=0x8a; break;
661     case 0x039b : out[k++]=0x9f; out[k++]=0x8b; break;
662     case 0x039c : out[k++]=0x9f; out[k++]=0x8c; break;
663     case 0x039d : out[k++]=0x9f; out[k++]=0x8d; break;
664     case 0x039e : out[k++]=0x9f; out[k++]=0x8e; break;
665     case 0x039f : out[k++]=0x9f; out[k++]=0x8f; break;
666     case 0x03a0 : out[k++]=0x9f; out[k++]=0x90; break;
667     case 0x03a1 : out[k++]=0x9f; out[k++]=0x91; break;
668     case 0x03a3 : out[k++]=0x9f; out[k++]=0x93; break;
669     case 0x03a4 : out[k++]=0x9f; out[k++]=0x94; break;
670     case 0x03a5 : out[k++]=0x9f; out[k++]=0x95; break;
671     case 0x03a6 : out[k++]=0x9f; out[k++]=0x96; break;
672     case 0x03a7 : out[k++]=0x9f; out[k++]=0x97; break;
673     case 0x03a8 : out[k++]=0x9f; out[k++]=0x98; break;
674     case 0x03a9 : out[k++]=0x9f; out[k++]=0x99; break;
675     case 0x03b1 : out[k++]=0x81; break;
676     case 0x03b2 : out[k++]=0x82; break;
677     case 0x03b3 : out[k++]=0x83; break;
678     case 0x03b4 : out[k++]=0x84; break;
679     case 0x03b5 : out[k++]=0x85; break;
680     case 0x03b6 : out[k++]=0x86; break;
681     case 0x03b7 : out[k++]=0x87; break;
682     case 0x03b8 : out[k++]=0x88; break;
683     case 0x03b9 : out[k++]=0x89; break;
684     case 0x03ba : out[k++]=0x8a; break;
685     case 0x03bb : out[k++]=0x8b; break;
686     case 0x03bc : out[k++]=0x8c; break;
687     case 0x03bd : out[k++]=0x8d; break;
688     case 0x03be : out[k++]=0x8e; break;
689     case 0x03bf : out[k++]=0x8f; break;
690     case 0x03c0 : out[k++]=0x90; break;
691     case 0x03c1 : out[k++]=0x91; break;
692     case 0x03c2 : out[k++]=0x92; break;
693     case 0x03c3 : out[k++]=0x93; break;
694     case 0x03c4 : out[k++]=0x94; break;
695     case 0x03c5 : out[k++]=0x95; break;
696     case 0x03c6 : out[k++]=0x96; break;
697     case 0x03c7 : out[k++]=0x96; break;
698     case 0x03c8 : out[k++]=0x98; break;
699     case 0x03c9 : out[k++]=0x99; break;
700     default:
701         if (x > 255)
702         {
703             cd->my_errno = YAZ_ICONV_EILSEQ;
704             return (size_t) -1;
705         }
706         out[k++] = x;
707         break;
708     }
709     *outbytesleft -= k;
710     (*outbuf) += k;
711     return 0;
712 }
713
714
715 static unsigned long yaz_read_marc8_comb (yaz_iconv_t cd, unsigned char *inp,
716                                           size_t inbytesleft, size_t *no_read,
717                                           int *comb);
718
719 static unsigned long yaz_read_marc8 (yaz_iconv_t cd, unsigned char *inp,
720                                      size_t inbytesleft, size_t *no_read)
721 {
722     unsigned long x;
723     if (cd->comb_offset < cd->comb_size)
724     {
725         *no_read = cd->comb_no_read[cd->comb_offset];
726         x = cd->comb_x[cd->comb_offset];
727
728         /* special case for double-diacritic combining characters, 
729            INVERTED BREVE and DOUBLE TILDE.
730            We'll increment the no_read counter by 1, since we want to skip over
731            the processing of the closing ligature character
732         */
733         /* this code is no longer necessary.. our handlers code in
734            yaz_marc8_?_conv (generated by charconv.tcl) now returns
735            0 and no_read=1 when a sequence does not match the input.
736            The SECOND HALFs in codetables.xml produces a non-existant
737            entry in the conversion trie.. Hence when met, the input byte is
738            skipped as it should (in yaz_iconv)
739         */
740 #if 0
741         if (x == 0x0361 || x == 0x0360)
742             *no_read += 1;
743 #endif
744         cd->comb_offset++;
745         return x;
746     }
747
748     cd->comb_offset = 0;
749     for (cd->comb_size = 0; cd->comb_size < 8; cd->comb_size++)
750     {
751         int comb = 0;
752
753         if (inbytesleft == 0 && cd->comb_size)
754         {
755             cd->my_errno = YAZ_ICONV_EINVAL;
756             x = 0;
757             *no_read = 0;
758             break;
759         }
760         x = yaz_read_marc8_comb(cd, inp, inbytesleft, no_read, &comb);
761         if (!comb || !x)
762             break;
763         cd->comb_x[cd->comb_size] = x;
764         cd->comb_no_read[cd->comb_size] = *no_read;
765         inp += *no_read;
766         inbytesleft = inbytesleft - *no_read;
767     }
768     return x;
769 }
770
771 static unsigned long yaz_read_marc8s(yaz_iconv_t cd, unsigned char *inp,
772                                      size_t inbytesleft, size_t *no_read)
773 {
774     unsigned long x = yaz_read_marc8(cd, inp, inbytesleft, no_read);
775     if (x && cd->comb_size == 1)
776     {
777         /* For MARC8s we try to get a Latin-1 page code out of it */
778         int i;
779         for (i = 0; latin1_comb[i].x1; i++)
780             if (cd->comb_x[0] == latin1_comb[i].x2 && x == latin1_comb[i].x1)
781             {
782                 *no_read += cd->comb_no_read[0];
783                 cd->comb_size = 0;
784                 x = latin1_comb[i].y;
785                 break;
786             }
787     }
788     return x;
789 }
790
791 static unsigned long yaz_read_marc8_comb(yaz_iconv_t cd, unsigned char *inp,
792                                          size_t inbytesleft, size_t *no_read,
793                                          int *comb)
794 {
795     *no_read = 0;
796     while(inbytesleft >= 1 && inp[0] == 27)
797     {
798         size_t inbytesleft0 = inbytesleft;
799         inp++;
800         inbytesleft--;
801         while(inbytesleft > 0 && strchr("(,$!)-", *inp))
802         {
803             inbytesleft--;
804             inp++;
805         }
806         if (inbytesleft <= 0)
807         {
808             *no_read = 0;
809             cd->my_errno = YAZ_ICONV_EINVAL;
810             return 0;
811         }
812         cd->marc8_esc_mode = *inp++;
813         inbytesleft--;
814         (*no_read) += inbytesleft0 - inbytesleft;
815     }
816     if (inbytesleft <= 0)
817         return 0;
818     else
819     {
820         unsigned long x;
821         size_t no_read_sub = 0;
822         *comb = 0;
823
824         switch(cd->marc8_esc_mode)
825         {
826         case 'B':  /* Basic ASCII */
827         case 'E':  /* ANSEL */
828         case 's':  /* ASCII */
829             x = yaz_marc8_1_conv(inp, inbytesleft, &no_read_sub, comb);
830             break;
831         case 'g':  /* Greek */
832             x = yaz_marc8_2_conv(inp, inbytesleft, &no_read_sub, comb);
833             break;
834         case 'b':  /* Subscripts */
835             x = yaz_marc8_3_conv(inp, inbytesleft, &no_read_sub, comb);
836             break;
837         case 'p':  /* Superscripts */
838             x = yaz_marc8_4_conv(inp, inbytesleft, &no_read_sub, comb);
839             break;
840         case '2':  /* Basic Hebrew */
841             x = yaz_marc8_5_conv(inp, inbytesleft, &no_read_sub, comb);
842             break;
843         case 'N':  /* Basic Cyrillic */
844         case 'Q':  /* Extended Cyrillic */
845             x = yaz_marc8_6_conv(inp, inbytesleft, &no_read_sub, comb);
846             break;
847         case '3':  /* Basic Arabic */
848         case '4':  /* Extended Arabic */
849             x = yaz_marc8_7_conv(inp, inbytesleft, &no_read_sub, comb);
850             break;
851         case 'S':  /* Greek */
852             x = yaz_marc8_8_conv(inp, inbytesleft, &no_read_sub, comb);
853             break;
854         case '1':  /* Chinese, Japanese, Korean (EACC) */
855             x = yaz_marc8_9_conv(inp, inbytesleft, &no_read_sub, comb);
856             break;
857         default:
858             *no_read = 0;
859             cd->my_errno = YAZ_ICONV_EILSEQ;
860             return 0;
861         }
862         *no_read += no_read_sub;
863         return x;
864     }
865 }
866
867 static size_t yaz_write_UTF8(yaz_iconv_t cd, unsigned long x,
868                              char **outbuf, size_t *outbytesleft,
869                              int last)
870 {
871     return yaz_write_UTF8_char(x, outbuf, outbytesleft, &cd->my_errno);
872 }
873
874 size_t yaz_write_UTF8_char(unsigned long x,
875                            char **outbuf, size_t *outbytesleft,
876                            int *error)
877 {
878     unsigned char *outp = (unsigned char *) *outbuf;
879
880     if (x <= 0x7f && *outbytesleft >= 1)
881     {
882         *outp++ = (unsigned char) x;
883         (*outbytesleft)--;
884     } 
885     else if (x <= 0x7ff && *outbytesleft >= 2)
886     {
887         *outp++ = (unsigned char) ((x >> 6) | 0xc0);
888         *outp++ = (unsigned char) ((x & 0x3f) | 0x80);
889         (*outbytesleft) -= 2;
890     }
891     else if (x <= 0xffff && *outbytesleft >= 3)
892     {
893         *outp++ = (unsigned char) ((x >> 12) | 0xe0);
894         *outp++ = (unsigned char) (((x >> 6) & 0x3f) | 0x80);
895         *outp++ = (unsigned char) ((x & 0x3f) | 0x80);
896         (*outbytesleft) -= 3;
897     }
898     else if (x <= 0x1fffff && *outbytesleft >= 4)
899     {
900         *outp++ = (unsigned char) ((x >> 18) | 0xf0);
901         *outp++ = (unsigned char) (((x >> 12) & 0x3f) | 0x80);
902         *outp++ = (unsigned char) (((x >> 6)  & 0x3f) | 0x80);
903         *outp++ = (unsigned char) ((x & 0x3f) | 0x80);
904         (*outbytesleft) -= 4;
905     }
906     else if (x <= 0x3ffffff && *outbytesleft >= 5)
907     {
908         *outp++ = (unsigned char) ((x >> 24) | 0xf8);
909         *outp++ = (unsigned char) (((x >> 18) & 0x3f) | 0x80);
910         *outp++ = (unsigned char) (((x >> 12) & 0x3f) | 0x80);
911         *outp++ = (unsigned char) (((x >> 6)  & 0x3f) | 0x80);
912         *outp++ = (unsigned char) ((x & 0x3f) | 0x80);
913         (*outbytesleft) -= 5;
914     }
915     else if (*outbytesleft >= 6)
916     {
917         *outp++ = (unsigned char) ((x >> 30) | 0xfc);
918         *outp++ = (unsigned char) (((x >> 24) & 0x3f) | 0x80);
919         *outp++ = (unsigned char) (((x >> 18) & 0x3f) | 0x80);
920         *outp++ = (unsigned char) (((x >> 12) & 0x3f) | 0x80);
921         *outp++ = (unsigned char) (((x >> 6)  & 0x3f) | 0x80);
922         *outp++ = (unsigned char) ((x & 0x3f) | 0x80);
923         (*outbytesleft) -= 6;
924     }
925     else 
926     {
927         *error = YAZ_ICONV_E2BIG;  /* not room for output */
928         return (size_t)(-1);
929     }
930     *outbuf = (char *) outp;
931     return 0;
932 }
933
934
935 static size_t yaz_write_ISO8859_1 (yaz_iconv_t cd, unsigned long x,
936                                    char **outbuf, size_t *outbytesleft,
937                                    int last)
938 {
939     /* list of two char unicode sequence that, when combined, are
940        equivalent to single unicode chars that can be represented in
941        ISO-8859-1/Latin-1.
942        Regular iconv on Linux at least does not seem to convert these,
943        but since MARC-8 to UTF-8 generates these composed sequence
944        we get a better chance of a successful MARC-8 -> ISO-8859-1
945        conversion */
946     unsigned char *outp = (unsigned char *) *outbuf;
947
948     if (cd->compose_char)
949     {
950         int i;
951         for (i = 0; latin1_comb[i].x1; i++)
952             if (cd->compose_char == latin1_comb[i].x1 && x == latin1_comb[i].x2)
953             {
954                 x = latin1_comb[i].y;
955                 break;
956             }
957         if (*outbytesleft < 1)
958         {  /* no room. Retain compose_char and bail out */
959             cd->my_errno = YAZ_ICONV_E2BIG;
960             return (size_t)(-1);
961         }
962         if (!latin1_comb[i].x1) 
963         {   /* not found. Just write compose_char */
964             *outp++ = (unsigned char) cd->compose_char;
965             (*outbytesleft)--;
966             *outbuf = (char *) outp;
967         }
968         /* compose_char used so reset it. x now holds current char */
969         cd->compose_char = 0;
970     }
971
972     if (!last && x > 32 && x < 127 && cd->compose_char == 0)
973     {
974         cd->compose_char = x;
975         return 0;
976     }
977     else if (x > 255 || x < 1)
978     {
979         cd->my_errno = YAZ_ICONV_EILSEQ;
980         return (size_t) -1;
981     }
982     else if (*outbytesleft < 1)
983     {
984         cd->my_errno = YAZ_ICONV_E2BIG;
985         return (size_t)(-1);
986     }
987     *outp++ = (unsigned char) x;
988     (*outbytesleft)--;
989     *outbuf = (char *) outp;
990     return 0;
991 }
992
993
994 static size_t yaz_write_UCS4 (yaz_iconv_t cd, unsigned long x,
995                               char **outbuf, size_t *outbytesleft,
996                               int last)
997 {
998     unsigned char *outp = (unsigned char *) *outbuf;
999     if (*outbytesleft >= 4)
1000     {
1001         *outp++ = (unsigned char) (x>>24);
1002         *outp++ = (unsigned char) (x>>16);
1003         *outp++ = (unsigned char) (x>>8);
1004         *outp++ = (unsigned char) x;
1005         (*outbytesleft) -= 4;
1006     }
1007     else
1008     {
1009         cd->my_errno = YAZ_ICONV_E2BIG;
1010         return (size_t)(-1);
1011     }
1012     *outbuf = (char *) outp;
1013     return 0;
1014 }
1015
1016 static size_t yaz_write_UCS4LE (yaz_iconv_t cd, unsigned long x,
1017                                 char **outbuf, size_t *outbytesleft,
1018                                 int last)
1019 {
1020     unsigned char *outp = (unsigned char *) *outbuf;
1021     if (*outbytesleft >= 4)
1022     {
1023         *outp++ = (unsigned char) x;
1024         *outp++ = (unsigned char) (x>>8);
1025         *outp++ = (unsigned char) (x>>16);
1026         *outp++ = (unsigned char) (x>>24);
1027         (*outbytesleft) -= 4;
1028     }
1029     else
1030     {
1031         cd->my_errno = YAZ_ICONV_E2BIG;
1032         return (size_t)(-1);
1033     }
1034     *outbuf = (char *) outp;
1035     return 0;
1036 }
1037
1038 static unsigned long lookup_marc8(yaz_iconv_t cd,
1039                                   unsigned long x, int *comb,
1040                                   const char **page_chr)
1041 {
1042     char utf8_buf[7];
1043     char *utf8_outbuf = utf8_buf;
1044     size_t utf8_outbytesleft = sizeof(utf8_buf)-1, r;
1045
1046     r = yaz_write_UTF8(cd, x, &utf8_outbuf, &utf8_outbytesleft, 0);
1047     if (r == (size_t)(-1))
1048     {
1049         cd->my_errno = YAZ_ICONV_EILSEQ;
1050         return 0;
1051     }
1052     else
1053     {
1054         unsigned char *inp;
1055         size_t inbytesleft, no_read_sub = 0;
1056         unsigned long x;
1057
1058         *utf8_outbuf = '\0';        
1059         inp = (unsigned char *) utf8_buf;
1060         inbytesleft = strlen(utf8_buf);
1061         
1062         x = yaz_marc8r_1_conv(inp, inbytesleft, &no_read_sub, comb);
1063         if (x)
1064         {
1065             *page_chr = "\033(B";
1066             return x;
1067         }
1068         x = yaz_marc8r_2_conv(inp, inbytesleft, &no_read_sub, comb);
1069         if (x)
1070         {
1071             *page_chr = "\033g";
1072             return x;
1073         }
1074         x = yaz_marc8r_3_conv(inp, inbytesleft, &no_read_sub, comb);
1075         if (x)
1076         {
1077             *page_chr = "\033b";
1078             return x;
1079         }
1080         x = yaz_marc8r_4_conv(inp, inbytesleft, &no_read_sub, comb);
1081         if (x)
1082         {
1083             *page_chr = "\033p";
1084             return x;
1085         }
1086         x = yaz_marc8r_5_conv(inp, inbytesleft, &no_read_sub, comb);
1087         if (x)
1088         {
1089             *page_chr = "\033(2";
1090             return x;
1091         }
1092         x = yaz_marc8r_6_conv(inp, inbytesleft, &no_read_sub, comb);
1093         if (x)
1094         {
1095             *page_chr = "\033(N";
1096             return x;
1097         }
1098         x = yaz_marc8r_7_conv(inp, inbytesleft, &no_read_sub, comb);
1099         if (x)
1100         {
1101             *page_chr = "\033(3";
1102             return x;
1103         }
1104         x = yaz_marc8r_8_conv(inp, inbytesleft, &no_read_sub, comb);
1105         if (x)
1106         {
1107             *page_chr = "\033(S";
1108             return x;
1109         }
1110         x = yaz_marc8r_9_conv(inp, inbytesleft, &no_read_sub, comb);
1111         if (x)
1112         {
1113             *page_chr = "\033$1";
1114             return x;
1115         }
1116         cd->my_errno = YAZ_ICONV_EILSEQ;
1117         return x;
1118     }
1119 }
1120
1121 static size_t flush_combos(yaz_iconv_t cd,
1122                            char **outbuf, size_t *outbytesleft)
1123 {
1124     unsigned long y = cd->write_marc8_last;
1125     unsigned char byte;
1126     char out_buf[10];
1127     size_t i, out_no = 0;
1128
1129     if (!y)
1130         return 0;
1131
1132     byte = (unsigned char )((y>>16) & 0xff);
1133     if (byte)
1134         out_buf[out_no++] = byte;
1135     byte = (unsigned char)((y>>8) & 0xff);
1136     if (byte)
1137         out_buf[out_no++] = byte;
1138     byte = (unsigned char )(y & 0xff);
1139     if (byte)
1140         out_buf[out_no++] = byte;
1141
1142     if (out_no + cd->write_marc8_comb_no + 1 > *outbytesleft)
1143     {
1144         cd->my_errno = YAZ_ICONV_E2BIG;
1145         return (size_t) (-1);
1146     }
1147
1148     for (i = 0; i < cd->write_marc8_comb_no; i++)
1149     {
1150         /* all MARC-8 combined characters are simple bytes */
1151         byte = (unsigned char )(cd->write_marc8_comb_ch[i]);
1152         *(*outbuf)++ = byte;
1153         (*outbytesleft)--;
1154     }
1155     memcpy(*outbuf, out_buf, out_no);
1156     *outbuf += out_no;
1157     (*outbytesleft) -= out_no;
1158     if (cd->write_marc8_second_half_char)
1159     {
1160         *(*outbuf)++ = cd->write_marc8_second_half_char;
1161         (*outbytesleft)--;
1162     }        
1163
1164     cd->write_marc8_last = 0;
1165     cd->write_marc8_comb_no = 0;
1166     cd->write_marc8_second_half_char = 0;
1167     return 0;
1168 }
1169
1170 static size_t yaz_write_marc8_page_chr(yaz_iconv_t cd, 
1171                                        char **outbuf, size_t *outbytesleft,
1172                                        const char *page_chr)
1173 {
1174     const char *old_page_chr = cd->write_marc8_page_chr;
1175     if (strcmp(page_chr, old_page_chr))
1176     {
1177         size_t plen = 0;
1178         const char *page_out = page_chr;
1179         
1180         if (*outbytesleft < 8)
1181         {
1182             cd->my_errno = YAZ_ICONV_E2BIG;
1183             
1184             return (size_t) (-1);
1185         }
1186         cd->write_marc8_page_chr = page_chr;
1187         
1188         if (!strcmp(old_page_chr, "\033p") 
1189             || !strcmp(old_page_chr, "\033g")
1190             || !strcmp(old_page_chr, "\033b"))
1191         {
1192             /* Technique 1 leave */
1193             page_out = "\033s";
1194             if (strcmp(page_chr, "\033(B")) /* Not going ASCII page? */
1195             {
1196                 /* Must leave script + enter new page */
1197                 plen = strlen(page_out);
1198                 memcpy(*outbuf, page_out, plen);
1199                 (*outbuf) += plen;
1200                 (*outbytesleft) -= plen;
1201                 page_out = page_chr;
1202             }
1203         }
1204         plen = strlen(page_out);
1205         memcpy(*outbuf, page_out, plen);
1206         (*outbuf) += plen;
1207         (*outbytesleft) -= plen;
1208     }
1209     return 0;
1210 }
1211
1212
1213 static size_t yaz_write_marc8_2(yaz_iconv_t cd, unsigned long x,
1214                                 char **outbuf, size_t *outbytesleft,
1215                                 int last)
1216 {
1217     int comb = 0;
1218     const char *page_chr = 0;
1219     unsigned long y = lookup_marc8(cd, x, &comb, &page_chr);
1220
1221     if (!y)
1222         return (size_t) (-1);
1223
1224     if (comb)
1225     {
1226         if (x == 0x0361)
1227             cd->write_marc8_second_half_char = 0xEC;
1228         else if (x == 0x0360)
1229             cd->write_marc8_second_half_char = 0xFB;
1230
1231         if (cd->write_marc8_comb_no < 6)
1232             cd->write_marc8_comb_ch[cd->write_marc8_comb_no++] = y;
1233     }
1234     else
1235     {
1236         size_t r = flush_combos(cd, outbuf, outbytesleft);
1237         if (r)
1238             return r;
1239
1240         r = yaz_write_marc8_page_chr(cd, outbuf, outbytesleft, page_chr);
1241         if (r)
1242             return r;
1243         cd->write_marc8_last = y;
1244     }
1245     if (last)
1246     {
1247         size_t r = flush_combos(cd, outbuf, outbytesleft);
1248         if (r)
1249         {
1250             if (comb)
1251                 cd->write_marc8_comb_no--;
1252             else
1253                 cd->write_marc8_last = 0;
1254             return r;
1255         }
1256     }
1257     return 0;
1258 }
1259
1260 static size_t yaz_flush_marc8(yaz_iconv_t cd,
1261                               char **outbuf, size_t *outbytesleft)
1262 {
1263     size_t r = flush_combos(cd, outbuf, outbytesleft);
1264     if (r)
1265         return r;
1266     return yaz_write_marc8_page_chr(cd, outbuf, outbytesleft, "\033(B");
1267 }
1268
1269 static size_t yaz_write_marc8(yaz_iconv_t cd, unsigned long x,
1270                               char **outbuf, size_t *outbytesleft,
1271                               int last)
1272 {
1273     int i;
1274     for (i = 0; latin1_comb[i].x1; i++)
1275     {
1276         if (x == latin1_comb[i].y)
1277         {
1278             size_t r ;
1279             /* save the output pointers .. */
1280             char *outbuf0 = *outbuf;
1281             size_t outbytesleft0 = *outbytesleft;
1282             int last_ch = cd->write_marc8_last;
1283
1284             r = yaz_write_marc8_2(cd, latin1_comb[i].x1,
1285                                   outbuf, outbytesleft, 0);
1286             if (r)
1287                 return r;
1288             r = yaz_write_marc8_2(cd, latin1_comb[i].x2,
1289                                   outbuf, outbytesleft, last);
1290             if (r && cd->my_errno == YAZ_ICONV_E2BIG)
1291             {
1292                 /* not enough room. reset output to original values */
1293                 *outbuf = outbuf0;
1294                 *outbytesleft = outbytesleft0;
1295                 cd->write_marc8_last = last_ch;
1296             }
1297             return r;
1298         }
1299     }
1300     return yaz_write_marc8_2(cd, x, outbuf, outbytesleft, last);
1301 }
1302
1303
1304 #if HAVE_WCHAR_H
1305 static size_t yaz_write_wchar_t (yaz_iconv_t cd, unsigned long x,
1306                                  char **outbuf, size_t *outbytesleft,
1307                                  int last)
1308 {
1309     unsigned char *outp = (unsigned char *) *outbuf;
1310
1311     if (*outbytesleft >= sizeof(wchar_t))
1312     {
1313         wchar_t wch = x;
1314         memcpy(outp, &wch, sizeof(wch));
1315         outp += sizeof(wch);
1316         (*outbytesleft) -= sizeof(wch);
1317     }
1318     else
1319     {
1320         cd->my_errno = YAZ_ICONV_E2BIG;
1321         return (size_t)(-1);
1322     }
1323     *outbuf = (char *) outp;
1324     return 0;
1325 }
1326 #endif
1327
1328 int yaz_iconv_isbuiltin(yaz_iconv_t cd)
1329 {
1330     return cd->read_handle && cd->write_handle;
1331 }
1332
1333 yaz_iconv_t yaz_iconv_open (const char *tocode, const char *fromcode)
1334 {
1335     yaz_iconv_t cd = (yaz_iconv_t) xmalloc (sizeof(*cd));
1336
1337     cd->write_handle = 0;
1338     cd->read_handle = 0;
1339     cd->init_handle = 0;
1340     cd->flush_handle = 0;
1341     cd->my_errno = YAZ_ICONV_UNKNOWN;
1342
1343     /* a useful hack: if fromcode has leading @,
1344        the library not use YAZ's own conversions .. */
1345     if (fromcode[0] == '@')
1346         fromcode++;
1347     else
1348     {
1349         if (!yaz_matchstr(fromcode, "UTF8"))
1350         {
1351             cd->read_handle = yaz_read_UTF8;
1352             cd->init_handle = yaz_init_UTF8;
1353         }
1354         else if (!yaz_matchstr(fromcode, "ISO88591"))
1355             cd->read_handle = yaz_read_ISO8859_1;
1356         else if (!yaz_matchstr(fromcode, "UCS4"))
1357             cd->read_handle = yaz_read_UCS4;
1358         else if (!yaz_matchstr(fromcode, "UCS4LE"))
1359             cd->read_handle = yaz_read_UCS4LE;
1360         else if (!yaz_matchstr(fromcode, "MARC8"))
1361             cd->read_handle = yaz_read_marc8;
1362         else if (!yaz_matchstr(fromcode, "MARC8s"))
1363             cd->read_handle = yaz_read_marc8s;
1364         else if (!yaz_matchstr(fromcode, "advancegreek"))
1365             cd->read_handle = yaz_read_advancegreek;
1366 #if HAVE_WCHAR_H
1367         else if (!yaz_matchstr(fromcode, "WCHAR_T"))
1368             cd->read_handle = yaz_read_wchar_t;
1369 #endif
1370         
1371         if (!yaz_matchstr(tocode, "UTF8"))
1372             cd->write_handle = yaz_write_UTF8;
1373         else if (!yaz_matchstr(tocode, "ISO88591"))
1374             cd->write_handle = yaz_write_ISO8859_1;
1375         else if (!yaz_matchstr (tocode, "UCS4"))
1376             cd->write_handle = yaz_write_UCS4;
1377         else if (!yaz_matchstr(tocode, "UCS4LE"))
1378             cd->write_handle = yaz_write_UCS4LE;
1379         else if (!yaz_matchstr(tocode, "MARC8"))
1380         {
1381             cd->write_handle = yaz_write_marc8;
1382             cd->flush_handle = yaz_flush_marc8;
1383         }
1384         else if (!yaz_matchstr(tocode, "MARC8s"))
1385         {
1386             cd->write_handle = yaz_write_marc8;
1387             cd->flush_handle = yaz_flush_marc8;
1388         }
1389         else if (!yaz_matchstr(tocode, "advancegreek"))
1390         {
1391             cd->write_handle = yaz_write_advancegreek;
1392         }
1393 #if HAVE_WCHAR_H
1394         else if (!yaz_matchstr(tocode, "WCHAR_T"))
1395             cd->write_handle = yaz_write_wchar_t;
1396 #endif
1397     }
1398 #if HAVE_ICONV_H
1399     cd->iconv_cd = 0;
1400     if (!cd->read_handle || !cd->write_handle)
1401     {
1402         cd->iconv_cd = iconv_open (tocode, fromcode);
1403         if (cd->iconv_cd == (iconv_t) (-1))
1404         {
1405             xfree (cd);
1406             return 0;
1407         }
1408     }
1409 #else
1410     if (!cd->read_handle || !cd->write_handle)
1411     {
1412         xfree (cd);
1413         return 0;
1414     }
1415 #endif
1416     cd->init_flag = 1;
1417     return cd;
1418 }
1419
1420 size_t yaz_iconv(yaz_iconv_t cd, char **inbuf, size_t *inbytesleft,
1421                  char **outbuf, size_t *outbytesleft)
1422 {
1423     char *inbuf0 = 0;
1424     size_t r = 0;
1425
1426 #if HAVE_ICONV_H
1427     if (cd->iconv_cd)
1428     {
1429         size_t r =
1430             iconv(cd->iconv_cd, inbuf, inbytesleft, outbuf, outbytesleft);
1431         if (r == (size_t)(-1))
1432         {
1433             switch (yaz_errno())
1434             {
1435             case E2BIG:
1436                 cd->my_errno = YAZ_ICONV_E2BIG;
1437                 break;
1438             case EINVAL:
1439                 cd->my_errno = YAZ_ICONV_EINVAL;
1440                 break;
1441             case EILSEQ:
1442                 cd->my_errno = YAZ_ICONV_EILSEQ;
1443                 break;
1444             default:
1445                 cd->my_errno = YAZ_ICONV_UNKNOWN;
1446             }
1447         }
1448         return r;
1449     }
1450 #endif
1451
1452     if (inbuf)
1453         inbuf0 = *inbuf;
1454
1455     if (cd->init_flag)
1456     {
1457         cd->my_errno = YAZ_ICONV_UNKNOWN;
1458         cd->marc8_esc_mode = 'B';
1459         
1460         cd->comb_offset = cd->comb_size = 0;
1461         cd->compose_char = 0;
1462         
1463         cd->write_marc8_comb_no = 0;
1464         cd->write_marc8_second_half_char = 0;
1465         cd->write_marc8_last = 0;
1466         cd->write_marc8_page_chr = "\033(B";
1467         
1468         cd->unget_x = 0;
1469         cd->no_read_x = 0;
1470     }
1471
1472     if (cd->init_flag)
1473     {
1474         if (cd->init_handle && inbuf && *inbuf)
1475         {
1476             size_t no_read = 0;
1477             size_t r = (cd->init_handle)(cd, (unsigned char *) *inbuf,
1478                                          *inbytesleft, &no_read);
1479             if (r)
1480             {
1481                 if (cd->my_errno == YAZ_ICONV_EINVAL)
1482                     return r;
1483                 cd->init_flag = 0;
1484                 return r;
1485             }
1486             *inbytesleft -= no_read;
1487             *inbuf += no_read;
1488         }
1489     }
1490     cd->init_flag = 0;
1491
1492     while (1)
1493     {
1494         unsigned long x;
1495         size_t no_read;
1496
1497         if (cd->unget_x)
1498         {
1499             x = cd->unget_x;
1500             no_read = cd->no_read_x;
1501         }
1502         else if (inbuf && *inbuf)
1503         {
1504             if (*inbytesleft == 0)
1505             {
1506                 r = *inbuf - inbuf0;
1507                 break;
1508             }
1509             x = (cd->read_handle)(cd, (unsigned char *) *inbuf, *inbytesleft,
1510                                   &no_read);
1511             if (no_read == 0)
1512             {
1513                 r = (size_t)(-1);
1514                 break;
1515             }
1516         }
1517         else
1518         {
1519             r = 0;
1520             if (cd->flush_handle && outbuf && *outbuf)
1521                 r = (*cd->flush_handle)(cd, outbuf, outbytesleft);
1522             if (r == 0)
1523                 cd->init_flag = 1;
1524             break;
1525         }
1526         if (x)
1527         {
1528             r = (cd->write_handle)(cd, x, outbuf, outbytesleft,
1529                                    (*inbytesleft - no_read) == 0 ? 1 : 0);
1530             if (r)
1531             {
1532                 /* unable to write it. save it because read_handle cannot
1533                    rewind .. */
1534                 if (cd->my_errno == YAZ_ICONV_E2BIG)
1535                 {
1536                     cd->unget_x = x;
1537                     cd->no_read_x = no_read;
1538                     break;
1539                 }
1540             }
1541             cd->unget_x = 0;
1542         }
1543         *inbytesleft -= no_read;
1544         (*inbuf) += no_read;
1545     }
1546     return r;
1547 }
1548
1549 int yaz_iconv_error (yaz_iconv_t cd)
1550 {
1551     return cd->my_errno;
1552 }
1553
1554 int yaz_iconv_close (yaz_iconv_t cd)
1555 {
1556 #if HAVE_ICONV_H
1557     if (cd->iconv_cd)
1558         iconv_close (cd->iconv_cd);
1559 #endif
1560     xfree (cd);
1561     return 0;
1562 }
1563
1564 /*
1565  * Local variables:
1566  * c-basic-offset: 4
1567  * indent-tabs-mode: nil
1568  * End:
1569  * vim: shiftwidth=4 tabstop=8 expandtab
1570  */
1571