Added cs_get_SSL. yaz-client-ssl prints peer info
[yaz-moved-to-github.git] / src / siconv.c
1 /*
2  * Copyright (c) 1997-2004, Index Data
3  * See the file LICENSE for details.
4  *
5  * $Id: siconv.c,v 1.5 2004-03-16 13:12:43 adam Exp $
6  */
7
8 /* mini iconv and wrapper for system iconv library (if present) */
9
10 #if HAVE_CONFIG_H
11 #include <config.h>
12 #endif
13
14 #include <errno.h>
15 #include <string.h>
16 #include <ctype.h>
17 #if HAVE_WCHAR_H
18 #include <wchar.h>
19 #endif
20
21 #if HAVE_ICONV_H
22 #include <iconv.h>
23 #endif
24
25 #include <yaz/yaz-util.h>
26
27 unsigned long yaz_marc8_conv (unsigned char *inp, size_t inbytesleft,
28                               size_t *no_read);
29 unsigned long yaz_marc8_2_conv (unsigned char *inp, size_t inbytesleft,
30                                 size_t *no_read);
31 unsigned long yaz_marc8_3_conv (unsigned char *inp, size_t inbytesleft,
32                                 size_t *no_read);
33 unsigned long yaz_marc8_4_conv (unsigned char *inp, size_t inbytesleft,
34                                 size_t *no_read);
35 unsigned long yaz_marc8_5_conv (unsigned char *inp, size_t inbytesleft,
36                                 size_t *no_read);
37 unsigned long yaz_marc8_6_conv (unsigned char *inp, size_t inbytesleft,
38                                 size_t *no_read);
39 unsigned long yaz_marc8_7_conv (unsigned char *inp, size_t inbytesleft,
40                                 size_t *no_read);
41 unsigned long yaz_marc8_8_conv (unsigned char *inp, size_t inbytesleft,
42                                 size_t *no_read);
43 unsigned long yaz_marc8_9_conv (unsigned char *inp, size_t inbytesleft,
44                                 size_t *no_read);
45     
46 struct yaz_iconv_struct {
47     int my_errno;
48     int init_flag;
49     size_t (*init_handle)(yaz_iconv_t cd, unsigned char *inbuf,
50                           size_t inbytesleft, size_t *no_read);
51     unsigned long (*read_handle)(yaz_iconv_t cd, unsigned char *inbuf,
52                                  size_t inbytesleft, size_t *no_read);
53     size_t (*write_handle)(yaz_iconv_t cd, unsigned long x,
54                            char **outbuf, size_t *outbytesleft);
55     int marc8_esc_mode;
56 #if HAVE_ICONV_H
57     iconv_t iconv_cd;
58 #endif
59 };
60
61 static unsigned long yaz_read_ISO8859_1 (yaz_iconv_t cd, unsigned char *inp,
62                                          size_t inbytesleft, size_t *no_read)
63 {
64     unsigned long x = inp[0];
65     *no_read = 1;
66     return x;
67 }
68
69 static size_t yaz_init_UTF8 (yaz_iconv_t cd, unsigned char *inp,
70                              size_t inbytesleft, size_t *no_read)
71 {
72     if (inp[0] != 0xef)
73     {
74         *no_read = 0;
75         return 0;
76     }
77     if (inbytesleft < 3)
78     {
79         cd->my_errno = YAZ_ICONV_EINVAL;
80         return (size_t) -1;
81     }
82     if (inp[1] != 0xbb || inp[2] != 0xbf)
83     {
84         cd->my_errno = YAZ_ICONV_EILSEQ;
85         return (size_t) -1;
86     }
87     *no_read = 3;
88     return 0;
89 }
90
91 static unsigned long yaz_read_UTF8 (yaz_iconv_t cd, unsigned char *inp,
92                                     size_t inbytesleft, size_t *no_read)
93 {
94     unsigned long x = 0;
95
96     if (inp[0] <= 0x7f)
97     {
98         x = inp[0];
99         *no_read = 1;
100     }
101     else if (inp[0] <= 0xbf || inp[0] >= 0xfe)
102     {
103         *no_read = 0;
104         cd->my_errno = YAZ_ICONV_EILSEQ;
105     }
106     else if (inp[0] <= 0xdf && inbytesleft >= 2)
107     {
108         x = ((inp[0] & 0x1f) << 6) | (inp[1] & 0x3f);
109         if (x >= 0x80)
110             *no_read = 2;
111         else
112         {
113             *no_read = 0;
114             cd->my_errno = YAZ_ICONV_EILSEQ;
115         }
116     }
117     else if (inp[0] <= 0xef && inbytesleft >= 3)
118     {
119         x = ((inp[0] & 0x0f) << 12) | ((inp[1] & 0x3f) << 6) |
120             (inp[1] & 0x3f);
121         if (x >= 0x800)
122             *no_read = 3;
123         else
124         {
125             *no_read = 0;
126             cd->my_errno = YAZ_ICONV_EILSEQ;
127         }
128     }
129     else if (inp[0] <= 0xf7 && inbytesleft >= 4)
130     {
131         x =  ((inp[0] & 0x07) << 18) | ((inp[1] & 0x3f) << 12) |
132             ((inp[2] & 0x3f) << 6) | (inp[3] & 0x3f);
133         if (x >= 0x10000)
134             *no_read = 4;
135         else
136         {
137             *no_read = 0;
138             cd->my_errno = YAZ_ICONV_EILSEQ;
139         }
140     }
141     else if (inp[0] <= 0xfb && inbytesleft >= 5)
142     {
143         x =  ((inp[0] & 0x03) << 24) | ((inp[1] & 0x3f) << 18) |
144             ((inp[2] & 0x3f) << 12) | ((inp[3] & 0x3f) << 6) |
145             (inp[4] & 0x3f);
146         if (x >= 0x200000)
147             *no_read = 5;
148         else
149         {
150             *no_read = 0;
151             cd->my_errno = YAZ_ICONV_EILSEQ;
152         }
153     }
154     else if (inp[0] <= 0xfd && inbytesleft >= 6)
155     {
156         x =  ((inp[0] & 0x01) << 30) | ((inp[1] & 0x3f) << 24) |
157             ((inp[2] & 0x3f) << 18) | ((inp[3] & 0x3f) << 12) |
158             ((inp[4] & 0x3f) << 6) | (inp[5] & 0x3f);
159         if (x >= 0x4000000)
160             *no_read = 6;
161         else
162         {
163             *no_read = 0;
164             cd->my_errno = YAZ_ICONV_EILSEQ;
165         }
166     }
167     else
168     {
169         *no_read = 0;
170         cd->my_errno = YAZ_ICONV_EINVAL;
171     }
172     return x;
173 }
174
175 static unsigned long yaz_read_UCS4 (yaz_iconv_t cd, unsigned char *inp,
176                                     size_t inbytesleft, size_t *no_read)
177 {
178     unsigned long x = 0;
179     
180     if (inbytesleft < 4)
181     {
182         cd->my_errno = YAZ_ICONV_EINVAL; /* incomplete input */
183         *no_read = 0;
184     }
185     else
186     {
187         x = (inp[0]<<24) | (inp[1]<<16) | (inp[2]<<8) | inp[3];
188         *no_read = 4;
189     }
190     return x;
191 }
192
193 static unsigned long yaz_read_UCS4LE (yaz_iconv_t cd, unsigned char *inp,
194                                       size_t inbytesleft, size_t *no_read)
195 {
196     unsigned long x = 0;
197     
198     if (inbytesleft < 4)
199     {
200         cd->my_errno = YAZ_ICONV_EINVAL; /* incomplete input */
201         *no_read = 0;
202     }
203     else
204     {
205         x = (inp[3]<<24) | (inp[2]<<16) | (inp[1]<<8) | inp[0];
206         *no_read = 4;
207     }
208     return x;
209 }
210
211 #if HAVE_WCHAR_H
212 static unsigned long yaz_read_wchar_t (yaz_iconv_t cd, unsigned char *inp,
213                                        size_t inbytesleft, size_t *no_read)
214 {
215     unsigned long x = 0;
216     
217     if (inbytesleft < sizeof(wchar_t))
218     {
219         cd->my_errno = YAZ_ICONV_EINVAL; /* incomplete input */
220         *no_read = 0;
221     }
222     else
223     {
224         wchar_t wch;
225         memcpy (&wch, inp, sizeof(wch));
226         x = wch;
227         *no_read = sizeof(wch);
228     }
229     return x;
230 }
231 #endif
232
233 static unsigned long yaz_read_marc8 (yaz_iconv_t cd, unsigned char *inp,
234                                      size_t inbytesleft, size_t *no_read)
235 {
236     *no_read = 0;
237     while(inbytesleft >= 1 && inp[0] == 27)
238     {
239         size_t inbytesleft0 = inbytesleft;
240         inp++;
241         inbytesleft--;
242         while(inbytesleft > 0 && strchr("(,$!", *inp))
243         {
244             inbytesleft--;
245             inp++;
246         }
247         if (inbytesleft <= 0)
248         {
249             *no_read = 0;
250             cd->my_errno = YAZ_ICONV_EINVAL;
251             return 0;
252         }
253         cd->marc8_esc_mode = *inp++;
254         inbytesleft--;
255         (*no_read) += inbytesleft0 - inbytesleft;
256     }
257     if (inbytesleft <= 0)
258         return 0;
259     else
260     {
261         unsigned long x;
262         size_t no_read_sub = 0;
263
264         switch(cd->marc8_esc_mode)
265         {
266         case 'B':  /* Basic ASCII */
267         case 'E':  /* ANSEL */
268         case 's':  /* ASCII */
269             x = yaz_marc8_conv(inp, inbytesleft, &no_read_sub);
270             break;
271         case 'g':  /* Greek */
272             x = yaz_marc8_2_conv(inp, inbytesleft, &no_read_sub);
273             break;
274         case 'b':  /* Subscripts */
275             x = yaz_marc8_3_conv(inp, inbytesleft, &no_read_sub);
276             break;
277         case 'p':  /* Superscripts */
278             x = yaz_marc8_4_conv(inp, inbytesleft, &no_read_sub);
279             break;
280         case '2':  /* Basic Hebrew */
281             x = yaz_marc8_5_conv(inp, inbytesleft, &no_read_sub);
282             break;
283         case 'N':  /* Basic Cyrillic */
284         case 'Q':  /* Extended Cyrillic */
285             x = yaz_marc8_6_conv(inp, inbytesleft, &no_read_sub);
286             break;
287         case '3':  /* Basic Arabic */
288         case '4':  /* Extended Arabic */
289             x = yaz_marc8_7_conv(inp, inbytesleft, &no_read_sub);
290             break;
291         case 'S':  /* Greek */
292             x = yaz_marc8_8_conv(inp, inbytesleft, &no_read_sub);
293             break;
294         case '1':  /* Chinese, Japanese, Korean (EACC) */
295             x = yaz_marc8_9_conv(inp, inbytesleft, &no_read_sub);
296             break;
297         default:
298             *no_read = 0;
299             cd->my_errno = YAZ_ICONV_EILSEQ;
300             return 0;
301         }
302         *no_read += no_read_sub;
303         return x;
304     }
305 }
306
307 static size_t yaz_write_UTF8 (yaz_iconv_t cd, unsigned long x,
308                               char **outbuf, size_t *outbytesleft)
309 {
310     unsigned char *outp = (unsigned char *) *outbuf;
311     if (x <= 0x7f && *outbytesleft >= 1)
312     {
313         *outp++ = (unsigned char) x;
314         (*outbytesleft)--;
315     } 
316     else if (x <= 0x7ff && *outbytesleft >= 2)
317     {
318         *outp++ = (unsigned char) ((x >> 6) | 0xc0);
319         *outp++ = (unsigned char) ((x & 0x3f) | 0x80);
320         (*outbytesleft) -= 2;
321     }
322     else if (x <= 0xffff && *outbytesleft >= 3)
323     {
324         *outp++ = (unsigned char) ((x >> 12) | 0xe0);
325         *outp++ = (unsigned char) (((x >> 6) & 0x3f) | 0x80);
326         *outp++ = (unsigned char) ((x & 0x3f) | 0x80);
327         (*outbytesleft) -= 3;
328     }
329     else if (x <= 0x1fffff && *outbytesleft >= 4)
330     {
331         *outp++ = (unsigned char) ((x >> 18) | 0xf0);
332         *outp++ = (unsigned char) (((x >> 12) & 0x3f) | 0x80);
333         *outp++ = (unsigned char) (((x >> 6)  & 0x3f) | 0x80);
334         *outp++ = (unsigned char) ((x & 0x3f) | 0x80);
335         (*outbytesleft) -= 4;
336     }
337     else if (x <= 0x3ffffff && *outbytesleft >= 5)
338     {
339         *outp++ = (unsigned char) ((x >> 24) | 0xf8);
340         *outp++ = (unsigned char) (((x >> 18) & 0x3f) | 0x80);
341         *outp++ = (unsigned char) (((x >> 12) & 0x3f) | 0x80);
342         *outp++ = (unsigned char) (((x >> 6)  & 0x3f) | 0x80);
343         *outp++ = (unsigned char) ((x & 0x3f) | 0x80);
344         (*outbytesleft) -= 5;
345     }
346     else if (*outbytesleft >= 6)
347     {
348         *outp++ = (unsigned char) ((x >> 30) | 0xfc);
349         *outp++ = (unsigned char) (((x >> 24) & 0x3f) | 0x80);
350         *outp++ = (unsigned char) (((x >> 18) & 0x3f) | 0x80);
351         *outp++ = (unsigned char) (((x >> 12) & 0x3f) | 0x80);
352         *outp++ = (unsigned char) (((x >> 6)  & 0x3f) | 0x80);
353         *outp++ = (unsigned char) ((x & 0x3f) | 0x80);
354         (*outbytesleft) -= 6;
355     }
356     else 
357     {
358         cd->my_errno = YAZ_ICONV_E2BIG;  /* not room for output */
359         return (size_t)(-1);
360     }
361     *outbuf = (char *) outp;
362     return 0;
363 }
364
365 static size_t yaz_write_ISO8859_1 (yaz_iconv_t cd, unsigned long x,
366                                    char **outbuf, size_t *outbytesleft)
367 {
368     unsigned char *outp = (unsigned char *) *outbuf;
369     if (x > 255 || x < 1)
370     {
371         cd->my_errno = YAZ_ICONV_EILSEQ;
372         return (size_t) -1;
373     }
374     else if (*outbytesleft >= 1)
375     {
376         *outp++ = (unsigned char) x;
377         (*outbytesleft)--;
378     }
379     else 
380     {
381         cd->my_errno = YAZ_ICONV_E2BIG;
382         return (size_t)(-1);
383     }
384     *outbuf = (char *) outp;
385     return 0;
386 }
387
388
389 static size_t yaz_write_UCS4 (yaz_iconv_t cd, unsigned long x,
390                               char **outbuf, size_t *outbytesleft)
391 {
392     unsigned char *outp = (unsigned char *) *outbuf;
393     if (*outbytesleft >= 4)
394     {
395         *outp++ = (unsigned char) (x>>24);
396         *outp++ = (unsigned char) (x>>16);
397         *outp++ = (unsigned char) (x>>8);
398         *outp++ = (unsigned char) x;
399         (*outbytesleft) -= 4;
400     }
401     else
402     {
403         cd->my_errno = YAZ_ICONV_E2BIG;
404         return (size_t)(-1);
405     }
406     *outbuf = (char *) outp;
407     return 0;
408 }
409
410 static size_t yaz_write_UCS4LE (yaz_iconv_t cd, unsigned long x,
411                                 char **outbuf, size_t *outbytesleft)
412 {
413     unsigned char *outp = (unsigned char *) *outbuf;
414     if (*outbytesleft >= 4)
415     {
416         *outp++ = (unsigned char) x;
417         *outp++ = (unsigned char) (x>>8);
418         *outp++ = (unsigned char) (x>>16);
419         *outp++ = (unsigned char) (x>>24);
420         (*outbytesleft) -= 4;
421     }
422     else
423     {
424         cd->my_errno = YAZ_ICONV_E2BIG;
425         return (size_t)(-1);
426     }
427     *outbuf = (char *) outp;
428     return 0;
429 }
430
431 #if HAVE_WCHAR_H
432 static size_t yaz_write_wchar_t (yaz_iconv_t cd, unsigned long x,
433                                  char **outbuf, size_t *outbytesleft)
434 {
435     unsigned char *outp = (unsigned char *) *outbuf;
436
437     if (*outbytesleft >= sizeof(wchar_t))
438     {
439         wchar_t wch = x;
440         memcpy(outp, &wch, sizeof(wch));
441         outp += sizeof(wch);
442         (*outbytesleft) -= sizeof(wch);
443     }
444     else
445     {
446         cd->my_errno = YAZ_ICONV_E2BIG;
447         return (size_t)(-1);
448     }
449     *outbuf = (char *) outp;
450     return 0;
451 }
452 #endif
453
454 int yaz_iconv_isbuiltin(yaz_iconv_t cd)
455 {
456     return cd->read_handle && cd->write_handle;
457 }
458
459 yaz_iconv_t yaz_iconv_open (const char *tocode, const char *fromcode)
460 {
461     yaz_iconv_t cd = (yaz_iconv_t) xmalloc (sizeof(*cd));
462
463     cd->write_handle = 0;
464     cd->read_handle = 0;
465     cd->init_handle = 0;
466     cd->my_errno = YAZ_ICONV_UNKNOWN;
467     cd->marc8_esc_mode = 'B';
468
469     /* a useful hack: if fromcode has leading @,
470        the library not use YAZ's own conversions .. */
471     if (fromcode[0] == '@')
472         fromcode++;
473     else
474     {
475         if (!yaz_matchstr(fromcode, "UTF8"))
476         {
477             cd->read_handle = yaz_read_UTF8;
478             cd->init_handle = yaz_init_UTF8;
479         }
480         else if (!yaz_matchstr(fromcode, "ISO88591"))
481             cd->read_handle = yaz_read_ISO8859_1;
482         else if (!yaz_matchstr(fromcode, "UCS4"))
483             cd->read_handle = yaz_read_UCS4;
484         else if (!yaz_matchstr(fromcode, "UCS4LE"))
485             cd->read_handle = yaz_read_UCS4LE;
486         else if (!yaz_matchstr(fromcode, "MARC8"))
487             cd->read_handle = yaz_read_marc8;
488 #if HAVE_WCHAR_H
489         else if (!yaz_matchstr(fromcode, "WCHAR_T"))
490             cd->read_handle = yaz_read_wchar_t;
491 #endif
492         
493         if (!yaz_matchstr(tocode, "UTF8"))
494             cd->write_handle = yaz_write_UTF8;
495         else if (!yaz_matchstr(tocode, "ISO88591"))
496             cd->write_handle = yaz_write_ISO8859_1;
497         else if (!yaz_matchstr (tocode, "UCS4"))
498             cd->write_handle = yaz_write_UCS4;
499         else if (!yaz_matchstr(tocode, "UCS4LE"))
500             cd->write_handle = yaz_write_UCS4LE;
501 #if HAVE_WCHAR_H
502         else if (!yaz_matchstr(tocode, "WCHAR_T"))
503             cd->write_handle = yaz_write_wchar_t;
504 #endif
505     }
506 #if HAVE_ICONV_H
507     cd->iconv_cd = 0;
508     if (!cd->read_handle || !cd->write_handle)
509     {
510         cd->iconv_cd = iconv_open (tocode, fromcode);
511         if (cd->iconv_cd == (iconv_t) (-1))
512         {
513             xfree (cd);
514             return 0;
515         }
516     }
517 #else
518     if (!cd->read_handle || !cd->write_handle)
519     {
520         xfree (cd);
521         return 0;
522     }
523 #endif
524     cd->init_flag = 1;
525     return cd;
526 }
527
528 size_t yaz_iconv (yaz_iconv_t cd, char **inbuf, size_t *inbytesleft,
529                   char **outbuf, size_t *outbytesleft)
530 {
531     char *inbuf0;
532     size_t r = 0;
533 #if HAVE_ICONV_H
534     if (cd->iconv_cd)
535     {
536         size_t r =
537             iconv(cd->iconv_cd, inbuf, inbytesleft, outbuf, outbytesleft);
538         if (r == (size_t)(-1))
539         {
540             switch (yaz_errno())
541             {
542             case E2BIG:
543                 cd->my_errno = YAZ_ICONV_E2BIG;
544                 break;
545             case EINVAL:
546                 cd->my_errno = YAZ_ICONV_EINVAL;
547                 break;
548             case EILSEQ:
549                 cd->my_errno = YAZ_ICONV_EILSEQ;
550                 break;
551             default:
552                 cd->my_errno = YAZ_ICONV_UNKNOWN;
553             }
554         }
555         return r;
556     }
557 #endif
558     if (inbuf == 0 || *inbuf == 0)
559     {
560         cd->init_flag = 1;
561         cd->my_errno = YAZ_ICONV_UNKNOWN;
562         return 0;
563     }
564     inbuf0 = *inbuf;
565
566     if (cd->init_flag)
567     {
568         if (cd->init_handle)
569         {
570             size_t no_read;
571             size_t r = (cd->init_handle)(cd, (unsigned char *) *inbuf,
572                                          *inbytesleft, &no_read);
573             if (r)
574             {
575                 if (cd->my_errno == YAZ_ICONV_EINVAL)
576                     return r;
577                 cd->init_flag = 0;
578                 return r;
579             }
580             *inbytesleft -= no_read;
581             *inbuf += no_read;
582         }
583         cd->init_flag = 0;
584     }
585     while (1)
586     {
587         unsigned long x;
588         size_t no_read;
589
590         if (*inbytesleft == 0)
591         {
592             r = *inbuf - inbuf0;
593             break;
594         }
595         
596         x = (cd->read_handle)(cd, (unsigned char *) *inbuf, *inbytesleft,
597                               &no_read);
598         if (no_read == 0)
599         {
600             r = (size_t)(-1);
601             break;
602         }
603         if (x)
604         {
605             r = (cd->write_handle)(cd, x, outbuf, outbytesleft);
606             if (r)
607                 break;
608         }
609         *inbytesleft -= no_read;
610         (*inbuf) += no_read;
611     }
612     return r;
613 }
614
615 int yaz_iconv_error (yaz_iconv_t cd)
616 {
617     return cd->my_errno;
618 }
619
620 int yaz_iconv_close (yaz_iconv_t cd)
621 {
622 #if HAVE_ICONV_H
623     if (cd->iconv_cd)
624         iconv_close (cd->iconv_cd);
625 #endif
626     xfree (cd);
627     return 0;
628 }
629
630