X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=util%2Fsiconv.c;h=73d7148c06942fe3629c78419fd5f1875cfb5128;hb=c39a893dfdae5f792139177132e7e7a70e010aa7;hp=a40cc0c199bb0e784f29fcb95be00718d097619c;hpb=a19b3326f39623ae79b6679c010c8db04c22fdf4;p=yaz-moved-to-github.git diff --git a/util/siconv.c b/util/siconv.c index a40cc0c..73d7148 100644 --- a/util/siconv.c +++ b/util/siconv.c @@ -1,8 +1,8 @@ /* - * Copyright (c) 1997-2002, Index Data + * Copyright (c) 1997-2003, Index Data * See the file LICENSE for details. * - * $Id: siconv.c,v 1.3 2002-08-28 19:34:36 adam Exp $ + * $Id: siconv.c,v 1.9 2003-01-06 08:20:28 adam Exp $ */ /* mini iconv and wrapper for system iconv library (if present) */ @@ -14,6 +14,9 @@ #include #include #include +#if HAVE_WCHAR_H +#include +#endif #if HAVE_ICONV_H #include @@ -21,6 +24,9 @@ #include +unsigned long yaz_marc8_conv (unsigned char *inp, size_t inbytesleft, + size_t *no_read); + struct yaz_iconv_struct { int my_errno; int init_flag; @@ -185,53 +191,81 @@ static unsigned long yaz_read_UCS4LE (yaz_iconv_t cd, unsigned char *inp, return x; } +#if HAVE_WCHAR_H +static unsigned long yaz_read_wchar_t (yaz_iconv_t cd, unsigned char *inp, + size_t inbytesleft, size_t *no_read) +{ + unsigned long x = 0; + + if (inbytesleft < sizeof(wchar_t)) + { + cd->my_errno = YAZ_ICONV_EINVAL; /* incomplete input */ + *no_read = 0; + } + else + { + wchar_t wch; + memcpy (&wch, inp, sizeof(wch)); + x = wch; + *no_read = sizeof(wch); + } + return x; +} +#endif + +static unsigned long yaz_read_marc8 (yaz_iconv_t cd, unsigned char *inp, + size_t inbytesleft, size_t *no_read) +{ + return yaz_marc8_conv(inp, inbytesleft, no_read); +} + static size_t yaz_write_UTF8 (yaz_iconv_t cd, unsigned long x, char **outbuf, size_t *outbytesleft) { - unsigned char *outp = *outbuf; + unsigned char *outp = (unsigned char *) *outbuf; if (x <= 0x7f && *outbytesleft >= 1) { - *outp++ = x; + *outp++ = (unsigned char) x; (*outbytesleft)--; } else if (x <= 0x7ff && *outbytesleft >= 2) { - *outp++ = (x >> 6) | 0xc0; - *outp++ = (x & 0x3f) | 0x80; + *outp++ = (unsigned char) ((x >> 6) | 0xc0); + *outp++ = (unsigned char) ((x & 0x3f) | 0x80); (*outbytesleft) -= 2; } else if (x <= 0xffff && *outbytesleft >= 3) { - *outp++ = (x >> 12) | 0xe0; - *outp++ = ((x >> 6) & 0x3f) | 0x80; - *outp++ = (x & 0x3f) | 0x80; + *outp++ = (unsigned char) ((x >> 12) | 0xe0); + *outp++ = (unsigned char) (((x >> 6) & 0x3f) | 0x80); + *outp++ = (unsigned char) ((x & 0x3f) | 0x80); (*outbytesleft) -= 3; } else if (x <= 0x1fffff && *outbytesleft >= 4) { - *outp++ = (x >> 18) | 0xf0; - *outp++ = ((x >> 12) & 0x3f) | 0x80; - *outp++ = ((x >> 6) & 0x3f) | 0x80; - *outp++ = (x & 0x3f) | 0x80; + *outp++ = (unsigned char) ((x >> 18) | 0xf0); + *outp++ = (unsigned char) (((x >> 12) & 0x3f) | 0x80); + *outp++ = (unsigned char) (((x >> 6) & 0x3f) | 0x80); + *outp++ = (unsigned char) ((x & 0x3f) | 0x80); (*outbytesleft) -= 4; } else if (x <= 0x3ffffff && *outbytesleft >= 5) { - *outp++ = (x >> 24) | 0xf8; - *outp++ = ((x >> 18) & 0x3f) | 0x80; - *outp++ = ((x >> 12) & 0x3f) | 0x80; - *outp++ = ((x >> 6) & 0x3f) | 0x80; - *outp++ = (x & 0x3f) | 0x80; + *outp++ = (unsigned char) ((x >> 24) | 0xf8); + *outp++ = (unsigned char) (((x >> 18) & 0x3f) | 0x80); + *outp++ = (unsigned char) (((x >> 12) & 0x3f) | 0x80); + *outp++ = (unsigned char) (((x >> 6) & 0x3f) | 0x80); + *outp++ = (unsigned char) ((x & 0x3f) | 0x80); (*outbytesleft) -= 5; } else if (*outbytesleft >= 6) { - *outp++ = (x >> 30) | 0xfc; - *outp++ = ((x >> 24) & 0x3f) | 0x80; - *outp++ = ((x >> 18) & 0x3f) | 0x80; - *outp++ = ((x >> 12) & 0x3f) | 0x80; - *outp++ = ((x >> 6) & 0x3f) | 0x80; - *outp++ = (x & 0x3f) | 0x80; + *outp++ = (unsigned char) ((x >> 30) | 0xfc); + *outp++ = (unsigned char) (((x >> 24) & 0x3f) | 0x80); + *outp++ = (unsigned char) (((x >> 18) & 0x3f) | 0x80); + *outp++ = (unsigned char) (((x >> 12) & 0x3f) | 0x80); + *outp++ = (unsigned char) (((x >> 6) & 0x3f) | 0x80); + *outp++ = (unsigned char) ((x & 0x3f) | 0x80); (*outbytesleft) -= 6; } else @@ -239,14 +273,14 @@ static size_t yaz_write_UTF8 (yaz_iconv_t cd, unsigned long x, cd->my_errno = YAZ_ICONV_E2BIG; /* not room for output */ return (size_t)(-1); } - *outbuf = outp; + *outbuf = (char *) outp; return 0; } static size_t yaz_write_ISO8859_1 (yaz_iconv_t cd, unsigned long x, char **outbuf, size_t *outbytesleft) { - unsigned char *outp = *outbuf; + unsigned char *outp = (unsigned char *) *outbuf; if (x > 255 || x < 1) { cd->my_errno = YAZ_ICONV_EILSEQ; @@ -254,7 +288,7 @@ static size_t yaz_write_ISO8859_1 (yaz_iconv_t cd, unsigned long x, } else if (*outbytesleft >= 1) { - *outp++ = x; + *outp++ = (unsigned char) x; (*outbytesleft)--; } else @@ -262,7 +296,7 @@ static size_t yaz_write_ISO8859_1 (yaz_iconv_t cd, unsigned long x, cd->my_errno = YAZ_ICONV_E2BIG; return (size_t)(-1); } - *outbuf = outp; + *outbuf = (char *) outp; return 0; } @@ -270,13 +304,13 @@ static size_t yaz_write_ISO8859_1 (yaz_iconv_t cd, unsigned long x, static size_t yaz_write_UCS4 (yaz_iconv_t cd, unsigned long x, char **outbuf, size_t *outbytesleft) { - unsigned char *outp = *outbuf; + unsigned char *outp = (unsigned char *) *outbuf; if (*outbytesleft >= 4) { - *outp++ = x<<24; - *outp++ = x<<16; - *outp++ = x<<8; - *outp++ = x; + *outp++ = (unsigned char) (x<<24); + *outp++ = (unsigned char) (x<<16); + *outp++ = (unsigned char) (x<<8); + *outp++ = (unsigned char) x; (*outbytesleft) -= 4; } else @@ -284,20 +318,20 @@ static size_t yaz_write_UCS4 (yaz_iconv_t cd, unsigned long x, cd->my_errno = YAZ_ICONV_E2BIG; return (size_t)(-1); } - *outbuf = outp; + *outbuf = (char *) outp; return 0; } static size_t yaz_write_UCS4LE (yaz_iconv_t cd, unsigned long x, char **outbuf, size_t *outbytesleft) { - unsigned char *outp = *outbuf; + unsigned char *outp = (unsigned char *) *outbuf; if (*outbytesleft >= 4) { - *outp++ = x; - *outp++ = x<<8; - *outp++ = x<<16; - *outp++ = x<<24; + *outp++ = (unsigned char) x; + *outp++ = (unsigned char) (x<<8); + *outp++ = (unsigned char) (x<<16); + *outp++ = (unsigned char) (x<<24); (*outbytesleft) -= 4; } else @@ -305,40 +339,84 @@ static size_t yaz_write_UCS4LE (yaz_iconv_t cd, unsigned long x, cd->my_errno = YAZ_ICONV_E2BIG; return (size_t)(-1); } - *outbuf = outp; + *outbuf = (char *) outp; return 0; } +#if HAVE_WCHAR_H +static size_t yaz_write_wchar_t (yaz_iconv_t cd, unsigned long x, + char **outbuf, size_t *outbytesleft) +{ + unsigned char *outp = (unsigned char *) *outbuf; + + if (*outbytesleft >= sizeof(wchar_t)) + { + wchar_t wch = x; + memcpy(outp, &wch, sizeof(wch)); + outp += sizeof(wch); + (*outbytesleft) -= sizeof(wch); + } + else + { + cd->my_errno = YAZ_ICONV_E2BIG; + return (size_t)(-1); + } + *outbuf = (char *) outp; + return 0; +} +#endif + +int yaz_iconv_isbuiltin(yaz_iconv_t cd) +{ + return cd->read_handle && cd->write_handle; +} + yaz_iconv_t yaz_iconv_open (const char *tocode, const char *fromcode) { - yaz_iconv_t cd = xmalloc (sizeof(*cd)); + yaz_iconv_t cd = (yaz_iconv_t) xmalloc (sizeof(*cd)); cd->write_handle = 0; cd->read_handle = 0; cd->init_handle = 0; cd->my_errno = YAZ_ICONV_UNKNOWN; - if (!yaz_matchstr(fromcode, "UTF8")) + /* a useful hack: if fromcode has leading @, + the library not use YAZ's own conversions .. */ + if (fromcode[0] == '@') + fromcode++; + else { - cd->read_handle = yaz_read_UTF8; - cd->init_handle = yaz_init_UTF8; + if (!yaz_matchstr(fromcode, "UTF8")) + { + cd->read_handle = yaz_read_UTF8; + cd->init_handle = yaz_init_UTF8; + } + else if (!yaz_matchstr(fromcode, "ISO88591")) + cd->read_handle = yaz_read_ISO8859_1; + else if (!yaz_matchstr(fromcode, "UCS4")) + cd->read_handle = yaz_read_UCS4; + else if (!yaz_matchstr(fromcode, "UCS4LE")) + cd->read_handle = yaz_read_UCS4LE; + else if (!yaz_matchstr(fromcode, "MARC8")) + cd->read_handle = yaz_read_marc8; +#if HAVE_WCHAR_H + else if (!yaz_matchstr(fromcode, "WCHAR_T")) + cd->read_handle = yaz_read_wchar_t; +#endif + + if (!yaz_matchstr(tocode, "UTF8")) + cd->write_handle = yaz_write_UTF8; + else if (!yaz_matchstr(tocode, "ISO88591")) + cd->write_handle = yaz_write_ISO8859_1; + else if (!yaz_matchstr (tocode, "UCS4")) + cd->write_handle = yaz_write_UCS4; + else if (!yaz_matchstr(tocode, "UCS4LE")) + cd->write_handle = yaz_write_UCS4LE; +#if HAVE_WCHAR_H + else if (!yaz_matchstr(tocode, "WCHAR_T")) + cd->write_handle = yaz_write_wchar_t; +#endif } - else if (!yaz_matchstr(fromcode, "ISO88591")) - cd->read_handle = yaz_read_ISO8859_1; - else if (!yaz_matchstr(fromcode, "UCS4")) - cd->read_handle = yaz_read_UCS4; - else if (!yaz_matchstr(fromcode, "UCS4LE")) - cd->read_handle = yaz_read_UCS4LE; - - if (!yaz_matchstr(tocode, "UTF8")) - cd->write_handle = yaz_write_UTF8; - else if (!yaz_matchstr(tocode, "ISO88591")) - cd->write_handle = yaz_write_ISO8859_1; - else if (!yaz_matchstr (tocode, "UCS4")) - cd->write_handle = yaz_write_UCS4; - else if (!yaz_matchstr(tocode, "UCS4LE")) - cd->write_handle = yaz_write_UCS4LE; - #if HAVE_ICONV_H cd->iconv_cd = 0; if (!cd->read_handle || !cd->write_handle) @@ -373,7 +451,7 @@ size_t yaz_iconv (yaz_iconv_t cd, char **inbuf, size_t *inbytesleft, iconv(cd->iconv_cd, inbuf, inbytesleft, outbuf, outbytesleft); if (r == (size_t)(-1)) { - switch (errno) + switch (yaz_errno()) { case E2BIG: cd->my_errno = YAZ_ICONV_E2BIG; @@ -404,7 +482,8 @@ size_t yaz_iconv (yaz_iconv_t cd, char **inbuf, size_t *inbytesleft, if (cd->init_handle) { size_t no_read; - size_t r = (cd->init_handle)(cd, *inbuf, *inbytesleft, &no_read); + size_t r = (cd->init_handle)(cd, (unsigned char *) *inbuf, + *inbytesleft, &no_read); if (r) { if (cd->my_errno == YAZ_ICONV_EINVAL) @@ -428,7 +507,8 @@ size_t yaz_iconv (yaz_iconv_t cd, char **inbuf, size_t *inbytesleft, break; } - x = (cd->read_handle)(cd, *inbuf, *inbytesleft, &no_read); + x = (cd->read_handle)(cd, (unsigned char *) *inbuf, *inbytesleft, + &no_read); if (no_read == 0) { r = (size_t)(-1);