X-Git-Url: http://git.indexdata.com/?p=yaz-moved-to-github.git;a=blobdiff_plain;f=src%2Futf8.c;h=0a1ccb37e59aa069404e8f2e85ca8ce60ecf4827;hp=b893e0d055d5476ff9d6a4242806843c8a94b12d;hb=5c1eb188e9cf6f0dd2e435a1f81938f258515edb;hpb=cccb7ecd623450d5b3ca2391327788c84aed71c8 diff --git a/src/utf8.c b/src/utf8.c index b893e0d..0a1ccb3 100644 --- a/src/utf8.c +++ b/src/utf8.c @@ -1,12 +1,10 @@ -/* - * Copyright (C) 1995-2008, Index Data ApS +/* This file is part of the YAZ toolkit. + * Copyright (C) Index Data * See the file LICENSE for details. - * - * $Id: siconv.c,v 1.50 2008-03-12 08:53:28 adam Exp $ */ /** * \file - * \brief ISO-5428 character mapping (iconv) + * \brief UTF-8 encoding / decoding */ #if HAVE_CONFIG_H @@ -16,14 +14,14 @@ #include #include #include -#include #include "iconv-p.h" -size_t yaz_init_UTF8(yaz_iconv_t cd, unsigned char *inp, - size_t inbytesleft, size_t *no_read) +static size_t init_utf8(yaz_iconv_t cd, yaz_iconv_decoder_t d, + unsigned char *inp, + size_t inbytesleft, size_t *no_read) { - if (inp[0] != 0xef) + if (!inp || inp[0] != 0xef) { *no_read = 0; return 0; @@ -40,7 +38,7 @@ size_t yaz_init_UTF8(yaz_iconv_t cd, unsigned char *inp, return 0; } -unsigned long yaz_read_UTF8_char(unsigned char *inp, +unsigned long yaz_read_UTF8_char(const unsigned char *inp, size_t inbytesleft, size_t *no_read, int *error) { @@ -82,7 +80,7 @@ unsigned long yaz_read_UTF8_char(unsigned char *inp, } else *error = YAZ_ICONV_EILSEQ; - } + } else if (inp[0] <= 0xf7 && inbytesleft >= 4) { if ((inp[1] & 0xc0) == 0x80 && (inp[2] & 0xc0) == 0x80 @@ -137,8 +135,9 @@ unsigned long yaz_read_UTF8_char(unsigned char *inp, return x; } -unsigned long yaz_read_UTF8(yaz_iconv_t cd, unsigned char *inp, - size_t inbytesleft, size_t *no_read) +static unsigned long read_utf8(yaz_iconv_t cd, yaz_iconv_decoder_t d, + unsigned char *inp, + size_t inbytesleft, size_t *no_read) { int err = 0; int r = yaz_read_UTF8_char(inp, inbytesleft, no_read, &err); @@ -147,8 +146,9 @@ unsigned long yaz_read_UTF8(yaz_iconv_t cd, unsigned char *inp, } -size_t yaz_write_UTF8(yaz_iconv_t cd, unsigned long x, - char **outbuf, size_t *outbytesleft) +static size_t write_UTF8(yaz_iconv_t cd, yaz_iconv_encoder_t en, + unsigned long x, + char **outbuf, size_t *outbytesleft) { int err = 0; int r = yaz_write_UTF8_char(x, outbuf, outbytesleft, &err); @@ -166,7 +166,7 @@ size_t yaz_write_UTF8_char(unsigned long x, { *outp++ = (unsigned char) x; (*outbytesleft)--; - } + } else if (x <= 0x7ff && *outbytesleft >= 2) { *outp++ = (unsigned char) ((x >> 6) | 0xc0); @@ -207,7 +207,7 @@ size_t yaz_write_UTF8_char(unsigned long x, *outp++ = (unsigned char) ((x & 0x3f) | 0x80); (*outbytesleft) -= 6; } - else + else { *error = YAZ_ICONV_E2BIG; /* not room for output */ return (size_t)(-1); @@ -216,11 +216,55 @@ size_t yaz_write_UTF8_char(unsigned long x, return 0; } +yaz_iconv_encoder_t yaz_utf8_encoder(const char *tocode, + yaz_iconv_encoder_t e) + +{ + if (!yaz_matchstr(tocode, "UTF8")) + { + e->write_handle = write_UTF8; + return e; + } + return 0; +} + +yaz_iconv_decoder_t yaz_utf8_decoder(const char *fromcode, + yaz_iconv_decoder_t d) +{ + if (!yaz_matchstr(fromcode, "UTF8")) + { + d->init_handle = init_utf8; + d->read_handle = read_utf8; + return d; + } + return 0; +} + +int yaz_utf8_check(const char *str) +{ + /* cast OK: yaz_read_UTF8_char is read-only */ + unsigned char *inp = (unsigned char *) str; + size_t inbytesleft = strlen(str); + + while (inbytesleft) + { + int error = 0; + size_t no_read; + yaz_read_UTF8_char(inp, inbytesleft, &no_read, &error); + if (error) + return 0; + inp += no_read; + inbytesleft -= no_read; + } + return 1; +} /* * Local variables: * c-basic-offset: 4 + * c-file-style: "Stroustrup" * indent-tabs-mode: nil * End: * vim: shiftwidth=4 tabstop=8 expandtab */ +