X-Git-Url: http://git.indexdata.com/?p=yaz-moved-to-github.git;a=blobdiff_plain;f=src%2Futf8.c;h=0a1ccb37e59aa069404e8f2e85ca8ce60ecf4827;hp=bf92cd32a2bff72e763bbb577620ae54ab42d1aa;hb=5c1eb188e9cf6f0dd2e435a1f81938f258515edb;hpb=96c6e58f286787106e4a7b3bb3900a36051968d6 diff --git a/src/utf8.c b/src/utf8.c index bf92cd3..0a1ccb3 100644 --- a/src/utf8.c +++ b/src/utf8.c @@ -1,5 +1,5 @@ /* This file is part of the YAZ toolkit. - * Copyright (C) 1995-2008 Index Data + * Copyright (C) Index Data * See the file LICENSE for details. */ /** @@ -14,7 +14,6 @@ #include #include #include -#include #include "iconv-p.h" @@ -22,7 +21,7 @@ static size_t init_utf8(yaz_iconv_t cd, yaz_iconv_decoder_t d, unsigned char *inp, size_t inbytesleft, size_t *no_read) { - if (inp[0] != 0xef) + if (!inp || inp[0] != 0xef) { *no_read = 0; return 0; @@ -39,7 +38,7 @@ static size_t init_utf8(yaz_iconv_t cd, yaz_iconv_decoder_t d, return 0; } -unsigned long yaz_read_UTF8_char(unsigned char *inp, +unsigned long yaz_read_UTF8_char(const unsigned char *inp, size_t inbytesleft, size_t *no_read, int *error) { @@ -81,7 +80,7 @@ unsigned long yaz_read_UTF8_char(unsigned char *inp, } else *error = YAZ_ICONV_EILSEQ; - } + } else if (inp[0] <= 0xf7 && inbytesleft >= 4) { if ((inp[1] & 0xc0) == 0x80 && (inp[2] & 0xc0) == 0x80 @@ -167,7 +166,7 @@ size_t yaz_write_UTF8_char(unsigned long x, { *outp++ = (unsigned char) x; (*outbytesleft)--; - } + } else if (x <= 0x7ff && *outbytesleft >= 2) { *outp++ = (unsigned char) ((x >> 6) | 0xc0); @@ -208,7 +207,7 @@ size_t yaz_write_UTF8_char(unsigned long x, *outp++ = (unsigned char) ((x & 0x3f) | 0x80); (*outbytesleft) -= 6; } - else + else { *error = YAZ_ICONV_E2BIG; /* not room for output */ return (size_t)(-1); @@ -219,7 +218,7 @@ size_t yaz_write_UTF8_char(unsigned long x, yaz_iconv_encoder_t yaz_utf8_encoder(const char *tocode, yaz_iconv_encoder_t e) - + { if (!yaz_matchstr(tocode, "UTF8")) { @@ -240,12 +239,32 @@ yaz_iconv_decoder_t yaz_utf8_decoder(const char *fromcode, } return 0; } - + +int yaz_utf8_check(const char *str) +{ + /* cast OK: yaz_read_UTF8_char is read-only */ + unsigned char *inp = (unsigned char *) str; + size_t inbytesleft = strlen(str); + + while (inbytesleft) + { + int error = 0; + size_t no_read; + yaz_read_UTF8_char(inp, inbytesleft, &no_read, &error); + if (error) + return 0; + inp += no_read; + inbytesleft -= no_read; + } + return 1; +} /* * Local variables: * c-basic-offset: 4 + * c-file-style: "Stroustrup" * indent-tabs-mode: nil * End: * vim: shiftwidth=4 tabstop=8 expandtab */ +