-/*
- * Copyright (C) 1995-2008, Index Data ApS
+/* This file is part of the YAZ toolkit.
+ * Copyright (C) Index Data
* See the file LICENSE for details.
- *
- * $Id: siconv.c,v 1.50 2008-03-12 08:53:28 adam Exp $
*/
/**
* \file
- * \brief ISO-5428 character mapping (iconv)
+ * \brief UTF-8 encoding / decoding
*/
#if HAVE_CONFIG_H
#include <assert.h>
#include <errno.h>
#include <string.h>
-#include <ctype.h>
#include "iconv-p.h"
-size_t yaz_init_UTF8(yaz_iconv_t cd, unsigned char *inp,
- size_t inbytesleft, size_t *no_read)
+static size_t init_utf8(yaz_iconv_t cd, yaz_iconv_decoder_t d,
+ unsigned char *inp,
+ size_t inbytesleft, size_t *no_read)
{
- if (inp[0] != 0xef)
+ if (!inp || inp[0] != 0xef)
{
*no_read = 0;
return 0;
return 0;
}
-unsigned long yaz_read_UTF8_char(unsigned char *inp,
+unsigned long yaz_read_UTF8_char(const unsigned char *inp,
size_t inbytesleft, size_t *no_read,
int *error)
{
}
else
*error = YAZ_ICONV_EILSEQ;
- }
+ }
else if (inp[0] <= 0xf7 && inbytesleft >= 4)
{
if ((inp[1] & 0xc0) == 0x80 && (inp[2] & 0xc0) == 0x80
return x;
}
-unsigned long yaz_read_UTF8(yaz_iconv_t cd, unsigned char *inp,
- size_t inbytesleft, size_t *no_read)
+static unsigned long read_utf8(yaz_iconv_t cd, yaz_iconv_decoder_t d,
+ unsigned char *inp,
+ size_t inbytesleft, size_t *no_read)
{
int err = 0;
int r = yaz_read_UTF8_char(inp, inbytesleft, no_read, &err);
}
-size_t yaz_write_UTF8(yaz_iconv_t cd, unsigned long x,
- char **outbuf, size_t *outbytesleft)
+static size_t write_UTF8(yaz_iconv_t cd, yaz_iconv_encoder_t en,
+ unsigned long x,
+ char **outbuf, size_t *outbytesleft)
{
int err = 0;
int r = yaz_write_UTF8_char(x, outbuf, outbytesleft, &err);
{
*outp++ = (unsigned char) x;
(*outbytesleft)--;
- }
+ }
else if (x <= 0x7ff && *outbytesleft >= 2)
{
*outp++ = (unsigned char) ((x >> 6) | 0xc0);
*outp++ = (unsigned char) ((x & 0x3f) | 0x80);
(*outbytesleft) -= 6;
}
- else
+ else
{
*error = YAZ_ICONV_E2BIG; /* not room for output */
return (size_t)(-1);
return 0;
}
+yaz_iconv_encoder_t yaz_utf8_encoder(const char *tocode,
+ yaz_iconv_encoder_t e)
+
+{
+ if (!yaz_matchstr(tocode, "UTF8"))
+ {
+ e->write_handle = write_UTF8;
+ return e;
+ }
+ return 0;
+}
+
+yaz_iconv_decoder_t yaz_utf8_decoder(const char *fromcode,
+ yaz_iconv_decoder_t d)
+{
+ if (!yaz_matchstr(fromcode, "UTF8"))
+ {
+ d->init_handle = init_utf8;
+ d->read_handle = read_utf8;
+ return d;
+ }
+ return 0;
+}
+
+int yaz_utf8_check(const char *str)
+{
+ /* cast OK: yaz_read_UTF8_char is read-only */
+ unsigned char *inp = (unsigned char *) str;
+ size_t inbytesleft = strlen(str);
+
+ while (inbytesleft)
+ {
+ int error = 0;
+ size_t no_read;
+ yaz_read_UTF8_char(inp, inbytesleft, &no_read, &error);
+ if (error)
+ return 0;
+ inp += no_read;
+ inbytesleft -= no_read;
+ }
+ return 1;
+}
/*
* Local variables:
* c-basic-offset: 4
+ * c-file-style: "Stroustrup"
* indent-tabs-mode: nil
* End:
* vim: shiftwidth=4 tabstop=8 expandtab
*/
+