projects
/
yaz-moved-to-github.git
/ commitdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
| commitdiff |
tree
raw
|
patch
|
inline
| side by side (parent:
621c7fe
)
Check for malformed UTF-8 characters. Thanks for Gary Anderson for
author
Adam Dickmeiss
<adam@indexdata.dk>
Wed, 23 May 2007 08:50:11 +0000
(08:50 +0000)
committer
Adam Dickmeiss
<adam@indexdata.dk>
Wed, 23 May 2007 08:50:11 +0000
(08:50 +0000)
pointing this out.
src/siconv.c
patch
|
blob
|
history
diff --git
a/src/siconv.c
b/src/siconv.c
index
bdb20ee
..
1420e78
100644
(file)
--- a/
src/siconv.c
+++ b/
src/siconv.c
@@
-2,7
+2,7
@@
* Copyright (C) 1995-2007, Index Data ApS
* See the file LICENSE for details.
*
* Copyright (C) 1995-2007, Index Data ApS
* See the file LICENSE for details.
*
- * $Id: siconv.c,v 1.40 2007-05-03 22:20:45 adam Exp $
+ * $Id: siconv.c,v 1.41 2007-05-23 08:50:11 adam Exp $
*/
/**
* \file siconv.c
*/
/**
* \file siconv.c
@@
-212,6
+212,7
@@
unsigned long yaz_read_UTF8_char(unsigned char *inp,
{
unsigned long x = 0;
{
unsigned long x = 0;
+ *no_read = 0; /* by default */
if (inp[0] <= 0x7f)
{
x = inp[0];
if (inp[0] <= 0x7f)
{
x = inp[0];
@@
-219,75
+220,86
@@
unsigned long yaz_read_UTF8_char(unsigned char *inp,
}
else if (inp[0] <= 0xbf || inp[0] >= 0xfe)
{
}
else if (inp[0] <= 0xbf || inp[0] >= 0xfe)
{
- *no_read = 0;
*error = YAZ_ICONV_EILSEQ;
}
else if (inp[0] <= 0xdf && inbytesleft >= 2)
{
*error = YAZ_ICONV_EILSEQ;
}
else if (inp[0] <= 0xdf && inbytesleft >= 2)
{
- x = ((inp[0] & 0x1f) << 6) | (inp[1] & 0x3f);
- if (x >= 0x80)
- *no_read = 2;
- else
+ if ((inp[1] & 0xc0) == 0x80)
{
{
- *no_read = 0;
- *error = YAZ_ICONV_EILSEQ;
+ x = ((inp[0] & 0x1f) << 6) | (inp[1] & 0x3f);
+ if (x >= 0x80)
+ *no_read = 2;
+ else
+ *error = YAZ_ICONV_EILSEQ;
}
}
+ else
+ *error = YAZ_ICONV_EILSEQ;
}
else if (inp[0] <= 0xef && inbytesleft >= 3)
{
}
else if (inp[0] <= 0xef && inbytesleft >= 3)
{
- x = ((inp[0] & 0x0f) << 12) | ((inp[1] & 0x3f) << 6) |
- (inp[2] & 0x3f);
- if (x >= 0x800)
- *no_read = 3;
- else
+ if ((inp[1] & 0xc0) == 0x80 && (inp[2] & 0xc0) == 0x80)
{
{
- *no_read = 0;
- *error = YAZ_ICONV_EILSEQ;
+ x = ((inp[0] & 0x0f) << 12) | ((inp[1] & 0x3f) << 6) |
+ (inp[2] & 0x3f);
+ if (x >= 0x800)
+ *no_read = 3;
+ else
+ *error = YAZ_ICONV_EILSEQ;
}
}
- }
+ else
+ *error = YAZ_ICONV_EILSEQ;
+ }
else if (inp[0] <= 0xf7 && inbytesleft >= 4)
{
else if (inp[0] <= 0xf7 && inbytesleft >= 4)
{
- x = ((inp[0] & 0x07) << 18) | ((inp[1] & 0x3f) << 12) |
- ((inp[2] & 0x3f) << 6) | (inp[3] & 0x3f);
- if (x >= 0x10000)
- *no_read = 4;
- else
+ if ((inp[1] & 0xc0) == 0x80 && (inp[2] & 0xc0) == 0x80
+ && (inp[3] & 0xc0) == 0x80)
{
{
- *no_read = 0;
- *error = YAZ_ICONV_EILSEQ;
+ x = ((inp[0] & 0x07) << 18) | ((inp[1] & 0x3f) << 12) |
+ ((inp[2] & 0x3f) << 6) | (inp[3] & 0x3f);
+ if (x >= 0x10000)
+ *no_read = 4;
+ else
+ *error = YAZ_ICONV_EILSEQ;
}
}
+ else
+ *error = YAZ_ICONV_EILSEQ;
}
else if (inp[0] <= 0xfb && inbytesleft >= 5)
{
}
else if (inp[0] <= 0xfb && inbytesleft >= 5)
{
- x = ((inp[0] & 0x03) << 24) | ((inp[1] & 0x3f) << 18) |
- ((inp[2] & 0x3f) << 12) | ((inp[3] & 0x3f) << 6) |
- (inp[4] & 0x3f);
- if (x >= 0x200000)
- *no_read = 5;
- else
+ if ((inp[1] & 0xc0) == 0x80 && (inp[2] & 0xc0) == 0x80
+ && (inp[3] & 0xc0) == 0x80 && (inp[4] & 0xc0) == 0x80)
{
{
- *no_read = 0;
- *error = YAZ_ICONV_EILSEQ;
+ x = ((inp[0] & 0x03) << 24) | ((inp[1] & 0x3f) << 18) |
+ ((inp[2] & 0x3f) << 12) | ((inp[3] & 0x3f) << 6) |
+ (inp[4] & 0x3f);
+ if (x >= 0x200000)
+ *no_read = 5;
+ else
+ *error = YAZ_ICONV_EILSEQ;
}
}
+ else
+ *error = YAZ_ICONV_EILSEQ;
}
else if (inp[0] <= 0xfd && inbytesleft >= 6)
{
}
else if (inp[0] <= 0xfd && inbytesleft >= 6)
{
- x = ((inp[0] & 0x01) << 30) | ((inp[1] & 0x3f) << 24) |
- ((inp[2] & 0x3f) << 18) | ((inp[3] & 0x3f) << 12) |
- ((inp[4] & 0x3f) << 6) | (inp[5] & 0x3f);
- if (x >= 0x4000000)
- *no_read = 6;
- else
+ if ((inp[1] & 0xc0) == 0x80 && (inp[2] & 0xc0) == 0x80
+ && (inp[3] & 0xc0) == 0x80 && (inp[4] & 0xc0) == 0x80
+ && (inp[5] & 0xc0) == 0x80)
{
{
- *no_read = 0;
- *error = YAZ_ICONV_EILSEQ;
+ x = ((inp[0] & 0x01) << 30) | ((inp[1] & 0x3f) << 24) |
+ ((inp[2] & 0x3f) << 18) | ((inp[3] & 0x3f) << 12) |
+ ((inp[4] & 0x3f) << 6) | (inp[5] & 0x3f);
+ if (x >= 0x4000000)
+ *no_read = 6;
+ else
+ *error = YAZ_ICONV_EILSEQ;
}
}
+ else
+ *error = YAZ_ICONV_EILSEQ;
}
else
}
else
- {
- *no_read = 0;
- *error = YAZ_ICONV_EINVAL;
- }
+ *error = YAZ_ICONV_EINVAL; /* incomplete sentence */
+
return x;
}
return x;
}