Skip to content

Commit

Permalink
unicoder.cpp: CheckForInvalidUtf8() did not work correctly when text …
Browse files Browse the repository at this point in the history
…length is less than 3. fixes bitbucket#57
  • Loading branch information
sdottaka committed Jan 29, 2017
1 parent cbe4ac6 commit 14d87c8
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 3 deletions.
10 changes: 7 additions & 3 deletions Src/Common/unicoder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1140,16 +1140,16 @@ bool CheckForInvalidUtf8(const char *pBuffer, size_t size)
return true;
pVal2++;
}
if (size <= 3)
return false;
pVal2 = (unsigned char *)pBuffer;
bool bUTF8 = false;
for (size_t i = 0; i < (size - 3); ++i)
for (size_t i = 0; i < size; ++i)
{
if ((*pVal2 & 0x80) == 0x00)
;
else if ((*pVal2 & 0xE0) == 0xC0)
{
if (i >= size - 1)
return true;
pVal2++;
i++;
if ((*pVal2 & 0xC0) != 0x80)
Expand All @@ -1158,6 +1158,8 @@ bool CheckForInvalidUtf8(const char *pBuffer, size_t size)
}
else if ((*pVal2 & 0xF0) == 0xE0)
{
if (i >= size - 2)
return true;
pVal2++;
i++;
if ((*pVal2 & 0xC0) != 0x80)
Expand All @@ -1170,6 +1172,8 @@ bool CheckForInvalidUtf8(const char *pBuffer, size_t size)
}
else if ((*pVal2 & 0xF8) == 0xF0)
{
if (i >= size - 3)
return true;
pVal2++;
i++;
if ((*pVal2 & 0xC0) != 0x80)
Expand Down
30 changes: 30 additions & 0 deletions Testing/GoogleTest/unicoder/unicoder_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,36 @@ namespace
#endif
}

TEST_F(UnicoderTest, CheckForInvalidUtf8)
{
std::string utf8 = ucr::toUTF8(L"\u00a0");
EXPECT_EQ(false, ucr::CheckForInvalidUtf8(utf8.c_str(), utf8.length()));
utf8 = ucr::toUTF8(L"\u263a");
EXPECT_EQ(false, ucr::CheckForInvalidUtf8(utf8.c_str(), utf8.length()));
utf8 = ucr::toUTF8(L"\u263a|\u00a0");
EXPECT_EQ(false, ucr::CheckForInvalidUtf8(utf8.c_str(), utf8.length()));

EXPECT_EQ(true, ucr::CheckForInvalidUtf8("", 0));
EXPECT_EQ(true, ucr::CheckForInvalidUtf8(" ", 1));
EXPECT_EQ(true, ucr::CheckForInvalidUtf8("ab", 2));
EXPECT_EQ(true, ucr::CheckForInvalidUtf8("abc", 3));

utf8 = ucr::toUTF8(L"\u00a0");
EXPECT_EQ(true, ucr::CheckForInvalidUtf8(utf8.c_str(), utf8.length() - 1));
utf8 = ucr::toUTF8(L"\u263a");
EXPECT_EQ(true, ucr::CheckForInvalidUtf8(utf8.c_str(), utf8.length() - 1));

utf8 = ucr::toUTF8(L"\u00a0");
utf8[utf8.length() - 1] &= 0x7f;
EXPECT_EQ(true, ucr::CheckForInvalidUtf8(utf8.c_str(), utf8.length()));
utf8 = ucr::toUTF8(L"\u263a");
utf8[utf8.length() - 2] &= 0x7f;
EXPECT_EQ(true, ucr::CheckForInvalidUtf8(utf8.c_str(), utf8.length()));
utf8 = ucr::toUTF8(L"\u263a");
utf8[utf8.length() - 1] &= 0x7f;
EXPECT_EQ(true, ucr::CheckForInvalidUtf8(utf8.c_str(), utf8.length()));
}

TEST_F(UnicoderTest, CrossConvert)
{
wchar_t wbuf[256];
Expand Down

0 comments on commit 14d87c8

Please sign in to comment.