-
-
Notifications
You must be signed in to change notification settings - Fork 138
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add a CBORGenerator feature for lenient unicode encoding
If enabled, the generator will output the Unicode Replacement Character for invalid unicode sequence (invalid surrogate chars in the Java String) instead of failing with an IllegalArgumentException
- Loading branch information
1 parent
f5853dc
commit 02a2cbc
Showing
3 changed files
with
195 additions
and
69 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
114 changes: 114 additions & 0 deletions
114
cbor/src/test/java/com/fasterxml/jackson/dataformat/cbor/gen/UnicodeGenerationTest.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,114 @@ | ||
|
||
import java.io.*; | ||
import java.math.BigDecimal; | ||
import java.math.BigInteger; | ||
import java.util.*; | ||
|
||
import org.junit.Assert; | ||
|
||
import com.fasterxml.jackson.core.JsonGenerationException; | ||
|
||
import com.fasterxml.jackson.databind.ObjectMapper; | ||
|
||
import com.fasterxml.jackson.dataformat.cbor.CBORConstants; | ||
import com.fasterxml.jackson.dataformat.cbor.CBORGenerator; | ||
import com.fasterxml.jackson.dataformat.cbor.CBORParser; | ||
import com.fasterxml.jackson.dataformat.cbor.CBORTestBase; | ||
|
||
public class UnicodeGenerationTest extends CBORTestBase | ||
{ | ||
/** | ||
* Test that encoding a String containing invalid surrogates fail with an exception | ||
*/ | ||
public void testFailForInvalidSurrogate() throws Exception | ||
{ | ||
ByteArrayOutputStream out = new ByteArrayOutputStream(); | ||
CBORGenerator gen = cborGenerator(out); | ||
|
||
assertEquals(0, gen.getOutputBuffered()); | ||
|
||
// Unmatched first surrogate character | ||
try { | ||
gen.writeString("x\ud83d"); | ||
} catch (IllegalArgumentException e) { | ||
} | ||
assertEquals(0, gen.getOutputBuffered()); | ||
|
||
// Unmatched second surrogate character | ||
try { | ||
gen.writeString("x\ude01"); | ||
} catch (IllegalArgumentException e) { | ||
} | ||
assertEquals(0, gen.getOutputBuffered()); | ||
|
||
// Unmatched second surrogate character (2) | ||
try { | ||
gen.writeString("x\ude01x"); | ||
} catch (IllegalArgumentException e) { | ||
} | ||
assertEquals(0, gen.getOutputBuffered()); | ||
|
||
// Broken surrogate pair | ||
try { | ||
gen.writeString("x\ud83dx"); | ||
} catch (IllegalArgumentException e) { | ||
} | ||
assertEquals(0, gen.getOutputBuffered()); | ||
} | ||
|
||
/** | ||
* Test that when the lenient unicode feature is enabled, the replacement character is used to fix invalid sequences | ||
*/ | ||
public void testRecoverInvalidSurrogate() throws Exception | ||
{ | ||
ByteArrayOutputStream out; | ||
CBORGenerator gen; | ||
byte[] b; | ||
|
||
out = new ByteArrayOutputStream(); | ||
gen = lenientUnicodeCborGenerator(out); | ||
assertEquals(0, gen.getOutputBuffered()); | ||
|
||
// Unmatched first surrogate character | ||
gen.writeString("x\ud83d"); | ||
gen.close(); | ||
b = "x\ufffd".getBytes("utf-8"); | ||
_verifyBytes(out.toByteArray(), | ||
(byte) (CBORConstants.PREFIX_TYPE_TEXT + b.length), b); | ||
|
||
out = new ByteArrayOutputStream(); | ||
gen = lenientUnicodeCborGenerator(out); | ||
assertEquals(0, gen.getOutputBuffered()); | ||
|
||
// Unmatched second surrogate character | ||
gen.writeString("x\ude01"); | ||
gen.close(); | ||
b = "x\ufffd".getBytes("utf-8"); | ||
_verifyBytes(out.toByteArray(), | ||
(byte) (CBORConstants.PREFIX_TYPE_TEXT + b.length), b); | ||
|
||
out = new ByteArrayOutputStream(); | ||
gen = lenientUnicodeCborGenerator(out); | ||
assertEquals(0, gen.getOutputBuffered()); | ||
|
||
// Unmatched second surrogate character (2) | ||
gen.writeString("x\ude01x"); | ||
gen.close(); | ||
b = "x\ufffdx".getBytes("utf-8"); | ||
_verifyBytes(out.toByteArray(), | ||
(byte) (CBORConstants.PREFIX_TYPE_TEXT + b.length), b); | ||
|
||
out = new ByteArrayOutputStream(); | ||
gen = lenientUnicodeCborGenerator(out); | ||
assertEquals(0, gen.getOutputBuffered()); | ||
|
||
// Broken surrogate pair | ||
gen.writeString("x\ud83dx"); | ||
gen.close(); | ||
b = "x\ufffdx".getBytes("utf-8"); | ||
_verifyBytes(out.toByteArray(), | ||
(byte) (CBORConstants.PREFIX_TYPE_TEXT + b.length), b); | ||
|
||
} | ||
|
||
} |