Skip to content

Commit

Permalink
Try to optimize encoding of surrogate pairs further
Browse files Browse the repository at this point in the history
  • Loading branch information
cowtowncoder committed Sep 17, 2024
1 parent f57c128 commit ef5d673
Showing 1 changed file with 25 additions and 22 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -660,10 +660,6 @@ public void writeUTF8String(byte[] text, int offset, int len) throws IOException
_outputBuffer[_outputTail++] = _quoteChar;
}

private boolean isSurrogatePair(char ch) {
return (ch & 0xD800) == 0xD800;
}

/*
/**********************************************************
/* Output method implementations, unprocessed ("raw")
Expand Down Expand Up @@ -1494,8 +1490,6 @@ private final void _writeStringSegment2(final char[] cbuf, int offset, final int
final byte[] outputBuffer = _outputBuffer;
final int[] escCodes = _outputEscapes;

boolean combineSurrogates = Feature.COMBINE_UNICODE_SURROGATES_IN_UTF8.enabledIn(_features);

while (offset < end) {
int ch = cbuf[offset++];
if (ch <= 0x7F) {
Expand All @@ -1517,14 +1511,17 @@ private final void _writeStringSegment2(final char[] cbuf, int offset, final int
outputBuffer[outputPtr++] = (byte) (0xc0 | (ch >> 6));
outputBuffer[outputPtr++] = (byte) (0x80 | (ch & 0x3f));
} else {
// multibyte character
if (combineSurrogates && isSurrogatePair((char) ch) && offset < end) {
char highSurrogate = (char) ch;
char lowSurrogate = cbuf[offset++];
outputPtr = _outputSurrogatePair(highSurrogate, lowSurrogate, outputPtr);
} else {
outputPtr = _outputMultiByteChar(ch, outputPtr);
// 3- or 4-byte character
if (_isSurrogateChar((char) ch)) {
final boolean combineSurrogates = Feature.COMBINE_UNICODE_SURROGATES_IN_UTF8.enabledIn(_features);
if (combineSurrogates && offset < end) {
char highSurrogate = (char) ch;
char lowSurrogate = cbuf[offset++];
outputPtr = _outputSurrogatePair(highSurrogate, lowSurrogate, outputPtr);
continue;
}
}
outputPtr = _outputMultiByteChar(ch, outputPtr);
}
}
_outputTail = outputPtr;
Expand All @@ -1541,8 +1538,6 @@ private final void _writeStringSegment2(final String text, int offset, final int
final byte[] outputBuffer = _outputBuffer;
final int[] escCodes = _outputEscapes;

boolean combineSurrogates = Feature.COMBINE_UNICODE_SURROGATES_IN_UTF8.enabledIn(_features);

while (offset < end) {
int ch = text.charAt(offset++);
if (ch <= 0x7F) {
Expand All @@ -1564,14 +1559,17 @@ private final void _writeStringSegment2(final String text, int offset, final int
outputBuffer[outputPtr++] = (byte) (0xc0 | (ch >> 6));
outputBuffer[outputPtr++] = (byte) (0x80 | (ch & 0x3f));
} else {
// multibyte character
if (combineSurrogates && isSurrogatePair((char) ch) && offset < end) {
char highSurrogate = (char) ch;
char lowSurrogate = text.charAt(offset++);
outputPtr = _outputSurrogatePair(highSurrogate, lowSurrogate, outputPtr);
} else {
outputPtr = _outputMultiByteChar(ch, outputPtr);
// 3- or 4-byte character
if (_isSurrogateChar((char) ch)) {
final boolean combineSurrogates = Feature.COMBINE_UNICODE_SURROGATES_IN_UTF8.enabledIn(_features);
if (combineSurrogates && offset < end) {
char highSurrogate = (char) ch;
char lowSurrogate = text.charAt(offset++);
outputPtr = _outputSurrogatePair(highSurrogate, lowSurrogate, outputPtr);
continue;
}
}
outputPtr = _outputMultiByteChar(ch, outputPtr);
}
}
_outputTail = outputPtr;
Expand Down Expand Up @@ -2244,5 +2242,10 @@ protected final void _flushBuffer() throws IOException
private byte[] getHexBytes() {
return _cfgWriteHexUppercase ? HEX_BYTES_UPPER : HEX_BYTES_LOWER;
}

// @since 2.18
private boolean _isSurrogateChar(char ch) {
return (ch & 0xD800) == 0xD800;
}
}

0 comments on commit ef5d673

Please sign in to comment.