Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Under very unfortunate conditions tests that check xContent objects
roundtrip parsing  (like i.e. [SearchHitsTests
testFromXContent](elastic#97716)
can fail when we happen to randomly pick YAML xContent type and create
random (realistic)Unicode character sequences that may contain the
character U+0085 (133) from the [Latin1 code
page](https://de.wikipedia.org/wiki/Unicodeblock_Lateinisch-1,_Erg%C3%A4nzung).

That specific character doesn't get parsed back to its original form for
YAML xContent, which can lead to [rare but hard to diagnose test
failures](elastic#97716 (comment)).

This change adds logic to AbstractXContentTestCase#test() which lies at
the core of most of our  xContent roundtrip tests that disallows test
instances containing that particular character  when using YAML xContent
type.

Closes elastic#97716
  • Loading branch information
cbuescher committed Jan 31, 2025
1 parent bbab741 commit 88f9e1b
Show file tree
Hide file tree
Showing 2 changed files with 57 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -145,8 +145,21 @@ private XContentTester(
public void test() throws IOException {
for (int runs = 0; runs < numberOfTestRuns; runs++) {
XContentType xContentType = randomFrom(XContentType.values()).canonical();
T testInstance = instanceSupplier.apply(xContentType);
T testInstance = null;
try {
if (xContentType.equals(XContentType.YAML)) {
testInstance = randomValueOtherThanMany(instance -> {
// unicode character U+0085 (NEXT LINE (NEL)) doesn't survive YAML round trip tests (see #97716)
// get a new random instance if we detect this character in the xContent output
try {
return toXContent.apply(instance, xContentType).utf8ToString().contains("\u0085");
} catch (IOException e) {
throw new RuntimeException(e);
}
}, () -> instanceSupplier.apply(xContentType));
} else {
testInstance = instanceSupplier.apply(xContentType);
}
BytesReference originalXContent = toXContent.apply(testInstance, xContentType);
BytesReference shuffledContent = insertRandomFieldsAndShuffle(
originalXContent,
Expand All @@ -173,7 +186,9 @@ public void test() throws IOException {
dispose.accept(parsed);
}
} finally {
dispose.accept(testInstance);
if (testInstance != null) {
dispose.accept(testInstance);
}
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,13 @@
import com.carrotsearch.randomizedtesting.RandomizedContext;

import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.xcontent.ToXContentFragment;
import org.elasticsearch.xcontent.XContentBuilder;
import org.elasticsearch.xcontent.XContentFactory;
import org.elasticsearch.xcontent.XContentParser;
import org.elasticsearch.xcontent.XContentType;

import java.io.IOException;
import java.util.Map;

import static org.hamcrest.Matchers.equalTo;
Expand Down Expand Up @@ -49,4 +51,42 @@ public void testInsertRandomFieldsAndShuffle() throws Exception {
assertThat(mapOrdered.keySet().iterator().next(), not(equalTo("field")));
}
}

private record TestToXContent(String field, String value) implements ToXContentFragment {

@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
return builder.field(field, value);
}
}

public void testYamlXContentRoundtripSanitization() throws Exception {
var test = new AbstractXContentTestCase<TestToXContent>() {

@Override
protected TestToXContent createTestInstance() {
// we need to randomly create both a "problematic" and an okay version in order to ensure that the sanitization code
// can draw at least one okay version if polled often enough
return randomBoolean() ? new TestToXContent("a\u0085b", "def") : new TestToXContent("a b", "def");
}

@Override
protected TestToXContent doParseInstance(XContentParser parser) throws IOException {
assertEquals(XContentParser.Token.START_OBJECT, parser.nextToken());
assertEquals(XContentParser.Token.FIELD_NAME, parser.nextToken());
String name = parser.currentName();
assertEquals(XContentParser.Token.VALUE_STRING, parser.nextToken());
String value = parser.text();
assertEquals(XContentParser.Token.END_OBJECT, parser.nextToken());
return new TestToXContent(name, value);
};

@Override
protected boolean supportsUnknownFields() {
return false;
}
};
// testFromXContent runs 20 repetitions, enough to hit a YAML xcontent version very likely
test.testFromXContent();
}
}

0 comments on commit 88f9e1b

Please sign in to comment.