Skip to content

Commit

Permalink
Fix rate failures in YAML xContent roundtrip tests
Browse files Browse the repository at this point in the history
Under very unfortunate conditions tests that check xContent objects
roundtrip parsing  (like i.e. [SearchHitsTests
testFromXContent](elastic#97716)
can fail when we happen to randomly pick YAML xContent type and create
random (realistic)Unicode character sequences that may contain the
character U+0085 (133) from the [Latin1 code
page](https://de.wikipedia.org/wiki/Unicodeblock_Lateinisch-1,_Erg%C3%A4nzung).

That specific character doesn't get parsed back to its original form for
YAML xContent, which can lead to [rare but hard to diagnose test
failures](elastic#97716 (comment)).

This change adds logic to AbstractXContentTestCase#test() which lies at
the core of most of our  xContent roundtrip tests that disallows test
instances containing that particular character  when using YAML xContent
type.

Closes elastic#97716
  • Loading branch information
cbuescher committed Feb 3, 2025
1 parent 85f5222 commit 8c6583f
Show file tree
Hide file tree
Showing 2 changed files with 57 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -145,8 +145,21 @@ private XContentTester(
public void test() throws IOException {
for (int runs = 0; runs < numberOfTestRuns; runs++) {
XContentType xContentType = randomFrom(XContentType.values()).canonical();
T testInstance = instanceSupplier.apply(xContentType);
T testInstance = null;
try {
if (xContentType.equals(XContentType.YAML)) {
testInstance = randomValueOtherThanMany(instance -> {
// unicode character U+0085 (NEXT LINE (NEL)) doesn't survive YAML round trip tests (see #97716)
// get a new random instance if we detect this character in the xContent output
try {
return toXContent.apply(instance, xContentType).utf8ToString().contains("\u0085");
} catch (IOException e) {
throw new RuntimeException(e);
}
}, () -> instanceSupplier.apply(xContentType));
} else {
testInstance = instanceSupplier.apply(xContentType);
}
BytesReference originalXContent = toXContent.apply(testInstance, xContentType);
BytesReference shuffledContent = insertRandomFieldsAndShuffle(
originalXContent,
Expand All @@ -173,7 +186,9 @@ public void test() throws IOException {
dispose.accept(parsed);
}
} finally {
dispose.accept(testInstance);
if (testInstance != null) {
dispose.accept(testInstance);
}
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,13 @@
import com.carrotsearch.randomizedtesting.RandomizedContext;

import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.xcontent.ToXContentFragment;
import org.elasticsearch.xcontent.XContentBuilder;
import org.elasticsearch.xcontent.XContentFactory;
import org.elasticsearch.xcontent.XContentParser;
import org.elasticsearch.xcontent.XContentType;

import java.io.IOException;
import java.util.Map;

import static org.hamcrest.Matchers.equalTo;
Expand Down Expand Up @@ -49,4 +51,42 @@ public void testInsertRandomFieldsAndShuffle() throws Exception {
assertThat(mapOrdered.keySet().iterator().next(), not(equalTo("field")));
}
}

private record TestToXContent(String field, String value) implements ToXContentFragment {

@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
return builder.field(field, value);
}
}

public void testYamlXContentRoundtripSanitization() throws Exception {
var test = new AbstractXContentTestCase<TestToXContent>() {

@Override
protected TestToXContent createTestInstance() {
// we need to randomly create both a "problematic" and an okay version in order to ensure that the sanitization code
// can draw at least one okay version if polled often enough
return randomBoolean() ? new TestToXContent("a\u0085b", "def") : new TestToXContent("a b", "def");
}

@Override
protected TestToXContent doParseInstance(XContentParser parser) throws IOException {
assertEquals(XContentParser.Token.START_OBJECT, parser.nextToken());
assertEquals(XContentParser.Token.FIELD_NAME, parser.nextToken());
String name = parser.currentName();
assertEquals(XContentParser.Token.VALUE_STRING, parser.nextToken());
String value = parser.text();
assertEquals(XContentParser.Token.END_OBJECT, parser.nextToken());
return new TestToXContent(name, value);
};

@Override
protected boolean supportsUnknownFields() {
return false;
}
};
// testFromXContent runs 20 repetitions, enough to hit a YAML xcontent version very likely
test.testFromXContent();
}
}

0 comments on commit 8c6583f

Please sign in to comment.