Skip to content

Commit

Permalink
HADOOP-19385. S3A: iceberg bulk delete test (Java17+)
Browse files Browse the repository at this point in the history
Add Iceberg core to the hadoop-aws test classpath.

Iceberg is java17+ only, so this adds
* A new test source path src/test/java17
* A new profile "java-17-or-later" which includes this
  and declares the dependency on iceberg-core.

The new test is ITestIcebergBulkDelete; it is parameterized
Iceberg bulk delete enabled/disabled and s3a multipart
delete enabled/disabled.

There is a superclass contract test
  org.apache.fs.test.formats.AbstractIcebergDeleteTest
To support future stores which implement bulk delete.
This is currently a minimal superclass; all tests
are currently in ITestIcebergBulkDelete

Change-Id: If0aeea6adf8cebbd850cabd72f6e5322e0cf4a4b
  • Loading branch information
steveloughran committed Jan 22, 2025
1 parent d2095fa commit 09b4682
Show file tree
Hide file tree
Showing 13 changed files with 613 additions and 28 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@
import org.apache.hadoop.io.wrappedio.WrappedIO;
import org.apache.hadoop.io.wrappedio.impl.DynamicWrappedIO;

import static org.apache.hadoop.fs.contract.ContractTestUtils.assertSuccessfulBulkDelete;
import static org.apache.hadoop.fs.contract.ContractTestUtils.createListOfPaths;
import static org.apache.hadoop.fs.contract.ContractTestUtils.skip;
import static org.apache.hadoop.fs.contract.ContractTestUtils.touch;
import static org.apache.hadoop.io.wrappedio.WrappedIO.bulkDelete_delete;
Expand Down Expand Up @@ -210,6 +212,20 @@ public void testDeletePathsNotExists() throws Exception {
assertSuccessfulBulkDelete(bulkDelete_delete(getFileSystem(), basePath, paths));
}

/**
* Use a more complex filename.
* This validates that any conversions to URI/string
* when passing to an object store is correct.
*/
@Test
public void testDeleteComplexFilename() throws Exception {
Path path = new Path(basePath, "child[=comple]x");
List<Path> paths = new ArrayList<>();
paths.add(path);
// bulk delete call doesn't verify if a path exist or not before deleting.
assertSuccessfulBulkDelete(bulkDelete_delete(getFileSystem(), basePath, paths));
}

@Test
public void testDeletePathsDirectory() throws Exception {
List<Path> paths = new ArrayList<>();
Expand Down Expand Up @@ -333,28 +349,4 @@ public void testChildPaths() throws Exception {
assertSuccessfulBulkDelete(bulkDelete_delete(getFileSystem(), basePath, paths));
}


/**
* Assert on returned entries after bulk delete operation.
* Entries should be empty after successful delete.
*/
public static void assertSuccessfulBulkDelete(List<Map.Entry<Path, String>> entries) {
Assertions.assertThat(entries)
.describedAs("Bulk delete failed, " +
"return entries should be empty after successful delete")
.isEmpty();
}

/**
* Create a list of paths with the given count
* under the given base path.
*/
private List<Path> createListOfPaths(int count, Path basePath) {
List<Path> paths = new ArrayList<>();
for (int i = 0; i < count; i++) {
Path path = new Path(basePath, "file-" + i);
paths.add(path);
}
return paths;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@
import java.util.HashSet;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.Properties;
import java.util.Set;
Expand Down Expand Up @@ -1825,6 +1826,48 @@ public static long totalReadSize(final List<FileRange> fileRanges) {
.sum();
}

/**
* Assert on returned entries after bulk delete operation.
* Entries should be empty after successful delete.
*/
public static void assertSuccessfulBulkDelete(List<Map.Entry<Path, String>> entries) {
Assertions.assertThat(entries)
.describedAs("Bulk delete failed, " +
"return entries should be empty after successful delete")
.isEmpty();
}

/**
* Get a file status value or, if the path doesn't exist, return null.
* @param fs filesystem
* @param path path
* @return status or null
* @throws IOException Any IO Failure other than file not found.
*/
public static final FileStatus getFileStatusOrNull(
final FileSystem fs,
final Path path)
throws IOException {
try {
return fs.getFileStatus(path);
} catch (FileNotFoundException e) {
return null;
}
}

/**
* Create a list of paths with the given count
* under the given base path.
*/
public static List<Path> createListOfPaths(int count, Path basePath) {
List<Path> paths = new ArrayList<>();
for (int i = 0; i < count; i++) {
Path path = new Path(basePath, "file-" + i);
paths.add(path);
}
return paths;
}

/**
* Results of recursive directory creation/scan operations.
*/
Expand Down
12 changes: 11 additions & 1 deletion hadoop-project/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -787,7 +787,17 @@
<version>${hadoop.version}</version>
<type>test-jar</type>
</dependency>

<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-format-testing</artifactId>
<version>${hadoop.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-format-testing</artifactId>
<version>${hadoop.version}</version>
<type>test-jar</type>
</dependency>
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
Expand Down
70 changes: 70 additions & 0 deletions hadoop-tools/hadoop-aws/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,10 @@
<job.id>00</job.id>
<!-- are root tests enabled. Set to false when running parallel jobs on same bucket -->
<root.tests.enabled>unset</root.tests.enabled>

<!-- iceberg is java 17+ -->
<!-- This requires the changes of the matching PR -->
<iceberg.version>1.8.0-SNAPSHOT</iceberg.version>
</properties>

<profiles>
Expand Down Expand Up @@ -312,6 +316,69 @@
</properties>
</profile>

<!-- Adds a *test only* java 17 build profile-->
<profile>
<id>java-17-or-later</id>
<activation>
<jdk>[17,)</jdk>
</activation>
<build>
<plugins>
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>build-helper-maven-plugin</artifactId>
<executions>
<execution>
<id>add-java17-test-source</id>
<phase>generate-test-sources</phase>
<goals>
<goal>add-test-source</goal>
</goals>
<configuration>
<sources>
<source>${basedir}/src/test/java17</source>
</sources>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
<dependencies>

<!-- Apache Iceberg, used for testing/regression testing BulkDelete -->
<!-- iceberg is java 17+ and so can only be referenced in java 17+ test source trees -->
<dependency>
<groupId>org.apache.iceberg</groupId>
<artifactId>iceberg-core</artifactId>
<version>${iceberg.version}</version>
<scope>test</scope>
<exclusions>
<exclusion>
<groupId>org.apache.httpcomponents.core5</groupId>
<artifactId>*</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.httpcomponents.client5</groupId>
<artifactId>*</artifactId>
</exclusion>
<exclusion>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>*</artifactId>
</exclusion>
<exclusion>
<groupId>io.airlift</groupId>
<artifactId>aircompressor</artifactId>
</exclusion>
<exclusion>
<groupId>org.checkerframework</groupId>
<artifactId>checker-qual</artifactId>
</exclusion>
</exclusions>
</dependency>
</dependencies>
</profile>

</profiles>

<build>
Expand Down Expand Up @@ -581,5 +648,8 @@
<artifactId>bcpkix-jdk18on</artifactId>
<scope>test</scope>
</dependency>



</dependencies>
</project>
Original file line number Diff line number Diff line change
Expand Up @@ -1518,6 +1518,13 @@ private Constants() {
*/
public static final String FS_S3A_PERFORMANCE_FLAGS =
"fs.s3a.performance.flags";

/**
* All performance flags in the enumeration.
*/
public static final String PERFORMANCE_FLAGS =
"create, delete, mkdir, open";

/**
* Prefix for adding a header to the object when created.
* The actual value must have a "." suffix and then the actual header.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@
import java.util.List;
import java.util.Map;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import software.amazon.awssdk.services.s3.model.ObjectIdentifier;

import org.apache.hadoop.fs.BulkDelete;
Expand All @@ -42,6 +44,9 @@
*/
public class BulkDeleteOperation extends AbstractStoreOperation implements BulkDelete {

private static final Logger LOG = LoggerFactory.getLogger(
BulkDeleteOperation.class);

private final BulkDeleteOperationCallbacks callbacks;

private final Path basePath;
Expand Down Expand Up @@ -78,14 +83,18 @@ public Path basePath() {
public List<Map.Entry<Path, String>> bulkDelete(final Collection<Path> paths)
throws IOException, IllegalArgumentException {
requireNonNull(paths);
checkArgument(paths.size() <= pageSize,
"Number of paths (%d) is larger than the page size (%d)", paths.size(), pageSize);
final int size = paths.size();
LOG.debug("bulkDelete() of {} paths with pagesize {}",
size, pageSize);
checkArgument(size <= pageSize,
"Number of paths (%d) is larger than the page size (%d)", size, pageSize);
final StoreContext context = getStoreContext();
final List<ObjectIdentifier> objects = paths.stream().map(p -> {
checkArgument(p.isAbsolute(), "Path %s is not absolute", p);
checkArgument(validatePathIsUnderParent(p, basePath),
"Path %s is not under the base path %s", p, basePath);
final String k = context.pathToKey(p);
LOG.debug("path \"{}\" mapped to \"{}\"", p, k);
return ObjectIdentifier.builder().key(k).build();
}).collect(toList());

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -350,6 +350,17 @@ mvn verify -Dparallel-tests -Dprefetch -DtestsThreadCount=8
mvn verify -Dparallel-tests -Dprefetch -Dscale -DtestsThreadCount=8
```

## <a name="java17"></a> Java 17 Tests

This module includes a test source tree which compiles and runs on
Java 17+ _only_. This is to allow external libraries to be used
in testing -libraries which have been built on Java 17 and cannot
be loaded on older versions.

* This source tree is `src/test/java17`.
* It may depend upon any library is built on Java 17 or later.
* It is for testing only.

## <a name="scale"></a> Scale Tests

There are a set of tests designed to measure the scalability and performance
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
import org.apache.hadoop.fs.statistics.MeanStatistic;

import static java.util.stream.Collectors.toList;
import static org.apache.hadoop.fs.contract.ContractTestUtils.assertSuccessfulBulkDelete;
import static org.apache.hadoop.fs.contract.ContractTestUtils.skip;
import static org.apache.hadoop.fs.contract.ContractTestUtils.touch;
import static org.apache.hadoop.fs.s3a.S3ATestUtils.*;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@
import org.apache.hadoop.fs.s3a.statistics.CommitterStatistics;
import org.apache.hadoop.io.wrappedio.WrappedIO;

import static org.apache.hadoop.fs.contract.AbstractContractBulkDeleteTest.assertSuccessfulBulkDelete;
import static org.apache.hadoop.fs.contract.ContractTestUtils.assertSuccessfulBulkDelete;
import static org.apache.hadoop.fs.contract.ContractTestUtils.skip;
import static org.apache.hadoop.fs.contract.ContractTestUtils.touch;
import static org.apache.hadoop.fs.s3a.Constants.*;
Expand Down
Loading

0 comments on commit 09b4682

Please sign in to comment.