Skip to content

Commit

Permalink
Interop test data from htslib.
Browse files Browse the repository at this point in the history
  • Loading branch information
cmnbroad committed Nov 13, 2024
1 parent 7e57f10 commit 6347964
Show file tree
Hide file tree
Showing 224 changed files with 14,147 additions and 102 deletions.
24 changes: 7 additions & 17 deletions src/test/java/htsjdk/samtools/cram/CRAMInteropTestUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -8,29 +8,19 @@
import java.util.ArrayList;
import java.util.List;

import htsjdk.utils.SamtoolsTestUtils;

/**
* Interop test data is kept in a separate repository, currently at https://github.com/samtools/htscodecs
* so it can be shared across htslib/samtools/htsjdk.
* Interop test data originates in a separate repository, currently at https://github.com/samtools/htslib, in htscodecs,
* but we keep a copy in htsjdk so we can use it for round trip tests in CI without needing to clone a second repo.
*/
public class CRAMInteropTestUtils {
public static final String INTEROP_TEST_FILES_PATH = SamtoolsTestUtils.getCRAMInteropData();

/**
* @return true if interop test data is available, otherwise false
*/
public static boolean isInteropTestDataAvailable() {
final Path testDataPath = getInteropTestDataLocation();
return Files.exists(testDataPath);
}
public static final String INTEROP_TEST_FILES_PATH = "src/test/resources/htsjdk/samtools/cram/htslib_interop/";

/**
* @return the name and location of the local interop test data as specified by the
* variable INTEROP_TEST_FILES_PATH
*/
public static Path getInteropTestDataLocation() {
return Paths.get(INTEROP_TEST_FILES_PATH);
public static Path getCRAM31_Htslib_InteropTestDataLocation() {
return Paths.get(INTEROP_TEST_FILES_PATH + "cram31/");
}

// the input files have embedded newlines that the test remove before round-tripping...
Expand Down Expand Up @@ -59,7 +49,7 @@ protected static final byte[] filterEmbeddedNewlines(final byte[] rawBytes) thro
protected static List<Path> getInteropCompressedFilePaths(final String compressedDir) throws IOException {
final List<Path> paths = new ArrayList<>();
Files.newDirectoryStream(
CRAMInteropTestUtils.getInteropTestDataLocation().resolve("dat/"+compressedDir),
CRAMInteropTestUtils.getCRAM31_Htslib_InteropTestDataLocation().resolve("dat/"+compressedDir),
path -> Files.isRegularFile(path))
.forEach(path -> paths.add(path));
return paths;
Expand Down Expand Up @@ -88,7 +78,7 @@ private static final String getUncompressedFileName(final String compressedFileN
protected static final List<Path> getInteropRawTestFiles() throws IOException {
final List<Path> paths = new ArrayList<>();
Files.newDirectoryStream(
CRAMInteropTestUtils.getInteropTestDataLocation().resolve("dat"),
CRAMInteropTestUtils.getCRAM31_Htslib_InteropTestDataLocation().resolve("dat"),
path -> (Files.isRegularFile(path)) && !Files.isHidden(path))
.forEach(path -> paths.add(path));
return paths;
Expand Down
9 changes: 0 additions & 9 deletions src/test/java/htsjdk/samtools/cram/FQZCompInteropTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -42,16 +42,7 @@ public Object[][] getDecodeOnlyTestCases() throws IOException {
return testCases.toArray(new Object[][]{});
}

@Test(description = "Test if CRAM Interop Test Data is available")
public void testHtsCodecsCorpusIsAvailable() {
if (!CRAMInteropTestUtils.isInteropTestDataAvailable()) {
throw new SkipException(String.format("CRAM Interop Test Data is not available at %s",
CRAMInteropTestUtils.INTEROP_TEST_FILES_PATH));
}
}

@Test (
dependsOnMethods = "testHtsCodecsCorpusIsAvailable",
dataProvider = "decodeOnlyTestCases",
description = "Uncompress the existing compressed file using htsjdk FQZComp and compare it with the original file.")
public void testDecodeOnly(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,17 +38,7 @@ public Object[][] getAllNameTokenizationInteropTests() throws IOException {
return testCases.toArray(new Object[][]{});
}

//TODO: check in the interop streams and get rid of these getCorpus methods
@Test(description = "Test if CRAM Interop Test Data is available")
public void testGetHTSCodecsCorpus() {
if (!CRAMInteropTestUtils.isInteropTestDataAvailable()) {
throw new SkipException(String.format("CRAM Interop Test Data is not available at %s",
CRAMInteropTestUtils.INTEROP_TEST_FILES_PATH));
}
}

@Test (
dependsOnMethods = "testGetHTSCodecsCorpus",
dataProvider = "allNameTokenizationInteropTests",
description = "Roundtrip using htsjdk NameTokenization Codec. Compare the output with the original file" )
public void testNameTokenizationRoundTrip(
Expand Down Expand Up @@ -76,7 +66,6 @@ public void testNameTokenizationRoundTrip(
}

@Test (
dependsOnMethods = "testGetHTSCodecsCorpus",
dataProvider = "allNameTokenizationInteropTests",
description = "Compress the original file using htsjdk NameTokenization Codec and compare it with the existing compressed file. " +
"Uncompress the existing compressed file using htsjdk NameTokenization Codec and compare it with the original file.")
Expand Down Expand Up @@ -106,7 +95,7 @@ public void testNameTokenizationDecompress(
private List<Path> getPreCompressedInteropNameTokTestFiles() throws IOException {
final List<Path> paths = new ArrayList<>();
Files.newDirectoryStream(
CRAMInteropTestUtils.getInteropTestDataLocation().resolve("names/"+COMPRESSED_TOK_DIR),
CRAMInteropTestUtils.getCRAM31_Htslib_InteropTestDataLocation().resolve("names/"+COMPRESSED_TOK_DIR),
path -> Files.isRegularFile(path))
.forEach(path -> paths.add(path));
return paths;
Expand Down
17 changes: 1 addition & 16 deletions src/test/java/htsjdk/samtools/cram/RANSInteropTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,7 @@

/**
* RANSInteropTest tests if the htsjdk RANS4x8 and RANSNx16 implementations are interoperable
* with the htslib implementations. The test files for Interop tests is kept in a separate repository,
* currently at https://github.com/samtools/htscodecs so it can be shared across htslib/samtools/htsjdk.
*
* For local development env, the Interop test files must be downloaded locally and made available at "../htscodecs/tests"
* For CI env, the Interop test files are made available from the existing samtools installation
* at "/samtools-1.14/htslib-1.14/htscodecs/tests"
* with the htslib implementations. The test files for Interop tests are from the htslib repo.
*/
public class RANSInteropTest extends HtsjdkTest {
public static final String COMPRESSED_RANS4X8_DIR = "r4x8";
Expand Down Expand Up @@ -151,16 +146,7 @@ public Object[][] getDecodeOnlyTestCases() throws IOException {
.toArray(Object[][]::new);
}

@Test(description = "Test if CRAM Interop Test Data is available")
public void testHtsCodecsCorpusIsAvailable() {
if (!CRAMInteropTestUtils.isInteropTestDataAvailable()) {
throw new SkipException(String.format("CRAM Interop Test Data is not available at %s",
CRAMInteropTestUtils.INTEROP_TEST_FILES_PATH));
}
}

@Test (
dependsOnMethods = "testHtsCodecsCorpusIsAvailable",
dataProvider = "roundTripTestCases",
description = "Roundtrip using htsjdk RANS. Compare the output with the original file" )
public void testRANSRoundTrip(
Expand Down Expand Up @@ -188,7 +174,6 @@ public void testRANSRoundTrip(
}

@Test (
dependsOnMethods = "testHtsCodecsCorpusIsAvailable",
dataProvider = "decodeOnlyTestCases",
description = "Uncompress the existing compressed file using htsjdk RANS and compare it with the original file.")
public void testDecodeOnly(
Expand Down
15 changes: 4 additions & 11 deletions src/test/java/htsjdk/samtools/cram/RangeInteropTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@
import java.util.Arrays;
import java.util.List;

//TODO: interop test failures:
// u32.4, u32.65, u32.1, u32.9
//
public class RangeInteropTest extends HtsjdkTest {
public static final String COMPRESSED_RANGE_DIR = "arith";

Expand Down Expand Up @@ -74,16 +77,7 @@ public Object[][] getDecodeOnlyTestCases() throws IOException {
return testCases.toArray(new Object[][]{});
}

@Test(description = "Test if CRAM Interop Test Data is available")
public void testHtsCodecsCorpusIsAvailable() {
if (!CRAMInteropTestUtils.isInteropTestDataAvailable()) {
throw new SkipException(String.format("CRAM Interop Test Data is not available at %s",
CRAMInteropTestUtils.INTEROP_TEST_FILES_PATH));
}
}

@Test (
dependsOnMethods = "testHtsCodecsCorpusIsAvailable",
dataProvider = "roundTripTestCases",
description = "Roundtrip using htsjdk Range Codec. Compare the output with the original file" )
public void testRangeRoundTrip(
Expand All @@ -109,7 +103,6 @@ public void testRangeRoundTrip(
}

@Test (
dependsOnMethods = "testHtsCodecsCorpusIsAvailable",
dataProvider = "decodeOnlyTestCases",
description = "Uncompress the existing compressed file using htsjdk Range codec and compare it with the original file.")
public void testDecodeOnly(
Expand All @@ -124,7 +117,7 @@ public void testDecodeOnly(
// and compare the results

final ByteBuffer uncompressedInteropBytes;
if (uncompressedInteropPath.toString().contains("htscodecs/tests/dat/u")) {
if (uncompressedInteropPath.toString().endsWith("dat/u32")) {
uncompressedInteropBytes = ByteBuffer.wrap(IOUtils.toByteArray(uncompressedInteropStream));
} else {
uncompressedInteropBytes = ByteBuffer.wrap(CRAMInteropTestUtils.filterEmbeddedNewlines(IOUtils.toByteArray(uncompressedInteropStream)));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,47 +30,47 @@ public Object[][] getNameTokenisationTestData() {

// a subset of read names from
// src/test/resources/htsjdk/samtools/cram/CEUTrio.HiSeq.WGS.b37.NA12878.20.first.8000.bam
readNamesList.add("20FUKAAXX100202:6:27:4968:125377\n" +
"20FUKAAXX100202:6:27:4986:125375\n" +
"20FUKAAXX100202:5:62:8987:1929\n" +
"20GAVAAXX100126:1:28:4295:139802\n" +
"20FUKAAXX100202:4:23:8516:117251\n" +
"20FUKAAXX100202:6:23:6442:37469\n" +
"20FUKAAXX100202:8:24:10477:24196\n" +
"20GAVAAXX100126:8:63:5797:158250\n" +
"20FUKAAXX100202:1:45:12798:104365\n" +
"20GAVAAXX100126:3:23:6419:199245\n" +
"20FUKAAXX100202:8:48:6663:137967\n" +
readNamesList.add("20FUKAAXX100202:6:27:4968:125377\0" +
"20FUKAAXX100202:6:27:4986:125375\0" +
"20FUKAAXX100202:5:62:8987:1929\0" +
"20GAVAAXX100126:1:28:4295:139802\0" +
"20FUKAAXX100202:4:23:8516:117251\0" +
"20FUKAAXX100202:6:23:6442:37469\0" +
"20FUKAAXX100202:8:24:10477:24196\0" +
"20GAVAAXX100126:8:63:5797:158250\0" +
"20FUKAAXX100202:1:45:12798:104365\0" +
"20GAVAAXX100126:3:23:6419:199245\0" +
"20FUKAAXX100202:8:48:6663:137967\0" +
"20FUKAAXX100202:6:68:17726:162601");

// a subset of read names from
// src/test/resources/htsjdk/samtools/longreads/NA12878.m64020_190210_035026.chr21.5011316.5411316.unmapped.bam
readNamesList.add("m64020_190210_035026/44368402/ccs\n");
readNamesList.add("m64020_190210_035026/44368402/ccs\0");
readNamesList.add("m64020_190210_035026/44368402/ccs");
readNamesList.add("m64020_190210_035026/44368402/ccs\n" +
"m64020_190210_035026/124127126/ccs\n" +
"m64020_190210_035026/4981311/ccs\n" +
"m64020_190210_035026/80022195/ccs\n" +
"m64020_190210_035026/17762104/ccs\n" +
"m64020_190210_035026/62981096/ccs\n" +
"m64020_190210_035026/86968803/ccs\n" +
"m64020_190210_035026/46400955/ccs\n" +
"m64020_190210_035026/137561592/ccs\n" +
"m64020_190210_035026/52233471/ccs\n" +
"m64020_190210_035026/97127189/ccs\n" +
"m64020_190210_035026/115278035/ccs\n" +
"m64020_190210_035026/155256324/ccs\n" +
"m64020_190210_035026/163644151/ccs\n" +
"m64020_190210_035026/162728365/ccs\n" +
"m64020_190210_035026/160238116/ccs\n" +
"m64020_190210_035026/147719983/ccs\n" +
"m64020_190210_035026/60883331/ccs\n" +
"m64020_190210_035026/1116165/ccs\n" +
readNamesList.add("m64020_190210_035026/44368402/ccs\0" +
"m64020_190210_035026/124127126/ccs\0" +
"m64020_190210_035026/4981311/ccs\0" +
"m64020_190210_035026/80022195/ccs\0" +
"m64020_190210_035026/17762104/ccs\0" +
"m64020_190210_035026/62981096/ccs\0" +
"m64020_190210_035026/86968803/ccs\0" +
"m64020_190210_035026/46400955/ccs\0" +
"m64020_190210_035026/137561592/ccs\0" +
"m64020_190210_035026/52233471/ccs\0" +
"m64020_190210_035026/97127189/ccs\0" +
"m64020_190210_035026/115278035/ccs\0" +
"m64020_190210_035026/155256324/ccs\0" +
"m64020_190210_035026/163644151/ccs\0" +
"m64020_190210_035026/162728365/ccs\0" +
"m64020_190210_035026/160238116/ccs\0" +
"m64020_190210_035026/147719983/ccs\0" +
"m64020_190210_035026/60883331/ccs\0" +
"m64020_190210_035026/1116165/ccs\0" +
"m64020_190210_035026/75893199/ccs");

// source: https://gatk.broadinstitute.org/hc/en-us/articles/360035890671-Read-groups
readNamesList.add(
"H0164ALXX140820:2:1101:10003:23460\n" +
"H0164ALXX140820:2:1101:10003:23460\0" +
"H0164ALXX140820:2:1101:15118:25288");

final List<Object[]> testCases = new ArrayList<>();
Expand Down
5 changes: 0 additions & 5 deletions src/test/java/htsjdk/utils/SamtoolsTestUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -49,11 +49,6 @@ public static String getSamtoolsBin() {
return samtoolsPath == null ? "/usr/local/bin/samtools" : samtoolsPath;
}

public static String getCRAMInteropData() {
final String samtoolsPath = System.getenv(SAMTOOLS_BINARY_ENV_VARIABLE);
return samtoolsPath == null ? "../htscodecs/tests" : "./samtools-"+expectedSamtoolsVersion+ "/htslib-"+expectedHtslibVersion+"/htscodecs/tests";
}

/**
* Execute a samtools command line if a local samtools executable is available see {@link #isSamtoolsAvailable()}.
*
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Test streams taken from htslib/htscodecs/tests, commit hash 0f8bedc11e3ee039060a058b5cd5ec99e0b593a2, for use
as CRAM 3.1 interop tests.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Loading

0 comments on commit 6347964

Please sign in to comment.