From 55f6086c408d47500d15aeb969331b41f8b47668 Mon Sep 17 00:00:00 2001 From: yash-puligundla Date: Mon, 29 Jan 2024 15:22:34 -0500 Subject: [PATCH] Addressing feedback from nov 21, nov 28 - part 3 --- .../cram/compression/CompressionUtils.java | 4 +- .../compression/RANSExternalCompressor.java | 4 +- .../compression/RangeExternalCompressor.java | 4 +- .../cram/compression/range/Constants.java | 1 + .../cram/compression/range/RangeCoder.java | 6 +- .../cram/compression/range/RangeDecode.java | 86 ++++++++++--------- .../cram/compression/range/RangeEncode.java | 24 ++---- .../cram/compression/range/RangeParams.java | 2 +- .../rans/ransnx16/RANSNx16Decode.java | 4 +- .../samtools/cram/CRAMInteropTestUtils.java | 19 ++-- .../htsjdk/samtools/cram/RANSInteropTest.java | 36 +++----- .../samtools/cram/RangeInteropTest.java | 47 ++++++++-- .../cram/compression/range/RangeTest.java | 7 +- 13 files changed, 122 insertions(+), 122 deletions(-) diff --git a/src/main/java/htsjdk/samtools/cram/compression/CompressionUtils.java b/src/main/java/htsjdk/samtools/cram/compression/CompressionUtils.java index 6d9a725696..d4d1408448 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/CompressionUtils.java +++ b/src/main/java/htsjdk/samtools/cram/compression/CompressionUtils.java @@ -147,15 +147,13 @@ else if (numSymbols <= 16){ return outBufferPack; } - - public static ByteBuffer allocateOutputBuffer(final int inSize) { // This calculation is identical to the one in samtools rANS_static.c // Presumably the frequency table (always big enough for order 1) = 257*257, // then * 3 for each entry (byte->symbol, 2 bytes -> scaled frequency), // + 9 for the header (order byte, and 2 int lengths for compressed/uncompressed lengths). final int compressedSize = (int) (inSize + 257 * 257 * 3 + 9); - final ByteBuffer outputBuffer = ByteBuffer.allocate(compressedSize).order(ByteOrder.LITTLE_ENDIAN); + final ByteBuffer outputBuffer = allocateByteBuffer(compressedSize); if (outputBuffer.remaining() < compressedSize) { throw new CRAMException("Failed to allocate sufficient buffer size for RANS coder."); } diff --git a/src/main/java/htsjdk/samtools/cram/compression/RANSExternalCompressor.java b/src/main/java/htsjdk/samtools/cram/compression/RANSExternalCompressor.java index 848d7a2906..dd4794b0e3 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/RANSExternalCompressor.java +++ b/src/main/java/htsjdk/samtools/cram/compression/RANSExternalCompressor.java @@ -68,13 +68,13 @@ public RANSExternalCompressor( @Override public byte[] compress(final byte[] data) { final RANS4x8Params params = new RANS4x8Params(order); - final ByteBuffer buffer = ransEncode.compress(ByteBuffer.wrap(data), params); + final ByteBuffer buffer = ransEncode.compress(CompressionUtils.wrap(data), params); return toByteArray(buffer); } @Override public byte[] uncompress(byte[] data) { - final ByteBuffer buf = ransDecode.uncompress(ByteBuffer.wrap(data)); + final ByteBuffer buf = ransDecode.uncompress(CompressionUtils.wrap(data)); return toByteArray(buf); } diff --git a/src/main/java/htsjdk/samtools/cram/compression/RangeExternalCompressor.java b/src/main/java/htsjdk/samtools/cram/compression/RangeExternalCompressor.java index 1c2a87982c..650ac7c275 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/RangeExternalCompressor.java +++ b/src/main/java/htsjdk/samtools/cram/compression/RangeExternalCompressor.java @@ -32,13 +32,13 @@ public RangeExternalCompressor( @Override public byte[] compress(byte[] data) { final RangeParams params = new RangeParams(formatFlags); - final ByteBuffer buffer = rangeEncode.compress(ByteBuffer.wrap(data), params); + final ByteBuffer buffer = rangeEncode.compress(CompressionUtils.wrap(data), params); return toByteArray(buffer); } @Override public byte[] uncompress(byte[] data) { - final ByteBuffer buf = rangeDecode.uncompress(ByteBuffer.wrap(data)); + final ByteBuffer buf = rangeDecode.uncompress(CompressionUtils.wrap(data)); return toByteArray(buf); } diff --git a/src/main/java/htsjdk/samtools/cram/compression/range/Constants.java b/src/main/java/htsjdk/samtools/cram/compression/range/Constants.java index 25066b1d2e..e2e941a549 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/range/Constants.java +++ b/src/main/java/htsjdk/samtools/cram/compression/range/Constants.java @@ -4,4 +4,5 @@ final public class Constants { public static final int NUMBER_OF_SYMBOLS = 256; public static final int MAX_FREQ = ((1<<16)-17); public static final int STEP = 16; + public static final long MAX_RANGE = 0xFFFFFFFFL; } \ No newline at end of file diff --git a/src/main/java/htsjdk/samtools/cram/compression/range/RangeCoder.java b/src/main/java/htsjdk/samtools/cram/compression/range/RangeCoder.java index 022cd106d3..a7d7b21828 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/range/RangeCoder.java +++ b/src/main/java/htsjdk/samtools/cram/compression/range/RangeCoder.java @@ -4,8 +4,6 @@ public class RangeCoder { - private static final long MAX_RANGE = 0xFFFFFFFFL; - private long low; private long range; private long code; @@ -16,7 +14,7 @@ public class RangeCoder { protected RangeCoder() { // Spec: RangeEncodeStart this.low = 0; - this.range = MAX_RANGE; // 4 bytes of all 1's + this.range = Constants.MAX_RANGE; // 4 bytes of all 1's this.code = 0; this.FFnum = 0; this.carry = false; @@ -27,7 +25,7 @@ protected void rangeDecodeStart(final ByteBuffer inBuffer){ for (int i = 0; i < 5; i++){ code = (code << 8) + (inBuffer.get() & 0xFF); } - code &= MAX_RANGE; + code &= Constants.MAX_RANGE; } protected void rangeDecode(final ByteBuffer inBuffer, final int cumulativeFrequency, final int symbolFrequency){ diff --git a/src/main/java/htsjdk/samtools/cram/compression/range/RangeDecode.java b/src/main/java/htsjdk/samtools/cram/compression/range/RangeDecode.java index c8f7e62127..5987630170 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/range/RangeDecode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/range/RangeDecode.java @@ -11,14 +11,19 @@ public class RangeDecode { - private static final ByteBuffer EMPTY_BUFFER = ByteBuffer.allocate(0); + private static final ByteBuffer EMPTY_BUFFER = CompressionUtils.allocateByteBuffer(0); + // This method assumes that inBuffer is already rewound. + // It uncompresses the data in the inBuffer, leaving it consumed. + // Returns a rewound ByteBuffer containing the uncompressed data. public ByteBuffer uncompress(final ByteBuffer inBuffer) { + + // For Range decoding, the bytes are read in little endian from the input stream inBuffer.order(ByteOrder.LITTLE_ENDIAN); return uncompress(inBuffer, 0); } - private ByteBuffer uncompress(final ByteBuffer inBuffer, int outSize) { + private ByteBuffer uncompress(final ByteBuffer inBuffer, final int outSize) { if (inBuffer.remaining() == 0) { return EMPTY_BUFFER; } @@ -28,11 +33,11 @@ private ByteBuffer uncompress(final ByteBuffer inBuffer, int outSize) { final RangeParams rangeParams = new RangeParams(formatFlags); // noSz - outSize = rangeParams.isNosz() ? outSize : CompressionUtils.readUint7(inBuffer); + int uncompressedSize = rangeParams.isNosz() ? outSize : CompressionUtils.readUint7(inBuffer); // stripe if (rangeParams.isStripe()) { - return decodeStripe(inBuffer, outSize); + return decodeStripe(inBuffer, uncompressedSize); } // pack @@ -41,7 +46,7 @@ private ByteBuffer uncompress(final ByteBuffer inBuffer, int outSize) { int numSymbols = 0; byte[] packMappingTable = null; if (rangeParams.isPack()){ - packDataLength = outSize; + packDataLength = uncompressedSize; numSymbols = inBuffer.get() & 0xFF; // if (numSymbols > 16 or numSymbols==0), raise exception @@ -50,43 +55,49 @@ private ByteBuffer uncompress(final ByteBuffer inBuffer, int outSize) { for (int i = 0; i < numSymbols; i++) { packMappingTable[i] = inBuffer.get(); } - outSize = CompressionUtils.readUint7(inBuffer); + uncompressedSize = CompressionUtils.readUint7(inBuffer); } else { - throw new CRAMException("Bit Packing is not permitted when number of distinct symbols is greater than 16 or equal to 0. Number of distinct symbols: " + numSymbols); + throw new CRAMException("Bit Packing is not permitted when number of distinct symbols is greater than 16 or equal to 0. " + + "Number of distinct symbols: " + numSymbols); } } - ByteBuffer outBuffer = ByteBuffer.allocate(outSize); + ByteBuffer outBuffer; if (rangeParams.isCAT()){ - byte[] data = new byte[outSize]; - inBuffer.get( data,0, outSize); - outBuffer = ByteBuffer.wrap(data); + outBuffer = CompressionUtils.slice(inBuffer); + outBuffer.limit(uncompressedSize); + // While resetting the position to the end is not strictly necessary, + // it is being done for the sake of completeness and + // to meet the requirements of the tests that verify the boundary conditions. + inBuffer.position(inBuffer.position()+uncompressedSize); } else if (rangeParams.isExternalCompression()){ - byte[] extCompressedBytes = new byte[inBuffer.remaining()]; + final byte[] extCompressedBytes = new byte[inBuffer.remaining()]; int extCompressedBytesIdx = 0; - int start = inBuffer.position(); - int end = inBuffer.limit(); + final int start = inBuffer.position(); + final int end = inBuffer.limit(); for (int i = start; i < end; i++) { extCompressedBytes[extCompressedBytesIdx] = inBuffer.get(); extCompressedBytesIdx++; } - uncompressEXT(extCompressedBytes, outBuffer); + outBuffer = uncompressEXT(extCompressedBytes); } else if (rangeParams.isRLE()){ + outBuffer = CompressionUtils.allocateByteBuffer(uncompressedSize); switch (rangeParams.getOrder()) { case ZERO: - uncompressRLEOrder0(inBuffer, outBuffer, outSize); + uncompressRLEOrder0(inBuffer, outBuffer, uncompressedSize); break; case ONE: - uncompressRLEOrder1(inBuffer, outBuffer, outSize); + uncompressRLEOrder1(inBuffer, outBuffer, uncompressedSize); break; } } else { - switch (rangeParams.getOrder()) { + outBuffer = CompressionUtils.allocateByteBuffer(uncompressedSize); + switch (rangeParams.getOrder()){ case ZERO: - uncompressOrder0(inBuffer, outBuffer, outSize); + uncompressOrder0(inBuffer, outBuffer, uncompressedSize); break; case ONE: - uncompressOrder1(inBuffer, outBuffer, outSize); + uncompressOrder1(inBuffer, outBuffer, uncompressedSize); break; } } @@ -100,7 +111,7 @@ private ByteBuffer uncompress(final ByteBuffer inBuffer, int outSize) { } - private ByteBuffer uncompressOrder0( + private void uncompressOrder0( final ByteBuffer inBuffer, final ByteBuffer outBuffer, final int outSize) { @@ -115,10 +126,9 @@ private ByteBuffer uncompressOrder0( for (int i = 0; i < outSize; i++) { outBuffer.put(i, (byte) byteModel.modelDecode(inBuffer, rangeCoder)); } - return outBuffer; } - private ByteBuffer uncompressOrder1( + private void uncompressOrder1( final ByteBuffer inBuffer, final ByteBuffer outBuffer, final int outSize) { @@ -135,17 +145,16 @@ private ByteBuffer uncompressOrder1( last = byteModelList.get(last).modelDecode(inBuffer, rangeCoder); outBuffer.put(i, (byte) last); } - return outBuffer; } - private ByteBuffer uncompressRLEOrder0( + private void uncompressRLEOrder0( final ByteBuffer inBuffer, final ByteBuffer outBuffer, final int outSize) { int maxSymbols = inBuffer.get() & 0xFF; maxSymbols = maxSymbols == 0 ? 256 : maxSymbols; - ByteModel modelLit = new ByteModel(maxSymbols); + final ByteModel modelLit = new ByteModel(maxSymbols); final List byteModelRunsList = new ArrayList(258); for (int i=0; i <=257; i++){ byteModelRunsList.add(i, new ByteModel(4)); @@ -156,7 +165,8 @@ private ByteBuffer uncompressRLEOrder0( int i = 0; while (i < outSize) { outBuffer.put(i,(byte) modelLit.modelDecode(inBuffer, rangeCoder)); - int part = byteModelRunsList.get(outBuffer.get(i)&0xFF).modelDecode(inBuffer,rangeCoder); + final int last = outBuffer.get(i) & (0xFF); + int part = byteModelRunsList.get(last).modelDecode(inBuffer,rangeCoder); int run = part; int rctx = 256; while (part == 3) { @@ -165,14 +175,13 @@ private ByteBuffer uncompressRLEOrder0( run += part; } for (int j = 1; j <= run; j++){ - outBuffer.put(i+j, outBuffer.get(i)); + outBuffer.put(i+j, (byte) last); } i += run+1; } - return outBuffer; } - private ByteBuffer uncompressRLEOrder1( + private void uncompressRLEOrder1( final ByteBuffer inBuffer, final ByteBuffer outBuffer, final int outSize) { @@ -188,7 +197,7 @@ private ByteBuffer uncompressRLEOrder1( byteModelRunsList.add(i, new ByteModel(4)); } - RangeCoder rangeCoder = new RangeCoder(); + final RangeCoder rangeCoder = new RangeCoder(); rangeCoder.rangeDecodeStart(inBuffer); int last = 0; @@ -196,7 +205,7 @@ private ByteBuffer uncompressRLEOrder1( while (i < outSize) { outBuffer.put(i,(byte) byteModelLitList.get(last).modelDecode(inBuffer, rangeCoder)); last = outBuffer.get(i) & 0xFF; - int part = byteModelRunsList.get(outBuffer.get(i)&0xFF).modelDecode(inBuffer,rangeCoder); + int part = byteModelRunsList.get(last).modelDecode(inBuffer,rangeCoder); int run = part; int rctx = 256; while (part == 3) { @@ -205,24 +214,19 @@ private ByteBuffer uncompressRLEOrder1( run += part; } for (int j = 1; j <= run; j++){ - outBuffer.put(i+j, outBuffer.get(i)); + outBuffer.put(i+j, (byte)last); } i += run+1; } - return outBuffer; } - private ByteBuffer uncompressEXT( - final byte[] extCompressedBytes, - final ByteBuffer outBuffer) { + private ByteBuffer uncompressEXT(final byte[] extCompressedBytes) { final BZIP2ExternalCompressor compressor = new BZIP2ExternalCompressor(); final byte [] extUncompressedBytes = compressor.uncompress(extCompressedBytes); - outBuffer.put(extUncompressedBytes); - return outBuffer; + return CompressionUtils.wrap(extUncompressedBytes); } - private ByteBuffer decodeStripe(ByteBuffer inBuffer, final int outSize){ - + private ByteBuffer decodeStripe(final ByteBuffer inBuffer, final int outSize){ final int numInterleaveStreams = inBuffer.get() & 0xFF; // read lengths of compressed interleaved streams diff --git a/src/main/java/htsjdk/samtools/cram/compression/range/RangeEncode.java b/src/main/java/htsjdk/samtools/cram/compression/range/RangeEncode.java index 9216d9ee55..437dc7ccbb 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/range/RangeEncode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/range/RangeEncode.java @@ -10,8 +10,11 @@ public class RangeEncode { - private static final ByteBuffer EMPTY_BUFFER = ByteBuffer.allocate(0); + private static final ByteBuffer EMPTY_BUFFER = CompressionUtils.allocateByteBuffer(0); + // This method assumes that inBuffer is already rewound. + // It compresses the data in the inBuffer, leaving it consumed. + // Returns a rewound ByteBuffer containing the compressed data. public ByteBuffer compress(final ByteBuffer inBuffer, final RangeParams rangeParams) { if (inBuffer.remaining() == 0) { return EMPTY_BUFFER; @@ -110,9 +113,7 @@ private void compressOrder0( maxSymbol = inBuffer.get(i) & 0xFF; } } - maxSymbol++; // TODO: Is this correct? Not what spec states!! - - // TODO: initialize byteModel -> set and reset symbols? + maxSymbol++; final ByteModel byteModel = new ByteModel(maxSymbol); outBuffer.put((byte) maxSymbol); final RangeCoder rangeCoder = new RangeCoder(); @@ -134,28 +135,19 @@ private void compressOrder1( maxSymbol = inBuffer.get(i) & 0xFF; } } - maxSymbol++; // TODO: Is this correct? Not what spec states!! - + maxSymbol++; final List byteModelList = new ArrayList(); - - // TODO: initialize byteModel -> set and reset symbols? - for (int i = 0; i < maxSymbol; i++) { byteModelList.add(i, new ByteModel(maxSymbol)); } outBuffer.put((byte) maxSymbol); - - // TODO: should we pass outBuffer to rangecoder? final RangeCoder rangeCoder = new RangeCoder(); - int last = 0; for (int i = 0; i < inSize; i++) { byteModelList.get(last).modelEncode(outBuffer, rangeCoder, inBuffer.get(i) & 0xFF); last = inBuffer.get(i) & 0xFF; } rangeCoder.rangeEncodeEnd(outBuffer); - - // TODO: should we set littleEndian true somehwere? outBuffer.limit(outBuffer.position()); outBuffer.rewind(); } @@ -180,8 +172,6 @@ private void compressRLEOrder0( } outBuffer.put((byte) maxSymbols); final RangeCoder rangeCoder = new RangeCoder(); - - int i = 0; while (i < inSize) { modelLit.modelEncode(outBuffer, rangeCoder, inBuffer.get(i) & 0xFF); @@ -230,8 +220,6 @@ private void compressRLEOrder1( } outBuffer.put((byte) maxSymbols); final RangeCoder rangeCoder = new RangeCoder(); - - int i = 0; int last = 0; while (i < inSize) { diff --git a/src/main/java/htsjdk/samtools/cram/compression/range/RangeParams.java b/src/main/java/htsjdk/samtools/cram/compression/range/RangeParams.java index b017aa9e54..7759f8c853 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/range/RangeParams.java +++ b/src/main/java/htsjdk/samtools/cram/compression/range/RangeParams.java @@ -30,7 +30,7 @@ public static RangeParams.ORDER fromInt(final int orderValue) { } } - public RangeParams(int formatFlags) { + public RangeParams(final int formatFlags) { this.formatFlags = formatFlags; } diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java index dcb81c8d5f..9cf18cae13 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java @@ -82,11 +82,11 @@ private ByteBuffer uncompress(final ByteBuffer inBuffer, final int outSize) { // If CAT is set then, the input is uncompressed if (ransNx16Params.isCAT()) { outBuffer = CompressionUtils.slice(inBuffer); - + outBuffer.limit(uncompressedSize); // While resetting the position to the end is not strictly necessary, // it is being done for the sake of completeness and // to meet the requirements of the tests that verify the boundary conditions. - inBuffer.position(inBuffer.limit()); + inBuffer.position(inBuffer.position()+uncompressedSize); } else { outBuffer = CompressionUtils.allocateByteBuffer(uncompressedSize); diff --git a/src/test/java/htsjdk/samtools/cram/CRAMInteropTestUtils.java b/src/test/java/htsjdk/samtools/cram/CRAMInteropTestUtils.java index 71b7b39555..eaee961bf5 100644 --- a/src/test/java/htsjdk/samtools/cram/CRAMInteropTestUtils.java +++ b/src/test/java/htsjdk/samtools/cram/CRAMInteropTestUtils.java @@ -84,17 +84,14 @@ private static final String getUncompressedFileName(final String compressedFileN } } - protected static final int getParamsFormatFlags(final Path compressedInteropPath){ - // Returns formatFlags from compressed file path - final String compressedFileName = compressedInteropPath.getFileName().toString(); - final int lastDotIndex = compressedFileName.lastIndexOf("."); - if (lastDotIndex >= 0 && lastDotIndex < compressedFileName.length() - 1) { - return Integer.parseInt(compressedFileName.substring(lastDotIndex + 1)); - } else { - throw new CRAMException("The format of the compressed File Name is not as expected. " + - "The name of the compressed file should contain a period followed by a number that" + - "indicates the order of compression. Actual compressed file name = "+ compressedFileName); - } + // return a list of all raw test files in the htscodecs/tests/dat directory + protected static final List getInteropRawTestFiles() throws IOException { + final List paths = new ArrayList<>(); + Files.newDirectoryStream( + CRAMInteropTestUtils.getInteropTestDataLocation().resolve("dat"), + path -> (Files.isRegularFile(path)) && !Files.isHidden(path)) + .forEach(path -> paths.add(path)); + return paths; } } \ No newline at end of file diff --git a/src/test/java/htsjdk/samtools/cram/RANSInteropTest.java b/src/test/java/htsjdk/samtools/cram/RANSInteropTest.java index 9c1abafc29..3b5358075c 100644 --- a/src/test/java/htsjdk/samtools/cram/RANSInteropTest.java +++ b/src/test/java/htsjdk/samtools/cram/RANSInteropTest.java @@ -51,7 +51,7 @@ public Object[][] get4x8RoundTripTestCases() throws IOException { RANSParams.ORDER.ZERO, RANSParams.ORDER.ONE); final List testCases = new ArrayList<>(); - getInteropRawTestFiles() + CRAMInteropTestUtils.getInteropRawTestFiles() .forEach(path -> rans4x8ParamsOrderList.stream().map(rans4x8ParamsOrder -> new Object[]{ path, @@ -84,7 +84,7 @@ public Object[][] getNx16RoundTripTestCases() throws IOException { RANSNx16Params.RLE_FLAG_MASK | RANSNx16Params.PACK_FLAG_MASK, RANSNx16Params.RLE_FLAG_MASK | RANSNx16Params.PACK_FLAG_MASK | RANSNx16Params.ORDER_FLAG_MASK); final List testCases = new ArrayList<>(); - getInteropRawTestFiles() + CRAMInteropTestUtils.getInteropRawTestFiles() .forEach(path -> ransNx16ParamsFormatFlagList.stream().map(ransNx16ParamsFormatFlag -> new Object[]{ path, @@ -100,15 +100,13 @@ public Object[][] get4x8DecodeOnlyTestCases() throws IOException { // params: // compressed testfile path, uncompressed testfile path, - // RANS encoder, RANS decoder, RANS params + // RANS decoder final List testCases = new ArrayList<>(); for (Path path : CRAMInteropTestUtils.getInteropCompressedFilePaths(COMPRESSED_RANS4X8_DIR)) { Object[] objects = new Object[]{ path, CRAMInteropTestUtils.getUnCompressedFilePath(path), - new RANS4x8Encode(), - new RANS4x8Decode(), - new RANS4x8Params(RANSParams.ORDER.fromInt(CRAMInteropTestUtils.getParamsFormatFlags(path))) + new RANS4x8Decode() }; testCases.add(objects); } @@ -120,15 +118,13 @@ public Object[][] getNx16DecodeOnlyTestCases() throws IOException { // params: // compressed testfile path, uncompressed testfile path, - // RANS encoder, RANS decoder, RANS params + // RANS decoder final List testCases = new ArrayList<>(); for (Path path : CRAMInteropTestUtils.getInteropCompressedFilePaths(COMPRESSED_RANSNX16_DIR)) { Object[] objects = new Object[]{ path, CRAMInteropTestUtils.getUnCompressedFilePath(path), - new RANSNx16Encode(), - new RANSNx16Decode(), - new RANSNx16Params(CRAMInteropTestUtils.getParamsFormatFlags(path)) + new RANSNx16Decode() }; testCases.add(objects); } @@ -139,7 +135,7 @@ public Object[][] getNx16DecodeOnlyTestCases() throws IOException { public Object[][] getRoundTripTestCases() throws IOException { // params: - // compressed testfile path, uncompressed testfile path, + // uncompressed testfile path, // RANS encoder, RANS decoder, RANS params return Stream.concat(Arrays.stream(get4x8RoundTripTestCases()), Arrays.stream(getNx16RoundTripTestCases())) .toArray(Object[][]::new); @@ -150,7 +146,7 @@ public Object[][] getDecodeOnlyTestCases() throws IOException { // params: // compressed testfile path, uncompressed testfile path, - // RANS encoder, RANS decoder, RANS params + // RANS decoder return Stream.concat(Arrays.stream(get4x8DecodeOnlyTestCases()), Arrays.stream(getNx16DecodeOnlyTestCases())) .toArray(Object[][]::new); } @@ -158,7 +154,7 @@ public Object[][] getDecodeOnlyTestCases() throws IOException { @Test(description = "Test if CRAM Interop Test Data is available") public void testHtsCodecsCorpusIsAvailable() { if (!CRAMInteropTestUtils.isInteropTestDataAvailable()) { - throw new SkipException(String.format("RANS Interop Test Data is not available at %s", + throw new SkipException(String.format("CRAM Interop Test Data is not available at %s", CRAMInteropTestUtils.INTEROP_TEST_FILES_PATH)); } } @@ -198,9 +194,7 @@ public void testRANSRoundTrip( public void testDecodeOnly( final Path compressedFilePath, final Path uncompressedInteropPath, - final RANSEncode unusedRansEncode, - final RANSDecode ransDecode, - final RANSParams unusedRansParams) throws IOException { + final RANSDecode ransDecode) throws IOException { try (final InputStream uncompressedInteropStream = Files.newInputStream(uncompressedInteropPath); final InputStream preCompressedInteropStream = Files.newInputStream(compressedFilePath) ) { @@ -222,14 +216,4 @@ public void testDecodeOnly( } } - // return a list of all raw test files in the htscodecs/tests/dat directory - private List getInteropRawTestFiles() throws IOException { - final List paths = new ArrayList<>(); - Files.newDirectoryStream( - CRAMInteropTestUtils.getInteropTestDataLocation().resolve("dat"), - path -> (Files.isRegularFile(path)) && !Files.isHidden(path)) - .forEach(path -> paths.add(path)); - return paths; - } - } \ No newline at end of file diff --git a/src/test/java/htsjdk/samtools/cram/RangeInteropTest.java b/src/test/java/htsjdk/samtools/cram/RangeInteropTest.java index 1f9547e744..72a88da1fc 100644 --- a/src/test/java/htsjdk/samtools/cram/RangeInteropTest.java +++ b/src/test/java/htsjdk/samtools/cram/RangeInteropTest.java @@ -17,25 +17,57 @@ import java.nio.file.NoSuchFileException; import java.nio.file.Path; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; public class RangeInteropTest extends HtsjdkTest { public static final String COMPRESSED_RANGE_DIR = "arith"; + // enumerates the different flag combinations @DataProvider(name = "roundTripTestCases") public Object[][] getRoundTripTestCases() throws IOException { // params: - // compressed testfile path, uncompressed testfile path, + // uncompressed testfile path, // Range encoder, Range decoder, Range params + final List rangeParamsFormatFlagList = Arrays.asList( + 0x00, + RangeParams.ORDER_FLAG_MASK, + RangeParams.RLE_FLAG_MASK, + RangeParams.RLE_FLAG_MASK | RangeParams.ORDER_FLAG_MASK, + RangeParams.CAT_FLAG_MASK, + RangeParams.CAT_FLAG_MASK | RangeParams.ORDER_FLAG_MASK, + RangeParams.PACK_FLAG_MASK, + RangeParams.PACK_FLAG_MASK | RangeParams. ORDER_FLAG_MASK, + RangeParams.PACK_FLAG_MASK | RangeParams.RLE_FLAG_MASK, + RangeParams.PACK_FLAG_MASK | RangeParams.RLE_FLAG_MASK | RangeParams.ORDER_FLAG_MASK, + RangeParams.EXT_FLAG_MASK, + RangeParams.EXT_FLAG_MASK | RangeParams.PACK_FLAG_MASK); + final List testCases = new ArrayList<>(); + CRAMInteropTestUtils.getInteropRawTestFiles() + .forEach(path -> + rangeParamsFormatFlagList.stream().map(rangeParamsFormatFlag -> new Object[]{ + path, + new RangeEncode(), + new RangeDecode(), + new RangeParams(rangeParamsFormatFlag) + }).forEach(testCases::add)); + return testCases.toArray(new Object[][]{}); + } + + // uses the available compressed interop test files + @DataProvider(name = "decodeOnlyTestCases") + public Object[][] getDecodeOnlyTestCases() throws IOException { + + // params: + // compressed testfile path, uncompressed testfile path, + // Range decoder final List testCases = new ArrayList<>(); for (Path path : CRAMInteropTestUtils.getInteropCompressedFilePaths(COMPRESSED_RANGE_DIR)) { Object[] objects = new Object[]{ path, CRAMInteropTestUtils.getUnCompressedFilePath(path), - new RangeEncode(), - new RangeDecode(), - new RangeParams(CRAMInteropTestUtils.getParamsFormatFlags(path)) + new RangeDecode() }; testCases.add(objects); } @@ -55,7 +87,6 @@ public void testHtsCodecsCorpusIsAvailable() { dataProvider = "roundTripTestCases", description = "Roundtrip using htsjdk Range Codec. Compare the output with the original file" ) public void testRangeRoundTrip( - final Path unusedCompressedFilePath, final Path uncompressedFilePath, final RangeEncode rangeEncode, final RangeDecode rangeDecode, @@ -79,14 +110,12 @@ public void testRangeRoundTrip( @Test ( dependsOnMethods = "testHtsCodecsCorpusIsAvailable", - dataProvider = "roundTripTestCases", + dataProvider = "decodeOnlyTestCases", description = "Uncompress the existing compressed file using htsjdk Range codec and compare it with the original file.") public void testDecodeOnly( final Path compressedFilePath, final Path uncompressedInteropPath, - final RangeEncode unusedRangeEncode, - final RangeDecode rangeDecode, - final RangeParams unusedRangeParams) throws IOException { + final RangeDecode rangeDecode) throws IOException { try (final InputStream uncompressedInteropStream = Files.newInputStream(uncompressedInteropPath); final InputStream preCompressedInteropStream = Files.newInputStream(compressedFilePath) ) { diff --git a/src/test/java/htsjdk/samtools/cram/compression/range/RangeTest.java b/src/test/java/htsjdk/samtools/cram/compression/range/RangeTest.java index 1c3fb865dd..37a9081574 100644 --- a/src/test/java/htsjdk/samtools/cram/compression/range/RangeTest.java +++ b/src/test/java/htsjdk/samtools/cram/compression/range/RangeTest.java @@ -2,6 +2,7 @@ import htsjdk.HtsjdkTest; import htsjdk.samtools.cram.CRAMException; +import htsjdk.samtools.cram.compression.CompressionUtils; import htsjdk.samtools.util.TestUtil; import htsjdk.utils.TestNGUtils; import org.testng.Assert; @@ -143,7 +144,7 @@ public void testRoundTrip(final RangeEncode rangeEncode, final RangeDecode rangeDecode, final RangeParams rangeParams, final TestDataEnvelope td) { - rangeRoundTrip(rangeEncode, rangeDecode, rangeParams, ByteBuffer.wrap(td.testArray)); + rangeRoundTrip(rangeEncode, rangeDecode, rangeParams, CompressionUtils.wrap(td.testArray)); } @Test(dataProvider = "allRangeCodecsAndDataForTinySmallLarge") @@ -154,7 +155,7 @@ public void testRoundTripTinySmallLarge( final TestDataEnvelope td, final Integer lowerLimit, final Integer upperLimit){ - final ByteBuffer in = ByteBuffer.wrap(td.testArray); + final ByteBuffer in = CompressionUtils.wrap(td.testArray); for (int size = lowerLimit; size < upperLimit; size++) { in.position(0); in.limit(size); @@ -174,7 +175,7 @@ public void testRangeEncodeStripe( // When td is not Empty, Encoding with Stripe Flag should throw an Exception // as Encode Stripe is not implemented - final ByteBuffer compressed = rangeEncode.compress(ByteBuffer.wrap(td.testArray), params); + final ByteBuffer compressed = rangeEncode.compress(CompressionUtils.wrap(td.testArray), params); } // testRangeBuffersMeetBoundaryExpectations