Skip to content

Commit

Permalink
Addressing feedback from nov 21, nov 28 - part 3
Browse files Browse the repository at this point in the history
  • Loading branch information
yash-puligundla committed Jan 29, 2024
1 parent 4a416f7 commit 55f6086
Show file tree
Hide file tree
Showing 13 changed files with 122 additions and 122 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -147,15 +147,13 @@ else if (numSymbols <= 16){
return outBufferPack;
}



public static ByteBuffer allocateOutputBuffer(final int inSize) {
// This calculation is identical to the one in samtools rANS_static.c
// Presumably the frequency table (always big enough for order 1) = 257*257,
// then * 3 for each entry (byte->symbol, 2 bytes -> scaled frequency),
// + 9 for the header (order byte, and 2 int lengths for compressed/uncompressed lengths).
final int compressedSize = (int) (inSize + 257 * 257 * 3 + 9);
final ByteBuffer outputBuffer = ByteBuffer.allocate(compressedSize).order(ByteOrder.LITTLE_ENDIAN);
final ByteBuffer outputBuffer = allocateByteBuffer(compressedSize);
if (outputBuffer.remaining() < compressedSize) {
throw new CRAMException("Failed to allocate sufficient buffer size for RANS coder.");
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,13 +68,13 @@ public RANSExternalCompressor(
@Override
public byte[] compress(final byte[] data) {
final RANS4x8Params params = new RANS4x8Params(order);
final ByteBuffer buffer = ransEncode.compress(ByteBuffer.wrap(data), params);
final ByteBuffer buffer = ransEncode.compress(CompressionUtils.wrap(data), params);
return toByteArray(buffer);
}

@Override
public byte[] uncompress(byte[] data) {
final ByteBuffer buf = ransDecode.uncompress(ByteBuffer.wrap(data));
final ByteBuffer buf = ransDecode.uncompress(CompressionUtils.wrap(data));
return toByteArray(buf);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,13 +32,13 @@ public RangeExternalCompressor(
@Override
public byte[] compress(byte[] data) {
final RangeParams params = new RangeParams(formatFlags);
final ByteBuffer buffer = rangeEncode.compress(ByteBuffer.wrap(data), params);
final ByteBuffer buffer = rangeEncode.compress(CompressionUtils.wrap(data), params);
return toByteArray(buffer);
}

@Override
public byte[] uncompress(byte[] data) {
final ByteBuffer buf = rangeDecode.uncompress(ByteBuffer.wrap(data));
final ByteBuffer buf = rangeDecode.uncompress(CompressionUtils.wrap(data));
return toByteArray(buf);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,5 @@ final public class Constants {
public static final int NUMBER_OF_SYMBOLS = 256;
public static final int MAX_FREQ = ((1<<16)-17);
public static final int STEP = 16;
public static final long MAX_RANGE = 0xFFFFFFFFL;
}
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,6 @@

public class RangeCoder {

private static final long MAX_RANGE = 0xFFFFFFFFL;

private long low;
private long range;
private long code;
Expand All @@ -16,7 +14,7 @@ public class RangeCoder {
protected RangeCoder() {
// Spec: RangeEncodeStart
this.low = 0;
this.range = MAX_RANGE; // 4 bytes of all 1's
this.range = Constants.MAX_RANGE; // 4 bytes of all 1's
this.code = 0;
this.FFnum = 0;
this.carry = false;
Expand All @@ -27,7 +25,7 @@ protected void rangeDecodeStart(final ByteBuffer inBuffer){
for (int i = 0; i < 5; i++){
code = (code << 8) + (inBuffer.get() & 0xFF);
}
code &= MAX_RANGE;
code &= Constants.MAX_RANGE;
}

protected void rangeDecode(final ByteBuffer inBuffer, final int cumulativeFrequency, final int symbolFrequency){
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,19 @@

public class RangeDecode {

private static final ByteBuffer EMPTY_BUFFER = ByteBuffer.allocate(0);
private static final ByteBuffer EMPTY_BUFFER = CompressionUtils.allocateByteBuffer(0);

// This method assumes that inBuffer is already rewound.
// It uncompresses the data in the inBuffer, leaving it consumed.
// Returns a rewound ByteBuffer containing the uncompressed data.
public ByteBuffer uncompress(final ByteBuffer inBuffer) {

// For Range decoding, the bytes are read in little endian from the input stream
inBuffer.order(ByteOrder.LITTLE_ENDIAN);
return uncompress(inBuffer, 0);
}

private ByteBuffer uncompress(final ByteBuffer inBuffer, int outSize) {
private ByteBuffer uncompress(final ByteBuffer inBuffer, final int outSize) {
if (inBuffer.remaining() == 0) {
return EMPTY_BUFFER;
}
Expand All @@ -28,11 +33,11 @@ private ByteBuffer uncompress(final ByteBuffer inBuffer, int outSize) {
final RangeParams rangeParams = new RangeParams(formatFlags);

// noSz
outSize = rangeParams.isNosz() ? outSize : CompressionUtils.readUint7(inBuffer);
int uncompressedSize = rangeParams.isNosz() ? outSize : CompressionUtils.readUint7(inBuffer);

// stripe
if (rangeParams.isStripe()) {
return decodeStripe(inBuffer, outSize);
return decodeStripe(inBuffer, uncompressedSize);
}

// pack
Expand All @@ -41,7 +46,7 @@ private ByteBuffer uncompress(final ByteBuffer inBuffer, int outSize) {
int numSymbols = 0;
byte[] packMappingTable = null;
if (rangeParams.isPack()){
packDataLength = outSize;
packDataLength = uncompressedSize;
numSymbols = inBuffer.get() & 0xFF;

// if (numSymbols > 16 or numSymbols==0), raise exception
Expand All @@ -50,43 +55,49 @@ private ByteBuffer uncompress(final ByteBuffer inBuffer, int outSize) {
for (int i = 0; i < numSymbols; i++) {
packMappingTable[i] = inBuffer.get();
}
outSize = CompressionUtils.readUint7(inBuffer);
uncompressedSize = CompressionUtils.readUint7(inBuffer);
} else {
throw new CRAMException("Bit Packing is not permitted when number of distinct symbols is greater than 16 or equal to 0. Number of distinct symbols: " + numSymbols);
throw new CRAMException("Bit Packing is not permitted when number of distinct symbols is greater than 16 or equal to 0. " +
"Number of distinct symbols: " + numSymbols);
}
}

ByteBuffer outBuffer = ByteBuffer.allocate(outSize);
ByteBuffer outBuffer;
if (rangeParams.isCAT()){
byte[] data = new byte[outSize];
inBuffer.get( data,0, outSize);
outBuffer = ByteBuffer.wrap(data);
outBuffer = CompressionUtils.slice(inBuffer);
outBuffer.limit(uncompressedSize);
// While resetting the position to the end is not strictly necessary,
// it is being done for the sake of completeness and
// to meet the requirements of the tests that verify the boundary conditions.
inBuffer.position(inBuffer.position()+uncompressedSize);
} else if (rangeParams.isExternalCompression()){
byte[] extCompressedBytes = new byte[inBuffer.remaining()];
final byte[] extCompressedBytes = new byte[inBuffer.remaining()];
int extCompressedBytesIdx = 0;
int start = inBuffer.position();
int end = inBuffer.limit();
final int start = inBuffer.position();
final int end = inBuffer.limit();
for (int i = start; i < end; i++) {
extCompressedBytes[extCompressedBytesIdx] = inBuffer.get();
extCompressedBytesIdx++;
}
uncompressEXT(extCompressedBytes, outBuffer);
outBuffer = uncompressEXT(extCompressedBytes);
} else if (rangeParams.isRLE()){
outBuffer = CompressionUtils.allocateByteBuffer(uncompressedSize);
switch (rangeParams.getOrder()) {
case ZERO:
uncompressRLEOrder0(inBuffer, outBuffer, outSize);
uncompressRLEOrder0(inBuffer, outBuffer, uncompressedSize);
break;
case ONE:
uncompressRLEOrder1(inBuffer, outBuffer, outSize);
uncompressRLEOrder1(inBuffer, outBuffer, uncompressedSize);
break;
}
} else {
switch (rangeParams.getOrder()) {
outBuffer = CompressionUtils.allocateByteBuffer(uncompressedSize);
switch (rangeParams.getOrder()){
case ZERO:
uncompressOrder0(inBuffer, outBuffer, outSize);
uncompressOrder0(inBuffer, outBuffer, uncompressedSize);
break;
case ONE:
uncompressOrder1(inBuffer, outBuffer, outSize);
uncompressOrder1(inBuffer, outBuffer, uncompressedSize);
break;
}
}
Expand All @@ -100,7 +111,7 @@ private ByteBuffer uncompress(final ByteBuffer inBuffer, int outSize) {

}

private ByteBuffer uncompressOrder0(
private void uncompressOrder0(
final ByteBuffer inBuffer,
final ByteBuffer outBuffer,
final int outSize) {
Expand All @@ -115,10 +126,9 @@ private ByteBuffer uncompressOrder0(
for (int i = 0; i < outSize; i++) {
outBuffer.put(i, (byte) byteModel.modelDecode(inBuffer, rangeCoder));
}
return outBuffer;
}

private ByteBuffer uncompressOrder1(
private void uncompressOrder1(
final ByteBuffer inBuffer,
final ByteBuffer outBuffer,
final int outSize) {
Expand All @@ -135,17 +145,16 @@ private ByteBuffer uncompressOrder1(
last = byteModelList.get(last).modelDecode(inBuffer, rangeCoder);
outBuffer.put(i, (byte) last);
}
return outBuffer;
}

private ByteBuffer uncompressRLEOrder0(
private void uncompressRLEOrder0(
final ByteBuffer inBuffer,
final ByteBuffer outBuffer,
final int outSize) {

int maxSymbols = inBuffer.get() & 0xFF;
maxSymbols = maxSymbols == 0 ? 256 : maxSymbols;
ByteModel modelLit = new ByteModel(maxSymbols);
final ByteModel modelLit = new ByteModel(maxSymbols);
final List<ByteModel> byteModelRunsList = new ArrayList(258);
for (int i=0; i <=257; i++){
byteModelRunsList.add(i, new ByteModel(4));
Expand All @@ -156,7 +165,8 @@ private ByteBuffer uncompressRLEOrder0(
int i = 0;
while (i < outSize) {
outBuffer.put(i,(byte) modelLit.modelDecode(inBuffer, rangeCoder));
int part = byteModelRunsList.get(outBuffer.get(i)&0xFF).modelDecode(inBuffer,rangeCoder);
final int last = outBuffer.get(i) & (0xFF);
int part = byteModelRunsList.get(last).modelDecode(inBuffer,rangeCoder);
int run = part;
int rctx = 256;
while (part == 3) {
Expand All @@ -165,14 +175,13 @@ private ByteBuffer uncompressRLEOrder0(
run += part;
}
for (int j = 1; j <= run; j++){
outBuffer.put(i+j, outBuffer.get(i));
outBuffer.put(i+j, (byte) last);
}
i += run+1;
}
return outBuffer;
}

private ByteBuffer uncompressRLEOrder1(
private void uncompressRLEOrder1(
final ByteBuffer inBuffer,
final ByteBuffer outBuffer,
final int outSize) {
Expand All @@ -188,15 +197,15 @@ private ByteBuffer uncompressRLEOrder1(
byteModelRunsList.add(i, new ByteModel(4));
}

RangeCoder rangeCoder = new RangeCoder();
final RangeCoder rangeCoder = new RangeCoder();
rangeCoder.rangeDecodeStart(inBuffer);

int last = 0;
int i = 0;
while (i < outSize) {
outBuffer.put(i,(byte) byteModelLitList.get(last).modelDecode(inBuffer, rangeCoder));
last = outBuffer.get(i) & 0xFF;
int part = byteModelRunsList.get(outBuffer.get(i)&0xFF).modelDecode(inBuffer,rangeCoder);
int part = byteModelRunsList.get(last).modelDecode(inBuffer,rangeCoder);
int run = part;
int rctx = 256;
while (part == 3) {
Expand All @@ -205,24 +214,19 @@ private ByteBuffer uncompressRLEOrder1(
run += part;
}
for (int j = 1; j <= run; j++){
outBuffer.put(i+j, outBuffer.get(i));
outBuffer.put(i+j, (byte)last);
}
i += run+1;
}
return outBuffer;
}

private ByteBuffer uncompressEXT(
final byte[] extCompressedBytes,
final ByteBuffer outBuffer) {
private ByteBuffer uncompressEXT(final byte[] extCompressedBytes) {
final BZIP2ExternalCompressor compressor = new BZIP2ExternalCompressor();
final byte [] extUncompressedBytes = compressor.uncompress(extCompressedBytes);
outBuffer.put(extUncompressedBytes);
return outBuffer;
return CompressionUtils.wrap(extUncompressedBytes);
}

private ByteBuffer decodeStripe(ByteBuffer inBuffer, final int outSize){

private ByteBuffer decodeStripe(final ByteBuffer inBuffer, final int outSize){
final int numInterleaveStreams = inBuffer.get() & 0xFF;

// read lengths of compressed interleaved streams
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,11 @@

public class RangeEncode<T extends RangeParams> {

private static final ByteBuffer EMPTY_BUFFER = ByteBuffer.allocate(0);
private static final ByteBuffer EMPTY_BUFFER = CompressionUtils.allocateByteBuffer(0);

// This method assumes that inBuffer is already rewound.
// It compresses the data in the inBuffer, leaving it consumed.
// Returns a rewound ByteBuffer containing the compressed data.
public ByteBuffer compress(final ByteBuffer inBuffer, final RangeParams rangeParams) {
if (inBuffer.remaining() == 0) {
return EMPTY_BUFFER;
Expand Down Expand Up @@ -110,9 +113,7 @@ private void compressOrder0(
maxSymbol = inBuffer.get(i) & 0xFF;
}
}
maxSymbol++; // TODO: Is this correct? Not what spec states!!

// TODO: initialize byteModel -> set and reset symbols?
maxSymbol++;
final ByteModel byteModel = new ByteModel(maxSymbol);
outBuffer.put((byte) maxSymbol);
final RangeCoder rangeCoder = new RangeCoder();
Expand All @@ -134,28 +135,19 @@ private void compressOrder1(
maxSymbol = inBuffer.get(i) & 0xFF;
}
}
maxSymbol++; // TODO: Is this correct? Not what spec states!!

maxSymbol++;
final List<ByteModel> byteModelList = new ArrayList();

// TODO: initialize byteModel -> set and reset symbols?

for (int i = 0; i < maxSymbol; i++) {
byteModelList.add(i, new ByteModel(maxSymbol));
}
outBuffer.put((byte) maxSymbol);

// TODO: should we pass outBuffer to rangecoder?
final RangeCoder rangeCoder = new RangeCoder();

int last = 0;
for (int i = 0; i < inSize; i++) {
byteModelList.get(last).modelEncode(outBuffer, rangeCoder, inBuffer.get(i) & 0xFF);
last = inBuffer.get(i) & 0xFF;
}
rangeCoder.rangeEncodeEnd(outBuffer);

// TODO: should we set littleEndian true somehwere?
outBuffer.limit(outBuffer.position());
outBuffer.rewind();
}
Expand All @@ -180,8 +172,6 @@ private void compressRLEOrder0(
}
outBuffer.put((byte) maxSymbols);
final RangeCoder rangeCoder = new RangeCoder();


int i = 0;
while (i < inSize) {
modelLit.modelEncode(outBuffer, rangeCoder, inBuffer.get(i) & 0xFF);
Expand Down Expand Up @@ -230,8 +220,6 @@ private void compressRLEOrder1(
}
outBuffer.put((byte) maxSymbols);
final RangeCoder rangeCoder = new RangeCoder();


int i = 0;
int last = 0;
while (i < inSize) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ public static RangeParams.ORDER fromInt(final int orderValue) {
}
}

public RangeParams(int formatFlags) {
public RangeParams(final int formatFlags) {
this.formatFlags = formatFlags;
}

Expand Down
Loading

0 comments on commit 55f6086

Please sign in to comment.