diff --git a/src/main/java/htsjdk/samtools/cram/compression/fqzcomp/FQZCompDecode.java b/src/main/java/htsjdk/samtools/cram/compression/fqzcomp/FQZCompDecode.java index 926f0b4ac2..332c37cbd5 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/fqzcomp/FQZCompDecode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/fqzcomp/FQZCompDecode.java @@ -10,89 +10,63 @@ import java.util.List; public class FQZCompDecode { - - public static final int MULTI_PARAM_FLAG_MASK = 0x01; - public static final int STAB_FLAG_MASK = 0x02; - public static final int DO_REV_FLAG_MASK = 0x04; - - public static final int DEDUP_FLAG_MASK = 0x02; - public static final int FIXED_LEN_FLAG_MASK = 0x04; - public static final int SEL_FLAG_MASK = 0x08; - public static final int QMAP_FLAG_MASK = 0x10; - public static final int PTAB_FLAG_MASK = 0x20; - public static final int DTAB_FLAG_MASK = 0x40; - public static final int QTAB_FLAG_MASK = 0x80; + private static final int NUMBER_OF_SYMBOLS = 256; public static ByteBuffer uncompress( final ByteBuffer inBuffer) { - - ArrayList QLens = new ArrayList<>(); - - - final int n_out = Utils.readUint7(inBuffer); - - int gparams_max_sym = 0; + final int bufferLength = Utils.readUint7(inBuffer); final int version = inBuffer.get() & 0xFF; if (version != 5) { - throw new CRAMException("Invalid FQZComp version number: " + version); + throw new CRAMException("Invalid FQZComp format version number: " + version); } - - final int globalFlags = inBuffer.get() & 0xFF; - final int numParam = ((globalFlags & MULTI_PARAM_FLAG_MASK)!=0) ? inBuffer.get() : 1; - int max_sel = (numParam > 1) ? (numParam - 1) : 0; - - final int[] stab = new int[256]; - if ((globalFlags & STAB_FLAG_MASK) != 0) { - max_sel = inBuffer.get() & 0xFF; - readArray(inBuffer, stab, 256); + final FQZGlobalFlags globalFlags = new FQZGlobalFlags(inBuffer.get() & 0xFF); + final int numParamBlock = globalFlags.isMultiParam()?inBuffer.get() : 1; + int maxSelector = (numParamBlock > 1) ? (numParamBlock - 1) : 0; + final int[] selectorTable = new int[NUMBER_OF_SYMBOLS]; + if (globalFlags.hasSelectorTable()) { + maxSelector = inBuffer.get() & 0xFF; + readArray(inBuffer, selectorTable, NUMBER_OF_SYMBOLS); } else { - for (int i = 0; i < numParam; i++) { - stab[i] = i; + for (int i = 0; i < numParamBlock; i++) { + selectorTable[i] = i; } - for (int i = numParam; i < 256; i++) { - stab[i] = numParam - 1; + for (int i = numParamBlock; i < NUMBER_OF_SYMBOLS; i++) { + selectorTable[i] = numParamBlock - 1; } } - - final boolean gparams_do_rev = ((globalFlags & DO_REV_FLAG_MASK)!=0); - final int[] gparams_stab = stab; - final int gparams_max_sel = max_sel; - List globalFlags_params = new ArrayList(numParam); - for (int p=0; p < numParam; p++){ - globalFlags_params.add(p,decodeFQZSingleParam(inBuffer)); - if(gparams_max_sym < globalFlags_params.get(p).getMaxSymbols()){ - gparams_max_sym = globalFlags_params.get(p).getMaxSymbols(); + final List fqzParamList = new ArrayList(numParamBlock); + int maxSymbols = 0; // maximum number of distinct Quality values across all param sets + for (int p=0; p < numParamBlock; p++){ + fqzParamList.add(p,decodeFQZSingleParam(inBuffer)); + if(maxSymbols < fqzParamList.get(p).getMaxSymbols()){ + maxSymbols = fqzParamList.get(p).getMaxSymbols(); } } - int[] rev = new int[QLens.size()]; - FQZModel model = fqzCreateModels(gparams_max_sym, gparams_max_sel); - - final RangeCoder rangeCoder = new RangeCoder(); - rangeCoder.rangeDecodeStart(inBuffer); - ByteBuffer outBuffer = ByteBuffer.allocate(n_out); - FQZState fqzState = new FQZState(); - - // main decode loop int i = 0; + final FQZState fqzState = new FQZState(); + final RangeCoder rangeCoder = new RangeCoder(); + rangeCoder.rangeDecodeStart(inBuffer); + final FQZModel model = fqzCreateModels(maxSymbols, maxSelector); + final List QualityLengths = new ArrayList<>(); + FQZParam params = null; int last = 0; - FQZParam params = new FQZParam(); - while (i 0) - R[j++] = run; + rle[j++] = run; } last = run; } - // Now expand runs in R to tab, noting 255 is max run + // Now expand runs in rle to table, noting 255 is max run int i = 0; j = 0; z = 0; @@ -180,61 +125,54 @@ public static void readArray(ByteBuffer src, int[] tab, int size) { while (z < size) { int run_len = 0; do { - part = R[j++]; + part = rle[j++]; run_len += part; } while (part == 255); while (run_len-- > 0) - tab[z++] = i; + table[z++] = i; i++; } } - public static FQZModel fqzCreateModels(final int gparams_max_sym, final int gparams_max_sel){ - // put it in place - FQZModel fqzModel = new FQZModel(); + public static FQZModel fqzCreateModels(final int maxSymbols, final int maxSelector){ + final FQZModel fqzModel = new FQZModel(); fqzModel.setQuality(new ByteModel[1 << 16]); for (int i = 0; i < (1 << 16); i++) { - fqzModel.getQuality()[i] = new ByteModel(gparams_max_sym + 1); // +1 as max value not num. values + fqzModel.getQuality()[i] = new ByteModel(maxSymbols + 1); // +1 as max value not num. values } - fqzModel.setLength(new ByteModel[4]); for (int i = 0; i < 4; i++) { - fqzModel.getLength()[i] = new ByteModel(256); + fqzModel.getLength()[i] = new ByteModel(NUMBER_OF_SYMBOLS); } - fqzModel.setReverse(new ByteModel(2)); fqzModel.setDuplicate(new ByteModel(2)); - - if (gparams_max_sel > 0) { - fqzModel.setSelector(new ByteModel(gparams_max_sel + 1)); + if (maxSelector > 0) { + fqzModel.setSelector(new ByteModel(maxSelector + 1)); } - return fqzModel; } // If duplicate returns 1, else 0 - public static FQZState decodeFQZNewRecord( + public static void decodeFQZNewRecord( final ByteBuffer inBuffer, final RangeCoder rangeCoder, final FQZModel model, final FQZState state, - final int gparams_max_sym, - final int gparams_max_sel, - final boolean gparams_do_rev, - final int[] gparams_stab, - final List gparams_params, + final int maxSelector, + final boolean doReverse, + final int[] selectorTable, + final List fqzParamList, final int[] rev){ // Parameter selector - if (gparams_max_sel > 0) { + if (maxSelector > 0) { state.setSelector(model.getSelector().modelDecode(inBuffer, rangeCoder)); } else { state.setSelector(0); } - state.setSelectorTable(gparams_stab[state.getSelector()]); - - FQZParam params = gparams_params.get(state.getSelectorTable()); + state.setSelectorTable(selectorTable[state.getSelector()]); + final FQZParam params = fqzParamList.get(state.getSelectorTable()); // Reset contexts at the start of each new record int len; @@ -251,33 +189,28 @@ public static FQZState decodeFQZNewRecord( len = -params.getFixedLen(); } state.setRecordLength(len); - - if (gparams_do_rev) { + if (doReverse) { rev[state.getRecordNumber()] = model.getReverse().modelDecode(inBuffer, rangeCoder); } - - state.setIsDup(false); + state.setIsDuplicate(false); if (params.isDoDedup()) { if (model.getDuplicate().modelDecode(inBuffer, rangeCoder) != 0) { - state.setIsDup(true); + state.setIsDuplicate(true); } } - state.setBases(len); // number of remaining bytes in this record state.setDelta(0); state.setQualityContext(0); state.setPreviousQuality(0); state.setRecordNumber(state.getRecordNumber() + 1); - - return state; } - public static int fqzUpdateContext(FQZParam params, - FQZState state, - int Q){ + public static int fqzUpdateContext(final FQZParam params, + final FQZState state, + final int quality){ int last = params.getContext(); - state.setQualityContext(((state.getQualityContext() << params.getQualityContextShift()) + params.getQualityContextTable()[Q]) >>> 0); + state.setQualityContext(((state.getQualityContext() << params.getQualityContextShift()) + params.getQualityContextTable()[quality]) >>> 0); last += ((state.getQualityContext() & ((1 << params.getQualityContextBits()) - 1)) << params.getQualityContextLocation()) >>> 0; if (params.isDoPos()) @@ -285,21 +218,17 @@ public static int fqzUpdateContext(FQZParam params, if (params.isDoDelta()) { last += params.getDeltaContextTable()[Math.min(state.getDelta(), 255)] << params.getDeltaContextLocation(); - state.setDelta(state.getDelta()+ ((state.getPreviousQuality() != Q) ? 1 : 0)); - state.setPreviousQuality(Q); + state.setDelta(state.getDelta()+ ((state.getPreviousQuality() != quality) ? 1 : 0)); + state.setPreviousQuality(quality); } - if (params.isDoSel()) last += state.getSelector() << params.getSelectorContextLocation(); - state.setBases(state.getBases()-1); - return last & 0xffff; - } public static FQZParam decodeFQZSingleParam(ByteBuffer inBuffer) { - FQZParam param = new FQZParam(); + final FQZParam param = new FQZParam(); param.setContext((inBuffer.get() & 0xFF) | ((inBuffer.get() & 0xFF) << 8)); param.setParameterFlags(inBuffer.get() & 0xFF); param.setMaxSymbols(inBuffer.get() & 0xFF); @@ -314,13 +243,13 @@ public static FQZParam decodeFQZSingleParam(ByteBuffer inBuffer) { param.setDeltaContextLocation(z & 0x0F); // Read Quality Map. Example: "unbin" Illumina Qualities - param.setQualityMap(new int[256]); + param.setQualityMap(new int[NUMBER_OF_SYMBOLS]); if (param.isDoQmap()) { for (int i = 0; i < param.getMaxSymbols(); i++) { param.getQualityMap()[i] = inBuffer.get() & 0xFF; } } else { - for (int i = 0; i < 256; i++) { + for (int i = 0; i < NUMBER_OF_SYMBOLS; i++) { param.getQualityMap()[i] = i; } } @@ -328,9 +257,9 @@ public static FQZParam decodeFQZSingleParam(ByteBuffer inBuffer) { // Read tables param.setQualityContextTable(new int[1024]); if (param.getQualityContextBits() > 0 && param.isDoQtab()) { - readArray(inBuffer, param.getQualityContextTable(), 256); + readArray(inBuffer, param.getQualityContextTable(), NUMBER_OF_SYMBOLS); } else { - for (int i = 0; i < 256; i++) { + for (int i = 0; i < NUMBER_OF_SYMBOLS; i++) { param.getQualityContextTable()[i] = i; // NOP } } @@ -338,11 +267,35 @@ public static FQZParam decodeFQZSingleParam(ByteBuffer inBuffer) { if (param.isDoPos()) { readArray(inBuffer, param.getPositionContextTable(), 1024); } - param.setDeltaContextTable(new int[256]); + param.setDeltaContextTable(new int[NUMBER_OF_SYMBOLS]); if (param.isDoDelta()) { - readArray(inBuffer, param.getDeltaContextTable(), 256); + readArray(inBuffer, param.getDeltaContextTable(), NUMBER_OF_SYMBOLS); } return param; } + public static void reverseQualities( + final ByteBuffer outBuffer, + final int bufferLength, + final int[] rev, + final List QualityLengths + ){ + int rec = 0; + int idx = 0; + while (idx< bufferLength) { + if (rev[rec]==1) { + int j = 0; + int k = QualityLengths.get(rec) - 1; + while (j < k) { + byte tmp = outBuffer.get(idx + j); + outBuffer.put(idx + j,outBuffer.get(idx + k)); + outBuffer.put(idx + k, tmp); + j++; + k--; + } + } + idx += QualityLengths.get(rec++); + } + } + } \ No newline at end of file diff --git a/src/main/java/htsjdk/samtools/cram/compression/fqzcomp/FQZGlobalFlags.java b/src/main/java/htsjdk/samtools/cram/compression/fqzcomp/FQZGlobalFlags.java new file mode 100644 index 0000000000..937b7eed62 --- /dev/null +++ b/src/main/java/htsjdk/samtools/cram/compression/fqzcomp/FQZGlobalFlags.java @@ -0,0 +1,28 @@ +package htsjdk.samtools.cram.compression.fqzcomp; + +public class FQZGlobalFlags { + public static final int MULTI_PARAM_FLAG_MASK = 0x01; + public static final int SELECTOR_TABLE_FLAG_MASK = 0x02; + public static final int DO_REVERSE_FLAG_MASK = 0x04; + + private int globalFlags; + + public FQZGlobalFlags(final int globalFlags) { + this.globalFlags = globalFlags; + } + + // returns True if more than one parameter block is present + public boolean isMultiParam(){ + return ((globalFlags & MULTI_PARAM_FLAG_MASK)!=0); + } + + // returns True if the parameter selector is mapped through selector table + public boolean hasSelectorTable(){ + return ((globalFlags & SELECTOR_TABLE_FLAG_MASK)!=0); + } + + public boolean doReverse(){ + return ((globalFlags & DO_REVERSE_FLAG_MASK)!=0); + } + +} \ No newline at end of file diff --git a/src/main/java/htsjdk/samtools/cram/compression/fqzcomp/FQZParam.java b/src/main/java/htsjdk/samtools/cram/compression/fqzcomp/FQZParam.java index 6541058fa7..eaf9b9d08c 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/fqzcomp/FQZParam.java +++ b/src/main/java/htsjdk/samtools/cram/compression/fqzcomp/FQZParam.java @@ -24,13 +24,13 @@ public class FQZParam { private int[] positionContextTable; // Position context lookup table private int[] deltaContextTable; // Delta context lookup table - public static final int DEDUP_FLAG_MASK = 0x02; - public static final int FIXED_LEN_FLAG_MASK = 0x04; - public static final int SEL_FLAG_MASK = 0x08; - public static final int QMAP_FLAG_MASK = 0x10; - public static final int PTAB_FLAG_MASK = 0x20; - public static final int DTAB_FLAG_MASK = 0x40; - public static final int QTAB_FLAG_MASK = 0x80; + private static final int DEDUP_FLAG_MASK = 0x02; + private static final int FIXED_LEN_FLAG_MASK = 0x04; + private static final int SEL_FLAG_MASK = 0x08; + private static final int QMAP_FLAG_MASK = 0x10; + private static final int PTAB_FLAG_MASK = 0x20; + private static final int DTAB_FLAG_MASK = 0x40; + private static final int QTAB_FLAG_MASK = 0x80; public FQZParam() { } diff --git a/src/main/java/htsjdk/samtools/cram/compression/fqzcomp/FQZState.java b/src/main/java/htsjdk/samtools/cram/compression/fqzcomp/FQZState.java index 15f8b4921d..3a4981029c 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/fqzcomp/FQZState.java +++ b/src/main/java/htsjdk/samtools/cram/compression/fqzcomp/FQZState.java @@ -70,12 +70,12 @@ public void setRecordLength(int recordLength) { this.recordLength = recordLength; } - public boolean getIsDup() { + public boolean getIsDuplicate() { return isDuplicate; } - public void setIsDup(boolean isDup) { - this.isDuplicate = isDup; + public void setIsDuplicate(boolean isDuplicate) { + this.isDuplicate = isDuplicate; } public int getRecordNumber() { diff --git a/src/test/java/htsjdk/samtools/cram/FQZCompInteropTest.java b/src/test/java/htsjdk/samtools/cram/FQZCompInteropTest.java index 9e0ab825b4..d07317bb3d 100644 --- a/src/test/java/htsjdk/samtools/cram/FQZCompInteropTest.java +++ b/src/test/java/htsjdk/samtools/cram/FQZCompInteropTest.java @@ -51,7 +51,7 @@ public void testHtsCodecsCorpusIsAvailable() { @Test ( dependsOnMethods = "testHtsCodecsCorpusIsAvailable", dataProvider = "decodeOnlyTestCases", - description = "Uncompress the existing compressed file using htsjdk FQZcomp codec and compare it with the original file.") + description = "Uncompress the existing compressed file using htsjdk FQZComp and compare it with the original file.") public void testDecodeOnly( final Path compressedFilePath, final Path uncompressedInteropPath, @@ -62,17 +62,12 @@ public void testDecodeOnly( // preprocess the uncompressed data (to match what the htscodecs-library test harness does) // by filtering out the embedded newlines, and then round trip through FQZComp codec // and compare the results - + final ByteBuffer uncompressedInteropBytes = ByteBuffer.wrap(CRAMInteropTestUtils.filterEmbeddedNewlines(IOUtils.toByteArray(uncompressedInteropStream))); final ByteBuffer preCompressedInteropBytes = ByteBuffer.wrap(IOUtils.toByteArray(preCompressedInteropStream)); // Use htsjdk to uncompress the precompressed file from htscodecs repo final ByteBuffer uncompressedHtsjdkBytes = fqzcompDecode.uncompress(preCompressedInteropBytes); - final ByteBuffer uncompressedInteropBytes = ByteBuffer.wrap(IOUtils.toByteArray(uncompressedInteropStream)); - // TODO!!: the precompressed interop files seem different to the other codecs. - // For htslib compression using FQZComp, "\n" was not filtered out of these test files - // unlike what we observed in the RANS, Range or Name Tokenizer compressed interop test files. - // Compare the htsjdk uncompressed bytes with the original input file from htscodecs repo Assert.assertEquals(uncompressedHtsjdkBytes, uncompressedInteropBytes); } catch (final NoSuchFileException ex){