Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding support for 0-length B arrays in SAM files to conform to 1.6 spec #1194

Merged
merged 20 commits into from
Oct 24, 2018
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ dependencies {
testRuntime 'org.pegdown:pegdown:1.6.0' // Necessary for generating HTML reports with ScalaTest
testCompile "org.testng:testng:6.14.3"
testCompile "com.google.jimfs:jimfs:1.1"
testCompile "com.google.guava:guava:26.0-jre"
}

sourceCompatibility = 1.8
Expand Down
5 changes: 2 additions & 3 deletions src/main/java/htsjdk/samtools/SAMLineParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -465,10 +465,9 @@ private void reportErrorParsingLine(final String reason) {
private void reportErrorParsingLine(final Exception e) {
final String errorMessage = makeErrorString(e.getMessage());
if (validationStringency == ValidationStringency.STRICT) {
throw new SAMFormatException(errorMessage);
throw new SAMFormatException(errorMessage, e);
} else if (validationStringency == ValidationStringency.LENIENT) {
System.err
.println("Ignoring SAM validation error due to lenient parsing:");
System.err.println("Ignoring SAM validation error due to lenient parsing:");
System.err.println(errorMessage);
}
}
Expand Down
3 changes: 0 additions & 3 deletions src/main/java/htsjdk/samtools/SAMRecord.java
Original file line number Diff line number Diff line change
Expand Up @@ -1419,9 +1419,6 @@ public Object getAttribute(final short tag) {
* String values are not validated to ensure that they conform to SAM spec.
*/
public void setAttribute(final String tag, final Object value) {
if (value != null && value.getClass().isArray() && Array.getLength(value) == 0) {
throw new IllegalArgumentException("Empty value passed for tag " + tag);
}
setAttribute(SAMTagUtil.getSingleton().makeBinaryTag(tag), value);
}

Expand Down
60 changes: 47 additions & 13 deletions src/main/java/htsjdk/samtools/TextTagCodec.java
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,14 @@ public class TextTagCodec {
// 3 fields for non-empty strings 2 fields if the string is empty.
private static final int NUM_TAG_FIELDS = 3;

private static final byte[] EMPTY_BYTE_ARRAY = new byte[0];
private static final TagValueAndUnsignedArrayFlag EMPTY_UNSIGNED_BYTE_ARRAY = new TagValueAndUnsignedArrayFlag(EMPTY_BYTE_ARRAY, true);
private static final short[] EMPTY_SHORT_ARRAY = new short[0];
private static final TagValueAndUnsignedArrayFlag EMPTY_UNSIGNED_SHORT_ARRAY = new TagValueAndUnsignedArrayFlag(EMPTY_SHORT_ARRAY, true);
private static final int[] EMPTY_INT_ARRAY = new int[0];
private static final TagValueAndUnsignedArrayFlag EMPTY_UNSIGNED_INT_ARRAY = new TagValueAndUnsignedArrayFlag(EMPTY_INT_ARRAY, true);
private static final float[] EMPTY_FLOAT_ARRAY = new float[0];

/**
* This is really a local variable of decode(), but allocated here to reduce allocations.
*/
Expand All @@ -52,7 +60,7 @@ public class TextTagCodec {
/**
* Convert in-memory representation of tag to SAM text representation.
* @param tagName Two-character tag name.
* @param value Tag value as approriate Object subclass.
* @param value Tag value as appropriate Object subclass.
* @return SAM text String representation, i.e. name:type:value
*/
public String encode(final String tagName, Object value) {
Expand All @@ -71,7 +79,7 @@ public String encode(final String tagName, Object value) {
// H should never happen anymore.
value = StringUtil.bytesToHexString((byte[])value);
} else if (tagType == 'B') {
value = getArrayType(value, false) + "," + encodeArrayValue(value);
value = getArrayType(value, false) + encodeArrayValue(value);
} else if (tagType == 'i') {
final long longVal = ((Number) value).longValue();
// as the spec says: [-2^31, 2^32)
Expand All @@ -83,7 +91,7 @@ public String encode(final String tagName, Object value) {
return sb.toString();
}

private char getArrayType(final Object array, final boolean isUnsigned) {
private static char getArrayType(final Object array, final boolean isUnsigned) {
final char type;
final Class<?> componentType = array.getClass().getComponentType();
if (componentType == Float.TYPE) {
Expand All @@ -97,18 +105,18 @@ private char getArrayType(final Object array, final boolean isUnsigned) {
return (isUnsigned? Character.toUpperCase(type): type);
}

private String encodeArrayValue(final Object value) {
final StringBuilder ret = new StringBuilder(Array.get(value, 0).toString());
private static String encodeArrayValue(final Object value) {
final int length = Array.getLength(value);
for (int i = 1; i < length; ++i) {
final StringBuilder ret = new StringBuilder();
for (int i = 0; i < length; ++i) {
ret.append(',');
ret.append(Array.get(value, i).toString());
}
return ret.toString();

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

remove nl

}

private long[] widenToUnsigned(final Object array) {
private static long[] widenToUnsigned(final Object array) {
final Class<?> componentType = array.getClass().getComponentType();
final long mask;
if (componentType == Byte.TYPE) mask = 0xffL;
Expand All @@ -127,7 +135,7 @@ String encodeUnsignedArray(final String tagName, final Object array) {
throw new IllegalArgumentException("Non-array passed to encodeUnsignedArray: " + array.getClass());
}
final long[] widened = widenToUnsigned(array);
return tagName + ":B:" + getArrayType(array, true) + "," + encodeArrayValue(widened);
return tagName + ":B:" + getArrayType(array, true) + encodeArrayValue(widened);
}

/**
Expand Down Expand Up @@ -175,7 +183,7 @@ public Object setValue(final Object o) {
};
}

private Object convertStringToObject(final String type, final String stringVal) {
private static Object convertStringToObject(final String type, final String stringVal) {
if (type.equals("Z")) {
return stringVal;
} else if (type.equals("A")) {
Expand Down Expand Up @@ -219,15 +227,20 @@ else if (SAMUtils.isValidUnsignedIntegerAttribute(lValue)) {
}
}

private Object covertStringArrayToObject(final String stringVal) {
private static Object covertStringArrayToObject(final String stringVal) {
final String[] elementTypeAndValue = new String[2];
if (StringUtil.splitConcatenateExcessTokens(stringVal, elementTypeAndValue, ',') != 2) {
throw new SAMFormatException("Tag of type B should have an element type followed by comma");
}

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

remove NL

final int numberOfTokens = StringUtil.splitConcatenateExcessTokens(stringVal, elementTypeAndValue, ',');

if (elementTypeAndValue[0].length() != 1) {
throw new SAMFormatException("Unrecognized element type for array tag value: " + elementTypeAndValue[0]);
}

final char elementType = elementTypeAndValue[0].charAt(0);
if (numberOfTokens == 1) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can this be removed and fall through with length==0?

return createEmptyArray(elementType);
}

final String[] stringValues = elementTypeAndValue[1].split(",");
if (stringValues.length == 0) throw new SAMFormatException("Tag of type B should have at least one element");
if (elementType == 'f') {
Expand Down Expand Up @@ -316,6 +329,27 @@ private Object covertStringArrayToObject(final String stringVal) {
}
}

private static Object createEmptyArray(char elementType) {
switch ( elementType ) {
lbergelson marked this conversation as resolved.
Show resolved Hide resolved
case 'c':
return EMPTY_BYTE_ARRAY;
case 'C':
return EMPTY_UNSIGNED_BYTE_ARRAY;
case 's':
return EMPTY_SHORT_ARRAY;
case 'S':
return EMPTY_UNSIGNED_SHORT_ARRAY;
case 'i':
return EMPTY_INT_ARRAY;
case 'I':
return EMPTY_UNSIGNED_INT_ARRAY;
case 'f':
//note that F is not a valid option since there is no signed/unsigned float distinction
lbergelson marked this conversation as resolved.
Show resolved Hide resolved
return EMPTY_FLOAT_ARRAY;
default: { throw new SAMFormatException("Unrecognized array tag element type: " + elementType); }
lbergelson marked this conversation as resolved.
Show resolved Hide resolved
}
}

Iso8601Date decodeDate(final String dateStr) {
try {
return new Iso8601Date(dateStr);
Expand Down
86 changes: 81 additions & 5 deletions src/test/java/htsjdk/samtools/SAMRecordUnitTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -25,21 +25,25 @@
package htsjdk.samtools;

import htsjdk.HtsjdkTest;
import htsjdk.samtools.cram.build.CramIO;
import htsjdk.samtools.util.BinaryCodec;
import htsjdk.samtools.util.IOUtil;
import htsjdk.samtools.util.TestUtil;
import htsjdk.utils.TestNGUtils;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;

import java.io.*;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.Optional;
import java.lang.reflect.Array;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.*;

public class SAMRecordUnitTest extends HtsjdkTest {

private static final String ARRAY_TAG = "xa";

@DataProvider(name = "serializationTestData")
public Object[][] getSerializationTestData() {
return new Object[][] {
Expand Down Expand Up @@ -1169,4 +1173,76 @@ private Object[][] hasAttributeTestData() throws IOException {
public void testHasAttribute(final SAMRecord samRecord, final String tag, final boolean expectedHasAttribute) {
Assert.assertEquals(samRecord.hasAttribute(tag), expectedHasAttribute);
}

@Test
public void test_setAttribute_empty_array() {
final SAMFileHeader header = new SAMFileHeader();
final SAMRecord record = new SAMRecord(header);
Assert.assertNull(record.getStringAttribute(ARRAY_TAG));
record.setAttribute(ARRAY_TAG, new int[0]);
Assert.assertNotNull(record.getSignedIntArrayAttribute(ARRAY_TAG));
Assert.assertEquals(record.getSignedIntArrayAttribute(ARRAY_TAG), new int[0]);
Assert.assertEquals(record.getAttribute(ARRAY_TAG), new char[0]);
record.setAttribute(ARRAY_TAG, null);
Assert.assertNull(record.getStringAttribute(ARRAY_TAG));
}

private static Object[][] getEmptyArrays() {
return new Object[][]{
{new int[0], int[].class},
{new short[0], short[].class},
{new byte[0], byte[].class},
{new float[0], float[].class},
};
}

private static Object[][] getFileExtensions(){
return new Object[][]{
{BamFileIoUtils.BAM_FILE_EXTENSION}, {IOUtil.SAM_FILE_EXTENSION}, {CramIO.CRAM_FILE_EXTENSION}
};
}

@DataProvider
public Object[][] getEmptyArraysAndExtensions(){
return TestNGUtils.cartesianProduct(getEmptyArrays(), getFileExtensions());
}

@Test(dataProvider = "getEmptyArraysAndExtensions")
public void testWriteSamWithEmptyArray(Object emptyArray, Class<?> arrayClass, String fileExtension) throws IOException {
Assert.assertEquals(emptyArray.getClass(), arrayClass);
Assert.assertEquals(Array.getLength(emptyArray), 0);

final SAMRecordSetBuilder samRecords = new SAMRecordSetBuilder();
samRecords.addFrag("Read", 0, 100, false);
final SAMRecord record = samRecords.getRecords().iterator().next();
record.setAttribute(ARRAY_TAG, emptyArray);
checkArrayIsEmpty(ARRAY_TAG, record, arrayClass);

final Path tmp = Files.createTempFile("tmp", fileExtension);
IOUtil.deleteOnExit(tmp);

final SAMFileWriterFactory writerFactory = new SAMFileWriterFactory()
.setCreateMd5File(false)
.setCreateIndex(false);
final Path reference = IOUtil.getPath("src/test/resources/htsjdk/samtools/one-contig.fasta");
try (final SAMFileWriter samFileWriter = writerFactory.makeWriter(samRecords.getHeader(), false, tmp, reference)) {
samFileWriter.addAlignment(record);
}

try (final SamReader reader = SamReaderFactory.makeDefault()
.referenceSequence(reference)
.open(tmp)) {
final SAMRecordIterator iterator = reader.iterator();
Assert.assertTrue(iterator.hasNext());
final SAMRecord recordFromDisk = iterator.next();
checkArrayIsEmpty(ARRAY_TAG, recordFromDisk, arrayClass);
}
}

private static void checkArrayIsEmpty(String arrayTag, SAMRecord recordFromDisk, Class<?> expectedClass) {
final Object attribute = recordFromDisk.getAttribute(arrayTag);
Assert.assertNotNull(attribute);
Assert.assertEquals(attribute.getClass(), expectedClass);
Assert.assertEquals(Array.getLength(attribute), 0);
}
}
20 changes: 20 additions & 0 deletions src/test/java/htsjdk/samtools/SAMTextReaderTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,15 @@
import htsjdk.samtools.util.CloseableIterator;
import htsjdk.samtools.util.CloserUtil;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;

public class SAMTextReaderTest extends HtsjdkTest {
private static final String ARRAY_TAG = "xa";

// Simple input, spot check that parsed correctly, and make sure nothing blows up.
@Test
public void testBasic() throws Exception {
Expand Down Expand Up @@ -135,4 +138,21 @@ public void testTagWithColon() {
Assert.assertEquals(recFromText.getAttribute(SAMTag.CQ.name()), valueWithColons);
CloserUtil.close(reader);
}

@DataProvider
public Object[][] getRecordsWithArrays(){
final String recordBase = "Read\t4\tchr1\t1\t0\t*\t*\t0\t0\tG\t%\t";
return new Object[][]{
{recordBase + ARRAY_TAG + ":B:i", new int[0]},
{recordBase + ARRAY_TAG + ":B:i,", new int[0]},
{recordBase + ARRAY_TAG + ":B:i,1,2,3,", new int[]{1,2,3}},
};
}

@Test(dataProvider = "getRecordsWithArrays")
public void testSamRecordCanHandleArrays(String samRecord, Object array){
final SAMLineParser samLineParser = new SAMLineParser(new SAMFileHeader());
final SAMRecord record = samLineParser.parseLine(samRecord);
Assert.assertEquals(record.getAttribute(ARRAY_TAG), array);
}
}
14 changes: 11 additions & 3 deletions src/test/java/htsjdk/samtools/SAMTextWriterTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@

public class SAMTextWriterTest extends HtsjdkTest {

private SAMRecordSetBuilder getSAMReader(final boolean sortForMe, final SAMFileHeader.SortOrder sortOrder) {
private SAMRecordSetBuilder getSamRecordSet(final boolean sortForMe, final SAMFileHeader.SortOrder sortOrder) {
final SAMRecordSetBuilder ret = new SAMRecordSetBuilder(sortForMe, sortOrder);
ret.addPair("readB", 20, 200, 300);
ret.addPair("readA", 20, 100, 150);
Expand All @@ -45,7 +45,7 @@ private SAMRecordSetBuilder getSAMReader(final boolean sortForMe, final SAMFileH

@Test
public void testNullHeader() throws Exception {
final SAMRecordSetBuilder recordSetBuilder = getSAMReader(true, SAMFileHeader.SortOrder.coordinate);
final SAMRecordSetBuilder recordSetBuilder = getSamRecordSet(true, SAMFileHeader.SortOrder.coordinate);
for (final SAMRecord rec : recordSetBuilder.getRecords()) {
rec.setHeader(null);
}
Expand Down Expand Up @@ -77,7 +77,7 @@ private void doTest(final SAMRecordSetBuilder recordSetBuilder) throws Exception
}

private void doTest(final SamFlagField samFlagField) throws Exception {
doTest(getSAMReader(true, SAMFileHeader.SortOrder.coordinate), samFlagField);
doTest(getSamRecordSet(true, SAMFileHeader.SortOrder.coordinate), samFlagField);
}

private void doTest(final SAMRecordSetBuilder recordSetBuilder, final SamFlagField samFlagField) throws Exception {
Expand Down Expand Up @@ -128,4 +128,12 @@ private void doTest(final SAMRecordSetBuilder recordSetBuilder, final SamFlagFie
Assert.assertFalse(newSAMIt.hasNext());
inputSAM.close();
}

@Test
public void testEmptyArrayAttributeHasNoCommaWhenWrittenToSAM(){
final SAMFileHeader header = new SAMFileHeader();
final SAMRecord record = new SAMRecord(header);
record.setAttribute("xa", new int[0]);
Copy link
Contributor

@yfarjoun yfarjoun Oct 22, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ARRAY_TAG?

Assert.assertTrue(record.getSAMString().endsWith("xa:B:i\n"));
}
}
Loading