-
Notifications
You must be signed in to change notification settings - Fork 59
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Chord: Implemented extractSigsSnv() R function in java
- Loading branch information
1 parent
d8cd755
commit 4a7cd4d
Showing
1 changed file
with
123 additions
and
0 deletions.
There are no files selected for viewing
123 changes: 123 additions & 0 deletions
123
chord/src/main/java/com/hartwig/hmftools/chord/prep/SnvPrep.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,123 @@ | ||
package com.hartwig.hmftools.chord.prep; | ||
|
||
import static com.hartwig.hmftools.chord.ChordConstants.CHORD_LOGGER; | ||
|
||
import java.nio.file.NoSuchFileException; | ||
import java.util.ArrayList; | ||
import java.util.LinkedHashMap; | ||
import java.util.List; | ||
import java.util.Map; | ||
|
||
import com.hartwig.hmftools.chord.ChordConfig; | ||
import com.hartwig.hmftools.chord.variant.VcfFile; | ||
import com.hartwig.hmftools.common.sigs.SnvSigUtils; | ||
import com.hartwig.hmftools.common.variant.SageVcfTags; | ||
|
||
import htsjdk.variant.variantcontext.VariantContext; | ||
|
||
public class SnvPrep | ||
{ | ||
private final ChordConfig mConfig; | ||
|
||
public SnvPrep(ChordConfig config) | ||
{ | ||
mConfig = config; | ||
} | ||
|
||
private List<VariantContext> loadVariants(String sampleId) throws NoSuchFileException | ||
{ | ||
VcfFile vcfFile = new VcfFile(mConfig.purpleSomaticVcfFile(sampleId), mConfig.IncludeNonPass); | ||
return vcfFile.loadVariants(); | ||
} | ||
|
||
private static Map<String,Integer> countTriNucContexts(List<VariantContext> variantContexts) | ||
{ | ||
// Get bin names | ||
// Returns: C>A_ACA -> 0, C>A_ACC -> 1, C>A_ACG -> 2, etc. We only need the name (i.e. key) | ||
Map<String,Integer> triNucNameIndexMap = new LinkedHashMap<>(); | ||
SnvSigUtils.populateBucketMap(triNucNameIndexMap); | ||
|
||
// Initialize count vector | ||
Map<String,Integer> triNucNameCountsMap = new LinkedHashMap<>(); | ||
triNucNameIndexMap.keySet().forEach(i -> triNucNameCountsMap.put(i, 0)); | ||
|
||
// Count trinucleotide contexts | ||
int snvCount = 0; | ||
for(VariantContext variantContext : variantContexts) | ||
{ | ||
String refSeq = variantContext.getReference().getBaseString(); | ||
String altSeq = variantContext.getAlternateAllele(0).getDisplayString(); | ||
|
||
boolean isSnv = refSeq.length()==1 && altSeq.length()==1; | ||
if(!isSnv) | ||
continue; | ||
|
||
String triNucSequence = variantContext.getAttributeAsString(SageVcfTags.TRINUCLEOTIDE_CONTEXT, null); | ||
String triNucContext = SnvSigUtils.variantContext(refSeq, altSeq, triNucSequence); | ||
|
||
// CHORD_LOGGER.trace("{}:{}:{}:{} {}", | ||
// variantContext.getContig(), variantContext.getStart(), refSeq, altSeq, renameTriNucBin(triNucContext)); | ||
|
||
triNucNameCountsMap.compute(triNucContext, (k,v) -> v + 1); | ||
|
||
snvCount++; | ||
} | ||
|
||
CHORD_LOGGER.debug("Counted trinucleotide contexts for {} SNVs", snvCount); | ||
|
||
return triNucNameCountsMap; | ||
} | ||
|
||
private static String renameTriNucBin(String bucketName) | ||
{ | ||
// Convert e.g. "C>A_ACA" to "A[C>A]A" | ||
// The latter is the format required by the CHORD random forest model | ||
|
||
String[] bucketNameSplit = bucketName.split("_"); | ||
|
||
String substitutionType = bucketNameSplit[0]; | ||
char[] triNucSequence = bucketNameSplit[1].toCharArray(); | ||
|
||
return String.format("%s[%s]%s", triNucSequence[0], substitutionType, triNucSequence[2]); | ||
} | ||
|
||
private static List<MutTypeCount> makeCountsList(Map<String,Integer> triNucCounts) | ||
{ | ||
List<MutTypeCount> counts = new ArrayList<>(); | ||
|
||
for(String binName : triNucCounts.keySet()) | ||
{ | ||
String newBinName = renameTriNucBin(binName); | ||
int count = triNucCounts.get(binName); | ||
|
||
MutTypeCount mutTypeCount = new MutTypeCount(newBinName, count); | ||
|
||
CHORD_LOGGER.trace(mutTypeCount); | ||
|
||
counts.add(new MutTypeCount(newBinName, count)); | ||
} | ||
|
||
return counts; | ||
} | ||
|
||
public List<MutTypeCount> extractSampleData(String sampleId) | ||
{ | ||
try | ||
{ | ||
CHORD_LOGGER.info("Counting SNV 96 trinucleotide contexts"); | ||
|
||
List<VariantContext> variantContexts = loadVariants(sampleId); | ||
Map<String,Integer> triNucCountsMap = countTriNucContexts(variantContexts); | ||
List<MutTypeCount> triNucCountsList = makeCountsList(triNucCountsMap); | ||
|
||
return triNucCountsList; | ||
} | ||
catch(Exception e) | ||
{ | ||
CHORD_LOGGER.error("sample({}) failed to count SNV trinucleotide contexts:", sampleId); | ||
e.printStackTrace(); | ||
System.exit(1); | ||
return null; | ||
} | ||
} | ||
} |