-
Notifications
You must be signed in to change notification settings - Fork 33
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge remote-tracking branch 'upstream/master' into MachineLearningPr…
…ojectModule
- Loading branch information
Showing
64 changed files
with
1,838 additions
and
685 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,142 @@ | ||
using System.Text; | ||
using Chemistry; | ||
using Omics.Modifications; | ||
|
||
namespace Omics; | ||
|
||
public static class BioPolymerWithSetModsExtensions | ||
{ | ||
/// <summary> | ||
/// This method returns the full sequence with mass shifts INSTEAD OF PTMs in brackets [] | ||
/// Some external tools cannot parse PTMs, instead requiring a numerical input indicating the mass of a PTM in brackets | ||
/// after the position of that modification | ||
/// N-terminal mas shifts are in brackets prior to the first amino acid and apparently missing the + sign | ||
/// </summary> | ||
/// <returns></returns> | ||
public static string FullSequenceWithMassShift(this IBioPolymerWithSetMods withSetMods) | ||
{ | ||
var subsequence = new StringBuilder(); | ||
|
||
// modification on peptide N-terminus | ||
if (withSetMods.AllModsOneIsNterminus.TryGetValue(1, out Modification mod)) | ||
{ | ||
subsequence.Append('[' + mod.MonoisotopicMass.RoundedDouble(6).ToString() + ']'); | ||
} | ||
|
||
for (int r = 0; r < withSetMods.Length; r++) | ||
{ | ||
subsequence.Append(withSetMods[r]); | ||
|
||
// modification on this residue | ||
if (withSetMods.AllModsOneIsNterminus.TryGetValue(r + 2, out mod)) | ||
{ | ||
if (mod.MonoisotopicMass > 0) | ||
{ | ||
subsequence.Append("[+" + mod.MonoisotopicMass.RoundedDouble(6).ToString() + ']'); | ||
} | ||
else | ||
{ | ||
subsequence.Append("[" + mod.MonoisotopicMass.RoundedDouble(6).ToString() + ']'); | ||
} | ||
} | ||
} | ||
|
||
// modification on peptide C-terminus | ||
if (withSetMods.AllModsOneIsNterminus.TryGetValue(withSetMods.Length + 2, out mod)) | ||
{ | ||
if (mod.MonoisotopicMass > 0) | ||
{ | ||
subsequence.Append("[+" + mod.MonoisotopicMass.RoundedDouble(6).ToString() + ']'); | ||
} | ||
else | ||
{ | ||
subsequence.Append("[" + mod.MonoisotopicMass.RoundedDouble(6).ToString() + ']'); | ||
} | ||
} | ||
return subsequence.ToString(); | ||
} | ||
|
||
/// <summary> | ||
/// This method returns the full sequence only with the specified modifications in the modstoWritePruned dictionary | ||
/// </summary> | ||
/// <param name="withSetMods"></param> | ||
/// <param name="modstoWritePruned"></param> | ||
/// <returns></returns> | ||
public static string EssentialSequence(this IBioPolymerWithSetMods withSetMods, | ||
IReadOnlyDictionary<string, int> modstoWritePruned) | ||
{ | ||
string essentialSequence = withSetMods.BaseSequence; | ||
if (modstoWritePruned != null) | ||
{ | ||
var sbsequence = new StringBuilder(); | ||
|
||
// variable modification on peptide N-terminus | ||
if (withSetMods.AllModsOneIsNterminus.TryGetValue(1, out Modification pep_n_term_variable_mod)) | ||
{ | ||
if (modstoWritePruned.ContainsKey(pep_n_term_variable_mod.ModificationType)) | ||
{ | ||
sbsequence.Append('[' + pep_n_term_variable_mod.ModificationType + ":" + pep_n_term_variable_mod.IdWithMotif + ']'); | ||
} | ||
} | ||
for (int r = 0; r < withSetMods.Length; r++) | ||
{ | ||
sbsequence.Append(withSetMods[r]); | ||
// variable modification on this residue | ||
if (withSetMods.AllModsOneIsNterminus.TryGetValue(r + 2, out Modification residue_variable_mod)) | ||
{ | ||
if (modstoWritePruned.ContainsKey(residue_variable_mod.ModificationType)) | ||
{ | ||
sbsequence.Append('[' + residue_variable_mod.ModificationType + ":" + residue_variable_mod.IdWithMotif + ']'); | ||
} | ||
} | ||
} | ||
|
||
// variable modification on peptide C-terminus | ||
if (withSetMods.AllModsOneIsNterminus.TryGetValue(withSetMods.Length + 2, out Modification pep_c_term_variable_mod)) | ||
{ | ||
if (modstoWritePruned.ContainsKey(pep_c_term_variable_mod.ModificationType)) | ||
{ | ||
sbsequence.Append('[' + pep_c_term_variable_mod.ModificationType + ":" + pep_c_term_variable_mod.IdWithMotif + ']'); | ||
} | ||
} | ||
|
||
essentialSequence = sbsequence.ToString(); | ||
} | ||
return essentialSequence; | ||
} | ||
|
||
/// <summary> | ||
/// Determines the full sequence of a BioPolymerWithSetMods from its base sequence and modifications | ||
/// </summary> | ||
/// <param name="withSetMods"></param> | ||
/// <returns></returns> | ||
public static string DetermineFullSequence(this IBioPolymerWithSetMods withSetMods) | ||
{ | ||
var subSequence = new StringBuilder(); | ||
|
||
// modification on peptide N-terminus | ||
if (withSetMods.AllModsOneIsNterminus.TryGetValue(1, out Modification mod)) | ||
{ | ||
subSequence.Append('[' + mod.ModificationType + ":" + mod.IdWithMotif + ']'); | ||
} | ||
|
||
for (int r = 0; r < withSetMods.Length; r++) | ||
{ | ||
subSequence.Append(withSetMods[r]); | ||
|
||
// modification on this residue | ||
if (withSetMods.AllModsOneIsNterminus.TryGetValue(r + 2, out mod)) | ||
{ | ||
subSequence.Append('[' + mod.ModificationType + ":" + mod.IdWithMotif + ']'); | ||
} | ||
} | ||
|
||
// modification on peptide C-terminus | ||
if (withSetMods.AllModsOneIsNterminus.TryGetValue(withSetMods.Length + 2, out mod)) | ||
{ | ||
subSequence.Append('[' + mod.ModificationType + ":" + mod.IdWithMotif + ']'); | ||
} | ||
|
||
return subSequence.ToString(); | ||
} | ||
} |
2 changes: 1 addition & 1 deletion
2
...oteolyticDigestion/CleavageSpecificity.cs → mzLib/Omics/Digestion/CleavageSpecificity.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,4 @@ | ||
namespace Proteomics.ProteolyticDigestion | ||
namespace Omics.Digestion | ||
{ | ||
public enum CleavageSpecificity | ||
{ | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,107 @@ | ||
using Omics.Modifications; | ||
using System; | ||
using System.Collections.Generic; | ||
using System.Linq; | ||
using System.Text; | ||
using System.Threading.Tasks; | ||
|
||
namespace Omics.Digestion | ||
{ | ||
public abstract class DigestionAgent | ||
{ | ||
protected DigestionAgent(string name, CleavageSpecificity cleavageSpecificity, List<DigestionMotif> motifList, Modification cleavageMod) | ||
{ | ||
Name = name; | ||
CleavageSpecificity = cleavageSpecificity; | ||
DigestionMotifs = motifList ?? new List<DigestionMotif>(); | ||
CleavageMod = cleavageMod; | ||
} | ||
|
||
public string Name { get; init; } | ||
public CleavageSpecificity CleavageSpecificity { get; init; } | ||
public List<DigestionMotif> DigestionMotifs { get; init; } | ||
public Modification CleavageMod { get; set; } | ||
|
||
public override string ToString() | ||
{ | ||
return Name; | ||
} | ||
|
||
/// <summary> | ||
/// Is length of given peptide okay, given minimum and maximum? | ||
/// </summary> | ||
/// <param name="length"></param> | ||
/// <param name="minLength"></param> | ||
/// <param name="maxLength"></param> | ||
/// <returns></returns> | ||
protected static bool ValidLength(int length, int minLength, int maxLength) | ||
{ | ||
return ValidMinLength(length, minLength) && ValidMaxLength(length, maxLength); | ||
} | ||
|
||
/// <summary> | ||
/// Is length of given peptide okay, given minimum? | ||
/// </summary> | ||
/// <param name="length"></param> | ||
/// <param name="minLength"></param> | ||
/// <returns></returns> | ||
protected static bool ValidMinLength(int length, int minLength) | ||
{ | ||
return length >= minLength; | ||
} | ||
|
||
/// <summary> | ||
/// Is length of given peptide okay, given maximum? | ||
/// </summary> | ||
/// <param name="length"></param> | ||
/// <param name="maxLength"></param> | ||
/// <returns></returns> | ||
protected static bool ValidMaxLength(int? length, int maxLength) | ||
{ | ||
return !length.HasValue || length <= maxLength; | ||
} | ||
|
||
/// <summary> | ||
/// Gets the indices after which this protease will cleave a given protein sequence | ||
/// </summary> | ||
/// <param name="sequence"></param> | ||
/// <returns></returns> | ||
public List<int> GetDigestionSiteIndices(string sequence) | ||
{ | ||
var indices = new List<int>(); | ||
|
||
for (int r = 0; r < sequence.Length; r++) | ||
{ | ||
var cutSiteIndex = -1; | ||
bool cleavagePrevented = false; | ||
|
||
foreach (DigestionMotif motif in DigestionMotifs) | ||
{ | ||
var motifResults = motif.Fits(sequence, r); | ||
bool motifFits = motifResults.Item1; | ||
bool motifPreventsCleavage = motifResults.Item2; | ||
|
||
if (motifFits && r + motif.CutIndex < sequence.Length) | ||
{ | ||
cutSiteIndex = Math.Max(r + motif.CutIndex, cutSiteIndex); | ||
} | ||
|
||
if (motifPreventsCleavage) // if any motif prevents cleave | ||
{ | ||
cleavagePrevented = true; | ||
} | ||
} | ||
|
||
// if no motif prevents cleave | ||
if (!cleavagePrevented && cutSiteIndex != -1) | ||
{ | ||
indices.Add(cutSiteIndex); | ||
} | ||
} | ||
|
||
indices.Add(0); // The start of the protein is treated as a cleavage site to retain the n-terminal peptide | ||
indices.Add(sequence.Length); // The end of the protein is treated as a cleavage site to retain the c-terminal peptide | ||
return indices.Distinct().OrderBy(i => i).ToList(); | ||
} | ||
} | ||
} |
8 changes: 3 additions & 5 deletions
8
...cs/ProteolyticDigestion/DigestionMotif.cs → mzLib/Omics/Digestion/DigestionMotif.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.