Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/master' into MachineLearningPr…
Browse files Browse the repository at this point in the history
…ojectModule
  • Loading branch information
elaboy committed Dec 8, 2023
2 parents 992448b + 167fa9b commit 84de29c
Show file tree
Hide file tree
Showing 64 changed files with 1,838 additions and 685 deletions.
49 changes: 45 additions & 4 deletions mzLib/Chemistry/ChemicalFormula.cs
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ namespace Chemistry
/// Formula can change!!! If isotopes or elements are changed.
/// </summary>
[Serializable]
public sealed class ChemicalFormula : IEquatable<ChemicalFormula>
public sealed class ChemicalFormula : IEquatable<ChemicalFormula>, IHasChemicalFormula
{
// Main data stores, the isotopes and elements

Expand Down Expand Up @@ -59,12 +59,14 @@ public ChemicalFormula()
Elements = new Dictionary<Element, int>();
}

public ChemicalFormula(ChemicalFormula capFormula)
public ChemicalFormula(IHasChemicalFormula capFormula)
{
Isotopes = new Dictionary<Isotope, int>(capFormula.Isotopes);
Elements = new Dictionary<Element, int>(capFormula.Elements);
Isotopes = new Dictionary<Isotope, int>(capFormula.ThisChemicalFormula.Isotopes);
Elements = new Dictionary<Element, int>(capFormula.ThisChemicalFormula.Elements);
}

public ChemicalFormula ThisChemicalFormula => this;

/// <summary>
/// Gets the average mass of this chemical formula
/// </summary>
Expand Down Expand Up @@ -523,5 +525,44 @@ private string GetHillNotation()
otherParts.Sort();
return s + string.Join("", otherParts);
}

public override string ToString()
{
return $"{ThisChemicalFormula.Formula} : {MonoisotopicMass}";
}

public static ChemicalFormula operator -(ChemicalFormula left, IHasChemicalFormula right)
{
if (left == null)
if (right == null)
return null;
else
{
var formula = new ChemicalFormula();
formula.Remove(right);
return formula;
}
if (right == null)
return new ChemicalFormula(left);


ChemicalFormula newFormula = new ChemicalFormula(left);
newFormula.Remove(right);
return newFormula;
}

public static ChemicalFormula operator +(ChemicalFormula left, IHasChemicalFormula right)
{
// if left is null, return right. If right is null, return left. If both are null, return null. If both are not null, add them
if (left == null)
return right == null ? null : new ChemicalFormula(right);
if (right == null)
return new ChemicalFormula(left);

ChemicalFormula newFormula = new ChemicalFormula(left);
newFormula.Add(right);
return newFormula;

}
}
}
142 changes: 142 additions & 0 deletions mzLib/Omics/BioPolymerWithSetModsExtensions.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
using System.Text;
using Chemistry;
using Omics.Modifications;

namespace Omics;

public static class BioPolymerWithSetModsExtensions
{
/// <summary>
/// This method returns the full sequence with mass shifts INSTEAD OF PTMs in brackets []
/// Some external tools cannot parse PTMs, instead requiring a numerical input indicating the mass of a PTM in brackets
/// after the position of that modification
/// N-terminal mas shifts are in brackets prior to the first amino acid and apparently missing the + sign
/// </summary>
/// <returns></returns>
public static string FullSequenceWithMassShift(this IBioPolymerWithSetMods withSetMods)
{
var subsequence = new StringBuilder();

// modification on peptide N-terminus
if (withSetMods.AllModsOneIsNterminus.TryGetValue(1, out Modification mod))
{
subsequence.Append('[' + mod.MonoisotopicMass.RoundedDouble(6).ToString() + ']');
}

for (int r = 0; r < withSetMods.Length; r++)
{
subsequence.Append(withSetMods[r]);

// modification on this residue
if (withSetMods.AllModsOneIsNterminus.TryGetValue(r + 2, out mod))
{
if (mod.MonoisotopicMass > 0)
{
subsequence.Append("[+" + mod.MonoisotopicMass.RoundedDouble(6).ToString() + ']');
}
else
{
subsequence.Append("[" + mod.MonoisotopicMass.RoundedDouble(6).ToString() + ']');
}
}
}

// modification on peptide C-terminus
if (withSetMods.AllModsOneIsNterminus.TryGetValue(withSetMods.Length + 2, out mod))
{
if (mod.MonoisotopicMass > 0)
{
subsequence.Append("[+" + mod.MonoisotopicMass.RoundedDouble(6).ToString() + ']');
}
else
{
subsequence.Append("[" + mod.MonoisotopicMass.RoundedDouble(6).ToString() + ']');
}
}
return subsequence.ToString();
}

/// <summary>
/// This method returns the full sequence only with the specified modifications in the modstoWritePruned dictionary
/// </summary>
/// <param name="withSetMods"></param>
/// <param name="modstoWritePruned"></param>
/// <returns></returns>
public static string EssentialSequence(this IBioPolymerWithSetMods withSetMods,
IReadOnlyDictionary<string, int> modstoWritePruned)
{
string essentialSequence = withSetMods.BaseSequence;
if (modstoWritePruned != null)
{
var sbsequence = new StringBuilder();

// variable modification on peptide N-terminus
if (withSetMods.AllModsOneIsNterminus.TryGetValue(1, out Modification pep_n_term_variable_mod))
{
if (modstoWritePruned.ContainsKey(pep_n_term_variable_mod.ModificationType))
{
sbsequence.Append('[' + pep_n_term_variable_mod.ModificationType + ":" + pep_n_term_variable_mod.IdWithMotif + ']');
}
}
for (int r = 0; r < withSetMods.Length; r++)
{
sbsequence.Append(withSetMods[r]);
// variable modification on this residue
if (withSetMods.AllModsOneIsNterminus.TryGetValue(r + 2, out Modification residue_variable_mod))
{
if (modstoWritePruned.ContainsKey(residue_variable_mod.ModificationType))
{
sbsequence.Append('[' + residue_variable_mod.ModificationType + ":" + residue_variable_mod.IdWithMotif + ']');
}
}
}

// variable modification on peptide C-terminus
if (withSetMods.AllModsOneIsNterminus.TryGetValue(withSetMods.Length + 2, out Modification pep_c_term_variable_mod))
{
if (modstoWritePruned.ContainsKey(pep_c_term_variable_mod.ModificationType))
{
sbsequence.Append('[' + pep_c_term_variable_mod.ModificationType + ":" + pep_c_term_variable_mod.IdWithMotif + ']');
}
}

essentialSequence = sbsequence.ToString();
}
return essentialSequence;
}

/// <summary>
/// Determines the full sequence of a BioPolymerWithSetMods from its base sequence and modifications
/// </summary>
/// <param name="withSetMods"></param>
/// <returns></returns>
public static string DetermineFullSequence(this IBioPolymerWithSetMods withSetMods)
{
var subSequence = new StringBuilder();

// modification on peptide N-terminus
if (withSetMods.AllModsOneIsNterminus.TryGetValue(1, out Modification mod))
{
subSequence.Append('[' + mod.ModificationType + ":" + mod.IdWithMotif + ']');
}

for (int r = 0; r < withSetMods.Length; r++)
{
subSequence.Append(withSetMods[r]);

// modification on this residue
if (withSetMods.AllModsOneIsNterminus.TryGetValue(r + 2, out mod))
{
subSequence.Append('[' + mod.ModificationType + ":" + mod.IdWithMotif + ']');
}
}

// modification on peptide C-terminus
if (withSetMods.AllModsOneIsNterminus.TryGetValue(withSetMods.Length + 2, out mod))
{
subSequence.Append('[' + mod.ModificationType + ":" + mod.IdWithMotif + ']');
}

return subSequence.ToString();
}
}
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
namespace Proteomics.ProteolyticDigestion
namespace Omics.Digestion
{
public enum CleavageSpecificity
{
Expand Down
107 changes: 107 additions & 0 deletions mzLib/Omics/Digestion/DigestionAgent.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
using Omics.Modifications;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;

namespace Omics.Digestion
{
public abstract class DigestionAgent
{
protected DigestionAgent(string name, CleavageSpecificity cleavageSpecificity, List<DigestionMotif> motifList, Modification cleavageMod)
{
Name = name;
CleavageSpecificity = cleavageSpecificity;
DigestionMotifs = motifList ?? new List<DigestionMotif>();
CleavageMod = cleavageMod;
}

public string Name { get; init; }
public CleavageSpecificity CleavageSpecificity { get; init; }
public List<DigestionMotif> DigestionMotifs { get; init; }
public Modification CleavageMod { get; set; }

public override string ToString()
{
return Name;
}

/// <summary>
/// Is length of given peptide okay, given minimum and maximum?
/// </summary>
/// <param name="length"></param>
/// <param name="minLength"></param>
/// <param name="maxLength"></param>
/// <returns></returns>
protected static bool ValidLength(int length, int minLength, int maxLength)
{
return ValidMinLength(length, minLength) && ValidMaxLength(length, maxLength);
}

/// <summary>
/// Is length of given peptide okay, given minimum?
/// </summary>
/// <param name="length"></param>
/// <param name="minLength"></param>
/// <returns></returns>
protected static bool ValidMinLength(int length, int minLength)
{
return length >= minLength;
}

/// <summary>
/// Is length of given peptide okay, given maximum?
/// </summary>
/// <param name="length"></param>
/// <param name="maxLength"></param>
/// <returns></returns>
protected static bool ValidMaxLength(int? length, int maxLength)
{
return !length.HasValue || length <= maxLength;
}

/// <summary>
/// Gets the indices after which this protease will cleave a given protein sequence
/// </summary>
/// <param name="sequence"></param>
/// <returns></returns>
public List<int> GetDigestionSiteIndices(string sequence)
{
var indices = new List<int>();

for (int r = 0; r < sequence.Length; r++)
{
var cutSiteIndex = -1;
bool cleavagePrevented = false;

foreach (DigestionMotif motif in DigestionMotifs)
{
var motifResults = motif.Fits(sequence, r);
bool motifFits = motifResults.Item1;
bool motifPreventsCleavage = motifResults.Item2;

if (motifFits && r + motif.CutIndex < sequence.Length)
{
cutSiteIndex = Math.Max(r + motif.CutIndex, cutSiteIndex);
}

if (motifPreventsCleavage) // if any motif prevents cleave
{
cleavagePrevented = true;
}
}

// if no motif prevents cleave
if (!cleavagePrevented && cutSiteIndex != -1)
{
indices.Add(cutSiteIndex);
}
}

indices.Add(0); // The start of the protein is treated as a cleavage site to retain the n-terminal peptide
indices.Add(sequence.Length); // The end of the protein is treated as a cleavage site to retain the c-terminal peptide
return indices.Distinct().OrderBy(i => i).ToList();
}
}
}
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
using MzLibUtil;
using System.Collections.Generic;
using System.Linq;
using System.Text.RegularExpressions;
using System.Text.RegularExpressions;
using MzLibUtil;

namespace Proteomics.ProteolyticDigestion
namespace Omics.Digestion
{
public class DigestionMotif
{
Expand Down
Loading

0 comments on commit 84de29c

Please sign in to comment.