Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update Omics to read additional SpectrumMatchFromTsv types #807

Draft
wants to merge 12 commits into
base: master
Choose a base branch
from
124 changes: 124 additions & 0 deletions mzLib/Omics/SpectrumMatch/CrosslinkLibrarySpectrum.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
using Omics.Fragmentation;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading.Tasks;

namespace Omics.SpectrumMatch
{
public class CrosslinkLibrarySpectrum : LibrarySpectrum
{
public CrosslinkLibrarySpectrum BetaPeptideSpectrum { get; }
public string AlphaPeptideSequence { get; private set; }
public string BetaPeptideSequence { get; private set; }
public string UniqueSequence { get; private set; }
public bool IsBetaPeptide { get; }

Check warning on line 17 in mzLib/Omics/SpectrumMatch/CrosslinkLibrarySpectrum.cs

View check run for this annotation

Codecov / codecov/patch

mzLib/Omics/SpectrumMatch/CrosslinkLibrarySpectrum.cs#L17

Added line #L17 was not covered by tests
public static Regex CrosslinkRegex = new Regex(@"\(\d+\)");
public new string Name => UniqueSequence + "/" + ChargeState;

public CrosslinkLibrarySpectrum(
string uniqueSequence,
double precursorMz,
int precursorCharge,
List<MatchedFragmentIon> peaks,
double rt,
List<MatchedFragmentIon> betaPeaks,
bool isDecoy = false) : this(
uniqueSequence,
precursorMz,
precursorCharge,
peaks,
rt,
new CrosslinkLibrarySpectrum(uniqueSequence, precursorMz, precursorCharge, betaPeaks, rt),
isDecoy)
{ }

public CrosslinkLibrarySpectrum(
string uniqueSequence,
double precursorMz,
int precursorCharge,
List<MatchedFragmentIon> peaks,
double rt,
CrosslinkLibrarySpectrum betaSpectrum = null,
bool isDecoy = false) : base(uniqueSequence, precursorMz, precursorCharge, peaks, rt, isDecoy)
{
UniqueSequence = uniqueSequence;
if (betaSpectrum == null)
{
IsBetaPeptide = true;
}
else
{
BetaPeptideSpectrum = betaSpectrum;
}
SetAlphaBetaSequence();
}

private void SetAlphaBetaSequence()
{
string[] uniqueSequenceSplit = CrosslinkRegex.Split(UniqueSequence);
if (uniqueSequenceSplit.Length >= 2)
{
AlphaPeptideSequence = uniqueSequenceSplit[0];
BetaPeptideSequence = uniqueSequenceSplit[1];
}
else
{
AlphaPeptideSequence = null;
BetaPeptideSequence = null;
}

Check warning on line 71 in mzLib/Omics/SpectrumMatch/CrosslinkLibrarySpectrum.cs

View check run for this annotation

Codecov / codecov/patch

mzLib/Omics/SpectrumMatch/CrosslinkLibrarySpectrum.cs#L68-L71

Added lines #L68 - L71 were not covered by tests
}

public override string ToString()
{
StringBuilder spectrum = new();
spectrum.AppendLine("Name: " + Name);
spectrum.AppendLine("MW: " + PrecursorMz);
spectrum.Append("Comment: ");
spectrum.Append("Parent=" + PrecursorMz);
spectrum.AppendLine(" RT=" + RetentionTime);
spectrum.Append("Num alpha peaks: " + MatchedFragmentIons.Count);
spectrum.AppendLine(", Num beta peaks: " + BetaPeptideSpectrum.MatchedFragmentIons.Count);

double maxIntensity = Math.Max(MatchedFragmentIons.Max(b => b.Intensity),
BetaPeptideSpectrum.MatchedFragmentIons.Max(s => s.Intensity));

foreach (MatchedFragmentIon matchedIon in MatchedFragmentIons)
{
double intensityFraction = matchedIon.Intensity / maxIntensity;

string neutralLoss = null;
if (matchedIon.NeutralTheoreticalProduct.NeutralLoss != 0)
{
neutralLoss = "-" + matchedIon.NeutralTheoreticalProduct.NeutralLoss;
}

Check warning on line 96 in mzLib/Omics/SpectrumMatch/CrosslinkLibrarySpectrum.cs

View check run for this annotation

Codecov / codecov/patch

mzLib/Omics/SpectrumMatch/CrosslinkLibrarySpectrum.cs#L94-L96

Added lines #L94 - L96 were not covered by tests

spectrum.AppendLine(matchedIon.Mz + "\t" + intensityFraction + "\t" + "\"" +
matchedIon.NeutralTheoreticalProduct.ProductType.ToString() +
matchedIon.NeutralTheoreticalProduct.FragmentNumber.ToString() + "^" +
matchedIon.Charge + neutralLoss + "/" + 0 + "ppm" + "\"");
}

foreach (MatchedFragmentIon matchedIon in BetaPeptideSpectrum.MatchedFragmentIons)
{
double intensityFraction = matchedIon.Intensity / maxIntensity;

string neutralLoss = null;
if (matchedIon.NeutralTheoreticalProduct.NeutralLoss != 0)
{
neutralLoss = "-" + matchedIon.NeutralTheoreticalProduct.NeutralLoss;
}

Check warning on line 112 in mzLib/Omics/SpectrumMatch/CrosslinkLibrarySpectrum.cs

View check run for this annotation

Codecov / codecov/patch

mzLib/Omics/SpectrumMatch/CrosslinkLibrarySpectrum.cs#L110-L112

Added lines #L110 - L112 were not covered by tests

spectrum.AppendLine(matchedIon.Mz + "\t" + intensityFraction + "\t" + "\"" +
matchedIon.NeutralTheoreticalProduct.ProductType.ToString() +
matchedIon.NeutralTheoreticalProduct.FragmentNumber.ToString() + "^" +
matchedIon.Charge + neutralLoss + "/" + 0 + "ppm" + "\"" +
"\t" + "BetaPeptideIon");
}

return spectrum.ToString().Trim();
}
}
}
2 changes: 1 addition & 1 deletion mzLib/Omics/SpectrumMatch/LibrarySpectrum.cs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ public class LibrarySpectrum : MzSpectrum
public List<MatchedFragmentIon> MatchedFragmentIons { get; set; }
public bool IsDecoy { get; set; }

public string Name
public virtual string Name
{
get { return Sequence + "/" + ChargeState; }
}
Expand Down
13 changes: 8 additions & 5 deletions mzLib/Omics/SpectrumMatch/SpectrumMatchFromTsv.cs
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@
}


protected static List<MatchedFragmentIon> ReadFragmentIonsFromString(string matchedMzString, string matchedIntensityString, string peptideBaseSequence, string matchedMassErrorDaString = null)
protected static List<MatchedFragmentIon> ReadFragmentIonsFromString(string matchedMzString, string matchedIntensityString, string peptideBaseSequence, string matchedMassErrorDaString = null, bool isProtein = true)
{
List<MatchedFragmentIon> matchedIons = new List<MatchedFragmentIon>();

Expand Down Expand Up @@ -225,11 +225,14 @@
}

//get terminus
if (TerminusSpecificProductTypes.ProductTypeToFragmentationTerminus.TryGetValue(productType,
out terminus));

if (isProtein)
TerminusSpecificProductTypes.ProductTypeToFragmentationTerminus.TryGetValue(productType, out terminus);
else
terminus = Omics.Fragmentation.Oligo.TerminusSpecificProductTypes.GetRnaTerminusType(productType);

Check warning on line 232 in mzLib/Omics/SpectrumMatch/SpectrumMatchFromTsv.cs

View check run for this annotation

Codecov / codecov/patch

mzLib/Omics/SpectrumMatch/SpectrumMatchFromTsv.cs#L232

Added line #L232 was not covered by tests

//get amino acid position
aminoAcidPosition = terminus == FragmentationTerminus.C ?
aminoAcidPosition = terminus is FragmentationTerminus.C or FragmentationTerminus.ThreePrime ?
peptideBaseSequence.Split('|')[0].Length - fragmentNumber :
fragmentNumber;
}
Expand Down Expand Up @@ -365,7 +368,7 @@
{
return FullSequence;
}
public LibrarySpectrum ToLibrarySpectrum()
public virtual LibrarySpectrum ToLibrarySpectrum()
{
bool isDecoy = this.DecoyContamTarget == "D";

Expand Down
9 changes: 1 addition & 8 deletions mzLib/Omics/SpectrumMatch/SpectrumMatchFromTsvHeader.cs
Original file line number Diff line number Diff line change
@@ -1,13 +1,5 @@
namespace Omics.SpectrumMatch
{
//for glcyo
public enum LocalizationLevel
{
Level1,
Level1b,
Level2,
Level3
}
public class SpectrumMatchFromTsvHeader
{
// File and scan information
Expand All @@ -28,6 +20,7 @@ public class SpectrumMatchFromTsvHeader
public const string BaseSequence = "Base Sequence";
public const string FullSequence = "Full Sequence";
public const string EssentialSequence = "Essential Sequence";
public const string UniqueSequence = "Unique Sequence"; //Used for crosslinked peptides
public const string AmbiguityLevel = "Ambiguity Level";
public const string SpectrumMatchCount = "Spectrum Match Count";
public const string Mods = "Mods";
Expand Down
77 changes: 66 additions & 11 deletions mzLib/Proteomics/PSM/PsmFromTsv.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
using System.Globalization;
using System.IO;
using System.Linq;
using Easy.Common.Extensions;
using Omics.Fragmentation;
using Omics.SpectrumMatch;

Expand Down Expand Up @@ -34,6 +35,10 @@
public int? BetaPeptideRank { get; }
public List<MatchedFragmentIon> BetaPeptideMatchedIons { get; }
public Dictionary<int, List<MatchedFragmentIon>> BetaPeptideChildScanMatchedIons { get; }
/// <summary>
/// If Crosslink, this contains the alpha and beta sequences. Otherwise, it contains the full sequence
/// </summary>
public string UniqueSequence { get; }
public double? XLTotalScore { get; }
public string ParentIons { get; }

Expand Down Expand Up @@ -135,6 +140,12 @@
((spl[parsedHeader[SpectrumMatchFromTsvHeader.BetaPeptideMatchedIonsLabel]].StartsWith("{")) ? ReadChildScanMatchedIons(spl[parsedHeader[SpectrumMatchFromTsvHeader.BetaPeptideMatchedIonsLabel]].Trim(), spl[parsedHeader[SpectrumMatchFromTsvHeader.BetaPeptideMatchedIonIntensitiesLabel]].Trim(), BetaPeptideBaseSequence).First().Value : ReadFragmentIonsFromString(spl[parsedHeader[SpectrumMatchFromTsvHeader.BetaPeptideMatchedIonsLabel]].Trim(), spl[parsedHeader[SpectrumMatchFromTsvHeader.BetaPeptideMatchedIonIntensitiesLabel]].Trim(), BetaPeptideBaseSequence));
XLTotalScore = (parsedHeader[SpectrumMatchFromTsvHeader.XLTotalScoreLabel] < 0) ? null : (double?)double.Parse(spl[parsedHeader[SpectrumMatchFromTsvHeader.XLTotalScoreLabel]].Trim(), CultureInfo.InvariantCulture);
ParentIons = (parsedHeader[SpectrumMatchFromTsvHeader.ParentIonsLabel] < 0) ? null : spl[parsedHeader[SpectrumMatchFromTsvHeader.ParentIonsLabel]].Trim();
// This ensures backwards compatibility with old Crosslink Search Results
// This works because the alpha and beta peptide full sequences are written to tsv with their crosslink site included (e.g., PEPTIDEK(4))
if (UniqueSequence == null && BetaPeptideFullSequence != null)
{
UniqueSequence = FullSequence + BetaPeptideFullSequence;
}

// child scan matched ions for xlink and glyco. we are getting them all above and then deleting primary scan ions here.
ChildScanMatchedIons = (!spl[parsedHeader[SpectrumMatchFromTsvHeader.MatchedIonMzRatios]].StartsWith("{")) ? null : ReadChildScanMatchedIons(spl[parsedHeader[SpectrumMatchFromTsvHeader.MatchedIonMzRatios]].Trim(), spl[parsedHeader[SpectrumMatchFromTsvHeader.MatchedIonIntensities]].Trim(), BaseSeq);
Expand All @@ -152,18 +163,30 @@
}

//For Glyco
GlycanMass = (parsedHeader[SpectrumMatchFromTsvHeader.GlycanMass] < 0) ? null : (double?)double.Parse(spl[parsedHeader[SpectrumMatchFromTsvHeader.GlycanMass]], CultureInfo.InvariantCulture);
GlycanComposition = (parsedHeader[SpectrumMatchFromTsvHeader.GlycanComposition] < 0) ? null : spl[parsedHeader[SpectrumMatchFromTsvHeader.GlycanComposition]];
GlycanStructure = (parsedHeader[SpectrumMatchFromTsvHeader.GlycanStructure] < 0) ? null : spl[parsedHeader[SpectrumMatchFromTsvHeader.GlycanStructure]];
var localizationLevel = (parsedHeader[SpectrumMatchFromTsvHeader.GlycanLocalizationLevel] < 0) ? null : spl[parsedHeader[SpectrumMatchFromTsvHeader.GlycanLocalizationLevel]];
if (localizationLevel != null)
try // Try is so that glyco and non-glyco psms can be read from the same file
{
if (localizationLevel.Equals("NA"))
GlycanLocalizationLevel = null;
else
GlycanLocalizationLevel = (LocalizationLevel)Enum.Parse(typeof(LocalizationLevel), localizationLevel);
GlycanMass = (parsedHeader[PsmTsvHeader_Glyco.GlycanMass] < 0) ? null : (double?)double.Parse(spl[parsedHeader[PsmTsvHeader_Glyco.GlycanMass]], CultureInfo.InvariantCulture);
GlycanComposition = (parsedHeader[PsmTsvHeader_Glyco.GlycanComposition] < 0) ? null : spl[parsedHeader[PsmTsvHeader_Glyco.GlycanComposition]];
GlycanStructure = (parsedHeader[PsmTsvHeader_Glyco.GlycanStructure] < 0) ? null : spl[parsedHeader[PsmTsvHeader_Glyco.GlycanStructure]];
var localizationLevel = (parsedHeader[PsmTsvHeader_Glyco.GlycanLocalizationLevel] < 0) ? null : spl[parsedHeader[PsmTsvHeader_Glyco.GlycanLocalizationLevel]];
if (localizationLevel != null)
{
if (localizationLevel.Equals("NA"))
GlycanLocalizationLevel = null;
else
GlycanLocalizationLevel = (LocalizationLevel)Enum.Parse(typeof(LocalizationLevel), localizationLevel);
}
LocalizedGlycan = (parsedHeader[PsmTsvHeader_Glyco.LocalizedGlycan] < 0) ? null : spl[parsedHeader[PsmTsvHeader_Glyco.LocalizedGlycan]];

}
catch
{
GlycanMass = null;
GlycanComposition = null;
GlycanStructure = null;
GlycanLocalizationLevel = null;
LocalizedGlycan = null;

Check warning on line 188 in mzLib/Proteomics/PSM/PsmFromTsv.cs

View check run for this annotation

Codecov / codecov/patch

mzLib/Proteomics/PSM/PsmFromTsv.cs#L182-L188

Added lines #L182 - L188 were not covered by tests
}
LocalizedGlycan = (parsedHeader[SpectrumMatchFromTsvHeader.LocalizedGlycan] < 0) ? null : spl[parsedHeader[SpectrumMatchFromTsvHeader.LocalizedGlycan]];
}

/// <summary>
Expand Down Expand Up @@ -259,6 +282,38 @@
LocalizedGlycan = psm.LocalizedGlycan;
}


/// <summary>
/// Override library spectrum for cross link library spectrum implict conversion
/// </summary>
/// <returns></returns>
public override LibrarySpectrum ToLibrarySpectrum()
{
bool isDecoy = this.DecoyContamTarget == "D";

List<MatchedFragmentIon> fragments = new List<MatchedFragmentIon>();

double matchedIonIntensitySum = Math.Max(1.0, this.MatchedIons.Select(i => i.Intensity).Sum());

foreach (MatchedFragmentIon ion in this.MatchedIons)
{
Product product = new Product(ion.NeutralTheoreticalProduct.ProductType, ion.NeutralTheoreticalProduct.Terminus, ion.NeutralTheoreticalProduct.NeutralMass, ion.NeutralTheoreticalProduct.FragmentNumber, ion.NeutralTheoreticalProduct.AminoAcidPosition, ion.NeutralTheoreticalProduct.NeutralLoss);
fragments.Add(new MatchedFragmentIon(product, ion.Mz, ion.Intensity / matchedIonIntensitySum, ion.Charge));
}
double retentionTime = RetentionTime ?? -1;

if (BetaPeptideMatchedIons.IsNotNullOrEmpty())
{
List<MatchedFragmentIon> betaFragments = new();
foreach (var ion in BetaPeptideMatchedIons)
{
Product product = new Product(ion.NeutralTheoreticalProduct.ProductType, ion.NeutralTheoreticalProduct.Terminus, ion.NeutralTheoreticalProduct.NeutralMass, ion.NeutralTheoreticalProduct.FragmentNumber, ion.NeutralTheoreticalProduct.AminoAcidPosition, ion.NeutralTheoreticalProduct.NeutralLoss);
betaFragments.Add(new MatchedFragmentIon(product, ion.Mz, ion.Intensity / matchedIonIntensitySum, ion.Charge));
}
string uniqueSequence = UniqueSequence ?? FullSequence + BetaPeptideFullSequence;
return new CrosslinkLibrarySpectrum(uniqueSequence, PrecursorMz, PrecursorCharge, fragments, retentionTime, betaFragments);
}

return (new(this.FullSequence, this.PrecursorMz, this.PrecursorCharge, fragments, retentionTime, isDecoy));
}
}
}
8 changes: 4 additions & 4 deletions mzLib/Proteomics/PSM/PsmTsvHeader_Glyco.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@
{
public enum LocalizationLevel
{
Level1,
Level1b,
Level2,
Level3
Level1 = 0,
Level1b = 1,
Level2 = 2,
Level3 = 3
}
public class PsmTsvHeader_Glyco
{
Expand Down
1 change: 1 addition & 0 deletions mzLib/Readers/Readers.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
<ItemGroup>
<ProjectReference Include="..\MassSpectrometry\MassSpectrometry.csproj" />
<ProjectReference Include="..\MzLibUtil\MzLibUtil.csproj" />
<ProjectReference Include="..\Transcriptomics\Transcriptomics.csproj" />
<ProjectReference Include="..\UsefulProteomicsDatabases\UsefulProteomicsDatabases.csproj" />
</ItemGroup>

Expand Down
19 changes: 10 additions & 9 deletions mzLib/Readers/SearchResults/SpectrumMatchTsvReader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -53,14 +53,15 @@ public static List<SpectrumMatchFromTsv> ReadTsv(string filePath, out List<strin
{
switch (filePath.ParseFileType())
{
//case SupportedFileType.osmtsv:
// psms.Add(new OsmFromTsv(line, Split, parsedHeader));
// break;

case SupportedFileType.psmtsv:
case SupportedFileType.IntralinkResults:
default:
psms.Add(new PsmFromTsv(line, Split, parsedHeader));
break;

// TODO: Create an osmtsv case when transcriptomics is merged

default:
throw new ArgumentOutOfRangeException();
}
}
catch (Exception e)
Expand Down Expand Up @@ -123,8 +124,8 @@ public static Dictionary<string, int> ParseHeader(string header)
parsedHeader.Add(SpectrumMatchFromTsvHeader.Name, Array.IndexOf(spl, SpectrumMatchFromTsvHeader.Name));
parsedHeader.Add(SpectrumMatchFromTsvHeader.Description, Array.IndexOf(spl, SpectrumMatchFromTsvHeader.Description));
parsedHeader.Add(SpectrumMatchFromTsvHeader.StartAndEndResiduesInFullSequence, Array.IndexOf(spl, SpectrumMatchFromTsvHeader.StartAndEndResiduesInFullSequence));
parsedHeader.Add(SpectrumMatchFromTsvHeader.NextResidue, Array.IndexOf(spl, SpectrumMatchFromTsvHeader.PreviousResidue));
parsedHeader.Add(SpectrumMatchFromTsvHeader.PreviousResidue, Array.IndexOf(spl, SpectrumMatchFromTsvHeader.NumExperimentalPeaks));
parsedHeader.Add(SpectrumMatchFromTsvHeader.NextResidue, Array.IndexOf(spl, SpectrumMatchFromTsvHeader.NextResidue));
parsedHeader.Add(SpectrumMatchFromTsvHeader.PreviousResidue, Array.IndexOf(spl, SpectrumMatchFromTsvHeader.PreviousResidue));
}
else
{
Expand All @@ -134,8 +135,8 @@ public static Dictionary<string, int> ParseHeader(string header)
parsedHeader.Add(SpectrumMatchFromTsvHeader.Name, Array.IndexOf(spl, SpectrumMatchFromTsvHeader.ProteinName));
parsedHeader.Add(SpectrumMatchFromTsvHeader.Description, Array.IndexOf(spl, SpectrumMatchFromTsvHeader.PeptideDescription));
parsedHeader.Add(SpectrumMatchFromTsvHeader.StartAndEndResiduesInFullSequence, Array.IndexOf(spl, SpectrumMatchFromTsvHeader.StartAndEndResiduesInProtein));
parsedHeader.Add(SpectrumMatchFromTsvHeader.NextResidue, Array.IndexOf(spl, SpectrumMatchFromTsvHeader.PreviousAminoAcid));
parsedHeader.Add(SpectrumMatchFromTsvHeader.PreviousResidue, Array.IndexOf(spl, SpectrumMatchFromTsvHeader.NextAminoAcid));
parsedHeader.Add(SpectrumMatchFromTsvHeader.NextResidue, Array.IndexOf(spl, SpectrumMatchFromTsvHeader.NextAminoAcid));
parsedHeader.Add(SpectrumMatchFromTsvHeader.PreviousResidue, Array.IndexOf(spl, SpectrumMatchFromTsvHeader.PreviousAminoAcid));
}

parsedHeader.Add(SpectrumMatchFromTsvHeader.GeneName, Array.IndexOf(spl, SpectrumMatchFromTsvHeader.GeneName));
Expand Down
Loading
Loading