From d147f5421446fdb647158d5aea82f43a4e845dc4 Mon Sep 17 00:00:00 2001 From: Nic Bollis Date: Fri, 24 Nov 2023 08:40:12 -0600 Subject: [PATCH] Above Averaging Algorithm Rework and Crash Fix (#739) --- .../Algorithms/SpectraFileAveraging.cs | 122 ++++++-------- .../Enums/SpectraFileAveragingType.cs | 1 - .../TestAveragingSpectraWriteFile.cs | 2 +- .../TestSpectraFileAveraging.cs | 159 ++++++------------ 4 files changed, 107 insertions(+), 177 deletions(-) diff --git a/mzLib/SpectralAveraging/Algorithms/SpectraFileAveraging.cs b/mzLib/SpectralAveraging/Algorithms/SpectraFileAveraging.cs index 6c51d050e..28a573ac8 100644 --- a/mzLib/SpectralAveraging/Algorithms/SpectraFileAveraging.cs +++ b/mzLib/SpectralAveraging/Algorithms/SpectraFileAveraging.cs @@ -1,7 +1,6 @@ using System.Collections.Generic; using System.Linq; using MassSpectrometry; -using MathNet.Numerics.Statistics; using MzLibUtil; namespace SpectralAveraging; @@ -34,10 +33,6 @@ public static MsDataScan[] AverageSpectraFile(List scans, SpectralAv return AverageEverynScans(scans, parameters); case SpectraFileAveragingType.AverageDdaScans: - parameters.ScanOverlap = 0; - return AverageDdaScans(scans, parameters); - - case SpectraFileAveragingType.AverageDdaScansWithOverlap: return AverageDdaScans(scans, parameters); default: throw new MzLibException("Averaging spectra file processing type not yet implemented"); @@ -68,7 +63,6 @@ private static MsDataScan[] AverageAll(IReadOnlyCollection scans, Sp private static MsDataScan[] AverageEverynScans(List scans, SpectralAveragingParameters parameters) { List averagedScans = new(); - var scanNumberIndex = 1; for (var i = 0; i < scans.Count; i += parameters.NumberOfScansToAverage - parameters.ScanOverlap) { // get the scans to be averaged @@ -81,19 +75,14 @@ private static MsDataScan[] AverageEverynScans(List scans, SpectralA scansToProcess = scans.GetRange(i, parameters.NumberOfScansToAverage); // average scans - var representativeScan = scansToProcess.First(); + int middleIndex = scansToProcess.Count / 2; + MsDataScan representativeScan = scansToProcess.Count % 2 == 0 ? + scansToProcess[middleIndex - 1] : + scansToProcess[middleIndex]; + var averagedSpectrum = scansToProcess.AverageSpectra(parameters); - MsDataScan averagedScan = new(averagedSpectrum, scanNumberIndex, 1, - representativeScan.IsCentroid, representativeScan.Polarity, - scansToProcess.Select(p => p.RetentionTime).Minimum(), - averagedSpectrum.Range, null, representativeScan.MzAnalyzer, - scansToProcess.Select(p => p.TotalIonCurrent).Average(), - scansToProcess.Select(p => p.InjectionTime).Average(), null, representativeScan.NativeId); - var newNativeId = - averagedScan.NativeId.Replace(averagedScan.NativeId.Split("=").Last(), scanNumberIndex.ToString()); - averagedScan.SetNativeID(newNativeId); + MsDataScan averagedScan = GetAveragedDataScanFromAveragedSpectrum(averagedSpectrum, representativeScan); averagedScans.Add(averagedScan); - scanNumberIndex++; } return averagedScans.ToArray(); @@ -102,64 +91,61 @@ private static MsDataScan[] AverageEverynScans(List scans, SpectralA private static MsDataScan[] AverageDdaScans(List scans, SpectralAveragingParameters parameters) { List averagedScans = new(); - var ms1Scans = scans.Where(p => p.MsnOrder == 1).ToList(); - var ms2Scans = scans.Where(p => p.MsnOrder == 2).ToList(); List scansToProcess = new(); - var scanNumberIndex = 1; - for (var i = 0; i < ms1Scans.Count; i += parameters.NumberOfScansToAverage - parameters.ScanOverlap) + int representativeScanMs1Index = parameters.NumberOfScansToAverage % 2 == 0 ? // central scan + parameters.NumberOfScansToAverage / 2 - 1 : parameters.NumberOfScansToAverage / 2; + + // iterate through all MS1 scans and average them + foreach (var scan in scans.Where(p => p.MsnOrder == 1)) { - // get the scans to be averaged - scansToProcess.Clear(); - IEnumerable ms2ScansFromAveragedScans; - if (i + parameters.NumberOfScansToAverage > ms1Scans.Count) // very end of the file - break; + scansToProcess.Add(scan); + // average with new scan from iteration, then remove first scan from list + if (scansToProcess.Count != parameters.NumberOfScansToAverage) continue; - scansToProcess = ms1Scans.GetRange(i, parameters.NumberOfScansToAverage); - // if next iteration breaks the loop (end of file), then add the rest of the MS2's - if (i + parameters.NumberOfScansToAverage - parameters.ScanOverlap + parameters.NumberOfScansToAverage > - ms1Scans.Count) - ms2ScansFromAveragedScans = ms2Scans.Where(p => - scansToProcess.Any(m => m.OneBasedScanNumber == p.OneBasedPrecursorScanNumber)); - // if not, add MS2 scans from MS1's that will not be averaged in the next iteration - else - ms2ScansFromAveragedScans = ms2Scans.Where(p => - scansToProcess.GetRange(0, parameters.NumberOfScansToAverage - parameters.ScanOverlap) - .Any(m => m.OneBasedScanNumber == p.OneBasedPrecursorScanNumber)); - - // average scans and add to averaged list - var representativeScan = scansToProcess.First(); + MsDataScan centralScan = scansToProcess[representativeScanMs1Index]; var averagedSpectrum = scansToProcess.AverageSpectra(parameters); - MsDataScan averagedScan = new(averagedSpectrum, scanNumberIndex, 1, - representativeScan.IsCentroid, representativeScan.Polarity, representativeScan.RetentionTime, - averagedSpectrum.Range, null, representativeScan.MzAnalyzer, - scansToProcess.Select(p => p.TotalIonCurrent).Average(), - scansToProcess.Select(p => p.InjectionTime).Average(), representativeScan.NoiseData, - representativeScan.NativeId, representativeScan.SelectedIonMZ, - representativeScan.SelectedIonChargeStateGuess, - representativeScan.SelectedIonIntensity, representativeScan.IsolationMz, - representativeScan.IsolationWidth, - representativeScan.DissociationType, representativeScan.OneBasedPrecursorScanNumber, - representativeScan.SelectedIonMonoisotopicGuessIntensity); - var newNativeId = - averagedScan.NativeId.Replace(averagedScan.NativeId.Split("=").Last(), scanNumberIndex.ToString()); - averagedScan.SetNativeID(newNativeId); + var averagedScan = GetAveragedDataScanFromAveragedSpectrum(averagedSpectrum, centralScan); + averagedScans.Add(averagedScan); - var precursorScanIndex = scanNumberIndex; - scanNumberIndex++; - - foreach (var scan in ms2ScansFromAveragedScans) - { - newNativeId = - scan.NativeId.Replace(scan.NativeId.Split("=").Last(), scanNumberIndex.ToString()); - scan.SetNativeID(newNativeId); - scan.SetOneBasedScanNumber(scanNumberIndex); - scan.SetOneBasedPrecursorScanNumber(precursorScanIndex); - averagedScans.Add(scan); - scanNumberIndex++; - } + scansToProcess.RemoveAt(0); } + + // add all scans that did not get averaged + // this includes the MS1 scans from start and end of file and all MS2+ scans + foreach (var unaveragedScan in scans.Where(original => + !averagedScans.Select(avg => avg.OneBasedScanNumber).Contains(original.OneBasedScanNumber))) + averagedScans.Add(unaveragedScan); + + return averagedScans.OrderBy(p => p.OneBasedScanNumber).ToArray(); + } - return averagedScans.ToArray(); + private static MsDataScan GetAveragedDataScanFromAveragedSpectrum(MzSpectrum averagedSpectrum, + MsDataScan centralScan) + { + MsDataScan averagedScan = new(averagedSpectrum, + centralScan.OneBasedScanNumber, + 1, + centralScan.IsCentroid, + centralScan.Polarity, + centralScan.RetentionTime, + averagedSpectrum.Range, null, + centralScan.MzAnalyzer, + averagedSpectrum.SumOfAllY, + centralScan.InjectionTime, + centralScan.NoiseData, + centralScan.NativeId, + centralScan.SelectedIonMZ, + centralScan.SelectedIonChargeStateGuess, + centralScan.SelectedIonIntensity, + centralScan.IsolationMz, + centralScan.IsolationWidth, + centralScan.DissociationType, + centralScan.OneBasedPrecursorScanNumber, + centralScan.SelectedIonMonoisotopicGuessIntensity); + var newNativeId = + averagedScan.NativeId.Replace(averagedScan.NativeId.Split("=").Last(), centralScan.OneBasedScanNumber.ToString()); + averagedScan.SetNativeID(newNativeId); + return averagedScan; } } \ No newline at end of file diff --git a/mzLib/SpectralAveraging/DataStructures/Enums/SpectraFileAveragingType.cs b/mzLib/SpectralAveraging/DataStructures/Enums/SpectraFileAveragingType.cs index cf5895d40..00a6e7ec9 100644 --- a/mzLib/SpectralAveraging/DataStructures/Enums/SpectraFileAveragingType.cs +++ b/mzLib/SpectralAveraging/DataStructures/Enums/SpectraFileAveragingType.cs @@ -6,5 +6,4 @@ public enum SpectraFileAveragingType AverageEverynScans, AverageEverynScansWithOverlap, AverageDdaScans, - AverageDdaScansWithOverlap } \ No newline at end of file diff --git a/mzLib/Test/AveragingTests/TestAveragingSpectraWriteFile.cs b/mzLib/Test/AveragingTests/TestAveragingSpectraWriteFile.cs index c1387e273..511eeb35c 100644 --- a/mzLib/Test/AveragingTests/TestAveragingSpectraWriteFile.cs +++ b/mzLib/Test/AveragingTests/TestAveragingSpectraWriteFile.cs @@ -29,7 +29,7 @@ public static void OneTimeSetup() SpectraPath = Path.Combine(OutputDirectory, "TDYeastFractionMS1.mzML"); Scans = MsDataFileReader.GetDataFile(SpectraPath).GetAllScansList().Take(50).ToList(); - Parameters.SpectraFileAveragingType = SpectraFileAveragingType.AverageDdaScansWithOverlap; + Parameters.SpectraFileAveragingType = SpectraFileAveragingType.AverageDdaScans; DdaCompositeSpectra = SpectraFileAveraging.AverageSpectraFile(Scans, Parameters); Assert.That(DdaCompositeSpectra.Length > 1); } diff --git a/mzLib/Test/AveragingTests/TestSpectraFileAveraging.cs b/mzLib/Test/AveragingTests/TestSpectraFileAveraging.cs index 0009a187d..3fb4c8428 100644 --- a/mzLib/Test/AveragingTests/TestSpectraFileAveraging.cs +++ b/mzLib/Test/AveragingTests/TestSpectraFileAveraging.cs @@ -266,7 +266,6 @@ public static void OneTimeSetup() NativeId = ActualScans.First().NativeId; } - [Test] public static void TestAverageAll() { @@ -360,125 +359,71 @@ public static void TestAverageEveryNScansWithOverlap() } [Test] - public static void TestAverageDDAScansWithoutOverlapInOrder() + [TestCase(5, nameof(DummyDDAScansOutOfOrder))] + [TestCase(4, nameof(DummyDDAScansOutOfOrder))] + [TestCase(9, nameof(DummyDDAScansOutOfOrder))] + [TestCase(5, nameof(DummyDDAScansInOrder))] + [TestCase(4, nameof(DummyDDAScansInOrder))] + [TestCase(5, nameof(DummyAllMs1Scans))] + [TestCase(4, nameof(DummyAllMs1Scans))] + [TestCase(5, nameof(ActualScans))] + public static void TestAverageDdaScans(int numScansToAverage, string listPropertyName) { + var scansToAverage = typeof(TestSpectraFileAveraging).GetProperty(listPropertyName)?.GetValue(null) as List; SpectralAveragingParameters.SpectraFileAveragingType = SpectraFileAveragingType.AverageDdaScans; - SpectralAveragingParameters.NumberOfScansToAverage = 2; - MsDataScan[] averagedScans = SpectraFileAveraging.AverageSpectraFile(DummyDDAScansInOrder, SpectralAveragingParameters); - double[] expected = new double[] { 5, 10 }; - Assert.That(averagedScans.Count(p => p.MsnOrder == 1) == 2); - Assert.That(averagedScans.Count(p => p.MsnOrder == 2) == 12); - Assert.That(averagedScans[0].MassSpectrum.YArray.SequenceEqual(expected)); - int?[] expectedNullable = new int?[] { null, 1, 1, 1, 1, 1, 1, null, 8, 8, 8, 8, 8, 8 }; - Assert.That(averagedScans.Select(p => p.OneBasedPrecursorScanNumber).ToArray().SequenceEqual(expectedNullable)); + SpectralAveragingParameters.NumberOfScansToAverage = numScansToAverage; + + int ms1Count = scansToAverage.Count(p => p.MsnOrder == 1); + int ms2Count = scansToAverage.Count(p => p.MsnOrder == 2); + int?[] precursorScanNumbers = scansToAverage.Select(p => p.OneBasedPrecursorScanNumber).ToArray(); + int[] scanNumbers = scansToAverage.Select(p => p.OneBasedScanNumber).ToArray(); + + MsDataScan[] averagedScans = SpectraFileAveraging.AverageSpectraFile(scansToAverage, SpectralAveragingParameters); + + // Assert that there are the same number of ms1 and ms2 scans, and that they are in the same order + Assert.That(averagedScans.Count(p => p.MsnOrder == 1), Is.EqualTo(ms1Count)); + Assert.That(averagedScans.Count(p => p.MsnOrder == 2), Is.EqualTo(ms2Count)); + CollectionAssert.AreEquivalent(precursorScanNumbers, averagedScans.Select(p => p.OneBasedPrecursorScanNumber)); + CollectionAssert.AreEquivalent(scanNumbers, averagedScans.Select(p => p.OneBasedScanNumber)); + + // ensure no duplicates in scan numbers and precursor scan numbers + CollectionAssert.AllItemsAreUnique(averagedScans.Select(p => p.OneBasedScanNumber)); + CollectionAssert.AllItemsAreUnique(averagedScans.Select(p => (p.OneBasedScanNumber, p.OneBasedPrecursorScanNumber))); } [Test] - public static void TestAverageDDAScansWithoutOverlapOutOfOrder() + public static void TestOnRealData() { + string datapath = Path.Combine(TestContext.CurrentContext.TestDirectory, "DataFiles", + "SmallCalibratibleYeast.mzml"); + var scansToAverage = MsDataFileReader.GetDataFile(datapath).GetAllScansList(); SpectralAveragingParameters.SpectraFileAveragingType = SpectraFileAveragingType.AverageDdaScans; - SpectralAveragingParameters.NumberOfScansToAverage = 2; - MsDataScan[] averagedScans = SpectraFileAveraging.AverageSpectraFile(DummyDDAScansOutOfOrder, SpectralAveragingParameters); - double[] expected = new double[] { 5, 10 }; - Assert.That(averagedScans.Count(p => p.MsnOrder == 1) == 2); - Assert.That(averagedScans.Count(p => p.MsnOrder == 2) == 11); - Assert.That(averagedScans[0].MassSpectrum.YArray.SequenceEqual(expected)); - int?[] expectedNullable = new int?[] { null, 1, 1, 1, 1, 1, 1, null, 8, 8, 8, 8, 8 }; - Assert.That(averagedScans.Select(p => p.OneBasedPrecursorScanNumber).ToArray().SequenceEqual(expectedNullable)); - } + SpectralAveragingParameters.NumberOfScansToAverage = 5; - [Test] - public static void TestAverageDDAScansWithOverlapInOrder() - { - SpectralAveragingParameters.SpectraFileAveragingType = SpectraFileAveragingType.AverageDdaScansWithOverlap; - SpectralAveragingParameters.NumberOfScansToAverage = 2; - SpectralAveragingParameters.ScanOverlap = 1; - MsDataScan[] averagedScans = SpectraFileAveraging.AverageSpectraFile(DummyDDAScansInOrder, SpectralAveragingParameters); - double[] expected = new double[] { 5, 10 }; - Assert.That(averagedScans.Count(p => p.MsnOrder == 1) == 4); - Assert.That(averagedScans.Count(p => p.MsnOrder == 2) == 15); - Assert.That(averagedScans[0].MassSpectrum.YArray.SequenceEqual(expected)); - expected = new double[] { 2.5, 5 }; - //Assert.AreEqual(expected, averagedScans.Last(p => p.MsnOrder == 1).MassSpectrum.YArray); - int?[] expectedNullable = new int?[] - { - null, 1, 1, 1, null, 5, 5, 5, null, 9, 9, 9, null, 13, 13, 13, 13, 13, 13 - }; - Assert.That(averagedScans.Select(p => p.OneBasedPrecursorScanNumber).ToArray().SequenceEqual(expectedNullable)); + int ms1Count = scansToAverage.Count(p => p.MsnOrder == 1); + int ms2Count = scansToAverage.Count(p => p.MsnOrder == 2); + int?[] precursorScanNumbers = scansToAverage.Select(p => p.OneBasedPrecursorScanNumber).ToArray(); + int[] scanNumbers = scansToAverage.Select(p => p.OneBasedScanNumber).ToArray(); - SpectralAveragingParameters.NumberOfScansToAverage = 3; - averagedScans = SpectraFileAveraging.AverageSpectraFile(DummyDDAScansInOrder, SpectralAveragingParameters); - expected = new double[] { 5, 10 }; - Assert.That(averagedScans.Count(p => p.MsnOrder == 1) == 2); - Assert.That(averagedScans.Count(p => p.MsnOrder == 2) == 15); - Assert.That(averagedScans[0].MassSpectrum.YArray.SequenceEqual(expected)); - expected = new double[] { 5.0 * (2.0 / 3.0), 10.0 * (2.0 / 3.0) }; - Assert.AreEqual(expected.Select(p => Math.Round(p, 4)), averagedScans.Last(p => p.MsnOrder == 1).MassSpectrum.YArray.Select(p => Math.Round(p, 4))); - expectedNullable = new int?[] - { - null, 1, 1, 1, 1, 1, 1, null, 8, 8, 8, 8, 8, 8, 8, 8, 8 - }; - Assert.That(averagedScans.Select(p => p.OneBasedPrecursorScanNumber).ToArray().SequenceEqual(expectedNullable)); + MsDataScan[] averagedScans = SpectraFileAveraging.AverageSpectraFile(scansToAverage, SpectralAveragingParameters); - SpectralAveragingParameters.NumberOfScansToAverage = 3; - SpectralAveragingParameters.ScanOverlap = 2; - averagedScans = SpectraFileAveraging.AverageSpectraFile(DummyDDAScansInOrder, SpectralAveragingParameters); - expected = new double[] { 5, 10 }; - Assert.That(averagedScans.Count(p => p.MsnOrder == 1) == 3); - Assert.That(averagedScans.Count(p => p.MsnOrder == 2) == 15); - Assert.That(averagedScans[0].MassSpectrum.YArray.SequenceEqual(expected)); - expected = new double[] { 5.0 * (2.0 / 3.0), 10.0 * (2.0 / 3.0) }; - Assert.AreEqual(expected.Select(p => Math.Round(p, 4)), averagedScans.Last(p => p.MsnOrder == 1).MassSpectrum.YArray.Select(p => Math.Round(p, 4))); - expectedNullable = new int?[] - { - null, 1, 1, 1, null, 5, 5, 5, null, 9, 9, 9, 9, 9, 9, 9, 9, 9 - }; - Assert.AreEqual(expectedNullable, averagedScans.Select(p => p.OneBasedPrecursorScanNumber).ToArray()); - } + // Assert that there are the same number of ms1 and ms2 scans, and that they are in the same order + Assert.That(averagedScans.Count(p => p.MsnOrder == 1), Is.EqualTo(ms1Count)); + Assert.That(averagedScans.Count(p => p.MsnOrder == 2), Is.EqualTo(ms2Count)); + CollectionAssert.AreEquivalent(precursorScanNumbers, averagedScans.Select(p => p.OneBasedPrecursorScanNumber)); + CollectionAssert.AreEquivalent(scanNumbers, averagedScans.Select(p => p.OneBasedScanNumber)); - [Test] - public static void TestAverageDDAScansWithOverlapOutOfOrder() - { - SpectralAveragingParameters.SpectraFileAveragingType = SpectraFileAveragingType.AverageDdaScansWithOverlap; - SpectralAveragingParameters.NumberOfScansToAverage = 2; - SpectralAveragingParameters.ScanOverlap = 1; - MsDataScan[] averagedScans = SpectraFileAveraging.AverageSpectraFile(DummyDDAScansOutOfOrder, SpectralAveragingParameters); - double[] expected = new double[] { 5, 10 }; - Assert.That(averagedScans.Count(p => p.MsnOrder == 1) == 4); - Assert.That(averagedScans.Count(p => p.MsnOrder == 2) == 15); - Assert.That(averagedScans[0].MassSpectrum.YArray.SequenceEqual(expected)); - Assert.AreEqual(expected, averagedScans.Last(p => p.MsnOrder == 1).MassSpectrum.YArray); - int?[] expectedNullable = new int?[] - { - null, 1, 1, 1, null, 5, 5, 5, null, 9, 9, 9, null, 13, 13, 13, 13, 13, 13 - }; - Assert.That(averagedScans.Select(p => p.OneBasedPrecursorScanNumber).ToArray().SequenceEqual(expectedNullable)); + // ensure no duplicates in scan numbers and precursor scan numbers + CollectionAssert.AllItemsAreUnique(averagedScans.Select(p => p.OneBasedScanNumber)); + CollectionAssert.AllItemsAreUnique(averagedScans.Select(p =>(p.OneBasedScanNumber, p.OneBasedPrecursorScanNumber))); - SpectralAveragingParameters.NumberOfScansToAverage = 3; - averagedScans = SpectraFileAveraging.AverageSpectraFile(DummyDDAScansOutOfOrder, SpectralAveragingParameters); - expected = new double[] { 5.0 * (2.0 / 3.0), 10.0 * (2.0 / 3.0) }; - Assert.That(averagedScans.Count(p => p.MsnOrder == 1) == 2); - Assert.That(averagedScans.Count(p => p.MsnOrder == 2) == 15); - Assert.AreEqual(expected.Select(p => Math.Round(p, 4)), averagedScans[0].MassSpectrum.YArray.Select(p => Math.Round(p, 4))); - Assert.AreEqual(expected.Select(p => Math.Round(p, 4)), averagedScans.Last(p => p.MsnOrder == 1).MassSpectrum.YArray.Select(p => Math.Round(p, 4))); - expectedNullable = new int?[] + // ensure retention times are sequentially increasing + double previousRt = 0; + foreach (var rt in averagedScans.Select(p => p.RetentionTime)) { - null, 1, 1, 1, 1, 1, 1, null, 8, 8, 8, 8, 8, 8, 8, 8, 8 - }; - Assert.That(averagedScans.Select(p => p.OneBasedPrecursorScanNumber).ToArray().SequenceEqual(expectedNullable)); - - SpectralAveragingParameters.NumberOfScansToAverage = 3; - SpectralAveragingParameters.ScanOverlap = 2; - averagedScans = SpectraFileAveraging.AverageSpectraFile(DummyDDAScansOutOfOrder, SpectralAveragingParameters); - Assert.That(averagedScans.Count(p => p.MsnOrder == 1) == 3); - Assert.That(averagedScans.Count(p => p.MsnOrder == 2) == 15); - Assert.AreEqual(expected.Select(p => Math.Round(p, 4)), averagedScans[0].MassSpectrum.YArray.Select(p => Math.Round(p, 4))); - Assert.AreEqual(expected.Select(p => Math.Round(p, 4)), averagedScans.Last(p => p.MsnOrder == 1).MassSpectrum.YArray.Select(p => Math.Round(p, 4))); - expectedNullable = new int?[] - { - null, 1, 1, 1, null, 5, 5, 5, null, 9, 9, 9, 9, 9, 9, 9, 9, 9 - }; - Assert.AreEqual(expectedNullable, averagedScans.Select(p => p.OneBasedPrecursorScanNumber).ToArray()); + Assert.That(rt > previousRt); + previousRt = rt; + } } } }