Skip to content

Commit

Permalink
Merge branch 'master' into GetClosestOneBasedScanRefactor
Browse files Browse the repository at this point in the history
  • Loading branch information
elaboy authored Aug 30, 2023
2 parents ba41098 + d905341 commit fc3a51f
Show file tree
Hide file tree
Showing 7 changed files with 70 additions and 39 deletions.
19 changes: 19 additions & 0 deletions mzLib/FlashLFQ/ChromatographicPeak.cs
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,25 @@ public override string ToString()
sb.Append(string.Join("|", Identifications.Select(p => p.BaseSequence).Distinct()) + '\t');
sb.Append(string.Join("|", Identifications.Select(p => p.ModifiedSequence).Distinct()) + '\t');

//The semi-colon here splitting the protein groups requires some explanation
//During protein parsimony, you can get situations where all peptides are shared between two or more proteins. In other words, there is no unique peptide that could resolve the parsimony.
//In this case you would see something like P00001 | P00002.

//That’s the easy part and you already understand that.

// Now imagine another scenario where you have some other peptides(that are not in either P00001 or P00002) that give you a second group, like the one above.Let’s call it P00003 | P00004.
// Everything is still fine her.

// Now you have two protein groups each with two proteins.

// Here is where the semi - colon comes in.
//Imagine you now find a new peptide(totally different from any of the peptides used to create the two original protein groups) that is shared across all four proteins.The original peptides
//require that two different protein groups exist, but this new peptide could come from either or both.We don’t know. So, the quantification of that peptide must be allowed to be
//either/ both groups. For this peptide, the protein accession in the output will be P00001 | P00002; P00003 | P00004.

// You could see an output that looks like P0000A; P0000B.Here there is only one protein in each protein group(as decided by parsimony).And you have a peptide that is shared.This would
// not ever be reported as P0000A | P0000B because each protein has a unique peptide that confirms its existence.

var t = Identifications.SelectMany(p => p.ProteinGroups.Select(v => v.ProteinGroupName)).Distinct().OrderBy(p => p);
if (t.Any())
{
Expand Down
57 changes: 24 additions & 33 deletions mzLib/MassSpectrometry/MsDataFile.cs
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,9 @@ namespace MassSpectrometry
/// <summary>
/// A class for interacting with data collected from a Mass Spectrometer, and stored in a file
/// </summary>
public abstract class MsDataFile
public abstract class MsDataFile : IEnumerable<MsDataScan>

Check failure on line 32 in mzLib/MassSpectrometry/MsDataFile.cs

View workflow job for this annotation

GitHub Actions / build

'MsDataFile' does not implement interface member 'IEnumerable.GetEnumerator()'. 'MsDataFile.GetEnumerator()' cannot implement 'IEnumerable.GetEnumerator()' because it does not have the matching return type of 'IEnumerator'.

Check failure on line 32 in mzLib/MassSpectrometry/MsDataFile.cs

View workflow job for this annotation

GitHub Actions / build

'MsDataFile' does not implement interface member 'IEnumerable.GetEnumerator()'. 'MsDataFile.GetEnumerator()' cannot implement 'IEnumerable.GetEnumerator()' because it does not have the matching return type of 'IEnumerator'.
{
public MsDataScan[] Scans { get; set; }
public MsDataScan[] Scans { get; protected set; }
public SourceFile SourceFile { get; set; }
public int NumSpectra => Scans.Length;
public string FilePath { get; }
Expand Down Expand Up @@ -73,33 +73,28 @@ public abstract MsDataScan GetOneBasedScanFromDynamicConnection(int oneBasedScan
public virtual MsDataScan[] GetMsDataScans()
{
if (!CheckIfScansLoaded())
{
LoadAllStaticData();
}
return Scans;
}

public virtual List<MsDataScan> GetAllScansList()
{
if (!CheckIfScansLoaded())
{
LoadAllStaticData();
}


return Scans.ToList();
}

public virtual IEnumerable<MsDataScan> GetMS1Scans()
{
if (!CheckIfScansLoaded())
{
LoadAllStaticData();
}

LoadAllStaticData();

for (int i = 1; i <= NumSpectra; i++)
{
var scan = GetOneBasedScan(i);
if (scan != null && scan.MsnOrder == 1)
if (scan.MsnOrder == 1)
{
yield return scan;
}
Expand All @@ -109,35 +104,26 @@ public virtual IEnumerable<MsDataScan> GetMS1Scans()
public virtual MsDataScan GetOneBasedScan(int scanNumber)
{
if (!CheckIfScansLoaded())
{
LoadAllStaticData();
}

LoadAllStaticData();

return Scans.SingleOrDefault(i => i.OneBasedScanNumber == scanNumber);
}

public virtual IEnumerable<MsDataScan> GetMsScansInIndexRange(int firstSpectrumNumber, int lastSpectrumNumber)
{
if (!CheckIfScansLoaded())
{
LoadAllStaticData();
}


for (int oneBasedSpectrumNumber = firstSpectrumNumber;
oneBasedSpectrumNumber <= lastSpectrumNumber;
oneBasedSpectrumNumber++)
{
oneBasedSpectrumNumber <= lastSpectrumNumber; oneBasedSpectrumNumber++)
yield return GetOneBasedScan(oneBasedSpectrumNumber);
}
}

public virtual IEnumerable<MsDataScan> GetMsScansInTimeRange(double firstRT, double lastRT)
{
if (!CheckIfScansLoaded())
{
LoadAllStaticData();
}


int oneBasedSpectrumNumber = GetClosestOneBasedSpectrumNumber(firstRT) + 1;

while (oneBasedSpectrumNumber < NumSpectra + 1)
Expand Down Expand Up @@ -173,20 +159,13 @@ public virtual IEnumerable<MsDataScan> GetMsScansInTimeRange(double firstRT, dou
public virtual int GetClosestOneBasedSpectrumNumber(double retentionTime)
{
if (!CheckIfScansLoaded())
{
LoadAllStaticData();
}

return ClassExtensions.GetClosestIndex(Scans
.Select(scan => scan.RetentionTime)
.ToArray(), retentionTime);
}

public virtual IEnumerator<MsDataScan> GetEnumerator()
{
return GetMsScansInIndexRange(1, NumSpectra).GetEnumerator();
}

public virtual int[] GetMsOrderByScanInDynamicConnection()
{
throw new NotImplementedException();
Expand All @@ -198,5 +177,17 @@ public virtual bool CheckIfScansLoaded()
{
return (Scans != null && Scans.Length > 0);
}

public IEnumerator<MsDataScan> GetEnumerator()
{
return Scans.Where(scan => scan is not null).GetEnumerator();
}

IEnumerator IEnumerable.GetEnumerator()

Check failure on line 186 in mzLib/MassSpectrometry/MsDataFile.cs

View workflow job for this annotation

GitHub Actions / build

Using the generic type 'IEnumerable<T>' requires 1 type arguments

Check failure on line 186 in mzLib/MassSpectrometry/MsDataFile.cs

View workflow job for this annotation

GitHub Actions / build

Using the generic type 'IEnumerator<T>' requires 1 type arguments

Check failure on line 186 in mzLib/MassSpectrometry/MsDataFile.cs

View workflow job for this annotation

GitHub Actions / build

'IEnumerable' in explicit interface declaration is not an interface

Check failure on line 186 in mzLib/MassSpectrometry/MsDataFile.cs

View workflow job for this annotation

GitHub Actions / build

Using the generic type 'IEnumerable<T>' requires 1 type arguments

Check failure on line 186 in mzLib/MassSpectrometry/MsDataFile.cs

View workflow job for this annotation

GitHub Actions / build

Using the generic type 'IEnumerator<T>' requires 1 type arguments

Check failure on line 186 in mzLib/MassSpectrometry/MsDataFile.cs

View workflow job for this annotation

GitHub Actions / build

'IEnumerable' in explicit interface declaration is not an interface
{
return GetEnumerator();
}
}


}
1 change: 1 addition & 0 deletions mzLib/MassSpectrometry/MsDataScan.cs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

using MzLibUtil;
using System;
using System.Collections;
using System.Collections.Generic;
using System.Linq;

Expand Down
27 changes: 24 additions & 3 deletions mzLib/Readers/Mgf/Mgf.cs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,12 @@ namespace Readers
{
public class Mgf : MsDataFile
{
public Mgf(string filePath) : base(filePath) { }

protected MsDataScan[] IndexedScans { get; set; }
public Mgf(string filePath) : base(filePath)
{

}

public override MsDataFile LoadAllStaticData(FilteringParams filterParams = null, int maxThreads = 1)
{
Expand Down Expand Up @@ -56,8 +61,19 @@ public override MsDataFile LoadAllStaticData(FilteringParams filterParams = null
}
}
}
SourceFile = GetSourceFile();
Scans = scans.OrderBy(x => x.OneBasedScanNumber).ToArray();

SourceFile = GetSourceFile();

// ensures that if a scan (OneBasedScanNumber) does not exist,
// the final scans array will contain a null value
// this unique case is due to the nature of loading MGF files
var orderedScans = scans.OrderBy(x => x.OneBasedScanNumber).ToArray();
var indexedScans = new MsDataScan[orderedScans[^1].OneBasedScanNumber];
foreach (var scan in orderedScans)
indexedScans[scan.OneBasedScanNumber - 1] = scan;

IndexedScans = indexedScans;
Scans = orderedScans;
return this;
}

Expand All @@ -66,6 +82,11 @@ public override SourceFile GetSourceFile()
return new SourceFile("no nativeID format", "mgf format", null, null, null);
}

public override MsDataScan GetOneBasedScan(int scanNumber)
{
return IndexedScans[scanNumber - 1];
}

public override MsDataScan GetOneBasedScanFromDynamicConnection(int scanNumber, IFilteringParams filterParams = null)
{
if (_streamReader == null)
Expand Down
1 change: 0 additions & 1 deletion mzLib/Test/FileReadingTests/FakeMsDataFile.cs
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@ public class FakeMsDataFile : MsDataFile
{
public FakeMsDataFile(MsDataScan[] FakeScans) : base(FakeScans, new SourceFile(@"scan number only nativeID format", "mzML format", null, "SHA-1", @"C:\fake.mzML", null))
{
this.Scans = FakeScans;
}

public int GetClosestOneBasedSpectrumNumber(double retentionTime)
Expand Down
2 changes: 1 addition & 1 deletion mzLib/UsefulProteomicsDatabases/ProteinDbRetriever.cs
Original file line number Diff line number Diff line change
Expand Up @@ -239,7 +239,7 @@ public enum IncludeIsoforms

/// <summary>
/// Columns to select for retrieving results in tab or xls format.
/// https://legacy.uniprot.org/help/uniprotkb_column_names
/// https://www.uniprot.org/help/return_fields
/// </summary>
public enum Columns
{
Expand Down
2 changes: 1 addition & 1 deletion mzLib/mzLib.nuspec
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
<package>
<metadata>
<id>mzLib</id>
<version>5.0.539</version>
<version>5.0.540</version>
<title>mzLib</title>
<authors>Stef S.</authors>
<owners>Stef S.</owners>
Expand Down

0 comments on commit fc3a51f

Please sign in to comment.