Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/master'
Browse files Browse the repository at this point in the history
  • Loading branch information
MICHAEL SHORTREED committed Jul 26, 2022
2 parents 28e05ae + 3c77adf commit 0a7c609
Show file tree
Hide file tree
Showing 4 changed files with 62 additions and 14 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,10 @@ public class PeptideWithSetModifications : ProteolyticPeptide
{
public string FullSequence { get; private set; } //sequence with modifications
public readonly int NumFixedMods;

// Parameter to store a hash code corresponding to a Decoy or a Target peptide
// If the peptide in question is a decoy, this pairs it to the target it was generated from
// If the peptide in question is a target, this pairs it to its corresponding decoy
public int? PairedTargetDecoyHash { get; private set; }
/// <summary>
/// Dictionary of modifications on the peptide. The N terminus is index 1.
/// The key indicates which residue modification is on (with 1 being N terminus).
Expand All @@ -28,14 +31,12 @@ public class PeptideWithSetModifications : ProteolyticPeptide
[NonSerialized] private DigestionParams _digestionParams;
private static readonly double WaterMonoisotopicMass = PeriodicTable.GetElement("H").PrincipalIsotope.AtomicMass * 2 + PeriodicTable.GetElement("O").PrincipalIsotope.AtomicMass;
private readonly string ProteinAccession; // used to get protein object after deserialization


/// <summary>
/// Creates a PeptideWithSetModifications object from a protein. Used when a Protein is digested.
/// </summary>
public PeptideWithSetModifications(Protein protein, DigestionParams digestionParams, int oneBasedStartResidueInProtein,
int oneBasedEndResidueInProtein, CleavageSpecificity cleavageSpecificity, string peptideDescription, int missedCleavages,
Dictionary<int, Modification> allModsOneIsNterminus, int numFixedMods, string baseSequence = null)
Dictionary<int, Modification> allModsOneIsNterminus, int numFixedMods, string baseSequence = null, int? pairedTargetDecoyHash = null)
: base(protein, oneBasedStartResidueInProtein, oneBasedEndResidueInProtein, missedCleavages, cleavageSpecificity, peptideDescription, baseSequence)
{
_allModsOneIsNterminus = allModsOneIsNterminus;
Expand All @@ -44,6 +45,7 @@ public PeptideWithSetModifications(Protein protein, DigestionParams digestionPar
DetermineFullSequence();
ProteinAccession = protein.Accession;
UpdateCleavageSpecificity();
PairedTargetDecoyHash = pairedTargetDecoyHash; // Added PairedTargetDecoyHash as a nullable integer
}

/// <summary>
Expand All @@ -53,7 +55,7 @@ public PeptideWithSetModifications(Protein protein, DigestionParams digestionPar
public PeptideWithSetModifications(string sequence, Dictionary<string, Modification> allKnownMods, int numFixedMods = 0,
DigestionParams digestionParams = null, Protein p = null, int oneBasedStartResidueInProtein = int.MinValue,
int oneBasedEndResidueInProtein = int.MinValue, int missedCleavages = int.MinValue,
CleavageSpecificity cleavageSpecificity = CleavageSpecificity.Full, string peptideDescription = null)
CleavageSpecificity cleavageSpecificity = CleavageSpecificity.Full, string peptideDescription = null, int? pairedTargetDecoyHash = null)
: base(p, oneBasedStartResidueInProtein, oneBasedEndResidueInProtein, missedCleavages, cleavageSpecificity, peptideDescription)
{
if (sequence.Contains("|"))
Expand All @@ -66,6 +68,7 @@ public PeptideWithSetModifications(string sequence, Dictionary<string, Modificat
GetModsAfterDeserialization(allKnownMods);
NumFixedMods = numFixedMods;
_digestionParams = digestionParams;
PairedTargetDecoyHash = pairedTargetDecoyHash; // Added PairedTargetDecoyHash as a nullable integer

if (p != null)
{
Expand Down Expand Up @@ -1132,6 +1135,11 @@ private HashSet<double> AddNeutralLossesFromMods(Modification mod, HashSet<doubl
//Occasionally, this process results in peptide with exactly the same sequence. Therefore, there is a stop-gap measure
//the returns the mirror image of the original. N-terminal mods are preserved, but other mods are also reversed.
//this should yield a unique decoy for each target sequence.
//This function also adds a hash code to both the original PeptideWithSetModifications and the decoy
//generated by this function pairing the two together by eachother's FullSequence.
//The original taget peptide is given a hash code corresponding to the decoy's full sequence,
//and the decoy is given a hash code corresponding to the original target peptide's sequence.
//This hash code is stored in the PairedTargetDecoyHash parameter of PeptideWithSetModifications.
public PeptideWithSetModifications GetReverseDecoyFromTarget(int[] revisedAminoAcidOrder)
{
Dictionary<int, Modification> newModificationsDictionary = new Dictionary<int, Modification>();
Expand Down Expand Up @@ -1217,16 +1225,28 @@ public PeptideWithSetModifications GetReverseDecoyFromTarget(int[] revisedAminoA
Protein decoyProtein = new Protein(proteinSequence, "DECOY_" + this.Protein.Accession, null, new List<Tuple<string, string>>(), new Dictionary<int, List<Modification>>(), null, null, null, true);
DigestionParams d = this.DigestionParams;

// Creates a hash code corresponding to the target's sequence
int targetHash = GetHashCode();
PeptideWithSetModifications decoyPeptide;
//Make the "peptideDescription" store the corresponding target's sequence
if (newBaseString != this.BaseSequence)
{
return new PeptideWithSetModifications(decoyProtein, d, this.OneBasedStartResidueInProtein, this.OneBasedEndResidueInProtein, this.CleavageSpecificityForFdrCategory, this.FullSequence, this.MissedCleavages, newModificationsDictionary, this.NumFixedMods, newBaseString);
decoyPeptide = new PeptideWithSetModifications(decoyProtein, d, this.OneBasedStartResidueInProtein, this.OneBasedEndResidueInProtein, this.CleavageSpecificityForFdrCategory, this.FullSequence, this.MissedCleavages, newModificationsDictionary, this.NumFixedMods, newBaseString);
// Sets PairedTargetDecoyHash of the original target peptie to the hash hode of the decoy sequence
PairedTargetDecoyHash = decoyPeptide.GetHashCode();
// Sets PairedTargetDecoyHash of the decoy peptide to the hash code of the target sequence
decoyPeptide.PairedTargetDecoyHash = targetHash;
return decoyPeptide;

}
else
{
//The reverse decoy procedure failed to create a PeptideWithSetModificatons with a different sequence. Therefore,
//we retrun the mirror image peptide.
return this.GetPeptideMirror(revisedAminoAcidOrder);
decoyPeptide = this.GetPeptideMirror(revisedAminoAcidOrder);
PairedTargetDecoyHash = decoyPeptide.GetHashCode();
decoyPeptide.PairedTargetDecoyHash = targetHash;
return decoyPeptide;
}

}
Expand Down Expand Up @@ -1260,6 +1280,7 @@ public PeptideWithSetModifications GetPeptideMirror(int[] revisedOrderNisOne)
proteinSequence = aStringBuilder.ToString();

Protein decoyProtein = new Protein(proteinSequence, "DECOY_" + this.Protein.Accession, null, new List<Tuple<string, string>>(), new Dictionary<int, List<Modification>>(), null, null, null, true);

DigestionParams d = this.DigestionParams;

//now fill in the revised amino acid order
Expand Down
12 changes: 12 additions & 0 deletions mzLib/Test/TestPeptideWithSetMods.cs
Original file line number Diff line number Diff line change
Expand Up @@ -756,6 +756,12 @@ public static void TestReverseDecoyFromTarget()

int[] newAminoAcidPositions = new int["PEPTIDEK".Length];
PeptideWithSetModifications reverse = p.GetReverseDecoyFromTarget(newAminoAcidPositions);
// Hash code corresponding to the target sequence, should be PairedTargetDecoyHash for reverse
int testTargetHash = p.GetHashCode();
// Hash code corresponding to the decoy sequence, should be PairedTargetDecoyHash for target
int testDecoyHash = reverse.GetHashCode();
Assert.AreEqual(reverse.PairedTargetDecoyHash, testTargetHash);
Assert.AreEqual(p.PairedTargetDecoyHash, testDecoyHash);
Assert.AreEqual("EDITPEPK", reverse.BaseSequence);
Assert.AreEqual(new int[] { 6, 5, 4, 3, 2, 1, 0, 7 }, newAminoAcidPositions);
Assert.IsTrue(reverse.Protein.IsDecoy);
Expand Down Expand Up @@ -826,6 +832,12 @@ public static void TestReverseDecoyFromTarget()
newAminoAcidPositions = new int["VTIRTVR".Length];
PeptideWithSetModifications p_tryp = new PeptideWithSetModifications(new Protein("VTIRTVR", "DECOY_TRYP"), new DigestionParams(protease: "trypsin"), 1, 7, CleavageSpecificity.Full, null, 0, VTIRTVR_modsDictionary, 0, null);
PeptideWithSetModifications p_tryp_reverse = p_tryp.GetReverseDecoyFromTarget(newAminoAcidPositions);
// Hash code corresponding to the target sequence, should be PairedTargetDecoyHash for reverse
int testMirrorTargetHash = p_tryp.GetHashCode();
// Hash code corresponding to the decoy sequence, should be PairedTargetDecoyHash for target
int testMirrorDecoyHash = p_tryp_reverse.GetHashCode();
Assert.AreEqual(testMirrorTargetHash, p_tryp_reverse.PairedTargetDecoyHash);
Assert.AreEqual(testMirrorDecoyHash, p_tryp.PairedTargetDecoyHash);
Assert.AreEqual("RVTRITV", p_tryp_reverse.BaseSequence);
Assert.AreEqual(new int[] { 6, 5, 4, 3, 2, 1, 0 }, newAminoAcidPositions);
Assert.IsTrue(p_tryp_reverse.AllModsOneIsNterminus.ContainsKey(1));//n-term acetyl
Expand Down
2 changes: 1 addition & 1 deletion mzLib/UsefulProteomicsDatabases/Loaders.cs
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,7 @@ private static void DownloadElements(string elementLocation)

private static void DownloadUniprot(string uniprotLocation)
{
DownloadContent(@"http://www.uniprot.org/docs/ptmlist.txt", uniprotLocation + ".temp");
DownloadContent(@"http://legacy.uniprot.org/docs/ptmlist.txt", uniprotLocation + ".temp");
}
}
}
27 changes: 21 additions & 6 deletions mzLib/UsefulProteomicsDatabases/ProteinDbRetriever.cs
Original file line number Diff line number Diff line change
Expand Up @@ -19,17 +19,22 @@ public static class ProteinDbRetriever
/// <param name="reviewed">if yes file contains only reviewd proteins</param>
/// <param name="compress">if yes file is saved as .gz</param>
/// <param name="absolutePathToStorageDirectory"></param>
public static string RetrieveProteome(string proteomeID, string absolutePathToStorageDirectory, ProteomeFormat format, Reviewed reviewed, Compress compress, IncludeIsoforms include)
public static string RetrieveProteome(string proteomeID, string absolutePathToStorageDirectory, ProteomeFormat format,
Reviewed reviewed, Compress compress, IncludeIsoforms include)
{
if (Directory.Exists(absolutePathToStorageDirectory))
{
string htmlQueryString = "";
string filename = "\\" + proteomeID;
bool compressBool = false;
bool isoformBool = false;
bool reviewedBool = false;
if (format == ProteomeFormat.fasta)
{
if (reviewed == Reviewed.yes)
{
filename += "_reviewed";
reviewedBool = true;
}
else
{
Expand All @@ -39,19 +44,25 @@ public static string RetrieveProteome(string proteomeID, string absolutePathToSt
if (include == IncludeIsoforms.yes)
{
filename += "_isoform";
isoformBool = true;
}
filename += ".fasta";
if (compress == Compress.yes)
{
filename += ".gz";
compressBool = true;
}
htmlQueryString = "https://www.uniprot.org/uniprot/?query=proteome:" + proteomeID + " reviewed:" + reviewed + "&compress=" + compress + "&format=" + format + "&include:" + include;

htmlQueryString = "https://rest.uniprot.org/uniprot/search?query=" + proteomeID + "+AND+" + "reviewed:" + reviewedBool.ToString().ToLower() +
"&compressed=" + compressBool.ToString().ToLower() + "&format=" + format + "&includeIsoforms:" + isoformBool.ToString().ToLower();

}
else if (format == ProteomeFormat.xml)
{
if (reviewed == Reviewed.yes)
{
filename += "_reviewed";
reviewedBool = true;
}
else
{
Expand All @@ -61,8 +72,11 @@ public static string RetrieveProteome(string proteomeID, string absolutePathToSt
if (compress == Compress.yes)
{
filename += ".gz";
compressBool = true;
}
htmlQueryString = "https://www.uniprot.org/uniprot/?query=proteome:" + proteomeID + " reviewed:" + reviewed + "&compress=" + compress + "&format=" + format;
htmlQueryString = "https://rest.uniprot.org/proteome/search?query=" + proteomeID + "+AND+reviewed:" + reviewedBool.ToString().ToLower()
+ "&compressed=" + compressBool.ToString().ToLower() + "&format=" + format;

}
if (htmlQueryString.Length > 0)
{
Expand All @@ -85,8 +99,9 @@ public static string RetrieveProteome(string proteomeID, string absolutePathToSt
public static string DownloadAvailableUniProtProteomes(string destinationFolder)
{
if (Directory.Exists(destinationFolder))
{
string htmlQueryString = "https://www.uniprot.org/proteomes/?query=*&format=tab&compress=yes&columns=id,name,organism-id,proteincount,busco,cpd,assembly%20representation";
{
string htmlQueryString = "https://rest.uniprot.org/proteomes/search?query=*&format=tsv&compressed=true";

string filename = "availableUniProtProteomes.txt.gz";

string filepath = Path.Combine(destinationFolder, filename);
Expand Down Expand Up @@ -224,7 +239,7 @@ public enum IncludeIsoforms

/// <summary>
/// Columns to select for retrieving results in tab or xls format.
/// https://www.uniprot.org/help/uniprotkb_column_names
/// https://legacy.uniprot.org/help/uniprotkb_column_names
/// </summary>
public enum Columns
{
Expand Down

0 comments on commit 0a7c609

Please sign in to comment.