-
Notifications
You must be signed in to change notification settings - Fork 2
/
BA4_G++ -> exercises for tryocidineB1.py
129 lines (91 loc) · 3.41 KB
/
BA4_G++ -> exercises for tryocidineB1.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
"""
Fixed from BA4G:
-> uses linear_spectra score to trim leaderboard
-> return a list with all equally scoring peptides instead of just one
"""
def Leaderboard_Cyclopeptide_Sequencing(Spectrum, N):
def Expand_Peptides(peptides):
new_peptides = []
for peptide in peptides:
for aa in mass:
new_peptides.append(peptide + aa)
return new_peptides
#
def Linear_Spectrum(peptide):
spectrum =[0]
for i in range(len(peptide)):
for j in range(i+1, len(peptide)+1):
spectrum.append(sum(mass[aa] for aa in peptide[i:j]))
return spectrum
#
def Cyclo_Spectrum(Peptide):
if Peptide in cyclospectra:
return cyclospectra[Peptide]
else:
spectrum = [0, int(sum(mass[aa] for aa in Peptide))]
cycle = Peptide * 2
for i in range( len( Peptide )):
for j in range( i + 1, i + len( Peptide )):
spectrum.append( sum( mass[aa] for aa in cycle[i:j] ))
cyclospectra[Peptide] = spectrum
return cyclospectra[Peptide]
#
def Score(Peptide_spectrum, Spectrum):
peaks = list(Spectrum)
score = 0
for fragment in Peptide_spectrum:
if fragment in peaks:
score += 1
peaks.remove(fragment)
elif fragment > Parent_Mass:
return 0
return score
#
cyclospectra = {}
Parent_Mass = Spectrum[-1]
Leaderboard = ['']
LeaderPeptides = []
Best = 1
while Leaderboard:
Leaderboard = Expand_Peptides(Leaderboard)
linear_scores = []
for Peptide in Leaderboard:
score = Score(Linear_Spectrum(Peptide), Spectrum)
Mass_Peptide = max(Cyclo_Spectrum(Peptide))
if Mass_Peptide == Parent_Mass:
Final_Score = Score(Cyclo_Spectrum(Peptide),Spectrum)
if Final_Score > Best:
LeaderPeptides = [Peptide]
Best = Final_Score
elif Final_Score == Best:
LeaderPeptides.append(Peptide)
elif Mass_Peptide > Parent_Mass:
score = 0
linear_scores.append(score)
if len(Leaderboard) > N:
cut_off = sorted(linear_scores)[-N]
leaders = []
for i in range(len(Leaderboard)):
if linear_scores[i] >= cut_off and linear_scores[i] > 0:
leaders.append(Leaderboard[i])
Leaderboard = leaders
return LeaderPeptides
#######
mass = {
'G':57,'A':71,'S':87,'P':97,'V':99,'T':101,'C':103,\
'I':113,'N':114,'D':115,'E':129,'K':128, 'M':131,'H':137,\
'F':147,'R': 156,'Y': 163,'W': 186
}
#######
f = open("/Users/jasonmoggridge/Desktop/dataset_102_10.txt",'r')
N = int(f.readline())
Spectrum = [int(i) for i in f.readline().split(' ')]
#
Leaders = Leaderboard_Cyclopeptide_Sequencing(Spectrum, N)
with open("/Users/jasonmoggridge/Desktop/rosalind_tyrocidine_ms25_output.txt",'w') as f:
for Leader in Leaders:
masses = [str(mass[aa]) for aa in Leader]
string = '-'.join(masses)
f.write(string+' ')
f.close()
#######