-
Notifications
You must be signed in to change notification settings - Fork 2
/
BA11_I -Prob_spec_dict.py
61 lines (41 loc) · 1.62 KB
/
BA11_I -Prob_spec_dict.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
"""
BA11-H ->
Spectral dictionaries...
https://stepik.org/lesson/11866/step/9?unit=8305
"""
def Pr_spectral_dict(spectrum, min_s, max_s):
""" Recursive function to count all possible peptides
scoring between min -> max score (*note -> never let t<0 in path)"""
def Pr(i,t):
# do not count if score dips below zero (silly but yeah)
if i < 0 or t < 0:
return 0
# recursion -> get sum of peptides leading to (i,t)
# size(i,t) = sum -> size(i-|aa|,t-s[i]) -> for all AAs
elif (i,t) not in pr_memo.keys():
pr_memo[(i,t)] =sum((1/20)*Pr(i - aa, t - spectrum[i]) for aa in AAs)
return pr_memo[(i,t)]
## wrapper
# initialize size_memo: an (i*t) matrix as dictionary
# size: sum of peptides in spectral_dict
pr_memo = {(0,0):1}
prob_dict = []
mass = len(spectrum)-1
# compute size(m,score) for all scores t in range(acceptable scores)
for t in range(min_s, max_s + 1):
prob_dict.append(Pr(mass, t))
return prob_dict
# Main --
# list of AA masses:
##
AAs = [57, 71, 87, 97, 99, 101, 103, 113, 113, 114, 115,\
128, 128, 129, 131, 137, 147, 156, 163, 186]
#AAs = [4,5] # AAs for sample
# Parsing
with open('data/rosalind_ba11i.txt', 'r') as infile:
spectrum = [0] + [int(i) for i in infile.readline().strip().split()]
t = int(infile.readline())
max_score = int(infile.readline())
# Get array of sizes of spectral dicts for each score in range t->max
prob_dict = Pr_spectral_dict(spectrum, t, max_score)
print('>Pr(Dict_t(Spectrum) = ',sum(prob_dict))