Skip to content

Commit

Permalink
#38: split logic into two main subroutines
Browse files Browse the repository at this point in the history
  • Loading branch information
weka511 committed Oct 26, 2020
1 parent 6ba9df8 commit 37eef43
Showing 1 changed file with 62 additions and 38 deletions.
100 changes: 62 additions & 38 deletions sims.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,49 +17,73 @@

import numpy as np

def get_score(c1,c2, match=1,mismatch=-1):
return match if c1==c2 else mismatch
def get_score(a,b, match=1,mismatch=-1):
return match if a==b else mismatch

def score_string(s,t):
return sum(get_score(c1,c2) for (c1,c2) in zip(s,t))
return sum(get_score(a,b) for (a,b) in zip(s,t))

def sims(s,t, match=1,mismatch=-1):
scores = np.zeros((len(t),len(s)),dtype=int)
link_back = {}
for j in range(len(s)):
scores[0,j] = get_score(s[j],t[0],match=match,mismatch=mismatch)
for i in range(1,len(t)):
for j in range(i,len(s)):
score_diag = scores[i-1][j-1] + get_score(s[j],t[i],match=match,mismatch=mismatch)
score_horizontal = scores[i-1][j] + mismatch
if score_horizontal>score_diag:
scores[i,j] = score_horizontal
link_back[(i,j)] = (i,j-1)

def dynamic_programming():
scores = np.zeros((len(t),len(s)),dtype=int)
predecessor = {}
for j in range(len(s)):
scores[0] = get_score(s[j],t[0],match=match,mismatch=mismatch)
for i in range(1,len(t)):
scores[i,0] = get_score(s[0],t[i],match=match,mismatch=mismatch)
for j in range(1,len(s)):
score_diag = scores[i-1][j-1] + get_score(s[j],t[i],match=match,mismatch=mismatch)
score_horizontal = scores[i][j-1] + mismatch
score_vertical = scores[i-1][j] + mismatch
scores[i,j] = max(score_diag,
score_horizontal,
score_vertical)

predecessor[(i,j)] = (i-1, j-1) if scores[i,j] == score_diag else \
(i, j-1) if scores[i,j] == score_horizontal else \
(i-1, j) if scores[i,j] == score_vertical else \
None
return scores,predecessor

def trace_back(scores,predecessor):
i = len(scores)-1
j = np.argmax(scores[i])
print (f'Number of maxima={sum(1 for s in scores[i] if s>=scores[i,j])}')
s_match = [s[j]]
t_match = [t[i]]
while (i,j) in predecessor:
i_pre,j_pre = predecessor[(i,j)]
if i==i_pre:
assert j != j_pre
t_match.append('-')
s_match.append(s[j_pre])
elif j==j_pre:
t_match.append(t[i_pre])
s_match.append('-')
else:
scores[i,j] = score_diag
link_back[(i,j)] = (i-1,j-1)
for i in range(len(scores)):
print (scores[i])
t_match.append(t[i_pre])
s_match.append(s[j_pre])
print (f'{(i_pre,j_pre)}=>{(i,j)} s={s_match[-1]} t={t_match[-1]}')
i,j = i_pre,j_pre

return (s_match,t_match)

scores,predecessor = dynamic_programming()

print (max(scores[-1]))
j0 = np.argmax(scores[-1])
i0 = len(scores)-1
s_match = [s[j0]]
t_match = [t[i0]]
while (i0,j0) in link_back:
i1,j1=link_back[(i0,j0)]
if i0==i1:
t_match.append('-')
else:
t_match.append(t[i1])
s_match.append(s[j1])
i0,j0= i1,j1
return (max(scores[-1]),''.join(s_match[::-1]),''.join(t_match[::-1]))
print (scores[-1])

s_match,t_match = trace_back(scores,predecessor)

return (max(scores[-1]), ''.join(s_match[::-1]), ''.join(t_match[::-1]))

if __name__=='__main__':
print(score_string('ACCATAAGCCCTACGTG-CCG',
'GCCGTCAGGC-TG-GTGTCCG'))

print (
sims('GCAAACCATAAGCCCTACGTGCCGCCTGTTTAAACTCGCGAACTGAATCTTCTGCTTCACGGTGAAAGTACCACAATGGTATCACACCCCAAGGAAAC',
'GCCGTCAGGCTGGTGTCCG'))


score,s1,t1 = sims('GCAAACCATAAGCCCTACGTGCCGCCTGTTTAAACTCGCGAACTGAATCTTCTGCTTCACGGTGAAAGTACCACAATGGTATCACACCCCAAGGAAAC',
'GCCGTCAGGCTGGTGTCCG')
print (score)
print (s1)
print (t1)
print (score_string(s1,t1))

0 comments on commit 37eef43

Please sign in to comment.