-
Notifications
You must be signed in to change notification settings - Fork 2
/
BA5_L - Hirschberg algo - Linear space Alignment*** INCOMPLETE.py
145 lines (114 loc) · 3.98 KB
/
BA5_L - Hirschberg algo - Linear space Alignment*** INCOMPLETE.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Thu Jan 9 21:27:20 2020
@author: jasonmoggridge
Global Alignment in Linear Space Problem:
Find the highest-scoring alignment between two strings using a scoring matrix
in linear space.
Given:
Two long amino acid strings (of length approximately 10,000).
Return:
The maximum alignment score of these strings, followed by an
alignment achieving this maximum score. Use the BLOSUM62 scoring matrix
and indel penalty σ = 5.
LinearSpaceAlignment(v, w, top, bottom, left, right)
# Base cases - zero-area rectangles: return straight indel seq
if left = right
output path formed by bottom − top vertical edges
if top = bottom
output path formed by right − left horizontal edges
middle ← ⌊ (left + right)/2⌋
midEdge ← MiddleEdge(v, w, top, bottom, left, right)
midNode ← vertical coordinate of the initial node of midEdge
LinearSpaceAlignment(v, w, top, midNode, left, middle)
output midEdge
if midEdge = "→" or midEdge = "↘"
middle ← middle + 1
if midEdge = "↓" or midEdge ="↘"
midNode ← midNode + 1
LinearSpaceAlignment(v, w, midNode, bottom, middle, right)
"""
#def Blosum62():
# B62 = {}
# with open("/Users/jasonmoggridge/Dropbox/Rosalind/textbook_track/Course3_Alignments/BLOSUM62.txt", 'r') as file:
# AA = file.readline().strip().split(' ')
# arrays = []
# for _ in AA:
# array = list(file.readline().strip().split(' '))
# while '' in array:
# array.remove('')
# array = list(int(i) for i in array[1:])
# arrays.append(array)
# for i in range(len(AA)):
# for j in range(len(AA)):
# B62[(AA[i],AA[j])] = arrays[i][j]
# return B62
#
#
#def Mid_Edge(v,w):
#
# def Paths_to_mid(v,w):
# S = [[0,-sigma]] + list([-sigma*i,-float('inf')] for i in range(1,len(v)+1))
# for j in range(1,len(w)+1):
# for i in range(len(v)+1):
# match = mu[(v[i-1], w[j-1])]
# S[i][j%2] = max(\
# S[i-1][j%2] - sigma,\
# S[i][(j-1)%2] - sigma,\
# S[i-1][(j-1)%2] + match)
# return list(S[i][j%2] for i in range(len(v)+1))
#
# mid_j = len(w)//2
# From_Source = Paths_to_mid(v, w[:mid_j])
# To_Sink = Paths_to_mid(v, w[len(w):mid_j:-1])
# best = -float('inf')
# for i in range(len(v)+1):
# len_ipath = From_Source[i] + To_Sink[i]
# if len_ipath > best:
# best = len_ipath
# mid_i = i
#
# if mu[(v[mid_i], w[mid_j])] < -sigma:
# return mid_i, (0, 1)
# else:
# return mid_i, (1, 1)
def Linear_space_align( v, w, top, bottom, left, right):
print('\n rectangle t/b/l/r:', top,bottom,left,right)
if left == right:
print('\tleft == right', left, right)
string =''
for i in range(top, bottom):
string += 'D'
path.append(string)
print(string)
return
if top == bottom:
print('\tleft == right', left, right)
string = ''
for j in range(left, right):
string += 'H'
path.append(string)
print(string)
return
mid_j = (left+right)//2
mid_i, edge = Mid_Edge(v[top:bottom],w[left:right])
print('\tNew mid_ij, edge', mid_i, mid_j,edge)
Linear_space_align( v, w, top, mid_i, left, mid_j)
if edge == (0,1):
path.append('H')
print('\t\t H')
elif edge == (1,1):
path.append('M')
print('\t\t M')
mid_i += edge[0]
mid_j += edge[1]
Linear_space_align( v, w, mid_i, bottom, mid_j, right)
###
path =['$']
with open("/Users/jasonmoggridge/Desktop/rosalind_test.txt",'r') as infile:
v = infile.readline().strip()
w = infile.readline().strip()
mu = dict(Blosum62())
sigma = 5
Linear_space_align(v,w,0,len(v)+1, 0, len(w)+1)