-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathFreqArray.py
38 lines (28 loc) · 1.35 KB
/
FreqArray.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
import math
def computeFreqs(text, k_mer):
sizeArr = int(math.pow(4,k_mer))
array = [None] * sizeArr
for i in range(sizeArr):
array[i] = 0
end = len(text) - k_mer
for i in range(end + 1):
pattern = text[i:i+k_mer]
numberOfPattern = patternToNumber(pattern)
array[numberOfPattern] = array[numberOfPattern] + 1
print(" ".join(str(x) for x in array))
def patternToNumber(pat):
if len(pat) == 0:
return 0
symbol = pat[len(pat) - 1]
prefix = pat[0:len(pat) - 1]
return 4*patternToNumber(prefix) + symbolToNumber(symbol)
def symbolToNumber(letter):
if letter == 'A':
return 0
elif letter == 'C':
return 1
elif letter == 'G':
return 2
elif letter == 'T':
return 3
computeFreqs('TACTATCTCGTGATTAGTACAGCCGAACCAACACATGATCCACATTTTCAGGAGGAACGTTACCATGCGCTTCCCCTTAAGTTAATCAGAGTACAAAAGCTCCTGGACAGCTGCGGATGGCTGCACATGTTGTCGAACTTCATATTCTCGATACTAGTCACGCTCCCGTAAGAAGTGCTGATCCGAAATTTGGCTATATTACAGCAGCTGGCTGAAGTGAGTCTGTCTGGAATGGGTCAAGAGGGCTATCGTCCAGTCGCAAAGTTGATCGCTCCGTTAGCTAATTAACTACTGCCCGTCATTGGTGCTCACGGATTACTGAGTCTTGACTGTTCGGCTGATTTGGCCATCTTGGGTGACCCGAGACAATTGATCCCCGTACTAAGTAGATACTCACACAAACGGGTGGTTGTAATGTGGCCGTCTCCCTCGTGGCACGACGGCACTCAGCCCGCAGCGTCCAAAGGAGGACGTAGATCATGTACTTGTTACGTTATGACTGACGTATGGTATCCAGAGCATCTCCGTCCTCGAGGCACTCTTGAAGTAAGTCAGAAAGCCCTCGGTTGTAAACGGGTTTCACGAACGGACCAGATTGAGCCCCTTCGGAGTATCTAGCCGGATCATTGAGTCGCCGGGCCGCCCGCAAGCACGTAGAAGCCGTC', 6)