-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathDNA,RNA,AA classes.py
114 lines (98 loc) · 4.96 KB
/
DNA,RNA,AA classes.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
#### Classes homework ####
class NuclAcid: # common for DNA and RNA
def __init__(self, seq):
"""Class init""" # init
letters = {'A', 'C', 'T', 'G', 'U'}
self.seq = seq.upper() # to be sure
self.index = 0
for i in self.seq: # check your data
if 'T' in self.seq and 'U' in self.seq:
raise ValueError('Wrong sequence: it is impossible to have T and U together in a nucleic acid sequence. Check your data and try again.')
if i not in letters:
raise ValueError(self.seq + ' is not a nucleic acid sequence. Perhaps you loaded an amino acid sequence. Try again.')
def __len__(self):
"""Get the length of the string""" # length of the sequence
return len(self.seq)
def __iter__(self):
return self
def __next__(self): # inspiration from here: https://stackoverflow.com/questions/19151/build-a-basic-python-iterator
try:
result = self.seq[self.index]
except IndexError:
raise StopIteration
self.index += 1
return result
def gc_content(self): # taken from my older projects
"""Count GC content of the sequence"""
gc = 0
for i in self.seq:
if i == 'G' or i == 'C':
gc += 1
return round(gc / len(self) * 100, 2)
class DNA(NuclAcid):
"""A DNA sequence"""
def __init__(self, seq):
super().__init__(seq) # https://docs.python.org/3/library/functions.html#super !!!
for i in self.seq:
if i not in {'A', 'C', 'T', 'G'}:
raise ValueError(self.seq + ' is not a DNA sequence. Check your data and try again.')
def DNA_complement(self):
"""Get the complementary DNA sequence"""
DNA_compl_rules = {'A': 'T', 'T': 'A', 'G': 'C', 'C': 'G'}
DNA_bases = list(self.seq)
DNA_compl = [DNA_compl_rules.get(base) for base in DNA_bases] # https://www.tutorialspoint.com/python/dictionary_get.htm
return ''.join(DNA_compl)
def reverse_complement(self): # just reverse of the previous function
"""Get the reverse complement of DNA sequence."""
rev_compl = reversed(self.DNA_complement()) # https://pythonz.net/references/named/reversed/
return ''.join(rev_compl)
def DNA_to_RNA_transcription(self):
"""Transcription: get a RNA sequence that is product of the given DNA sequence"""
transcription_rules = {'A': 'U', 'T': 'A', 'G': 'C', 'C': 'G'}
DNA_bases = list(self.seq)
transcribed = [transcription_rules.get(base) for base in DNA_bases]
RNA_seq = ''.join(transcribed)
return RNA(RNA_seq) # get a new class for the sequence
class RNA(NuclAcid): # almost copypaste of DNA except of U instead of T
"""A RNA sequence"""
def __init__(self, seq):
super().__init__(seq)
for i in self.seq:
if i not in {'A', 'U', 'G', 'C'}:
raise ValueError(self.seq + ' is not a RNA sequence. Check your data and try again.')
def RNA_complement(self):
"""Returns the complementary RNA sequence."""
RNA_compl_rules = {'A': 'U', 'U': 'A', 'G': 'C', 'C': 'G'}
RNA_bases = list(self.seq)
RNA_compl = [RNA_compl_rules.get(base) for base in RNA_bases]
return ''.join(RNA_compl)
# BONUS: translation! FYI: at this stage it will translate from the start of the sequence, even if AUG is not sequence start.
def RNA_to_protein_translation(self):
"""Translation: get an amino acid sequence based on the given RNA sequence"""
RNA_to_protein_table = {
'AUA':'I', 'AUC':'I', 'AUU':'I', 'AUG':'M',
'ACA':'T', 'ACC':'T', 'ACG':'T', 'ACU':'T',
'AAC':'N', 'AAU':'N', 'AAA':'K', 'AAG':'K',
'AGC':'S', 'AGU':'S', 'AGA':'R', 'AGG':'R',
'CUA':'L', 'CUC':'L', 'CUG':'L', 'CUU':'L',
'CCA':'P', 'CCC':'P', 'CCG':'P', 'CCU':'P',
'CAC':'H', 'CAU':'H', 'CAA':'Q', 'CAG':'Q',
'CGA':'R', 'CGC':'R', 'CGG':'R', 'CGU':'R',
'GUA':'V', 'GUC':'V', 'GUG':'V', 'GUU':'V',
'GCA':'A', 'GCC':'A', 'GCG':'A', 'GCU':'A',
'GAC':'D', 'GAU':'D', 'GAA':'E', 'GAG':'E',
'GGA':'G', 'GGC':'G', 'GGG':'G', 'GGU':'G',
'UCA':'S', 'UCC':'S', 'UCG':'S', 'UCU':'S',
'UUC':'F', 'UUU':'F', 'UUA':'L', 'UUG':'L',
'UAC':'Y', 'UAU':'Y', 'UAA':'_', 'UAG':'_',
'UGC':'C', 'UGU':'C', 'UGA':'_', 'UGG':'W', }
RNA_seq = list(self.seq)
triplets = [RNA_seq[i:i+3] for i in range(0, len(RNA_seq), 3)]
protein_seq = ''
for triplet in triplets:
if RNA_to_protein_table.get(triplet) == "_": # if stop codon occurs, stop
return
else:
aminoacid = [RNA_to_protein_table.get(triplet) for triplet in triplets]
protein_seq = ''.join(aminoacid)
return protein_seq