2
|
1 """
|
|
2 Translate nucleotide code to polypeptide
|
|
3 Because condon table is never change, so in consider of read/exe performance,
|
|
4 it is hard-coded, not writen in XML or other format of files, nor store in database
|
|
5 TODO: Need to add condon table for RNA
|
|
6
|
|
7 Author Nedias
|
|
8 """
|
|
9
|
|
10 from Bio.Alphabet import generic_protein
|
|
11 from Bio.Seq import Seq
|
|
12
|
|
13
|
|
14 # Nucleotide to Polypeptide mapping
|
|
15 def condon_table():
|
|
16 c_table = dict()
|
|
17
|
|
18 c_table["TTT"] = "F"
|
|
19 c_table["TTC"] = "F"
|
|
20 c_table["TTA"] = "L"
|
|
21 c_table["TTG"] = "L"
|
|
22 c_table["CTT"] = "L"
|
|
23 c_table["CTC"] = "L"
|
|
24 c_table["CTA"] = "L"
|
|
25 c_table["CTG"] = "L"
|
|
26 c_table["ATT"] = "I"
|
|
27 c_table["ATC"] = "I"
|
|
28 c_table["ATA"] = "I"
|
|
29 c_table["ATG"] = "M"
|
|
30 c_table["GTT"] = "V"
|
|
31 c_table["GTC"] = "V"
|
|
32 c_table["GTA"] = "V"
|
|
33 c_table["GTG"] = "V"
|
|
34 c_table["TCT"] = "S"
|
|
35 c_table["TCA"] = "S"
|
|
36 c_table["TCC"] = "S"
|
|
37 c_table["TCG"] = "S"
|
|
38 c_table["CCT"] = "P"
|
|
39 c_table["CCC"] = "P"
|
|
40 c_table["CCA"] = "P"
|
|
41 c_table["CCG"] = "P"
|
|
42 c_table["ACT"] = "T"
|
|
43 c_table["ACC"] = "T"
|
|
44 c_table["ACA"] = "T"
|
|
45 c_table["ACG"] = "T"
|
|
46 c_table["GCT"] = "A"
|
|
47 c_table["GCC"] = "A"
|
|
48 c_table["GCA"] = "A"
|
|
49 c_table["GCG"] = "A"
|
|
50 c_table["TAT"] = "Y"
|
|
51 c_table["TAC"] = "Y"
|
|
52 c_table["TAA"] = "stop"
|
|
53 c_table["TAG"] = "stop"
|
|
54 c_table["CAT"] = "H"
|
|
55 c_table["CAC"] = "H"
|
|
56 c_table["CAA"] = "Q"
|
|
57 c_table["CAG"] = "Q"
|
|
58 c_table["AAT"] = "N"
|
|
59 c_table["AAC"] = "N"
|
|
60 c_table["AAA"] = "K"
|
|
61 c_table["AAG"] = "K"
|
|
62 c_table["GAT"] = "D"
|
|
63 c_table["GAC"] = "D"
|
|
64 c_table["GAA"] = "E"
|
|
65 c_table["GAG"] = "E"
|
|
66 c_table["TGT"] = "C"
|
|
67 c_table["TGC"] = "C"
|
|
68 c_table["TGA"] = "stop"
|
|
69 c_table["TGG"] = "W"
|
|
70 c_table["CGT"] = "R"
|
|
71 c_table["CGC"] = "R"
|
|
72 c_table["CGA"] = "R"
|
|
73 c_table["CGG"] = "R"
|
|
74 c_table["AGT"] = "S"
|
|
75 c_table["AGC"] = "S"
|
|
76 c_table["AGA"] = "R"
|
|
77 c_table["AGG"] = "R"
|
|
78 c_table["GGT"] = "G"
|
|
79 c_table["GGC"] = "G"
|
|
80 c_table["GGA"] = "G"
|
|
81 c_table["GGG"] = "G"
|
|
82 c_table.update(dict((c_table[i], i) for i in c_table))
|
|
83
|
|
84 return c_table
|
|
85
|
|
86
|
|
87 # Nucleotide to Polypeptide mapping for complementary sequence
|
|
88 def rev_condon_table():
|
|
89
|
|
90 c_table = dict()
|
|
91
|
|
92 c_table["AAA"] = "F"
|
|
93 c_table["AAG"] = "F"
|
|
94 c_table["AAT"] = "L"
|
|
95 c_table["AAC"] = "L"
|
|
96 c_table["GAA"] = "L"
|
|
97 c_table["GAG"] = "L"
|
|
98 c_table["GAT"] = "L"
|
|
99 c_table["GAC"] = "L"
|
|
100 c_table["TAA"] = "I"
|
|
101 c_table["TAG"] = "I"
|
|
102 c_table["TAT"] = "I"
|
|
103 c_table["TAC"] = "M"
|
|
104 c_table["CAA"] = "V"
|
|
105 c_table["CAG"] = "V"
|
|
106 c_table["CAT"] = "V"
|
|
107 c_table["CAC"] = "V"
|
|
108 c_table["AGA"] = "S"
|
|
109 c_table["AGT"] = "S"
|
|
110 c_table["AGG"] = "S"
|
|
111 c_table["AGC"] = "S"
|
|
112 c_table["GGA"] = "P"
|
|
113 c_table["GGG"] = "P"
|
|
114 c_table["GGT"] = "P"
|
|
115 c_table["GGC"] = "P"
|
|
116 c_table["TGA"] = "T"
|
|
117 c_table["TGG"] = "T"
|
|
118 c_table["TGT"] = "T"
|
|
119 c_table["TGC"] = "T"
|
|
120 c_table["CGA"] = "A"
|
|
121 c_table["CGG"] = "A"
|
|
122 c_table["CGT"] = "A"
|
|
123 c_table["CGC"] = "A"
|
|
124 c_table["ATA"] = "Y"
|
|
125 c_table["ATG"] = "Y"
|
|
126 c_table["ATT"] = "stop"
|
|
127 c_table["ATC"] = "stop"
|
|
128 c_table["GTA"] = "H"
|
|
129 c_table["GTG"] = "H"
|
|
130 c_table["GTT"] = "Q"
|
|
131 c_table["GTC"] = "Q"
|
|
132 c_table["TTA"] = "N"
|
|
133 c_table["TTG"] = "N"
|
|
134 c_table["TTT"] = "K"
|
|
135 c_table["TTC"] = "K"
|
|
136 c_table["CTA"] = "D"
|
|
137 c_table["CTG"] = "D"
|
|
138 c_table["CTT"] = "E"
|
|
139 c_table["CTC"] = "E"
|
|
140 c_table["ACA"] = "C"
|
|
141 c_table["ACG"] = "C"
|
|
142 c_table["ACT"] = "stop"
|
|
143 c_table["ACC"] = "W"
|
|
144 c_table["GCA"] = "R"
|
|
145 c_table["GCG"] = "R"
|
|
146 c_table["GCT"] = "R"
|
|
147 c_table["GCC"] = "R"
|
|
148 c_table["TCA"] = "S"
|
|
149 c_table["TCG"] = "S"
|
|
150 c_table["TCT"] = "R"
|
|
151 c_table["TCC"] = "R"
|
|
152 c_table["CCA"] = "G"
|
|
153 c_table["CCG"] = "G"
|
|
154 c_table["CCT"] = "G"
|
|
155 c_table["CCC"] = "G"
|
|
156 c_table.update(dict((c_table[i], i) for i in c_table))
|
|
157
|
|
158 return c_table
|
|
159
|
|
160
|
|
161 # Check if the sequence is a multiple of 3
|
|
162 # input: Nucleotide sequence in SeqRecords format or string format
|
|
163 # output: check result
|
|
164 def check_seq(seq):
|
|
165
|
|
166 if len(seq) % 3 == 0:
|
|
167 return True
|
|
168 else:
|
|
169 return False
|
|
170
|
|
171
|
|
172 # Translate Nucleotide to Polypeptide
|
|
173 # input: 1.seq: Nucleotide sequence in SeqRecords format
|
|
174 # 2.rev: True if +strand(use normal mapping), False for -strand(use complementary mapping)
|
|
175 # return: Polypeptide sequence in Seq format
|
|
176 def nucleotide_to_polypeptide(seq, rev):
|
|
177
|
|
178 poly_seq = ""
|
|
179 # If -strand, use complementary mapping
|
|
180 if rev:
|
|
181 c_table = rev_condon_table()
|
|
182 # If +strand, use normal mapping
|
|
183 else:
|
|
184 c_table = condon_table()
|
|
185
|
|
186 # If sequence length is a multiple of 3
|
|
187 if check_seq(seq):
|
|
188 str_seq = str(seq)
|
|
189 # Translate every 3 nucleotide acid to one polypeptide
|
|
190 for i in xrange(0, len(str_seq) - 3, 3):
|
|
191 poly_seq += c_table[str_seq[i:i+3]]
|
|
192 return Seq(poly_seq, generic_protein)
|
|
193
|
|
194
|
|
195
|
|
196
|