Mercurial > repos > nedias > orf_tools
comparison GTranslator.py @ 2:c56b8a6bd02e draft
Uploaded
author | nedias |
---|---|
date | Wed, 12 Oct 2016 00:03:46 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
1:3814470e221a | 2:c56b8a6bd02e |
---|---|
1 """ | |
2 Translate nucleotide code to polypeptide | |
3 Because condon table is never change, so in consider of read/exe performance, | |
4 it is hard-coded, not writen in XML or other format of files, nor store in database | |
5 TODO: Need to add condon table for RNA | |
6 | |
7 Author Nedias | |
8 """ | |
9 | |
10 from Bio.Alphabet import generic_protein | |
11 from Bio.Seq import Seq | |
12 | |
13 | |
14 # Nucleotide to Polypeptide mapping | |
15 def condon_table(): | |
16 c_table = dict() | |
17 | |
18 c_table["TTT"] = "F" | |
19 c_table["TTC"] = "F" | |
20 c_table["TTA"] = "L" | |
21 c_table["TTG"] = "L" | |
22 c_table["CTT"] = "L" | |
23 c_table["CTC"] = "L" | |
24 c_table["CTA"] = "L" | |
25 c_table["CTG"] = "L" | |
26 c_table["ATT"] = "I" | |
27 c_table["ATC"] = "I" | |
28 c_table["ATA"] = "I" | |
29 c_table["ATG"] = "M" | |
30 c_table["GTT"] = "V" | |
31 c_table["GTC"] = "V" | |
32 c_table["GTA"] = "V" | |
33 c_table["GTG"] = "V" | |
34 c_table["TCT"] = "S" | |
35 c_table["TCA"] = "S" | |
36 c_table["TCC"] = "S" | |
37 c_table["TCG"] = "S" | |
38 c_table["CCT"] = "P" | |
39 c_table["CCC"] = "P" | |
40 c_table["CCA"] = "P" | |
41 c_table["CCG"] = "P" | |
42 c_table["ACT"] = "T" | |
43 c_table["ACC"] = "T" | |
44 c_table["ACA"] = "T" | |
45 c_table["ACG"] = "T" | |
46 c_table["GCT"] = "A" | |
47 c_table["GCC"] = "A" | |
48 c_table["GCA"] = "A" | |
49 c_table["GCG"] = "A" | |
50 c_table["TAT"] = "Y" | |
51 c_table["TAC"] = "Y" | |
52 c_table["TAA"] = "stop" | |
53 c_table["TAG"] = "stop" | |
54 c_table["CAT"] = "H" | |
55 c_table["CAC"] = "H" | |
56 c_table["CAA"] = "Q" | |
57 c_table["CAG"] = "Q" | |
58 c_table["AAT"] = "N" | |
59 c_table["AAC"] = "N" | |
60 c_table["AAA"] = "K" | |
61 c_table["AAG"] = "K" | |
62 c_table["GAT"] = "D" | |
63 c_table["GAC"] = "D" | |
64 c_table["GAA"] = "E" | |
65 c_table["GAG"] = "E" | |
66 c_table["TGT"] = "C" | |
67 c_table["TGC"] = "C" | |
68 c_table["TGA"] = "stop" | |
69 c_table["TGG"] = "W" | |
70 c_table["CGT"] = "R" | |
71 c_table["CGC"] = "R" | |
72 c_table["CGA"] = "R" | |
73 c_table["CGG"] = "R" | |
74 c_table["AGT"] = "S" | |
75 c_table["AGC"] = "S" | |
76 c_table["AGA"] = "R" | |
77 c_table["AGG"] = "R" | |
78 c_table["GGT"] = "G" | |
79 c_table["GGC"] = "G" | |
80 c_table["GGA"] = "G" | |
81 c_table["GGG"] = "G" | |
82 c_table.update(dict((c_table[i], i) for i in c_table)) | |
83 | |
84 return c_table | |
85 | |
86 | |
87 # Nucleotide to Polypeptide mapping for complementary sequence | |
88 def rev_condon_table(): | |
89 | |
90 c_table = dict() | |
91 | |
92 c_table["AAA"] = "F" | |
93 c_table["AAG"] = "F" | |
94 c_table["AAT"] = "L" | |
95 c_table["AAC"] = "L" | |
96 c_table["GAA"] = "L" | |
97 c_table["GAG"] = "L" | |
98 c_table["GAT"] = "L" | |
99 c_table["GAC"] = "L" | |
100 c_table["TAA"] = "I" | |
101 c_table["TAG"] = "I" | |
102 c_table["TAT"] = "I" | |
103 c_table["TAC"] = "M" | |
104 c_table["CAA"] = "V" | |
105 c_table["CAG"] = "V" | |
106 c_table["CAT"] = "V" | |
107 c_table["CAC"] = "V" | |
108 c_table["AGA"] = "S" | |
109 c_table["AGT"] = "S" | |
110 c_table["AGG"] = "S" | |
111 c_table["AGC"] = "S" | |
112 c_table["GGA"] = "P" | |
113 c_table["GGG"] = "P" | |
114 c_table["GGT"] = "P" | |
115 c_table["GGC"] = "P" | |
116 c_table["TGA"] = "T" | |
117 c_table["TGG"] = "T" | |
118 c_table["TGT"] = "T" | |
119 c_table["TGC"] = "T" | |
120 c_table["CGA"] = "A" | |
121 c_table["CGG"] = "A" | |
122 c_table["CGT"] = "A" | |
123 c_table["CGC"] = "A" | |
124 c_table["ATA"] = "Y" | |
125 c_table["ATG"] = "Y" | |
126 c_table["ATT"] = "stop" | |
127 c_table["ATC"] = "stop" | |
128 c_table["GTA"] = "H" | |
129 c_table["GTG"] = "H" | |
130 c_table["GTT"] = "Q" | |
131 c_table["GTC"] = "Q" | |
132 c_table["TTA"] = "N" | |
133 c_table["TTG"] = "N" | |
134 c_table["TTT"] = "K" | |
135 c_table["TTC"] = "K" | |
136 c_table["CTA"] = "D" | |
137 c_table["CTG"] = "D" | |
138 c_table["CTT"] = "E" | |
139 c_table["CTC"] = "E" | |
140 c_table["ACA"] = "C" | |
141 c_table["ACG"] = "C" | |
142 c_table["ACT"] = "stop" | |
143 c_table["ACC"] = "W" | |
144 c_table["GCA"] = "R" | |
145 c_table["GCG"] = "R" | |
146 c_table["GCT"] = "R" | |
147 c_table["GCC"] = "R" | |
148 c_table["TCA"] = "S" | |
149 c_table["TCG"] = "S" | |
150 c_table["TCT"] = "R" | |
151 c_table["TCC"] = "R" | |
152 c_table["CCA"] = "G" | |
153 c_table["CCG"] = "G" | |
154 c_table["CCT"] = "G" | |
155 c_table["CCC"] = "G" | |
156 c_table.update(dict((c_table[i], i) for i in c_table)) | |
157 | |
158 return c_table | |
159 | |
160 | |
161 # Check if the sequence is a multiple of 3 | |
162 # input: Nucleotide sequence in SeqRecords format or string format | |
163 # output: check result | |
164 def check_seq(seq): | |
165 | |
166 if len(seq) % 3 == 0: | |
167 return True | |
168 else: | |
169 return False | |
170 | |
171 | |
172 # Translate Nucleotide to Polypeptide | |
173 # input: 1.seq: Nucleotide sequence in SeqRecords format | |
174 # 2.rev: True if +strand(use normal mapping), False for -strand(use complementary mapping) | |
175 # return: Polypeptide sequence in Seq format | |
176 def nucleotide_to_polypeptide(seq, rev): | |
177 | |
178 poly_seq = "" | |
179 # If -strand, use complementary mapping | |
180 if rev: | |
181 c_table = rev_condon_table() | |
182 # If +strand, use normal mapping | |
183 else: | |
184 c_table = condon_table() | |
185 | |
186 # If sequence length is a multiple of 3 | |
187 if check_seq(seq): | |
188 str_seq = str(seq) | |
189 # Translate every 3 nucleotide acid to one polypeptide | |
190 for i in xrange(0, len(str_seq) - 3, 3): | |
191 poly_seq += c_table[str_seq[i:i+3]] | |
192 return Seq(poly_seq, generic_protein) | |
193 | |
194 | |
195 | |
196 |