Mercurial > repos > davidmurphy > codonlogo
comparison corebio/transform.py @ 7:8d676bbd1f2d
Uploaded
author | davidmurphy |
---|---|
date | Mon, 16 Jan 2012 07:03:36 -0500 |
parents | c55bdc2fb9fa |
children |
comparison
equal
deleted
inserted
replaced
6:4a4aca3d57c9 | 7:8d676bbd1f2d |
---|---|
1 # Copyright (c) 2006 John Gilman | |
2 # | |
3 # This software is distributed under the MIT Open Source License. | |
4 # <http://www.opensource.org/licenses/mit-license.html> | |
5 # | |
6 # Permission is hereby granted, free of charge, to any person obtaining a | |
7 # copy of this software and associated documentation files (the "Software"), | |
8 # to deal in the Software without restriction, including without limitation | |
9 # the rights to use, copy, modify, merge, publish, distribute, sublicense, | |
10 # and/or sell copies of the Software, and to permit persons to whom the | |
11 # Software is furnished to do so, subject to the following conditions: | |
12 # | |
13 # The above copyright notice and this permission notice shall be included | |
14 # in all copies or substantial portions of the Software. | |
15 # | |
16 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
17 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
18 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
19 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
20 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
21 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | |
22 # THE SOFTWARE. | |
23 | |
24 """ Transformations of Seqs (alphabetic sequences). | |
25 | |
26 | |
27 | |
28 Classes : | |
29 - Transform -- Simple transforms of alphabetic strings. | |
30 - GeneticCode -- The genetic mapping of dna to protein. | |
31 | |
32 Functions : | |
33 - mask_low_complexity -- Implementation of Seg algorithm to remove low complexity | |
34 regions from protein sequences. | |
35 | |
36 | |
37 """ | |
38 | |
39 | |
40 from corebio.data import dna_extended_letters, dna_ambiguity | |
41 from corebio.seq import Seq, protein_alphabet, nucleic_alphabet, dna_alphabet | |
42 from string import maketrans | |
43 from corebio.moremath import log2 , entropy | |
44 | |
45 __all__ = [ | |
46 'Transform', | |
47 'mask_low_complexity', | |
48 'GeneticCode' | |
49 ] | |
50 | |
51 class Transform(object) : | |
52 """A translation between alphabetic strings. | |
53 (This class is not called 'Translation' to avoid confusion with the | |
54 biological translation of rna to protein.) | |
55 | |
56 Example: | |
57 trans = Transform( | |
58 Seq("ACGTRYSWKMBDHVN-acgtUuryswkmbdhvnXx?.~'", dna_alphabet), | |
59 Seq("ACGTRYSWKMNNNNN-acgtUuryswkmbnnnnXx?.~", reduced_nucleic_alphabet) | |
60 ) | |
61 s0 = Seq("AAAAAV", nucleic_alphabet) | |
62 s1 = trans(s0) | |
63 assert(s1.alphabet == reduced_nucleic_alphabet) | |
64 assert(s2 == Seq("AAAAAN", reduced_nucleic_alphabet) | |
65 | |
66 Status : Beta | |
67 """ | |
68 | |
69 __slots__ = ["table", "source", "target"] | |
70 def __init__(self, source, target) : | |
71 | |
72 self.table = maketrans(source, target) | |
73 self.source = source | |
74 self.target = target | |
75 | |
76 | |
77 def __call__(self, seq) : | |
78 """Translate sequence.""" | |
79 if not self.source.alphabet.alphabetic(seq) : | |
80 raise ValueError("Incompatable alphabets") | |
81 s = str.translate(seq, self.table) | |
82 cls = self.target.__class__ | |
83 return cls(s, self.target.alphabet, seq.name, seq.description) | |
84 # End class Translation | |
85 | |
86 # FIXME: Test, document, add to seq. | |
87 dna_complement = Transform( | |
88 Seq("ACGTRYSWKMBDHVN-acgtUuryswkmbdhvnXx?.~", dna_alphabet), | |
89 Seq("TGCAYRSWMKVHDBN-tgcaAayrswmkvhdbnXx?.~", dna_alphabet), | |
90 ) | |
91 | |
92 | |
93 | |
94 def mask_low_complexity(seq, width =12, trigger=1.8, extension=2.0, mask='X') : | |
95 """ Mask low complexity regions in protein sequences. | |
96 | |
97 Uses the method of Seg [1] by Wootton & Federhen [2] to divide a sequence | |
98 into regions of high and low complexity. The sequence is divided into | |
99 overlapping windows. Low complexity windows either have a sequence entropy | |
100 less that the trigger complexity, or have an entropy less than the extension | |
101 complexity and neighbor other low-complexity windows. The sequence within | |
102 low complexity regions are replaced with the mask character (default 'X'), | |
103 and the masked alphabetic sequence is returned. | |
104 | |
105 The default parameters, width=12, trigger=1.8, extension=2.0, mask='X' are | |
106 suitable for masking protein sequences before a database search. The | |
107 standard default seg parameters are width=12, trigger=2.2, extension=2.5 | |
108 | |
109 Arguments: | |
110 Seq seq -- An alphabetic sequence | |
111 int width -- Window width | |
112 float trigger -- Entropy in bits between 0 and 4.3.. ( =log_2(20) ) | |
113 float extension -- Entropy in bits between 0 and 4.3.. ( =log_2(20) ) | |
114 char mask -- The mask character (default: 'X') | |
115 Returns : | |
116 Seq -- A masked alphabetic sequence | |
117 Raises : | |
118 ValueError -- On invalid arguments | |
119 Refs: | |
120 [1] seg man page: | |
121 http://bioportal.weizmann.ac.il/education/materials/gcg/seg.html | |
122 [2] Wootton & Federhen (Computers and Chemistry 17; 149-163, (1993)) | |
123 Authors: | |
124 GEC 2005 | |
125 Future : | |
126 - Optional mask character. | |
127 - Option to lower case masked symbols. | |
128 - Remove arbitary restriction to protein. | |
129 """ | |
130 | |
131 lg20 = log2(20) | |
132 if trigger<0 or trigger>lg20 : | |
133 raise ValueError("Invalid trigger complexity: %f"% trigger) | |
134 if extension<0 or extension>lg20 or extension<trigger: | |
135 raise ValueError("Invalid extension complexity: %f"% extension) | |
136 if width<0 : | |
137 raise ValueError("Invalid width: %d"% width) | |
138 | |
139 if width > len(seq) : return seq | |
140 | |
141 s = seq.ords() | |
142 | |
143 X = seq.alphabet.ord(mask) | |
144 | |
145 | |
146 nwindows = len(seq)- width +1 | |
147 ent = [ 0 for x in range(0, nwindows)] | |
148 count = [ 0 for x in range(0, len(seq.alphabet) )] | |
149 | |
150 for c in s[0:width] : count[c] +=1 | |
151 ent[0] = entropy(count,2) | |
152 | |
153 for i in range(1, nwindows) : | |
154 count[ s[i-1] ] -= 1 | |
155 count[ s[i+width-1] ] +=1 | |
156 ent[i] = entropy(count,2) | |
157 | |
158 prev_segged = False | |
159 for i in range(0, nwindows) : | |
160 if ((prev_segged and ent[i]< extension) or | |
161 ent[i]< trigger) : | |
162 for j in range(0, width) : s[i+j]=X | |
163 prev_segged=True | |
164 else : | |
165 prev_segged = False | |
166 | |
167 | |
168 # Redo, only backwards | |
169 prev_segged = False | |
170 for i in range(nwindows-1, -1, -1) : | |
171 if ((prev_segged and ent[i]< extension) or | |
172 ent[i]< trigger) : | |
173 for j in range(0, width) : s[i+j]=X | |
174 prev_segged=True | |
175 else : | |
176 prev_segged = False | |
177 | |
178 | |
179 return seq.alphabet.chrs(s) | |
180 # end mask_low_complexity() | |
181 | |
182 | |
183 class GeneticCode(object): | |
184 """An encoding of amino acids by DNA triplets. | |
185 | |
186 Example : | |
187 | |
188 Genetic Code [1]: Standard | |
189 T C A G | |
190 +---------+---------+---------+---------+ | |
191 T | TTT F | TCT S | TAT Y | TGT C | T | |
192 T | TTC F | TCC S | TAC Y | TGC C | C | |
193 T | TTA L | TCA S | TAA Stop| TGA Stop| A | |
194 T | TTG L(s)| TCG S | TAG Stop| TGG W | G | |
195 +---------+---------+---------+---------+ | |
196 C | CTT L | CCT P | CAT H | CGT R | T | |
197 C | CTC L | CCC P | CAC H | CGC R | C | |
198 C | CTA L | CCA P | CAA Q | CGA R | A | |
199 C | CTG L(s)| CCG P | CAG Q | CGG R | G | |
200 +---------+---------+---------+---------+ | |
201 A | ATT I | ACT T | AAT N | AGT S | T | |
202 A | ATC I | ACC T | AAC N | AGC S | C | |
203 A | ATA I | ACA T | AAA K | AGA R | A | |
204 A | ATG M(s)| ACG T | AAG K | AGG R | G | |
205 +---------+---------+---------+---------+ | |
206 G | GTT V | GCT A | GAT D | GGT G | T | |
207 G | GTC V | GCC A | GAC D | GGC G | C | |
208 G | GTA V | GCA A | GAA E | GGA G | A | |
209 G | GTG V | GCG A | GAG E | GGG G | G | |
210 +---------+---------+---------+---------+ | |
211 | |
212 | |
213 See Also : | |
214 -- http://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi?mode=c | |
215 -- http://www.ncbi.nlm.nih.gov/projects/collab/FT/index.html#7.5 | |
216 Authors: | |
217 JXG, GEC | |
218 """ | |
219 # TODO: Explain use of '?' in translated sequence. | |
220 # TODO: Does translate fails with aproriate execption when fed gaps? | |
221 # TODO: Can back_translate handle gaps? | |
222 | |
223 def __init__(self, ident, description, | |
224 amino_acid, start, base1, base2, base3): | |
225 """Create a new GeneticCode. | |
226 | |
227 Args: | |
228 -- ident - Standarad identifier (Or zero). An integer | |
229 -- description | |
230 -- amino acid - A sequecne of amino acids and stop codons. e.g. | |
231 "FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG" | |
232 -- start - A sequence indicating start codons, e.g., | |
233 "---M---------------M---------------M----------------------------" | |
234 -- base1 - The first base of each codon. e.g., | |
235 "TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG" | |
236 -- base2 - The second base of each codon. e.g., | |
237 "TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG" | |
238 -- base3 - The last base of each codon. e.g., | |
239 "TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG" | |
240 """ | |
241 self.ident = ident | |
242 self.description = description | |
243 | |
244 self.amino_acid = amino_acid | |
245 self.start = start | |
246 self.base1 = base1 | |
247 self.base2 = base2 | |
248 self.base3 = base3 | |
249 | |
250 stop_codons = [] | |
251 start_codons = [] | |
252 for i, a in enumerate(amino_acid) : | |
253 codon = base1[i] + base2[i] + base3[i] | |
254 if a=='*' : stop_codons.append(codon) | |
255 if start[i] == 'M': start_codons.append(codon) | |
256 | |
257 self.stop_codons = tuple(stop_codons) | |
258 self.start_codons = tuple(start_codons) | |
259 | |
260 # Building the full translation table is expensive, | |
261 # so we avoid doing so until necessary. | |
262 self._table = None | |
263 self._back_table = None | |
264 | |
265 #@staticmethod | |
266 def std_list(): | |
267 "Return a list of standard genetic codes." | |
268 return _codon_tables | |
269 std_list = staticmethod(std_list) | |
270 | |
271 #@staticmethod | |
272 def std(): | |
273 "The standard 'universal' genetic code." | |
274 return _codon_tables[0] | |
275 std = staticmethod(std) | |
276 | |
277 | |
278 #@staticmethod | |
279 def by_name(name) : | |
280 """Find a genetic code in the code list by name or identifier. | |
281 """ | |
282 for t in _codon_tables : | |
283 if t.ident == name or t.description == name : | |
284 return t | |
285 raise ValueError("No such translation table: %s" % str(name) ) | |
286 by_name = staticmethod(by_name) | |
287 | |
288 | |
289 def _get_table(self) : | |
290 if self._table is None : self._create_table() | |
291 return self._table | |
292 table = property(_get_table, None, "A map between codons and amino acids") | |
293 | |
294 def _get_back_table(self) : | |
295 if self._back_table is None : | |
296 self._create_table() | |
297 return self._back_table | |
298 back_table = property(_get_back_table, None, "A map between amino acids and codons") | |
299 | |
300 | |
301 def _create_table(self) : | |
302 aa = self.amino_acid | |
303 base1 = self.base1 | |
304 base2 = self.base2 | |
305 base3 = self.base3 | |
306 | |
307 # Construct a table of unambiguous codon translations | |
308 table = {} | |
309 for i, a in enumerate(aa) : | |
310 codon = base1[i] + base2[i] + base3[i] | |
311 table[codon] = a | |
312 | |
313 # Build the back table. | |
314 back_table = {} | |
315 items = table.items() | |
316 items.sort() | |
317 for codon, aa in items[::-1] : | |
318 back_table[aa] = codon # Use first codon, alphabetically. | |
319 back_table['X'] = 'NNN' | |
320 back_table['B'] = 'NNN' | |
321 back_table['Z'] = 'NNN' | |
322 back_table['J'] = 'NNN' | |
323 self._back_table = back_table | |
324 | |
325 ltable = {} | |
326 letters = dna_extended_letters+'U' # include RNA in table | |
327 | |
328 # Create a list of all possble codons | |
329 codons = [] | |
330 for c1 in letters: | |
331 for c2 in letters: | |
332 for c3 in letters : | |
333 codons.append( c1+c2+c3) | |
334 | |
335 # For each ambiguous codon, construct all compatible unambiguous codons. | |
336 # Translate and collect a set of all possible translated amino acids. | |
337 # If more than one translation look for possible amino acid ambiguity | |
338 # codes. | |
339 for C in codons : | |
340 translated = dict() # Use dict, because no set in py2.3 | |
341 c = C.replace('U', 'T') # Convert rna codon to dna | |
342 for c1 in dna_ambiguity[c[0]]: | |
343 for c2 in dna_ambiguity[c[1]]: | |
344 for c3 in dna_ambiguity[c[2]]: | |
345 aa = table[ c1+c2+c3 ] | |
346 translated[aa] = '' | |
347 translated = list(translated.keys()) | |
348 translated.sort() | |
349 if len(translated) ==1 : | |
350 trans = list(translated)[0] | |
351 elif translated == ['D','N'] : | |
352 trans = 'B' | |
353 elif translated == ['E','Q'] : | |
354 trans = 'Z' | |
355 elif translated == ['I','L'] : | |
356 trans = 'J' | |
357 elif '*' in translated: | |
358 trans = '?' | |
359 else : | |
360 trans = 'X' | |
361 ltable[C] = trans | |
362 | |
363 self._table = ltable | |
364 # End create tables | |
365 | |
366 def translate(self, seq, frame=0) : | |
367 """Translate a DNA sequence to a polypeptide using full | |
368 IUPAC ambiguities in DNA/RNA and amino acid codes. | |
369 | |
370 Returns : | |
371 -- Seq - A polypeptide sequence | |
372 """ | |
373 # TODO: Optimize. | |
374 # TODO: Insanity check alphabet. | |
375 seq = str(seq) | |
376 table = self.table | |
377 trans = [] | |
378 L = len(seq) | |
379 for i in range(frame, L-2, 3) : | |
380 codon = seq[i:i+3].upper() | |
381 trans.append( table[codon]) | |
382 return Seq(''.join(trans), protein_alphabet) | |
383 | |
384 | |
385 def back_translate(self, seq) : | |
386 """Convert protein back into coding DNA. | |
387 | |
388 Args: | |
389 -- seq - A polypeptide sequence. | |
390 | |
391 Returns : | |
392 -- Seq - A dna sequence | |
393 """ | |
394 # TODO: Optimzie | |
395 # TODO: Insanity check alphabet. | |
396 table = self.back_table | |
397 seq = str(seq) | |
398 trans = [ table[a] for a in seq] | |
399 return Seq(''.join(trans), dna_alphabet) | |
400 | |
401 #TODO: translate_orf(self, seq, start) ? | |
402 #TODO: translate_to_stop(self, seq, frame) ? | |
403 #TODO: translate_all_frames(self,seq) -> 6 translations. | |
404 | |
405 def __repr__(self) : | |
406 string = [] | |
407 string += 'GeneticCode( %d, "' % self.ident | |
408 string += self.description | |
409 string += '", \n' | |
410 string += ' amino_acid = "' | |
411 string += self.amino_acid | |
412 string += '",\n' | |
413 string += ' start = "' | |
414 string += self.start | |
415 string += '",\n' | |
416 string += ' base1 = "' | |
417 string += self.base1 | |
418 string += '",\n' | |
419 string += ' base2 = "' | |
420 string += self.base2 | |
421 string += '",\n' | |
422 string += ' base3 = "' | |
423 string += self.base3 | |
424 string += '" )' | |
425 return ''.join(string) | |
426 | |
427 | |
428 def __str__(self) : | |
429 """Returns a text representation of this genetic code.""" | |
430 # Inspired by http://bugzilla.open-bio.org/show_bug.cgi?id=1963 | |
431 letters = "TCAG" # Convectional ordering for codon tables. | |
432 string = [] | |
433 | |
434 if self.ident : | |
435 string += 'Genetic Code [%d]: ' % self.ident | |
436 else : | |
437 string += 'Genetic Code: ' | |
438 string += self.description or '' | |
439 | |
440 string += "\n " | |
441 string += " ".join( [" %s " % c2 for c2 in letters] ) | |
442 | |
443 string += "\n +" | |
444 string += "+".join(["---------" for c2 in letters]) + "+ " | |
445 | |
446 table = self.table | |
447 | |
448 for c1 in letters : | |
449 for c3 in letters : | |
450 string += '\n ' | |
451 string += c1 | |
452 string += " |" | |
453 for c2 in letters : | |
454 codon = c1+c2+c3 | |
455 string += " " + codon | |
456 if codon in self.stop_codons : | |
457 string += " Stop|" | |
458 else : | |
459 amino = table.get(codon, '?') | |
460 if codon in self.start_codons : | |
461 string += " %s(s)|" % amino | |
462 else : | |
463 string += " %s |" % amino | |
464 string += " " + c3 | |
465 | |
466 string += "\n +" | |
467 string += "+".join(["---------" for c2 in letters]) | |
468 string += "+ " | |
469 string += '\n' | |
470 return ''.join(string) | |
471 # end class GeneticCode | |
472 | |
473 | |
474 # Data from http://www.ncbi.nlm.nih.gov/projects/collab/FT/index.html#7.5 | |
475 # Aug. 2006 | |
476 # Genetic Code Tables | |
477 # | |
478 # Authority International Sequence Databank Collaboration | |
479 # Contact NCBI | |
480 # Scope /transl_table qualifier | |
481 # URL http://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi?mode=c | |
482 _codon_tables = ( | |
483 GeneticCode(1, "Standard", | |
484 "FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", | |
485 "---M---------------M---------------M----------------------------", | |
486 "TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG", | |
487 "TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG", | |
488 "TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG"), | |
489 | |
490 GeneticCode(2, "Vertebrate Mitochondrial", | |
491 "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSS**VVVVAAAADDEEGGGG", | |
492 "--------------------------------MMMM---------------M------------", | |
493 "TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG", | |
494 "TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG", | |
495 "TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG"), | |
496 | |
497 GeneticCode(3, "Yeast Mitochondrial", | |
498 "FFLLSSSSYY**CCWWTTTTPPPPHHQQRRRRIIMMTTTTNNKKSSRRVVVVAAAADDEEGGGG", | |
499 "----------------------------------MM----------------------------", | |
500 "TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG", | |
501 "TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG", | |
502 "TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG"), | |
503 | |
504 GeneticCode(4, "Mold, Protozoan, Coelenterate Mitochondrial & Mycoplasma/Spiroplasma", | |
505 "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", | |
506 "--MM---------------M------------MMMM---------------M------------", | |
507 "TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG", | |
508 "TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG", | |
509 "TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG"), | |
510 | |
511 GeneticCode(5, "Invertebrate Mitochondrial", | |
512 "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSSSVVVVAAAADDEEGGGG", | |
513 "---M----------------------------MMMM---------------M------------", | |
514 "TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG", | |
515 "TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG", | |
516 "TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG"), | |
517 | |
518 GeneticCode(6, "Ciliate, Dasycladacean and Hexamita Nuclear", | |
519 "FFLLSSSSYYQQCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", | |
520 "-----------------------------------M----------------------------", | |
521 "TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG", | |
522 "TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG", | |
523 "TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG"), | |
524 | |
525 GeneticCode(9, "Echinoderm and Flatworm Mitochondrial", | |
526 "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG", | |
527 "-----------------------------------M---------------M------------", | |
528 "TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG", | |
529 "TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG", | |
530 "TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG"), | |
531 | |
532 GeneticCode(10, "Euplotid Nuclear", | |
533 "FFLLSSSSYY**CCCWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", | |
534 "-----------------------------------M----------------------------", | |
535 "TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG", | |
536 "TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG", | |
537 "TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG"), | |
538 | |
539 GeneticCode(11, "Bacterial and Plant Plastid", | |
540 "FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", | |
541 "---M---------------M------------MMMM---------------M------------", | |
542 "TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG", | |
543 "TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG", | |
544 "TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG"), | |
545 | |
546 GeneticCode(12, "Alternative Yeast Nuclear", | |
547 "FFLLSSSSYY**CC*WLLLSPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", | |
548 "-------------------M---------------M----------------------------", | |
549 "TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG", | |
550 "TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG", | |
551 "TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG"), | |
552 | |
553 GeneticCode(13,"Ascidian Mitochondrial", | |
554 "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSGGVVVVAAAADDEEGGGG", | |
555 "-----------------------------------M----------------------------", | |
556 "TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG", | |
557 "TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG", | |
558 "TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG"), | |
559 | |
560 GeneticCode(14, "Alternative Flatworm Mitochondrial", | |
561 "FFLLSSSSYYY*CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG", | |
562 "-----------------------------------M----------------------------", | |
563 "TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG", | |
564 "TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG", | |
565 "TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG"), | |
566 | |
567 GeneticCode(15, "Blepharisma Nuclear", | |
568 "FFLLSSSSYY*QCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", | |
569 "-----------------------------------M----------------------------", | |
570 "TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG", | |
571 "TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG", | |
572 "TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG"), | |
573 | |
574 GeneticCode(16, "Chlorophycean Mitochondrial", | |
575 "FFLLSSSSYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", | |
576 "-----------------------------------M----------------------------", | |
577 "TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG", | |
578 "TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG", | |
579 "TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG"), | |
580 | |
581 GeneticCode(21, "Trematode Mitochondrial", | |
582 "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNNKSSSSVVVVAAAADDEEGGGG", | |
583 "-----------------------------------M---------------M------------", | |
584 "TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG", | |
585 "TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG", | |
586 "TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG"), | |
587 | |
588 GeneticCode(22, "Scenedesmus obliquus Mitochondrial", | |
589 "FFLLSS*SYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", | |
590 "-----------------------------------M----------------------------", | |
591 "TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG", | |
592 "TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG", | |
593 "TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG"), | |
594 | |
595 GeneticCode(23,"Thraustochytrium Mitochondrial", | |
596 "FF*LSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", | |
597 "--------------------------------M--M---------------M------------", | |
598 "TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG", | |
599 "TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG", | |
600 "TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG",), | |
601 ) | |
602 | |
603 | |
604 | |
605 |