comparison corebio/transform.py @ 7:8d676bbd1f2d

Uploaded
author davidmurphy
date Mon, 16 Jan 2012 07:03:36 -0500
parents c55bdc2fb9fa
children
comparison
equal deleted inserted replaced
6:4a4aca3d57c9 7:8d676bbd1f2d
1 # Copyright (c) 2006 John Gilman
2 #
3 # This software is distributed under the MIT Open Source License.
4 # <http://www.opensource.org/licenses/mit-license.html>
5 #
6 # Permission is hereby granted, free of charge, to any person obtaining a
7 # copy of this software and associated documentation files (the "Software"),
8 # to deal in the Software without restriction, including without limitation
9 # the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 # and/or sell copies of the Software, and to permit persons to whom the
11 # Software is furnished to do so, subject to the following conditions:
12 #
13 # The above copyright notice and this permission notice shall be included
14 # in all copies or substantial portions of the Software.
15 #
16 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 # THE SOFTWARE.
23
24 """ Transformations of Seqs (alphabetic sequences).
25
26
27
28 Classes :
29 - Transform -- Simple transforms of alphabetic strings.
30 - GeneticCode -- The genetic mapping of dna to protein.
31
32 Functions :
33 - mask_low_complexity -- Implementation of Seg algorithm to remove low complexity
34 regions from protein sequences.
35
36
37 """
38
39
40 from corebio.data import dna_extended_letters, dna_ambiguity
41 from corebio.seq import Seq, protein_alphabet, nucleic_alphabet, dna_alphabet
42 from string import maketrans
43 from corebio.moremath import log2 , entropy
44
45 __all__ = [
46 'Transform',
47 'mask_low_complexity',
48 'GeneticCode'
49 ]
50
51 class Transform(object) :
52 """A translation between alphabetic strings.
53 (This class is not called 'Translation' to avoid confusion with the
54 biological translation of rna to protein.)
55
56 Example:
57 trans = Transform(
58 Seq("ACGTRYSWKMBDHVN-acgtUuryswkmbdhvnXx?.~'", dna_alphabet),
59 Seq("ACGTRYSWKMNNNNN-acgtUuryswkmbnnnnXx?.~", reduced_nucleic_alphabet)
60 )
61 s0 = Seq("AAAAAV", nucleic_alphabet)
62 s1 = trans(s0)
63 assert(s1.alphabet == reduced_nucleic_alphabet)
64 assert(s2 == Seq("AAAAAN", reduced_nucleic_alphabet)
65
66 Status : Beta
67 """
68
69 __slots__ = ["table", "source", "target"]
70 def __init__(self, source, target) :
71
72 self.table = maketrans(source, target)
73 self.source = source
74 self.target = target
75
76
77 def __call__(self, seq) :
78 """Translate sequence."""
79 if not self.source.alphabet.alphabetic(seq) :
80 raise ValueError("Incompatable alphabets")
81 s = str.translate(seq, self.table)
82 cls = self.target.__class__
83 return cls(s, self.target.alphabet, seq.name, seq.description)
84 # End class Translation
85
86 # FIXME: Test, document, add to seq.
87 dna_complement = Transform(
88 Seq("ACGTRYSWKMBDHVN-acgtUuryswkmbdhvnXx?.~", dna_alphabet),
89 Seq("TGCAYRSWMKVHDBN-tgcaAayrswmkvhdbnXx?.~", dna_alphabet),
90 )
91
92
93
94 def mask_low_complexity(seq, width =12, trigger=1.8, extension=2.0, mask='X') :
95 """ Mask low complexity regions in protein sequences.
96
97 Uses the method of Seg [1] by Wootton & Federhen [2] to divide a sequence
98 into regions of high and low complexity. The sequence is divided into
99 overlapping windows. Low complexity windows either have a sequence entropy
100 less that the trigger complexity, or have an entropy less than the extension
101 complexity and neighbor other low-complexity windows. The sequence within
102 low complexity regions are replaced with the mask character (default 'X'),
103 and the masked alphabetic sequence is returned.
104
105 The default parameters, width=12, trigger=1.8, extension=2.0, mask='X' are
106 suitable for masking protein sequences before a database search. The
107 standard default seg parameters are width=12, trigger=2.2, extension=2.5
108
109 Arguments:
110 Seq seq -- An alphabetic sequence
111 int width -- Window width
112 float trigger -- Entropy in bits between 0 and 4.3.. ( =log_2(20) )
113 float extension -- Entropy in bits between 0 and 4.3.. ( =log_2(20) )
114 char mask -- The mask character (default: 'X')
115 Returns :
116 Seq -- A masked alphabetic sequence
117 Raises :
118 ValueError -- On invalid arguments
119 Refs:
120 [1] seg man page:
121 http://bioportal.weizmann.ac.il/education/materials/gcg/seg.html
122 [2] Wootton & Federhen (Computers and Chemistry 17; 149-163, (1993))
123 Authors:
124 GEC 2005
125 Future :
126 - Optional mask character.
127 - Option to lower case masked symbols.
128 - Remove arbitary restriction to protein.
129 """
130
131 lg20 = log2(20)
132 if trigger<0 or trigger>lg20 :
133 raise ValueError("Invalid trigger complexity: %f"% trigger)
134 if extension<0 or extension>lg20 or extension<trigger:
135 raise ValueError("Invalid extension complexity: %f"% extension)
136 if width<0 :
137 raise ValueError("Invalid width: %d"% width)
138
139 if width > len(seq) : return seq
140
141 s = seq.ords()
142
143 X = seq.alphabet.ord(mask)
144
145
146 nwindows = len(seq)- width +1
147 ent = [ 0 for x in range(0, nwindows)]
148 count = [ 0 for x in range(0, len(seq.alphabet) )]
149
150 for c in s[0:width] : count[c] +=1
151 ent[0] = entropy(count,2)
152
153 for i in range(1, nwindows) :
154 count[ s[i-1] ] -= 1
155 count[ s[i+width-1] ] +=1
156 ent[i] = entropy(count,2)
157
158 prev_segged = False
159 for i in range(0, nwindows) :
160 if ((prev_segged and ent[i]< extension) or
161 ent[i]< trigger) :
162 for j in range(0, width) : s[i+j]=X
163 prev_segged=True
164 else :
165 prev_segged = False
166
167
168 # Redo, only backwards
169 prev_segged = False
170 for i in range(nwindows-1, -1, -1) :
171 if ((prev_segged and ent[i]< extension) or
172 ent[i]< trigger) :
173 for j in range(0, width) : s[i+j]=X
174 prev_segged=True
175 else :
176 prev_segged = False
177
178
179 return seq.alphabet.chrs(s)
180 # end mask_low_complexity()
181
182
183 class GeneticCode(object):
184 """An encoding of amino acids by DNA triplets.
185
186 Example :
187
188 Genetic Code [1]: Standard
189 T C A G
190 +---------+---------+---------+---------+
191 T | TTT F | TCT S | TAT Y | TGT C | T
192 T | TTC F | TCC S | TAC Y | TGC C | C
193 T | TTA L | TCA S | TAA Stop| TGA Stop| A
194 T | TTG L(s)| TCG S | TAG Stop| TGG W | G
195 +---------+---------+---------+---------+
196 C | CTT L | CCT P | CAT H | CGT R | T
197 C | CTC L | CCC P | CAC H | CGC R | C
198 C | CTA L | CCA P | CAA Q | CGA R | A
199 C | CTG L(s)| CCG P | CAG Q | CGG R | G
200 +---------+---------+---------+---------+
201 A | ATT I | ACT T | AAT N | AGT S | T
202 A | ATC I | ACC T | AAC N | AGC S | C
203 A | ATA I | ACA T | AAA K | AGA R | A
204 A | ATG M(s)| ACG T | AAG K | AGG R | G
205 +---------+---------+---------+---------+
206 G | GTT V | GCT A | GAT D | GGT G | T
207 G | GTC V | GCC A | GAC D | GGC G | C
208 G | GTA V | GCA A | GAA E | GGA G | A
209 G | GTG V | GCG A | GAG E | GGG G | G
210 +---------+---------+---------+---------+
211
212
213 See Also :
214 -- http://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi?mode=c
215 -- http://www.ncbi.nlm.nih.gov/projects/collab/FT/index.html#7.5
216 Authors:
217 JXG, GEC
218 """
219 # TODO: Explain use of '?' in translated sequence.
220 # TODO: Does translate fails with aproriate execption when fed gaps?
221 # TODO: Can back_translate handle gaps?
222
223 def __init__(self, ident, description,
224 amino_acid, start, base1, base2, base3):
225 """Create a new GeneticCode.
226
227 Args:
228 -- ident - Standarad identifier (Or zero). An integer
229 -- description
230 -- amino acid - A sequecne of amino acids and stop codons. e.g.
231 "FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG"
232 -- start - A sequence indicating start codons, e.g.,
233 "---M---------------M---------------M----------------------------"
234 -- base1 - The first base of each codon. e.g.,
235 "TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG"
236 -- base2 - The second base of each codon. e.g.,
237 "TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG"
238 -- base3 - The last base of each codon. e.g.,
239 "TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG"
240 """
241 self.ident = ident
242 self.description = description
243
244 self.amino_acid = amino_acid
245 self.start = start
246 self.base1 = base1
247 self.base2 = base2
248 self.base3 = base3
249
250 stop_codons = []
251 start_codons = []
252 for i, a in enumerate(amino_acid) :
253 codon = base1[i] + base2[i] + base3[i]
254 if a=='*' : stop_codons.append(codon)
255 if start[i] == 'M': start_codons.append(codon)
256
257 self.stop_codons = tuple(stop_codons)
258 self.start_codons = tuple(start_codons)
259
260 # Building the full translation table is expensive,
261 # so we avoid doing so until necessary.
262 self._table = None
263 self._back_table = None
264
265 #@staticmethod
266 def std_list():
267 "Return a list of standard genetic codes."
268 return _codon_tables
269 std_list = staticmethod(std_list)
270
271 #@staticmethod
272 def std():
273 "The standard 'universal' genetic code."
274 return _codon_tables[0]
275 std = staticmethod(std)
276
277
278 #@staticmethod
279 def by_name(name) :
280 """Find a genetic code in the code list by name or identifier.
281 """
282 for t in _codon_tables :
283 if t.ident == name or t.description == name :
284 return t
285 raise ValueError("No such translation table: %s" % str(name) )
286 by_name = staticmethod(by_name)
287
288
289 def _get_table(self) :
290 if self._table is None : self._create_table()
291 return self._table
292 table = property(_get_table, None, "A map between codons and amino acids")
293
294 def _get_back_table(self) :
295 if self._back_table is None :
296 self._create_table()
297 return self._back_table
298 back_table = property(_get_back_table, None, "A map between amino acids and codons")
299
300
301 def _create_table(self) :
302 aa = self.amino_acid
303 base1 = self.base1
304 base2 = self.base2
305 base3 = self.base3
306
307 # Construct a table of unambiguous codon translations
308 table = {}
309 for i, a in enumerate(aa) :
310 codon = base1[i] + base2[i] + base3[i]
311 table[codon] = a
312
313 # Build the back table.
314 back_table = {}
315 items = table.items()
316 items.sort()
317 for codon, aa in items[::-1] :
318 back_table[aa] = codon # Use first codon, alphabetically.
319 back_table['X'] = 'NNN'
320 back_table['B'] = 'NNN'
321 back_table['Z'] = 'NNN'
322 back_table['J'] = 'NNN'
323 self._back_table = back_table
324
325 ltable = {}
326 letters = dna_extended_letters+'U' # include RNA in table
327
328 # Create a list of all possble codons
329 codons = []
330 for c1 in letters:
331 for c2 in letters:
332 for c3 in letters :
333 codons.append( c1+c2+c3)
334
335 # For each ambiguous codon, construct all compatible unambiguous codons.
336 # Translate and collect a set of all possible translated amino acids.
337 # If more than one translation look for possible amino acid ambiguity
338 # codes.
339 for C in codons :
340 translated = dict() # Use dict, because no set in py2.3
341 c = C.replace('U', 'T') # Convert rna codon to dna
342 for c1 in dna_ambiguity[c[0]]:
343 for c2 in dna_ambiguity[c[1]]:
344 for c3 in dna_ambiguity[c[2]]:
345 aa = table[ c1+c2+c3 ]
346 translated[aa] = ''
347 translated = list(translated.keys())
348 translated.sort()
349 if len(translated) ==1 :
350 trans = list(translated)[0]
351 elif translated == ['D','N'] :
352 trans = 'B'
353 elif translated == ['E','Q'] :
354 trans = 'Z'
355 elif translated == ['I','L'] :
356 trans = 'J'
357 elif '*' in translated:
358 trans = '?'
359 else :
360 trans = 'X'
361 ltable[C] = trans
362
363 self._table = ltable
364 # End create tables
365
366 def translate(self, seq, frame=0) :
367 """Translate a DNA sequence to a polypeptide using full
368 IUPAC ambiguities in DNA/RNA and amino acid codes.
369
370 Returns :
371 -- Seq - A polypeptide sequence
372 """
373 # TODO: Optimize.
374 # TODO: Insanity check alphabet.
375 seq = str(seq)
376 table = self.table
377 trans = []
378 L = len(seq)
379 for i in range(frame, L-2, 3) :
380 codon = seq[i:i+3].upper()
381 trans.append( table[codon])
382 return Seq(''.join(trans), protein_alphabet)
383
384
385 def back_translate(self, seq) :
386 """Convert protein back into coding DNA.
387
388 Args:
389 -- seq - A polypeptide sequence.
390
391 Returns :
392 -- Seq - A dna sequence
393 """
394 # TODO: Optimzie
395 # TODO: Insanity check alphabet.
396 table = self.back_table
397 seq = str(seq)
398 trans = [ table[a] for a in seq]
399 return Seq(''.join(trans), dna_alphabet)
400
401 #TODO: translate_orf(self, seq, start) ?
402 #TODO: translate_to_stop(self, seq, frame) ?
403 #TODO: translate_all_frames(self,seq) -> 6 translations.
404
405 def __repr__(self) :
406 string = []
407 string += 'GeneticCode( %d, "' % self.ident
408 string += self.description
409 string += '", \n'
410 string += ' amino_acid = "'
411 string += self.amino_acid
412 string += '",\n'
413 string += ' start = "'
414 string += self.start
415 string += '",\n'
416 string += ' base1 = "'
417 string += self.base1
418 string += '",\n'
419 string += ' base2 = "'
420 string += self.base2
421 string += '",\n'
422 string += ' base3 = "'
423 string += self.base3
424 string += '" )'
425 return ''.join(string)
426
427
428 def __str__(self) :
429 """Returns a text representation of this genetic code."""
430 # Inspired by http://bugzilla.open-bio.org/show_bug.cgi?id=1963
431 letters = "TCAG" # Convectional ordering for codon tables.
432 string = []
433
434 if self.ident :
435 string += 'Genetic Code [%d]: ' % self.ident
436 else :
437 string += 'Genetic Code: '
438 string += self.description or ''
439
440 string += "\n "
441 string += " ".join( [" %s " % c2 for c2 in letters] )
442
443 string += "\n +"
444 string += "+".join(["---------" for c2 in letters]) + "+ "
445
446 table = self.table
447
448 for c1 in letters :
449 for c3 in letters :
450 string += '\n '
451 string += c1
452 string += " |"
453 for c2 in letters :
454 codon = c1+c2+c3
455 string += " " + codon
456 if codon in self.stop_codons :
457 string += " Stop|"
458 else :
459 amino = table.get(codon, '?')
460 if codon in self.start_codons :
461 string += " %s(s)|" % amino
462 else :
463 string += " %s |" % amino
464 string += " " + c3
465
466 string += "\n +"
467 string += "+".join(["---------" for c2 in letters])
468 string += "+ "
469 string += '\n'
470 return ''.join(string)
471 # end class GeneticCode
472
473
474 # Data from http://www.ncbi.nlm.nih.gov/projects/collab/FT/index.html#7.5
475 # Aug. 2006
476 # Genetic Code Tables
477 #
478 # Authority International Sequence Databank Collaboration
479 # Contact NCBI
480 # Scope /transl_table qualifier
481 # URL http://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi?mode=c
482 _codon_tables = (
483 GeneticCode(1, "Standard",
484 "FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
485 "---M---------------M---------------M----------------------------",
486 "TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG",
487 "TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG",
488 "TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG"),
489
490 GeneticCode(2, "Vertebrate Mitochondrial",
491 "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSS**VVVVAAAADDEEGGGG",
492 "--------------------------------MMMM---------------M------------",
493 "TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG",
494 "TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG",
495 "TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG"),
496
497 GeneticCode(3, "Yeast Mitochondrial",
498 "FFLLSSSSYY**CCWWTTTTPPPPHHQQRRRRIIMMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
499 "----------------------------------MM----------------------------",
500 "TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG",
501 "TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG",
502 "TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG"),
503
504 GeneticCode(4, "Mold, Protozoan, Coelenterate Mitochondrial & Mycoplasma/Spiroplasma",
505 "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
506 "--MM---------------M------------MMMM---------------M------------",
507 "TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG",
508 "TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG",
509 "TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG"),
510
511 GeneticCode(5, "Invertebrate Mitochondrial",
512 "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSSSVVVVAAAADDEEGGGG",
513 "---M----------------------------MMMM---------------M------------",
514 "TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG",
515 "TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG",
516 "TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG"),
517
518 GeneticCode(6, "Ciliate, Dasycladacean and Hexamita Nuclear",
519 "FFLLSSSSYYQQCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
520 "-----------------------------------M----------------------------",
521 "TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG",
522 "TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG",
523 "TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG"),
524
525 GeneticCode(9, "Echinoderm and Flatworm Mitochondrial",
526 "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG",
527 "-----------------------------------M---------------M------------",
528 "TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG",
529 "TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG",
530 "TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG"),
531
532 GeneticCode(10, "Euplotid Nuclear",
533 "FFLLSSSSYY**CCCWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
534 "-----------------------------------M----------------------------",
535 "TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG",
536 "TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG",
537 "TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG"),
538
539 GeneticCode(11, "Bacterial and Plant Plastid",
540 "FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
541 "---M---------------M------------MMMM---------------M------------",
542 "TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG",
543 "TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG",
544 "TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG"),
545
546 GeneticCode(12, "Alternative Yeast Nuclear",
547 "FFLLSSSSYY**CC*WLLLSPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
548 "-------------------M---------------M----------------------------",
549 "TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG",
550 "TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG",
551 "TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG"),
552
553 GeneticCode(13,"Ascidian Mitochondrial",
554 "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSGGVVVVAAAADDEEGGGG",
555 "-----------------------------------M----------------------------",
556 "TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG",
557 "TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG",
558 "TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG"),
559
560 GeneticCode(14, "Alternative Flatworm Mitochondrial",
561 "FFLLSSSSYYY*CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG",
562 "-----------------------------------M----------------------------",
563 "TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG",
564 "TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG",
565 "TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG"),
566
567 GeneticCode(15, "Blepharisma Nuclear",
568 "FFLLSSSSYY*QCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
569 "-----------------------------------M----------------------------",
570 "TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG",
571 "TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG",
572 "TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG"),
573
574 GeneticCode(16, "Chlorophycean Mitochondrial",
575 "FFLLSSSSYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
576 "-----------------------------------M----------------------------",
577 "TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG",
578 "TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG",
579 "TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG"),
580
581 GeneticCode(21, "Trematode Mitochondrial",
582 "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNNKSSSSVVVVAAAADDEEGGGG",
583 "-----------------------------------M---------------M------------",
584 "TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG",
585 "TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG",
586 "TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG"),
587
588 GeneticCode(22, "Scenedesmus obliquus Mitochondrial",
589 "FFLLSS*SYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
590 "-----------------------------------M----------------------------",
591 "TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG",
592 "TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG",
593 "TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG"),
594
595 GeneticCode(23,"Thraustochytrium Mitochondrial",
596 "FF*LSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
597 "--------------------------------M--M---------------M------------",
598 "TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG",
599 "TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG",
600 "TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG",),
601 )
602
603
604
605