Mercurial > repos > davidmurphy > codonlogo
comparison corebio/transform.py @ 7:8d676bbd1f2d
Uploaded
| author | davidmurphy |
|---|---|
| date | Mon, 16 Jan 2012 07:03:36 -0500 |
| parents | c55bdc2fb9fa |
| children |
comparison
equal
deleted
inserted
replaced
| 6:4a4aca3d57c9 | 7:8d676bbd1f2d |
|---|---|
| 1 # Copyright (c) 2006 John Gilman | |
| 2 # | |
| 3 # This software is distributed under the MIT Open Source License. | |
| 4 # <http://www.opensource.org/licenses/mit-license.html> | |
| 5 # | |
| 6 # Permission is hereby granted, free of charge, to any person obtaining a | |
| 7 # copy of this software and associated documentation files (the "Software"), | |
| 8 # to deal in the Software without restriction, including without limitation | |
| 9 # the rights to use, copy, modify, merge, publish, distribute, sublicense, | |
| 10 # and/or sell copies of the Software, and to permit persons to whom the | |
| 11 # Software is furnished to do so, subject to the following conditions: | |
| 12 # | |
| 13 # The above copyright notice and this permission notice shall be included | |
| 14 # in all copies or substantial portions of the Software. | |
| 15 # | |
| 16 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
| 17 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
| 18 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
| 19 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
| 20 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
| 21 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | |
| 22 # THE SOFTWARE. | |
| 23 | |
| 24 """ Transformations of Seqs (alphabetic sequences). | |
| 25 | |
| 26 | |
| 27 | |
| 28 Classes : | |
| 29 - Transform -- Simple transforms of alphabetic strings. | |
| 30 - GeneticCode -- The genetic mapping of dna to protein. | |
| 31 | |
| 32 Functions : | |
| 33 - mask_low_complexity -- Implementation of Seg algorithm to remove low complexity | |
| 34 regions from protein sequences. | |
| 35 | |
| 36 | |
| 37 """ | |
| 38 | |
| 39 | |
| 40 from corebio.data import dna_extended_letters, dna_ambiguity | |
| 41 from corebio.seq import Seq, protein_alphabet, nucleic_alphabet, dna_alphabet | |
| 42 from string import maketrans | |
| 43 from corebio.moremath import log2 , entropy | |
| 44 | |
| 45 __all__ = [ | |
| 46 'Transform', | |
| 47 'mask_low_complexity', | |
| 48 'GeneticCode' | |
| 49 ] | |
| 50 | |
| 51 class Transform(object) : | |
| 52 """A translation between alphabetic strings. | |
| 53 (This class is not called 'Translation' to avoid confusion with the | |
| 54 biological translation of rna to protein.) | |
| 55 | |
| 56 Example: | |
| 57 trans = Transform( | |
| 58 Seq("ACGTRYSWKMBDHVN-acgtUuryswkmbdhvnXx?.~'", dna_alphabet), | |
| 59 Seq("ACGTRYSWKMNNNNN-acgtUuryswkmbnnnnXx?.~", reduced_nucleic_alphabet) | |
| 60 ) | |
| 61 s0 = Seq("AAAAAV", nucleic_alphabet) | |
| 62 s1 = trans(s0) | |
| 63 assert(s1.alphabet == reduced_nucleic_alphabet) | |
| 64 assert(s2 == Seq("AAAAAN", reduced_nucleic_alphabet) | |
| 65 | |
| 66 Status : Beta | |
| 67 """ | |
| 68 | |
| 69 __slots__ = ["table", "source", "target"] | |
| 70 def __init__(self, source, target) : | |
| 71 | |
| 72 self.table = maketrans(source, target) | |
| 73 self.source = source | |
| 74 self.target = target | |
| 75 | |
| 76 | |
| 77 def __call__(self, seq) : | |
| 78 """Translate sequence.""" | |
| 79 if not self.source.alphabet.alphabetic(seq) : | |
| 80 raise ValueError("Incompatable alphabets") | |
| 81 s = str.translate(seq, self.table) | |
| 82 cls = self.target.__class__ | |
| 83 return cls(s, self.target.alphabet, seq.name, seq.description) | |
| 84 # End class Translation | |
| 85 | |
| 86 # FIXME: Test, document, add to seq. | |
| 87 dna_complement = Transform( | |
| 88 Seq("ACGTRYSWKMBDHVN-acgtUuryswkmbdhvnXx?.~", dna_alphabet), | |
| 89 Seq("TGCAYRSWMKVHDBN-tgcaAayrswmkvhdbnXx?.~", dna_alphabet), | |
| 90 ) | |
| 91 | |
| 92 | |
| 93 | |
| 94 def mask_low_complexity(seq, width =12, trigger=1.8, extension=2.0, mask='X') : | |
| 95 """ Mask low complexity regions in protein sequences. | |
| 96 | |
| 97 Uses the method of Seg [1] by Wootton & Federhen [2] to divide a sequence | |
| 98 into regions of high and low complexity. The sequence is divided into | |
| 99 overlapping windows. Low complexity windows either have a sequence entropy | |
| 100 less that the trigger complexity, or have an entropy less than the extension | |
| 101 complexity and neighbor other low-complexity windows. The sequence within | |
| 102 low complexity regions are replaced with the mask character (default 'X'), | |
| 103 and the masked alphabetic sequence is returned. | |
| 104 | |
| 105 The default parameters, width=12, trigger=1.8, extension=2.0, mask='X' are | |
| 106 suitable for masking protein sequences before a database search. The | |
| 107 standard default seg parameters are width=12, trigger=2.2, extension=2.5 | |
| 108 | |
| 109 Arguments: | |
| 110 Seq seq -- An alphabetic sequence | |
| 111 int width -- Window width | |
| 112 float trigger -- Entropy in bits between 0 and 4.3.. ( =log_2(20) ) | |
| 113 float extension -- Entropy in bits between 0 and 4.3.. ( =log_2(20) ) | |
| 114 char mask -- The mask character (default: 'X') | |
| 115 Returns : | |
| 116 Seq -- A masked alphabetic sequence | |
| 117 Raises : | |
| 118 ValueError -- On invalid arguments | |
| 119 Refs: | |
| 120 [1] seg man page: | |
| 121 http://bioportal.weizmann.ac.il/education/materials/gcg/seg.html | |
| 122 [2] Wootton & Federhen (Computers and Chemistry 17; 149-163, (1993)) | |
| 123 Authors: | |
| 124 GEC 2005 | |
| 125 Future : | |
| 126 - Optional mask character. | |
| 127 - Option to lower case masked symbols. | |
| 128 - Remove arbitary restriction to protein. | |
| 129 """ | |
| 130 | |
| 131 lg20 = log2(20) | |
| 132 if trigger<0 or trigger>lg20 : | |
| 133 raise ValueError("Invalid trigger complexity: %f"% trigger) | |
| 134 if extension<0 or extension>lg20 or extension<trigger: | |
| 135 raise ValueError("Invalid extension complexity: %f"% extension) | |
| 136 if width<0 : | |
| 137 raise ValueError("Invalid width: %d"% width) | |
| 138 | |
| 139 if width > len(seq) : return seq | |
| 140 | |
| 141 s = seq.ords() | |
| 142 | |
| 143 X = seq.alphabet.ord(mask) | |
| 144 | |
| 145 | |
| 146 nwindows = len(seq)- width +1 | |
| 147 ent = [ 0 for x in range(0, nwindows)] | |
| 148 count = [ 0 for x in range(0, len(seq.alphabet) )] | |
| 149 | |
| 150 for c in s[0:width] : count[c] +=1 | |
| 151 ent[0] = entropy(count,2) | |
| 152 | |
| 153 for i in range(1, nwindows) : | |
| 154 count[ s[i-1] ] -= 1 | |
| 155 count[ s[i+width-1] ] +=1 | |
| 156 ent[i] = entropy(count,2) | |
| 157 | |
| 158 prev_segged = False | |
| 159 for i in range(0, nwindows) : | |
| 160 if ((prev_segged and ent[i]< extension) or | |
| 161 ent[i]< trigger) : | |
| 162 for j in range(0, width) : s[i+j]=X | |
| 163 prev_segged=True | |
| 164 else : | |
| 165 prev_segged = False | |
| 166 | |
| 167 | |
| 168 # Redo, only backwards | |
| 169 prev_segged = False | |
| 170 for i in range(nwindows-1, -1, -1) : | |
| 171 if ((prev_segged and ent[i]< extension) or | |
| 172 ent[i]< trigger) : | |
| 173 for j in range(0, width) : s[i+j]=X | |
| 174 prev_segged=True | |
| 175 else : | |
| 176 prev_segged = False | |
| 177 | |
| 178 | |
| 179 return seq.alphabet.chrs(s) | |
| 180 # end mask_low_complexity() | |
| 181 | |
| 182 | |
| 183 class GeneticCode(object): | |
| 184 """An encoding of amino acids by DNA triplets. | |
| 185 | |
| 186 Example : | |
| 187 | |
| 188 Genetic Code [1]: Standard | |
| 189 T C A G | |
| 190 +---------+---------+---------+---------+ | |
| 191 T | TTT F | TCT S | TAT Y | TGT C | T | |
| 192 T | TTC F | TCC S | TAC Y | TGC C | C | |
| 193 T | TTA L | TCA S | TAA Stop| TGA Stop| A | |
| 194 T | TTG L(s)| TCG S | TAG Stop| TGG W | G | |
| 195 +---------+---------+---------+---------+ | |
| 196 C | CTT L | CCT P | CAT H | CGT R | T | |
| 197 C | CTC L | CCC P | CAC H | CGC R | C | |
| 198 C | CTA L | CCA P | CAA Q | CGA R | A | |
| 199 C | CTG L(s)| CCG P | CAG Q | CGG R | G | |
| 200 +---------+---------+---------+---------+ | |
| 201 A | ATT I | ACT T | AAT N | AGT S | T | |
| 202 A | ATC I | ACC T | AAC N | AGC S | C | |
| 203 A | ATA I | ACA T | AAA K | AGA R | A | |
| 204 A | ATG M(s)| ACG T | AAG K | AGG R | G | |
| 205 +---------+---------+---------+---------+ | |
| 206 G | GTT V | GCT A | GAT D | GGT G | T | |
| 207 G | GTC V | GCC A | GAC D | GGC G | C | |
| 208 G | GTA V | GCA A | GAA E | GGA G | A | |
| 209 G | GTG V | GCG A | GAG E | GGG G | G | |
| 210 +---------+---------+---------+---------+ | |
| 211 | |
| 212 | |
| 213 See Also : | |
| 214 -- http://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi?mode=c | |
| 215 -- http://www.ncbi.nlm.nih.gov/projects/collab/FT/index.html#7.5 | |
| 216 Authors: | |
| 217 JXG, GEC | |
| 218 """ | |
| 219 # TODO: Explain use of '?' in translated sequence. | |
| 220 # TODO: Does translate fails with aproriate execption when fed gaps? | |
| 221 # TODO: Can back_translate handle gaps? | |
| 222 | |
| 223 def __init__(self, ident, description, | |
| 224 amino_acid, start, base1, base2, base3): | |
| 225 """Create a new GeneticCode. | |
| 226 | |
| 227 Args: | |
| 228 -- ident - Standarad identifier (Or zero). An integer | |
| 229 -- description | |
| 230 -- amino acid - A sequecne of amino acids and stop codons. e.g. | |
| 231 "FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG" | |
| 232 -- start - A sequence indicating start codons, e.g., | |
| 233 "---M---------------M---------------M----------------------------" | |
| 234 -- base1 - The first base of each codon. e.g., | |
| 235 "TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG" | |
| 236 -- base2 - The second base of each codon. e.g., | |
| 237 "TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG" | |
| 238 -- base3 - The last base of each codon. e.g., | |
| 239 "TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG" | |
| 240 """ | |
| 241 self.ident = ident | |
| 242 self.description = description | |
| 243 | |
| 244 self.amino_acid = amino_acid | |
| 245 self.start = start | |
| 246 self.base1 = base1 | |
| 247 self.base2 = base2 | |
| 248 self.base3 = base3 | |
| 249 | |
| 250 stop_codons = [] | |
| 251 start_codons = [] | |
| 252 for i, a in enumerate(amino_acid) : | |
| 253 codon = base1[i] + base2[i] + base3[i] | |
| 254 if a=='*' : stop_codons.append(codon) | |
| 255 if start[i] == 'M': start_codons.append(codon) | |
| 256 | |
| 257 self.stop_codons = tuple(stop_codons) | |
| 258 self.start_codons = tuple(start_codons) | |
| 259 | |
| 260 # Building the full translation table is expensive, | |
| 261 # so we avoid doing so until necessary. | |
| 262 self._table = None | |
| 263 self._back_table = None | |
| 264 | |
| 265 #@staticmethod | |
| 266 def std_list(): | |
| 267 "Return a list of standard genetic codes." | |
| 268 return _codon_tables | |
| 269 std_list = staticmethod(std_list) | |
| 270 | |
| 271 #@staticmethod | |
| 272 def std(): | |
| 273 "The standard 'universal' genetic code." | |
| 274 return _codon_tables[0] | |
| 275 std = staticmethod(std) | |
| 276 | |
| 277 | |
| 278 #@staticmethod | |
| 279 def by_name(name) : | |
| 280 """Find a genetic code in the code list by name or identifier. | |
| 281 """ | |
| 282 for t in _codon_tables : | |
| 283 if t.ident == name or t.description == name : | |
| 284 return t | |
| 285 raise ValueError("No such translation table: %s" % str(name) ) | |
| 286 by_name = staticmethod(by_name) | |
| 287 | |
| 288 | |
| 289 def _get_table(self) : | |
| 290 if self._table is None : self._create_table() | |
| 291 return self._table | |
| 292 table = property(_get_table, None, "A map between codons and amino acids") | |
| 293 | |
| 294 def _get_back_table(self) : | |
| 295 if self._back_table is None : | |
| 296 self._create_table() | |
| 297 return self._back_table | |
| 298 back_table = property(_get_back_table, None, "A map between amino acids and codons") | |
| 299 | |
| 300 | |
| 301 def _create_table(self) : | |
| 302 aa = self.amino_acid | |
| 303 base1 = self.base1 | |
| 304 base2 = self.base2 | |
| 305 base3 = self.base3 | |
| 306 | |
| 307 # Construct a table of unambiguous codon translations | |
| 308 table = {} | |
| 309 for i, a in enumerate(aa) : | |
| 310 codon = base1[i] + base2[i] + base3[i] | |
| 311 table[codon] = a | |
| 312 | |
| 313 # Build the back table. | |
| 314 back_table = {} | |
| 315 items = table.items() | |
| 316 items.sort() | |
| 317 for codon, aa in items[::-1] : | |
| 318 back_table[aa] = codon # Use first codon, alphabetically. | |
| 319 back_table['X'] = 'NNN' | |
| 320 back_table['B'] = 'NNN' | |
| 321 back_table['Z'] = 'NNN' | |
| 322 back_table['J'] = 'NNN' | |
| 323 self._back_table = back_table | |
| 324 | |
| 325 ltable = {} | |
| 326 letters = dna_extended_letters+'U' # include RNA in table | |
| 327 | |
| 328 # Create a list of all possble codons | |
| 329 codons = [] | |
| 330 for c1 in letters: | |
| 331 for c2 in letters: | |
| 332 for c3 in letters : | |
| 333 codons.append( c1+c2+c3) | |
| 334 | |
| 335 # For each ambiguous codon, construct all compatible unambiguous codons. | |
| 336 # Translate and collect a set of all possible translated amino acids. | |
| 337 # If more than one translation look for possible amino acid ambiguity | |
| 338 # codes. | |
| 339 for C in codons : | |
| 340 translated = dict() # Use dict, because no set in py2.3 | |
| 341 c = C.replace('U', 'T') # Convert rna codon to dna | |
| 342 for c1 in dna_ambiguity[c[0]]: | |
| 343 for c2 in dna_ambiguity[c[1]]: | |
| 344 for c3 in dna_ambiguity[c[2]]: | |
| 345 aa = table[ c1+c2+c3 ] | |
| 346 translated[aa] = '' | |
| 347 translated = list(translated.keys()) | |
| 348 translated.sort() | |
| 349 if len(translated) ==1 : | |
| 350 trans = list(translated)[0] | |
| 351 elif translated == ['D','N'] : | |
| 352 trans = 'B' | |
| 353 elif translated == ['E','Q'] : | |
| 354 trans = 'Z' | |
| 355 elif translated == ['I','L'] : | |
| 356 trans = 'J' | |
| 357 elif '*' in translated: | |
| 358 trans = '?' | |
| 359 else : | |
| 360 trans = 'X' | |
| 361 ltable[C] = trans | |
| 362 | |
| 363 self._table = ltable | |
| 364 # End create tables | |
| 365 | |
| 366 def translate(self, seq, frame=0) : | |
| 367 """Translate a DNA sequence to a polypeptide using full | |
| 368 IUPAC ambiguities in DNA/RNA and amino acid codes. | |
| 369 | |
| 370 Returns : | |
| 371 -- Seq - A polypeptide sequence | |
| 372 """ | |
| 373 # TODO: Optimize. | |
| 374 # TODO: Insanity check alphabet. | |
| 375 seq = str(seq) | |
| 376 table = self.table | |
| 377 trans = [] | |
| 378 L = len(seq) | |
| 379 for i in range(frame, L-2, 3) : | |
| 380 codon = seq[i:i+3].upper() | |
| 381 trans.append( table[codon]) | |
| 382 return Seq(''.join(trans), protein_alphabet) | |
| 383 | |
| 384 | |
| 385 def back_translate(self, seq) : | |
| 386 """Convert protein back into coding DNA. | |
| 387 | |
| 388 Args: | |
| 389 -- seq - A polypeptide sequence. | |
| 390 | |
| 391 Returns : | |
| 392 -- Seq - A dna sequence | |
| 393 """ | |
| 394 # TODO: Optimzie | |
| 395 # TODO: Insanity check alphabet. | |
| 396 table = self.back_table | |
| 397 seq = str(seq) | |
| 398 trans = [ table[a] for a in seq] | |
| 399 return Seq(''.join(trans), dna_alphabet) | |
| 400 | |
| 401 #TODO: translate_orf(self, seq, start) ? | |
| 402 #TODO: translate_to_stop(self, seq, frame) ? | |
| 403 #TODO: translate_all_frames(self,seq) -> 6 translations. | |
| 404 | |
| 405 def __repr__(self) : | |
| 406 string = [] | |
| 407 string += 'GeneticCode( %d, "' % self.ident | |
| 408 string += self.description | |
| 409 string += '", \n' | |
| 410 string += ' amino_acid = "' | |
| 411 string += self.amino_acid | |
| 412 string += '",\n' | |
| 413 string += ' start = "' | |
| 414 string += self.start | |
| 415 string += '",\n' | |
| 416 string += ' base1 = "' | |
| 417 string += self.base1 | |
| 418 string += '",\n' | |
| 419 string += ' base2 = "' | |
| 420 string += self.base2 | |
| 421 string += '",\n' | |
| 422 string += ' base3 = "' | |
| 423 string += self.base3 | |
| 424 string += '" )' | |
| 425 return ''.join(string) | |
| 426 | |
| 427 | |
| 428 def __str__(self) : | |
| 429 """Returns a text representation of this genetic code.""" | |
| 430 # Inspired by http://bugzilla.open-bio.org/show_bug.cgi?id=1963 | |
| 431 letters = "TCAG" # Convectional ordering for codon tables. | |
| 432 string = [] | |
| 433 | |
| 434 if self.ident : | |
| 435 string += 'Genetic Code [%d]: ' % self.ident | |
| 436 else : | |
| 437 string += 'Genetic Code: ' | |
| 438 string += self.description or '' | |
| 439 | |
| 440 string += "\n " | |
| 441 string += " ".join( [" %s " % c2 for c2 in letters] ) | |
| 442 | |
| 443 string += "\n +" | |
| 444 string += "+".join(["---------" for c2 in letters]) + "+ " | |
| 445 | |
| 446 table = self.table | |
| 447 | |
| 448 for c1 in letters : | |
| 449 for c3 in letters : | |
| 450 string += '\n ' | |
| 451 string += c1 | |
| 452 string += " |" | |
| 453 for c2 in letters : | |
| 454 codon = c1+c2+c3 | |
| 455 string += " " + codon | |
| 456 if codon in self.stop_codons : | |
| 457 string += " Stop|" | |
| 458 else : | |
| 459 amino = table.get(codon, '?') | |
| 460 if codon in self.start_codons : | |
| 461 string += " %s(s)|" % amino | |
| 462 else : | |
| 463 string += " %s |" % amino | |
| 464 string += " " + c3 | |
| 465 | |
| 466 string += "\n +" | |
| 467 string += "+".join(["---------" for c2 in letters]) | |
| 468 string += "+ " | |
| 469 string += '\n' | |
| 470 return ''.join(string) | |
| 471 # end class GeneticCode | |
| 472 | |
| 473 | |
| 474 # Data from http://www.ncbi.nlm.nih.gov/projects/collab/FT/index.html#7.5 | |
| 475 # Aug. 2006 | |
| 476 # Genetic Code Tables | |
| 477 # | |
| 478 # Authority International Sequence Databank Collaboration | |
| 479 # Contact NCBI | |
| 480 # Scope /transl_table qualifier | |
| 481 # URL http://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi?mode=c | |
| 482 _codon_tables = ( | |
| 483 GeneticCode(1, "Standard", | |
| 484 "FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", | |
| 485 "---M---------------M---------------M----------------------------", | |
| 486 "TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG", | |
| 487 "TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG", | |
| 488 "TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG"), | |
| 489 | |
| 490 GeneticCode(2, "Vertebrate Mitochondrial", | |
| 491 "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSS**VVVVAAAADDEEGGGG", | |
| 492 "--------------------------------MMMM---------------M------------", | |
| 493 "TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG", | |
| 494 "TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG", | |
| 495 "TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG"), | |
| 496 | |
| 497 GeneticCode(3, "Yeast Mitochondrial", | |
| 498 "FFLLSSSSYY**CCWWTTTTPPPPHHQQRRRRIIMMTTTTNNKKSSRRVVVVAAAADDEEGGGG", | |
| 499 "----------------------------------MM----------------------------", | |
| 500 "TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG", | |
| 501 "TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG", | |
| 502 "TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG"), | |
| 503 | |
| 504 GeneticCode(4, "Mold, Protozoan, Coelenterate Mitochondrial & Mycoplasma/Spiroplasma", | |
| 505 "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", | |
| 506 "--MM---------------M------------MMMM---------------M------------", | |
| 507 "TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG", | |
| 508 "TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG", | |
| 509 "TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG"), | |
| 510 | |
| 511 GeneticCode(5, "Invertebrate Mitochondrial", | |
| 512 "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSSSVVVVAAAADDEEGGGG", | |
| 513 "---M----------------------------MMMM---------------M------------", | |
| 514 "TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG", | |
| 515 "TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG", | |
| 516 "TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG"), | |
| 517 | |
| 518 GeneticCode(6, "Ciliate, Dasycladacean and Hexamita Nuclear", | |
| 519 "FFLLSSSSYYQQCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", | |
| 520 "-----------------------------------M----------------------------", | |
| 521 "TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG", | |
| 522 "TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG", | |
| 523 "TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG"), | |
| 524 | |
| 525 GeneticCode(9, "Echinoderm and Flatworm Mitochondrial", | |
| 526 "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG", | |
| 527 "-----------------------------------M---------------M------------", | |
| 528 "TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG", | |
| 529 "TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG", | |
| 530 "TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG"), | |
| 531 | |
| 532 GeneticCode(10, "Euplotid Nuclear", | |
| 533 "FFLLSSSSYY**CCCWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", | |
| 534 "-----------------------------------M----------------------------", | |
| 535 "TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG", | |
| 536 "TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG", | |
| 537 "TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG"), | |
| 538 | |
| 539 GeneticCode(11, "Bacterial and Plant Plastid", | |
| 540 "FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", | |
| 541 "---M---------------M------------MMMM---------------M------------", | |
| 542 "TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG", | |
| 543 "TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG", | |
| 544 "TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG"), | |
| 545 | |
| 546 GeneticCode(12, "Alternative Yeast Nuclear", | |
| 547 "FFLLSSSSYY**CC*WLLLSPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", | |
| 548 "-------------------M---------------M----------------------------", | |
| 549 "TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG", | |
| 550 "TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG", | |
| 551 "TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG"), | |
| 552 | |
| 553 GeneticCode(13,"Ascidian Mitochondrial", | |
| 554 "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSGGVVVVAAAADDEEGGGG", | |
| 555 "-----------------------------------M----------------------------", | |
| 556 "TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG", | |
| 557 "TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG", | |
| 558 "TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG"), | |
| 559 | |
| 560 GeneticCode(14, "Alternative Flatworm Mitochondrial", | |
| 561 "FFLLSSSSYYY*CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG", | |
| 562 "-----------------------------------M----------------------------", | |
| 563 "TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG", | |
| 564 "TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG", | |
| 565 "TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG"), | |
| 566 | |
| 567 GeneticCode(15, "Blepharisma Nuclear", | |
| 568 "FFLLSSSSYY*QCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", | |
| 569 "-----------------------------------M----------------------------", | |
| 570 "TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG", | |
| 571 "TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG", | |
| 572 "TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG"), | |
| 573 | |
| 574 GeneticCode(16, "Chlorophycean Mitochondrial", | |
| 575 "FFLLSSSSYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", | |
| 576 "-----------------------------------M----------------------------", | |
| 577 "TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG", | |
| 578 "TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG", | |
| 579 "TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG"), | |
| 580 | |
| 581 GeneticCode(21, "Trematode Mitochondrial", | |
| 582 "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNNKSSSSVVVVAAAADDEEGGGG", | |
| 583 "-----------------------------------M---------------M------------", | |
| 584 "TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG", | |
| 585 "TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG", | |
| 586 "TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG"), | |
| 587 | |
| 588 GeneticCode(22, "Scenedesmus obliquus Mitochondrial", | |
| 589 "FFLLSS*SYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", | |
| 590 "-----------------------------------M----------------------------", | |
| 591 "TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG", | |
| 592 "TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG", | |
| 593 "TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG"), | |
| 594 | |
| 595 GeneticCode(23,"Thraustochytrium Mitochondrial", | |
| 596 "FF*LSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", | |
| 597 "--------------------------------M--M---------------M------------", | |
| 598 "TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG", | |
| 599 "TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG", | |
| 600 "TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG",), | |
| 601 ) | |
| 602 | |
| 603 | |
| 604 | |
| 605 |
