Mercurial > repos > davidmurphy > codonlogo
diff corebio/transform.py @ 0:c55bdc2fb9fa
Uploaded
author | davidmurphy |
---|---|
date | Thu, 27 Oct 2011 12:09:09 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/corebio/transform.py Thu Oct 27 12:09:09 2011 -0400 @@ -0,0 +1,605 @@ +# Copyright (c) 2006 John Gilman +# +# This software is distributed under the MIT Open Source License. +# <http://www.opensource.org/licenses/mit-license.html> +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +""" Transformations of Seqs (alphabetic sequences). + + + +Classes : +- Transform -- Simple transforms of alphabetic strings. +- GeneticCode -- The genetic mapping of dna to protein. + +Functions : +- mask_low_complexity -- Implementation of Seg algorithm to remove low complexity + regions from protein sequences. + + +""" + + +from corebio.data import dna_extended_letters, dna_ambiguity +from corebio.seq import Seq, protein_alphabet, nucleic_alphabet, dna_alphabet +from string import maketrans +from corebio.moremath import log2 , entropy + +__all__ = [ + 'Transform', + 'mask_low_complexity', + 'GeneticCode' + ] + +class Transform(object) : + """A translation between alphabetic strings. + (This class is not called 'Translation' to avoid confusion with the + biological translation of rna to protein.) + + Example: + trans = Transform( + Seq("ACGTRYSWKMBDHVN-acgtUuryswkmbdhvnXx?.~'", dna_alphabet), + Seq("ACGTRYSWKMNNNNN-acgtUuryswkmbnnnnXx?.~", reduced_nucleic_alphabet) + ) + s0 = Seq("AAAAAV", nucleic_alphabet) + s1 = trans(s0) + assert(s1.alphabet == reduced_nucleic_alphabet) + assert(s2 == Seq("AAAAAN", reduced_nucleic_alphabet) + + Status : Beta + """ + + __slots__ = ["table", "source", "target"] + def __init__(self, source, target) : + + self.table = maketrans(source, target) + self.source = source + self.target = target + + + def __call__(self, seq) : + """Translate sequence.""" + if not self.source.alphabet.alphabetic(seq) : + raise ValueError("Incompatable alphabets") + s = str.translate(seq, self.table) + cls = self.target.__class__ + return cls(s, self.target.alphabet, seq.name, seq.description) +# End class Translation + +# FIXME: Test, document, add to seq. +dna_complement = Transform( + Seq("ACGTRYSWKMBDHVN-acgtUuryswkmbdhvnXx?.~", dna_alphabet), + Seq("TGCAYRSWMKVHDBN-tgcaAayrswmkvhdbnXx?.~", dna_alphabet), + ) + + + +def mask_low_complexity(seq, width =12, trigger=1.8, extension=2.0, mask='X') : + """ Mask low complexity regions in protein sequences. + + Uses the method of Seg [1] by Wootton & Federhen [2] to divide a sequence + into regions of high and low complexity. The sequence is divided into + overlapping windows. Low complexity windows either have a sequence entropy + less that the trigger complexity, or have an entropy less than the extension + complexity and neighbor other low-complexity windows. The sequence within + low complexity regions are replaced with the mask character (default 'X'), + and the masked alphabetic sequence is returned. + + The default parameters, width=12, trigger=1.8, extension=2.0, mask='X' are + suitable for masking protein sequences before a database search. The + standard default seg parameters are width=12, trigger=2.2, extension=2.5 + + Arguments: + Seq seq -- An alphabetic sequence + int width -- Window width + float trigger -- Entropy in bits between 0 and 4.3.. ( =log_2(20) ) + float extension -- Entropy in bits between 0 and 4.3.. ( =log_2(20) ) + char mask -- The mask character (default: 'X') + Returns : + Seq -- A masked alphabetic sequence + Raises : + ValueError -- On invalid arguments + Refs: + [1] seg man page: + http://bioportal.weizmann.ac.il/education/materials/gcg/seg.html + [2] Wootton & Federhen (Computers and Chemistry 17; 149-163, (1993)) + Authors: + GEC 2005 + Future : + - Optional mask character. + - Option to lower case masked symbols. + - Remove arbitary restriction to protein. + """ + + lg20 = log2(20) + if trigger<0 or trigger>lg20 : + raise ValueError("Invalid trigger complexity: %f"% trigger) + if extension<0 or extension>lg20 or extension<trigger: + raise ValueError("Invalid extension complexity: %f"% extension) + if width<0 : + raise ValueError("Invalid width: %d"% width) + + if width > len(seq) : return seq + + s = seq.ords() + + X = seq.alphabet.ord(mask) + + + nwindows = len(seq)- width +1 + ent = [ 0 for x in range(0, nwindows)] + count = [ 0 for x in range(0, len(seq.alphabet) )] + + for c in s[0:width] : count[c] +=1 + ent[0] = entropy(count,2) + + for i in range(1, nwindows) : + count[ s[i-1] ] -= 1 + count[ s[i+width-1] ] +=1 + ent[i] = entropy(count,2) + + prev_segged = False + for i in range(0, nwindows) : + if ((prev_segged and ent[i]< extension) or + ent[i]< trigger) : + for j in range(0, width) : s[i+j]=X + prev_segged=True + else : + prev_segged = False + + + # Redo, only backwards + prev_segged = False + for i in range(nwindows-1, -1, -1) : + if ((prev_segged and ent[i]< extension) or + ent[i]< trigger) : + for j in range(0, width) : s[i+j]=X + prev_segged=True + else : + prev_segged = False + + + return seq.alphabet.chrs(s) +# end mask_low_complexity() + + +class GeneticCode(object): + """An encoding of amino acids by DNA triplets. + + Example : + + Genetic Code [1]: Standard + T C A G + +---------+---------+---------+---------+ + T | TTT F | TCT S | TAT Y | TGT C | T + T | TTC F | TCC S | TAC Y | TGC C | C + T | TTA L | TCA S | TAA Stop| TGA Stop| A + T | TTG L(s)| TCG S | TAG Stop| TGG W | G + +---------+---------+---------+---------+ + C | CTT L | CCT P | CAT H | CGT R | T + C | CTC L | CCC P | CAC H | CGC R | C + C | CTA L | CCA P | CAA Q | CGA R | A + C | CTG L(s)| CCG P | CAG Q | CGG R | G + +---------+---------+---------+---------+ + A | ATT I | ACT T | AAT N | AGT S | T + A | ATC I | ACC T | AAC N | AGC S | C + A | ATA I | ACA T | AAA K | AGA R | A + A | ATG M(s)| ACG T | AAG K | AGG R | G + +---------+---------+---------+---------+ + G | GTT V | GCT A | GAT D | GGT G | T + G | GTC V | GCC A | GAC D | GGC G | C + G | GTA V | GCA A | GAA E | GGA G | A + G | GTG V | GCG A | GAG E | GGG G | G + +---------+---------+---------+---------+ + + + See Also : + -- http://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi?mode=c + -- http://www.ncbi.nlm.nih.gov/projects/collab/FT/index.html#7.5 + Authors: + JXG, GEC + """ + # TODO: Explain use of '?' in translated sequence. + # TODO: Does translate fails with aproriate execption when fed gaps? + # TODO: Can back_translate handle gaps? + + def __init__(self, ident, description, + amino_acid, start, base1, base2, base3): + """Create a new GeneticCode. + + Args: + -- ident - Standarad identifier (Or zero). An integer + -- description + -- amino acid - A sequecne of amino acids and stop codons. e.g. + "FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG" + -- start - A sequence indicating start codons, e.g., + "---M---------------M---------------M----------------------------" + -- base1 - The first base of each codon. e.g., + "TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG" + -- base2 - The second base of each codon. e.g., + "TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG" + -- base3 - The last base of each codon. e.g., + "TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG" + """ + self.ident = ident + self.description = description + + self.amino_acid = amino_acid + self.start = start + self.base1 = base1 + self.base2 = base2 + self.base3 = base3 + + stop_codons = [] + start_codons = [] + for i, a in enumerate(amino_acid) : + codon = base1[i] + base2[i] + base3[i] + if a=='*' : stop_codons.append(codon) + if start[i] == 'M': start_codons.append(codon) + + self.stop_codons = tuple(stop_codons) + self.start_codons = tuple(start_codons) + + # Building the full translation table is expensive, + # so we avoid doing so until necessary. + self._table = None + self._back_table = None + + #@staticmethod + def std_list(): + "Return a list of standard genetic codes." + return _codon_tables + std_list = staticmethod(std_list) + + #@staticmethod + def std(): + "The standard 'universal' genetic code." + return _codon_tables[0] + std = staticmethod(std) + + + #@staticmethod + def by_name(name) : + """Find a genetic code in the code list by name or identifier. + """ + for t in _codon_tables : + if t.ident == name or t.description == name : + return t + raise ValueError("No such translation table: %s" % str(name) ) + by_name = staticmethod(by_name) + + + def _get_table(self) : + if self._table is None : self._create_table() + return self._table + table = property(_get_table, None, "A map between codons and amino acids") + + def _get_back_table(self) : + if self._back_table is None : + self._create_table() + return self._back_table + back_table = property(_get_back_table, None, "A map between amino acids and codons") + + + def _create_table(self) : + aa = self.amino_acid + base1 = self.base1 + base2 = self.base2 + base3 = self.base3 + + # Construct a table of unambiguous codon translations + table = {} + for i, a in enumerate(aa) : + codon = base1[i] + base2[i] + base3[i] + table[codon] = a + + # Build the back table. + back_table = {} + items = table.items() + items.sort() + for codon, aa in items[::-1] : + back_table[aa] = codon # Use first codon, alphabetically. + back_table['X'] = 'NNN' + back_table['B'] = 'NNN' + back_table['Z'] = 'NNN' + back_table['J'] = 'NNN' + self._back_table = back_table + + ltable = {} + letters = dna_extended_letters+'U' # include RNA in table + + # Create a list of all possble codons + codons = [] + for c1 in letters: + for c2 in letters: + for c3 in letters : + codons.append( c1+c2+c3) + + # For each ambiguous codon, construct all compatible unambiguous codons. + # Translate and collect a set of all possible translated amino acids. + # If more than one translation look for possible amino acid ambiguity + # codes. + for C in codons : + translated = dict() # Use dict, because no set in py2.3 + c = C.replace('U', 'T') # Convert rna codon to dna + for c1 in dna_ambiguity[c[0]]: + for c2 in dna_ambiguity[c[1]]: + for c3 in dna_ambiguity[c[2]]: + aa = table[ c1+c2+c3 ] + translated[aa] = '' + translated = list(translated.keys()) + translated.sort() + if len(translated) ==1 : + trans = list(translated)[0] + elif translated == ['D','N'] : + trans = 'B' + elif translated == ['E','Q'] : + trans = 'Z' + elif translated == ['I','L'] : + trans = 'J' + elif '*' in translated: + trans = '?' + else : + trans = 'X' + ltable[C] = trans + + self._table = ltable + # End create tables + + def translate(self, seq, frame=0) : + """Translate a DNA sequence to a polypeptide using full + IUPAC ambiguities in DNA/RNA and amino acid codes. + + Returns : + -- Seq - A polypeptide sequence + """ + # TODO: Optimize. + # TODO: Insanity check alphabet. + seq = str(seq) + table = self.table + trans = [] + L = len(seq) + for i in range(frame, L-2, 3) : + codon = seq[i:i+3].upper() + trans.append( table[codon]) + return Seq(''.join(trans), protein_alphabet) + + + def back_translate(self, seq) : + """Convert protein back into coding DNA. + + Args: + -- seq - A polypeptide sequence. + + Returns : + -- Seq - A dna sequence + """ + # TODO: Optimzie + # TODO: Insanity check alphabet. + table = self.back_table + seq = str(seq) + trans = [ table[a] for a in seq] + return Seq(''.join(trans), dna_alphabet) + + #TODO: translate_orf(self, seq, start) ? + #TODO: translate_to_stop(self, seq, frame) ? + #TODO: translate_all_frames(self,seq) -> 6 translations. + + def __repr__(self) : + string = [] + string += 'GeneticCode( %d, "' % self.ident + string += self.description + string += '", \n' + string += ' amino_acid = "' + string += self.amino_acid + string += '",\n' + string += ' start = "' + string += self.start + string += '",\n' + string += ' base1 = "' + string += self.base1 + string += '",\n' + string += ' base2 = "' + string += self.base2 + string += '",\n' + string += ' base3 = "' + string += self.base3 + string += '" )' + return ''.join(string) + + + def __str__(self) : + """Returns a text representation of this genetic code.""" + # Inspired by http://bugzilla.open-bio.org/show_bug.cgi?id=1963 + letters = "TCAG" # Convectional ordering for codon tables. + string = [] + + if self.ident : + string += 'Genetic Code [%d]: ' % self.ident + else : + string += 'Genetic Code: ' + string += self.description or '' + + string += "\n " + string += " ".join( [" %s " % c2 for c2 in letters] ) + + string += "\n +" + string += "+".join(["---------" for c2 in letters]) + "+ " + + table = self.table + + for c1 in letters : + for c3 in letters : + string += '\n ' + string += c1 + string += " |" + for c2 in letters : + codon = c1+c2+c3 + string += " " + codon + if codon in self.stop_codons : + string += " Stop|" + else : + amino = table.get(codon, '?') + if codon in self.start_codons : + string += " %s(s)|" % amino + else : + string += " %s |" % amino + string += " " + c3 + + string += "\n +" + string += "+".join(["---------" for c2 in letters]) + string += "+ " + string += '\n' + return ''.join(string) +# end class GeneticCode + + +# Data from http://www.ncbi.nlm.nih.gov/projects/collab/FT/index.html#7.5 +# Aug. 2006 +# Genetic Code Tables +# +# Authority International Sequence Databank Collaboration +# Contact NCBI +# Scope /transl_table qualifier +# URL http://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi?mode=c +_codon_tables = ( + GeneticCode(1, "Standard", + "FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", + "---M---------------M---------------M----------------------------", + "TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG", + "TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG", + "TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG"), + + GeneticCode(2, "Vertebrate Mitochondrial", + "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSS**VVVVAAAADDEEGGGG", + "--------------------------------MMMM---------------M------------", + "TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG", + "TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG", + "TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG"), + + GeneticCode(3, "Yeast Mitochondrial", + "FFLLSSSSYY**CCWWTTTTPPPPHHQQRRRRIIMMTTTTNNKKSSRRVVVVAAAADDEEGGGG", + "----------------------------------MM----------------------------", + "TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG", + "TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG", + "TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG"), + + GeneticCode(4, "Mold, Protozoan, Coelenterate Mitochondrial & Mycoplasma/Spiroplasma", + "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", + "--MM---------------M------------MMMM---------------M------------", + "TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG", + "TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG", + "TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG"), + + GeneticCode(5, "Invertebrate Mitochondrial", + "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSSSVVVVAAAADDEEGGGG", + "---M----------------------------MMMM---------------M------------", + "TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG", + "TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG", + "TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG"), + + GeneticCode(6, "Ciliate, Dasycladacean and Hexamita Nuclear", + "FFLLSSSSYYQQCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", + "-----------------------------------M----------------------------", + "TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG", + "TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG", + "TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG"), + + GeneticCode(9, "Echinoderm and Flatworm Mitochondrial", + "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG", + "-----------------------------------M---------------M------------", + "TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG", + "TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG", + "TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG"), + + GeneticCode(10, "Euplotid Nuclear", + "FFLLSSSSYY**CCCWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", + "-----------------------------------M----------------------------", + "TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG", + "TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG", + "TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG"), + + GeneticCode(11, "Bacterial and Plant Plastid", + "FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", + "---M---------------M------------MMMM---------------M------------", + "TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG", + "TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG", + "TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG"), + + GeneticCode(12, "Alternative Yeast Nuclear", + "FFLLSSSSYY**CC*WLLLSPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", + "-------------------M---------------M----------------------------", + "TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG", + "TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG", + "TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG"), + + GeneticCode(13,"Ascidian Mitochondrial", + "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSGGVVVVAAAADDEEGGGG", + "-----------------------------------M----------------------------", + "TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG", + "TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG", + "TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG"), + + GeneticCode(14, "Alternative Flatworm Mitochondrial", + "FFLLSSSSYYY*CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG", + "-----------------------------------M----------------------------", + "TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG", + "TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG", + "TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG"), + + GeneticCode(15, "Blepharisma Nuclear", + "FFLLSSSSYY*QCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", + "-----------------------------------M----------------------------", + "TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG", + "TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG", + "TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG"), + + GeneticCode(16, "Chlorophycean Mitochondrial", + "FFLLSSSSYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", + "-----------------------------------M----------------------------", + "TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG", + "TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG", + "TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG"), + + GeneticCode(21, "Trematode Mitochondrial", + "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNNKSSSSVVVVAAAADDEEGGGG", + "-----------------------------------M---------------M------------", + "TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG", + "TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG", + "TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG"), + + GeneticCode(22, "Scenedesmus obliquus Mitochondrial", + "FFLLSS*SYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", + "-----------------------------------M----------------------------", + "TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG", + "TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG", + "TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG"), + + GeneticCode(23,"Thraustochytrium Mitochondrial", + "FF*LSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", + "--------------------------------M--M---------------M------------", + "TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG", + "TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG", + "TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG",), + ) + + + + \ No newline at end of file