# HG changeset patch
# User earlhaminst
# Date 1481817865 18000
# Node ID 794a6e864a96206f80099a9ad7c42ba9a480debd
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
diff -r 000000000000 -r 794a6e864a96 filter_by_fasta_ids.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/filter_by_fasta_ids.py Thu Dec 15 11:04:25 2016 -0500
@@ -0,0 +1,108 @@
+#!/usr/bin/env python
+""" A script to build specific fasta databases """
+from __future__ import print_function
+
+
+import logging
+import sys
+
+
+# ===================================== Iterator ===============================
+class Sequence:
+ ''' Holds protein sequence information '''
+ def __init__(self):
+ self.header = ""
+ self.sequence_parts = []
+
+ def get_sequence(self):
+ return "".join([line.rstrip().replace('\n', '').replace('\r', '') for line in self.sequence_parts])
+
+
+class FASTAReader:
+ """
+ FASTA db iterator. Returns a single FASTA sequence object.
+ """
+ def __init__(self, fasta_name):
+ self.fasta_file = open(fasta_name)
+ self.next_line = self.fasta_file.readline()
+
+ def __iter__(self):
+ return self
+
+ def __next__(self):
+ ''' Iteration '''
+ # while True:
+ # line = self.fasta_file.readline()
+ # if not line:
+ # raise StopIteration
+ # if line[0] == '>':
+ # break
+ next_line = self.next_line
+ if not next_line:
+ raise StopIteration
+
+ seq = Sequence()
+ seq.header = next_line.rstrip().replace('\n', '').replace('\r', '')
+
+ next_line = self.fasta_file.readline()
+ while next_line and next_line[0] != '>':
+ # tail = self.fasta_file.tell()
+ # line = self.fasta_file.readline()
+ # if not line:
+ # break
+ # if line[0] == '>':
+ # self.fasta_file.seek(tail)
+ # break
+ seq.sequence_parts.append(next_line)
+ next_line = self.fasta_file.readline()
+ self.next_line = next_line
+ return seq
+
+ # Python 2/3 compat
+ next = __next__
+# ==============================================================================
+
+
+def target_match(target, search_entry):
+ ''' Matches '''
+ search_entry = search_entry.upper()
+ for atarget in target:
+ if search_entry.find(atarget) > -1:
+ return atarget
+ return None
+
+
+def main():
+ ''' the main function'''
+ logging.basicConfig(filename='filter_fasta_log',
+ level=logging.INFO,
+ format='%(asctime)s :: %(levelname)s :: %(message)s',)
+
+ used_sequences = set()
+ work_summary = {'wanted': 0, 'found': 0, 'duplicates': 0}
+ targets = []
+
+ f_target = open(sys.argv[1])
+ for line in f_target.readlines():
+ targets.append(">%s" % line.strip().upper())
+ f_target.close()
+
+ work_summary['wanted'] = len(targets)
+ homd_db = FASTAReader(sys.argv[2])
+
+ # output = open(sys.argv[3], "w")
+ for entry in homd_db:
+ target_matched_results = target_match(targets, entry.header)
+ if target_matched_results:
+ work_summary['found'] += 1
+ targets.remove(target_matched_results)
+ sequence = entry.get_sequence()
+ used_sequences.add(sequence)
+ print(entry.header)
+ print(sequence)
+ for parm, count in work_summary.items():
+ logging.info('%s ==> %d', parm, count)
+
+
+if __name__ == "__main__":
+ main()
diff -r 000000000000 -r 794a6e864a96 t_coffee.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/t_coffee.xml Thu Dec 15 11:04:25 2016 -0500
@@ -0,0 +1,210 @@
+
+ multiple sequence alignment
+
+ t_coffee
+
+
+
+
+
+ t_coffee -version | grep Version
+
+
+ '$cigar'
+ #end if
+]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 'cigar' in outputs
+
+
+ 'clustalw_aln' in outputs
+
+
+ 'dnd' in outputs
+
+
+ 'fasta_aln' in outputs
+
+
+ 'fasta_seq' in outputs
+
+
+ 'msf_aln' in outputs
+
+
+ 'phylip' in outputs
+
+
+ 'pir_aln' in outputs
+
+
+ 'pir_seq' in outputs
+
+
+ 'score_ascii' in outputs
+
+
+ 'score_html' in outputs
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+**What it does**
+
+This tool is a wrapper for the T-Coffee multiple sequence alignment suite. The input is a set of sequences in FASTA format. Apart from running on the complete FASTA input, it can also run on a subset of sequences by providing a list of the FASTA IDs.
+
+This wrapper offers selected advanced T-Coffee options like the selection of the alignment methods to use: ''Pairwise Structual Method'', ''Multiple Sequence Alignment Methods'' or ''Pairwise Sequence Alignment Methods''.
+
+The T-Coffee documentation can be found at http://www.tcoffee.org/Projects/tcoffee/ .
+
+**Example**
+
+Suppose you have 5 sequences in FASTA format::
+
+ >1aboA
+ NLFVALYDFVASGDNTLSITKGEKLRVLGYNHNGEWCEAQTKNGQGWVPS
+ NYITPVN
+ >1ycsB
+ KGVIYALWDYEPQNDDELPMKEGDCMTIIHREDEDEIEWWWARLNDKEGY
+ VPRNLLGLYP
+ >1pht
+ GYQYRALYDYKKEREEDIDLHLGDILTVNKGSLVALGFSDGQEARPEEIG
+ WLNGYNETTGERGDFPGTYVEYIGRKKISP
+ >1vie
+ DRVRKKSGAAWQGQIVGWYCTNLTPEGYAVESEAHPGSVQIYPVAALERI
+ N
+ >1ihvA
+ NFRVYYRDSRDPVWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRD
+
+By selecting "Yes" in output fasta_aln in the wrapper, the user will obtain the multiple alignment in FASTA format::
+
+ >1aboA
+ NL-FVA---LYDFVASGDNTLSITKGEKLR-------VLGYN-------H
+ NGEWCEA--QTKN-GQGWVPSNYIT------PVN
+ >1ycsB
+ KGVIYA---LWDYEPQNDDELPMKEGDCMT-------IIHREDE-----D
+ EIEWWWA--RLND-KEGYVPRNLLG------LYP
+ >1pht
+ GYQYRA---LYDYKKEREEDIDLHLGDILTVNKGSLVALGFSDGQEARPE
+ EIGWLNGYNETTG-ERGDFPGTYVEYIGRKKISP
+ >1vie
+ DR-----------VRK--KSGAAWQGQIVGWYCTNLTPEGYAVE------
+ ------S--EAHPGSVQIYPVAALE------RIN
+ >1ihvA
+ NF-RVYYRDSRDPVWKGPA-KLLWKGEGAV-------VIQDN-------S
+ DI--------------KVVPRRKAK-----IIRD
+
+
+ 10.1006/jmbi.2000.4042
+
+
diff -r 000000000000 -r 794a6e864a96 t_coffee_to_cigar.pl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/t_coffee_to_cigar.pl Thu Dec 15 11:04:25 2016 -0500
@@ -0,0 +1,48 @@
+#!/usr/bin/perl
+#
+use strict;
+use warnings;
+
+# A simple Perl script to convert FASTA sequence alignments into 2-column output where first column is FASTA id and second is CIGAR line
+# TCoffee_to_cigar.pl
+
+sub convert_and_print {
+ my ($header, $sequence) = @_;
+ # Converts each match into M and each gap into D
+ $sequence =~ s/[^-]/M/g;
+ $sequence =~ s/-/D/g;
+
+ # Split the sequence in substrings composed by the same letter
+ $sequence =~ s/DM/D,M/g;
+ $sequence =~ s/MD/M,D/g;
+ my @cigar_array = split(',', $sequence);
+
+ # Condense each substring, e.g. DDDD in 4D, and concatenate them again
+ my $cigar = '';
+ foreach my $str (@cigar_array) {
+ if (length($str) > 1) {
+ $cigar .= length($str);
+ }
+ $cigar .= substr($str, 0, 1);
+ }
+ print "$header\t$cigar\n";
+}
+
+my $file1 = $ARGV[0];
+open my $fh1, '<', $file1;
+
+my $header = '', my $sequence = '';
+while (my $line = <$fh1>) {
+ chomp $line;
+ if (substr($line, 0, 1) eq '>') {
+ if ($header) {
+ convert_and_print($header, $sequence);
+ }
+ $header = substr($line, 1);
+ $sequence = '';
+ } else {
+ $sequence .= $line;
+ }
+}
+close $fh1;
+convert_and_print($header, $sequence);
diff -r 000000000000 -r 794a6e864a96 test-data/cigar.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/cigar.tabular Thu Dec 15 11:04:25 2016 -0500
@@ -0,0 +1,3 @@
+ENSMUST00000091291_musmusculus 41M3D64MD473MD7M2D375M3D74M3D771M2D13MD227MD13M7D226M3D58M3D6MD21M3D437M2D33MD76MD1017M12D34M10D14M3D22M4D10M4D107M
+ENSCAFT00000026349_canisfamiliaris 16D26M2D64MD422MD8M2D140M3D195M2D5M8D16M5D52M3D61M6D6M3D672M20D24MD311M8D25M2D58M2D11M2D103M5D17M6D18M5D22M2D10MD37M3D499M3D10M4D30M5D12M4D35M5D6M10D904M4D27M6D40M8D16M3D21M126D
+ENSRNOT00000019267_rattusnorvegicus 75MD14M6D112M6D317MD8M2D40MD7M2D273M4D23M3D16M5D794M3D6M18D315M2D7MD46M2D249MD335M3D142M2D8MD8M2D14MD35MD63M5D16M3D16M4D10M6D1152M3D
diff -r 000000000000 -r 794a6e864a96 test-data/ids.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/ids.txt Thu Dec 15 11:04:25 2016 -0500
@@ -0,0 +1,3 @@
+ENSMUST00000091291_musmusculus
+ENSCAFT00000026349_canisfamiliaris
+ENSRNOT00000019267_rattusnorvegicus
diff -r 000000000000 -r 794a6e864a96 test-data/input.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/input.fasta Thu Dec 15 11:04:25 2016 -0500
@@ -0,0 +1,14 @@
+>ENSMUST00000091291_musmusculus
+ATGGGCTTCGGGAGAGGATGTGAGACGACGGCTGTGCCATTGCTGGTGGCCGTGGCCGCGTTGCTGGTGGGCACAGCCGGCCACCTGTACCCTGGAGAGGTGTGCCCTGGTATGGACATCCGGAACAACCTGACCAGGCTACATGAGCTGGAGAACTGCTCAGTCATTGAGGGCCATCTGCAGATCCTCCTGATGTTCAAGACCAGACCCGAAGATTTCCGAGACCTCAGTTTCCCCAAACTCATCATGATCACAGATTACCTGCTTCTCTTCCGTGTCTATGGTCTGGAAAGTCTGAAAGACCTCTTCCCAAATCTCACAGTCATCCGAGGCTCCCGTCTCTTCTTCAACTATGCCCTGGTTATCTTCGAGATGGTCCACCTGAAGGAGCTGGGGCTTTATAACCTCATGAACATCACCCGGGGCTCTGTCCGCATCGAGAAGAATAATGAGCTCTGCTACCTGGCCACTATCGACTGGTCCCGTATCCTGGATTCTGTGGAGGACAACTACATTGTACTGAACAAAGATGACAACGAGGAATGTGGGGATGTCTGTCCAGGCACCGCCAAGGGCAAGACCAACTGTCCTGCCACTGTCATCAATGGGCAGTTTGTGGAACGGTGCTGGACACACAGTCATTGTCAGAAAGTTTGCCCAACCATCTGTAAGTCACATGGCTGCACAGCTGAAGGCCTGTGCTGCCACAAAGAGTGCCTGGGCAACTGTTCGGAACCTGATGACCCCACCAAGTGTGTGGCCTGTCGCAACTTCTATCTGGATGGTCAGTGTGTGGAGACCTGCCCGCCACCCTACTATCACTTCCAGGACTGGCGCTGTGTGAACTTCAGCTTCTGCCAAGACCTTCACTTCAAATGCAGGAACTCTCGGAAGCCTGGCTGCCACCAATACGTCATTCACAACAATAAGTGCATCCCCGAGTGCCCGTCTGGCTATACCATGAATTCCAGCAACTTGATGTGCACCCCATGTCTGGGACCCTGCCCTAAGGTCTGCCAAATCCTCGAAGGTGAGAAGACCATTGATTCTGTGACATCTGCCCAGGAGCTCCGAGGCTGCACTGTGATCAACGGTAGCCTGATCATCAACATCCGAGGGGGCAACAACCTGGCAGCTGAGCTGGAGGCTAACCTTGGCCTCATTGAAGAAATTTCGGGATTTCTAAAGATCCGCCGCTCCTATGCTCTGGTATCACTTTCTTTCTTCAGGAAGCTACATCTGATTCGAGGAGAGACCTTGGAAATTGGGAACTATTCTTTTTATGCCTTGGACAACCAGAACCTGAGGCAACTCTGGGACTGGAGCAAACACAACCTCACCATCACTCAGGGCAAGCTCTTCTTCCATTACAACCCGAAACTCTGCTTGTCTGAAATTCACAAGATGGAAGAAGTCTCCGGAACTAAGGGCCGTCAGGAGAGGAACGACATTGCCCTGAAGACCAATGGGGACCAGGCATCGTGTGAAAATGAATTGCTTAAATTTTCTTTCATTCGGACATCTTTTGACAAGATCCTGTTGAGGTGGGAACCCTACTGGCCCCCCGACTTCCGAGATCTCCTGGGATTCATGTTGTTCTACAAAGAGGCCCCTTATCAGAATGTGACAGAGTTTGATGGGCAGGATGCTTGTGGCTCCAACAGCTGGACTGTGGTGGATATTGACCCGCCCCAGAGGTCCAACGACCCCAAGTCTCAGACCCCAAGCCACCCTGGGTGGCTGATGCGGGGCCTCAAACCCTGGACCCAATACGCCATCTTTGTGAAGACCTTGGTTACCTTCTCTGATGAACGGCGGACCTATGGAGCCAAAAGTGATATCATCTATGTGCAAACAGATGCCACTAATCCTTCTGTCCCCCTGGATCCCATATCAGTTTCTAATTCCTCATCTCAGATTATCTTAAAGTGGAAGCCCCCCTCTGACCCCAATGGCAACATCACACACTACCTGGTGTACTGGGAGAGGCAAGCAGAGGACAGCGAGCTGTTTGAGCTGGATTATTGTCTCAAAGGGCTGAAGCTCCCTTCACGGACCTGGTCCCCACCCTTTGAGTCTGATGATTCTCAGAAGCACAATCAGAGTGAGTATGACGACTCGGCCAGTGAGTGCTGCTCATGCCCTAAGACTGACTCTCAGATCCTGAAGGAGCTGGAGGAGTCTTCATTCAGGAAGACCTTCGAGGATTACCTGCACAACGTGGTTTTTGTCCCCAGGCCATCCCGAAAGCGAAGATCCCTTGAAGAGGTGGGGAATGTGACAGCCACCACACTCACACTTCCAGATTTCCCCAACGTCTCCTCTACCATTGTGCCCACAAGTCAGGAGGAGCACAGGCCATTTGAGAAAGTGGTGAACAAGGAGTCACTTGTCATCTCTGGCCTGAGACACTTCACTGGGTACCGCATTGAGCTGCAGGCATGCAATCAAGATTCCCCAGATGAGAGGTGCAGTGTGGCTGCCTACGTCAGTGCCCGGACCATGCCTGAAGCTAAGGCAGATGACATCGTTGGCCCTGTGACTCATGAAATCTTTGAGAACAATGTTGTACACTTAATGTGGCAAGAGCCAAAGGAACCTAATGGTCTGATTGTGCTATATGAAGTGAGCTATCGCCGATATGGTGATGAGGAGCTGCACCTCTGTGTCTCCCGGAAGCATTTTGCCCTGGAGCGGGGCTGCAGGCTGCGAGGGCTCTCCCCAGGAAACTACAGTGTTCGAGTCCGGGCTACCTCTCTGGCAGGAAATGGCTCCTGGACAGAACCCACCTATTTTTATGTGACTGATTATTTAGATGTCCCATCAAATATTGCCAAAATTATCATTGGACCCCTCATCTTTGTCTTCCTCTTCAGTGTTGTGATTGGAAGTATTTATCTATTTCTGAGAAAGAGGCAGCCGGATGGGCCAATGGGACCACTGTATGCATCTTCAAACCCTGAGTACCTCAGTGCCAGTGATGTGTTTCCATCTTCTGTGTACGTGCCGGACGAGTGGGAGGTGCCTCGAGAGAAGATCACCCTTCTTCGAGAGCTGGGGCAGGGATCCTTTGGTATGGTGTATGAAGGCAATGCCAAGGATATCATCAAGGGTGAGGCAGAGACCCGTGTTGCGGTTAAGACTGTCAATGAGTCAGCCAGTCTTCGAGAACGGATCGAGTTCCTCAATGAGGCATCAGTCATGAAGGGATTCACCTGCCATCATGTGGTCCGCCTTCTTGGGGTGGTATCCAAAGGACAGCCAACGCTGGTAGTGATGGAATTGATGGCTCATGGAGACCTGAAAAGTCACCTCCGTTCTCTGAGGCCAGATGCTGAGAATAACCCAGGCCGCCCTCCCCCTACCTTGCAAGAAATGATTCAGATGACAGCAGAAATTGCTGATGGCATGGCATACTTGAACGCCAAGAAGTTTGTGCACCGGGACCTGGCAGCTCGAAACTGCATGGTTGCCCATGATTTTACTGTCAAAATTGGAGACTTTGGAATGACAAGGGACATCTACGAGACAGATTACTATCGGAAAGGGGGCAAGGGACTGCTTCCTGTGAGGTGGATGTCACCTGAGTCCCTGAAGGATGGAGTCTTTACTGCTTCTTCTGATATGTGGTCCTTTGGGGTGGTCCTTTGGGAAATCACTAGCCTGGCTGAGCAACCTTATCAAGGCCTGTCTAATGAACAGGTGTTGAAGTTTGTCATGGATGGAGGCTATCTGGATCCCCCTGATAACTGTCCAGAGAGACTCACTGACCTGATGCGCATGTGCTGGCAGTTCAACCCCAAGATGAGGCCAACCTTCCTGGAAATCGTCAACCTGCTCAAGGATGACCTCCACCCCAGCTTTCCAGAAGTTTCCTTCTTCTACAGCGAGGAGAACAAGGCTCCTGAGAGTGAGGAGCTGGAGATGGAGTTTGAAGACATGGAGAATGTCCCGTTGGATCGTTCCTCTCACTGTCAGAGAGAAGAGGCTGGGGGCCGGGAGGGAGGGTCCTCACTGAGCATCAAACGGACCTATGATGAACACATCCCCTATACCCACATGAATGGGGGCAAGAAGAACGGACGTGTCCTTACCCTGCCAAGGTCAAACCCTTCCTAA
+
+>ENSCAFT00000026349_canisfamiliaris
+ATGGCAGTGCCCAGTCTGTGCCCATGGGTCGCATGCCTGCTGGTGATCCTCCTCTCCTTGGGATTTGGCCTGGACACACTAGAGGTGTGCCCCAGCCTGGACATCCGCTCAGAGGTGGCGGAGCTGCGCCGGCTGGAGAACTGCAGCGTGGTGGAGGGCCATCTGCAGATCCTGCTCATGTTCACGGCCACGGGCGAGGACTTCCGCGGCCTCAGCTTCCCGCGCCTCACTCAGGTCACTGACTACCTGCTGCTCTTCCGCGTGTATGGCCTGGAGAGCCTGCGGGACCTCTTCCCCAACCTCGCGGTCGTGCGTGGCGCCCGCCTCTTCCTGGGTTATGCGCTGGTGGTCTACGAGATGCCGCACCTGCGGGACGTGGGGCTGCCGGCGCTGGGGGCCGTGCTGCGTGGGGCCGTGCGGGTGGAGAAGAACCAGGAGCTCTGTCATCTCTCCACCATCGACTGGGGGCTGCTGCAGCCCTCGCCCAGCGCCAACCACATCGTGGGCAACAAGCTGCGTGAGGAGTGTGCCGACGTGTGCCCTGGTGTGCTGGGTGCCACCGGCCAGCCCTGCGCCAGGACCACCTTCAGTGGGCACACCGACTACAGATGCTGGACCTCCAGCCACTGCCAGAGAGTGTGTCCCTGCCCCCATGGGCTGGCCTGCACAGCTGGGGGTGAGTGCTGCCACACTGAATGCCTGGGGGGCTGCAGCCGGCCGGAAGACCCCCGCGCCTGCGTCGCCTGTCGCCACCTCTACTTCCAGGGTGCCTGCCACCGGGCCTGCCCTCCAGGCACCTACCAGCACGAGTCCTGGCGCTGTGTCACGGCAGAGCGCTGTGCCAGCCTGCGCTCTGTGCCCGGCCGCACCTCCATCTTTGGCATCCACGAGGGCAGCTGCCTGGCCCAGTGCCCTCCGGGCTTCACCCGCAACGGCAGCAGCATGTTCTGCCACAAGTGTGAGGGGCTGTGCCCCAAAGAGTGCAAGGTGGGTACCAAGACCATCGACTCCACGCAGGCGGCACAGGACCTGGCGGGCTGCACCCACGTGGAGGGGAGCCTCATCATCAACCTCCGCCAGGGCTACAACCTAGAGCTGGAGCTGCAACAGAGCCTGGGGCTGATAGAGACCATCACTGGCTTCCTCAAGATCAAGCACTCCTTTGCCCTCGTGTCCCTGGGCTTTTTCAAGAACCTCAAACTCATCCGAGGGGACGCCATGGTGGATGGGAACTACACCCTGTATGTGCTGGACAACCAGAACCTACAGCAGCTGGGGGCCTGGGTGGCTGCGGGGCTCACCATTCCCGTGGGCAAGATATACTTCGCTTTCAACCCTCGCCTCTGCTTGGAGCACATCTACCGCCTGGAAGAGGTGACCGGCACGCGGGGACGGCAAAACAAGGCTGAGATCAACCCCCGCACCAACGGAGACCGCGCCGCCTGCCAAACTCGCACCCTGCGCTTCGTGTCCAACGTGACGCAAGCTGACAGCATCTTGCTGCGCTGGGAGCGCTACGAGCCGCTGGAGGCTCGGGACCTGCTCAGCTTCATCGTGTACTACAAGGAGTCCCCATTCCAGAATGCCACAGAGCACACAGGTCCAGATGCCTGTGGAACCCAGAGCTGGAACCTGCTGGATGTGGAGCTGCCCTTAAGCCGCACCCAAGAACCCGGGGTAACTCTAGCACCCCTCAAGCCCTGGACACAATATGCAGTGTTTGTACGGGCCATCACACTGACCACTGCTGAGGACAGCCCCCACCAAGGAGCCCAGAGCCCCATCGTCTACCTCCGAACCCTGCCTGCGGCGCCCACTGTGCCCCAGGACGTCATCTCCACGTCCAATTCCTCGTCCCACCTGCTCGTGCGCTGGAAGCCACCGATTCAGCGCAACGGGAACATCACCTACTACCTGGTGCTGTGGCAGCGTCTGGCGGAGGACGGCGACCTCTATCTCAACGACTACTGCCACCGCGGCCTGCGGCTGCCCACCAGCAACAACGACCCGCGCTTCGACCGCGAGGACGGTGAACTCGAAGCCGAGATGGAGCCGGGCTGCTGCCCTTGCCAGCACCCACCGCCTGGGCAGGTCCTGCCGCCGCTGGAGGCGCAAGAGGCCTCGTTCCAGAAGAAGTTCGAAAACTTCCTGCACAACGCCATCACCATCCCCAAGTCCCCCTGGAAGGTGACGTCCGTCAATAAGAGCCCTCAAAGACATGCGGGGAGGCACCGCCGGGCGGCCGGGGCGCTCCGGCTTGGGGGCAACAGCTCGGATTTCGAGATCCAGGAGGACAAAGTGCCCCGGGAGCGAGCAGTGTTGAGTGGTCTGCGCCACTTTACGGAATATCGTATCGACATCCACGCCTGCAACCACGCGGCTCACACCGTGGGCTGCAGCGCGGCCACGTTCGTCTTCGCGCGCACCATGCCGCACAGAGAAGCTGATGGCATCCCAGGGAAGGTGGCCTGGGAGGCAGCCAGCAAAAGCAGTGTCCTCCTGCGCTGGCTGGAGCCACCTGACCCCAACGGACTCATCCTCAAGTATGAAATCAAGTACCGCCGCTTGGGAGAGGAGGCCACAGTGCTATGTGTGTCCCGCCTACGATATGCCAAATTTGGGGGTGTCCAGCTGGCCCTGCTGCCCCCTGGAAACTACTCCGCCAGAGTTCGGGCAACCTCGCTGGCTGGCAACGGCTCCTGGACAGAAAGTATCGCTTTCTACGTCCCAGGCCCAGAGGAGGAAGACTCCGGGGGGCTGCACGTCCTTCTCACTGTCACCCCCGTGGGGCTCATGCTGTTCATCATTCTTGCCGCCCTCGGTTTCTTCTACGGCAGGAAGAGAAACAGCACCCTCTATGCCTCAGTGAATCCGGAGTACTTCAGCGCCTCTGATATGTACATCCCTGATGAGTGGGAGGTGCCTCGGGAGCAGATCTCCATAATCCGAGAGCTGGGCCAGGGCTCCTTTGGGATGGTATATGAAGGCCTGGCACAAGGACTAGAGGCTGGAGAGGAGTCCACACCCGTGGCCCTGAAGACAGTGAATGAGTTGGCCAGCCCACGAGAACGCATTGAGTTCCTCAAGGAAGCTTCTGTCATGAAGGCATTCAAGTGTCACCATGTGGTACGTCTCCTGGGTGTTGTGTCTCAAGGCCAGCCAACTCTGGTCATCATGGAGTTAATGACTCGTGGGGACCTCAAGAGCCATCTTCGATCTCTGCGGCCTGAGGCAGAGAACAACCCTGGGCTCCCACGGCCAGCACTGGGAGATATGATCCAGATGGCTGGTGAGATTGCAGATGGCATGGCTTACCTCGCTGCCAACAAGTTTGTGCATCGAGACCTGGCAGCCAGAAACTGCATGGTGTCCCAGGACTTCACCGTCAAGATTGGGGACTTCGGGATGACTCGAGACGTGTACGAGACAGACTATTACCGCAAGGGCGGGAAAGGGCTGCTGCCCGTGCGCTGGATGGCCCCCGAGTCCCTCAAAGATGGAATCTTCACCACACATTCGGATGTTTGGTCCTTCGGGGTGGTGCTCTGGGAGATCGTGACCCTAGCTGAACAGCCCTACCAGGGTTTATCCAACGAGCAGGTGCTCAAGTTTGTCATGGATGGTGGGGTCCTGGAGGAGCTGGAGAGCTGTCCCCTTCAGCTGCAAGAACTGATGAGCCGCTGCTGGCAGCAGAACCCACGCCTGCGGCCCACCTTCACCCACATCCTGGACAGCATTCGGGGGGAACTGCGGCCCTCTTTCCGCCTCTTTTCCTTCTATTACAGCCCAGAGTGCCAGGGGAGCCAGGGGGCCCTGCTGCCTAATGCCAAACCCAACTCCCTACCAACCCCAGAAGGGGCTCCCTCAGACTGCATGCCCCAAAATGGGGGTCCAGGGCACTGA
+
+>ENSMUST00000005671_homosapiens
+ATGAAGTCTGGCTCCGGAGGAGGGTCCCCGACCTCGCTGTGGGGGCTCGTGTTTCTCTCCGCCGCGCTCTCTCTCTGGCCGACGAGTGGAGAAATCTGTGGGCCCGGCATTGACATCCGCAACGACTATCAGCAGCTGAAGCGCCTGGAAAACTGCACGGTGATCGAGGGCTTCCTCCACATCCTGCTCATCTCCAAGGCCGAGGACTACCGAAGCTACCGCTTCCCCAAGCTCACCGTCATCACTGAGTACTTGCTGCTCTTCCGAGTCGCTGGCCTCGAGAGCCTGGGAGACCTCTTCCCCAACCTCACAGTCATCCGTGGCTGGAAACTCTTCTACAACTACGCACTGGTCATCTTCGAGATGACCAATCTCAAGGATATTGGGCTTTATAATCTGAGGAACATTACTCGGGGGGCCATCAGGATTGAGAAGAACGCCGACCTCTGTTACCTCTCCACCATAGACTGGTCTCTCATCTTGGATGCGGTGTCCAATAACTACATTGTGGGGAACAAGCCCCCGAAGGAATGTGGGGACCTGTGTCCAGGGACATTGGAGGAGAAGCCCATGTGTGAGAAGACCACCATCAACAATGAGTACAACTACCGCTGCTGGACCACAAATCGCTGCCAGAAAATGTGCCCAAGTGTGTGCGGGAAGCGAGCCTGCACCGAGAACAACGAGTGCTGCCACCCGGAGTGCCTGGGCAGCTGCCACACACCGGACGACAACACAACCTGCGTGGCCTGCAGACACTACTACTACAAAGGCGTGTGTGTGCCTGCCTGCCCGCCTGGCACCTACAGGTTCGAGGGCTGGCGCTGTGTGGATCGCGATTTCTGCGCCAACATCCCCAACGCTGAGAGCAGTGACTCGGATGGCTTCGTTATCCACGACGATGAGTGCATGCAGGAGTGTCCCTCAGGCTTCATCCGCAACAGCACCCAGAGCATGTACTGTATCCCCTGCGAAGGCCCCTGCCCCAAAGTCTGCGGCGATGAAGAGAAGAAAACGAAAACCATCGATTCGGTGACTTCTGCTCAAATGCTCCAAGGATGCACCATCCTGAAGGGCAATCTGCTTATTAACATCCGGAGAGGCAATAACATTGCCTCGGAGTTGGAGAACTTCATGGGGCTCATCGAGGTGGTGACCGGCTACGTGAAGATCCGCCATTCTCATGCCTTGGTCTCCTTGTCCTTCCTGAAGAACCTTCGTCTCATCTTAGGAGAGGAGCAGCTGGAAGGGAACTACTCCTTCTATGTCCTAGACAACCAGAACTTGCAGCAGCTGTGGGACTGGAACCACCGGAACCTGACCGTCAGGTCCGGAAAGATGTACTTTGCTTTCAATCCCAAGCTGTGTGTCTCCGAAATTTACCGCATGGAGGAAGTGACCGGAACCAAGGGACGCCAGAGCAAAGGGGACATAAACACCAGGAACAACGGAGAGCGAGCTTCCTGTGAAAGTGATGTTCTCCGTTTCACCTCCACCACGACCTGGAAGAACCGAATCATCATAACGTGGCACCGGTACCGGCCGCCGGACTACCGGGATCTCATCAGCTTCACAGTTTACTACAAGGAGGCACCATTTAAAAACGTTACGGAATATGACGGGCAGGATGCCTGTGGCTCCAACAGCTGGAACATGGTGGATGTAGACCTGCCTCCGAACAAGGAGGGCGAGCCTGGCATTTTACTGCATGGGCTGAAGCCCTGGACCCAGTATGCTGTCTATGTCAAGGCTGTGACCCTCACCATGGTGGAAAACGACCATATCCGTGGGGCCAAAAGTGAAATCTTGTACATTCGCACCAATGCTTCAGTCCCTTCCATTCCCCTAGATGTCCTCTCAGCATCAAACTCTTCCTCTCAGCTGATTGTGAAGTGGAATCCTCCAACTCTGCCCAATGGTAACTTGAGTTACTACATTGTGAGGTGGCAGCGGCAGCCCCAGGATGGTTACCTGTACCGGCACAACTACTGCTCCAAAGACAAAATACCCATCAGAAAGTACGCCGATGGTACCATCGACGTGGAGGAGGTGACGGAAAATCCCAAGACAGAAGTGTGTGGTGGTGATAAAGGGCCATGCTGCGCTTGCCCTAAAACTGAAGCTGAGAAGCAGGCTGAGAAGGAGGAGGCTGAGTACCGTAAAGTCTTTGAGAATTTCCTTCACAATTCCATCTTTGTGCCCAGGCCCGAAAGGAGGCGGAGAGACGTCATGCAAGTGGCCAACACGACCATGTCCAGCCGAAGCAGGAACACCACGGTAGCTGACACCTACAATATCACAGACCCGGAGGAGTTCGAGACAGAGTACCCTTTCTTTGAGAGCAGAGTGGATAACAAGGAGAGGACTGTCATCTCCAACCTCCGGCCTTTCACTCTGTACCGCATCGATATCCACAGCTGCAACCACGAGGCTGAGAAGCTGGGCTGCAGCGCCTCCAACTTCGTCTTTGCGAGAACCATGCCAGCAGAAGGAGCAGATGATATCCCTGGTCCGGTGACCTGGGAGCCAAGACCCGAAAACTCCATCTTTTTAAAGTGGCCAGAACCCGAGAACCCCAACGGATTGATCCTAATGTATGAAATTAAATACGGGTCGCAAGTCGAGGATCAGCGGGAATGTGTGTCCAGACAGGAGTACAGGAAGTACGGAGGGGCCAAACTCAACCGTCTAAACCCAGGGAACTATACAGCCCGGATTCAGGCTACCTCCCTCTCTGGGAATGGGTCATGGACAGATCCTGTGTTCTTCTATGTCCCCGCCAAAACGACGTATGAGAACTTCATGCATCTGATCATTGCTCTGCCGGTTGCCATCCTGCTGATCGTTGGGGGGCTGGTTATCATGCTGTATGTCTTCCATAGAAAGAGAAATAACAGCAGGTTGGGCAATGGAGTGCTGTATGCTTCTGTGAACCCCGAGTATTTCAGCGCAGCTGATGTGTACGTGCCTGATGAATGGGAGGTAGCTCGAGAGAAGATCACCATGAACCGGGAGCTCGGACAAGGGTCCTTTGGGATGGTCTATGAAGGAGTGGCCAAGGGTGTGGTCAAGGATGAACCCGAAACCAGAGTGGCCATCAAGACGGTAAACGAGGCTGCAAGTATGCGTGAAAGAATCGAGTTTCTCAACGAGGCCTCGGTGATGAAGGAGTTCAATTGTCACCATGTGGTCCGGTTGCTGGGTGTGGTATCCCAAGGCCAGCCCACCCTGGTCATCATGGAACTAATGACACGCGGTGATCTCAAAAGTTATCTCCGGTCTCTGAGGCCAGAAGTGGAGCAGAATAATCTAGTCCTCATTCCTCCGAGCTTAAGCAAGATGATCCAGATGGCTGGAGAGATTGCAGATGGCATGGCCTACCTCAATGCCAACAAGTTCGTCCACAGAGACCTTGCTGCTAGGAACTGCATGGTAGCCGAAGATTTCACAGTCAAAATTGGAGATTTCGGTATGACACGAGACATCTACGAGACGGACTACTACCGGAAAGGCGGGAAGGGGTTGCTGCCTGTGCGCTGGATGTCTCCCGAGTCCCTCAAGGATGGTGTCTTCACTACTCATTCTGATGTCTGGTCCTTCGGGGTCGTCCTCTGGGAGATCGCCACGCTGGCTGAGCAGCCCTACCAGGGCTTGTCCAACGAGCAAGTTCTTCGTTTCGTCATGGAGGGTGGCCTTCTGGACAAGCCGGACAACTGCCCTGATATGCTGTTTGAACTTATGCGCATGTGCTGGCAGTATAACCCCAAGATGCGGCCCTCCTTCCTGGAGATCATCGGCAGCATCAAGGATGAGATGGAGCCCAGCTTCCAGGAGGTCTCCTTCTACTACAGCGAGGAGAACAAGCCTCCCGAGCCAGAGGAGCTGGAGATGGAGCCTGAGAACATGGAGAGCGTCCCACTGGACCCTTCGGCCTCCTCAGCCTCCCTGCCTCTGCCTGAAAGACACTCAGGACACAAGGCTGAGAATGGCCCGGGCCCTGGCGTGCTCGTTCTCCGCGCCAGTTTTGATGAGAGACAGCCTTACGCTCACATGAACGGGGGACGCGCCAACGAGAGGGCCTTGCCTCTGCCCCAGTCCTCGACCTGCTGA
+
+>ENSRNOT00000019267_rattusnorvegicus
+ATGAAGTCTGGCTCCGGAGGAGGGTCCCCGACCTCGCTGTGGGGGCTCGTGTTTCTCTCCGCCGCGCTCTCGCTCTGGCCGACGAGTGGAGAAATTTGTGGGCCCGGCATTGACATCCGCAACGACTATCAGCAGCTGAAGCGCCTGGAAAACTGCACGGTGATCGAGGGCTTCCTCCACATCCTGCTCATCTCCAAGGCCGAGGACTACCGAAGCTACCGCTTCCCCAAGCTCACGGTCATCACCGAGTACTTGCTGCTGTTTCGAGTGGCCGGCCTCGAGAGCCTGGGAGACCTCTTCCCGAACCTCACAGTCATCCGTGGCTGGAAACTCTTCTACAATTACGCACTGGTCATCTTCGAGATGACCAATCTCAAGGATATTGGGCTTTATAATCTGAGGAACATTACTCGGGGGGCCATCAGGATTGAGAAAAACGCTGACCTCTGTTACCTCTCCACCATAGACTGGTCTCTCATCTTGGATGCGGTGTCCAATAACTACATTGTGGGGAACAAGCCCCCAAAGGAATGTGGGGACCTGTGTCCAGGGACCTTGGAGGAGAAGCCCATGTGTGAGAAGACCACCATCAACAATGAGTACAACTACCGCTGCTGGACCACAAATCGCTGCCAGAAAATGTGCCCAAGTGTGTGTGGGAAGCGAGCCTGCACCGAGAACAATGAGTGCTGCCACCCGGAGTGCCTAGGCAGCTGCCACACACCGGACGACAACACAACCTGCGTGGCCTGCCGACACTACTACTACAAAGGCGTGTGCGTGCCTGCCTGCCCGCCTGGCACCTACAGGTTCGAGGGCTGGCGCTGTGTGGACCGGGATTTCTGCGCCAACATCCCCAACGCCGAGAGCAGTGACTCAGATGGCTTCGTCATCCACGATGGCGAGTGCATGCAGGAGTGTCCATCAGGCTTCATCCGCAACAGCACCCAGAGCATGTACTGTATCCCCTGTGAAGGCCCCTGCCCCAAGGTCTGCGGCGATGAAGAAAAGAAAACGAAAACCATCGATTCTGTGACGTCTGCCCAGATGCTCCAAGGGTGCACCATTTTGAAGGGCAATCTGCTTATTAACATCCGGCGAGGCAATAACATTGCCTCGGAATTGGAGAACTTCATGGGGCTCATCGAGGTGGTGACTGGCTACGTGAAGATCCGCCATTCCCATGCCTTGGTCTCCTTGTCCTTCCTGAAGAACCTTCGTCTCATCTTAGGAGAGGAGCAGCTAGAAGGGAACTACTCCTTCTATGTCCTGGACAACCAGAACTTGCAGCAGCTGTGGGACTGGAACCACCGGAACCTGACCGTCAGGTCAGGGAAAATGTACTTCGCTTTCAATCCCAAGCTGTGTGTCTCTGAAATTTACCGGATGGAGGAGGTGACAGGAACAAAGGGACGGCAGAGCAAAGGAGACATAAACACCAGGAACAACGGAGAGCGAGCTTCCTGTGAAAGTGATGTTCTCCGTTTCACCTCCACCACCACCTGGAAGAACCGCATCATCATAACGTGGCACCGGTACCGGCCGCCGGACTACCGGGATCTCATCAGTTTCACAGTCTACTACAAGGAGGCACCCTTTAAAAACGTCACGGAATACGACGGGCAGGATGCCTGTGGCTCCAACAGCTGGAACATGGTGGACGTGGACCTGCCTCCGAACAAGGAGGGGGAGCCTGGCATTTTGCTGCATGGGCTGAAGCCCTGGACCCAGTATGCAGTCTATGTCAAGGCTGTGACCCTCACCATGGTGGAAAACGACCACATCCGTGGGGCCAAAAGTGAAATCTTGTACATTCGCACCAACGCTTCAGTTCCTTCCATTCCTCTAGATGTCCTCTCGGCATCAAACTCCTCCTCTCAGCTGATCGTGAAGTGGAACCCCCCAACTCTGCCCAATGGTAACTTGAGTTACTACATTGTGAGGTGGCAGCGGCAGCCGCAGGATGGCTATCTGTTCCGGCACAACTACTGCTCCAAAGACAAAATACCCATCAGAAAGTACGCCGATGGTACCATCGATGTGGAGGAGGTGACAGAAAATCCCAAGACAGAAGTGTGCGGTGGTGATAAAGGGCCGTGCTGTGCCTGTCCTAAAACCGAAGCTGAGAAGCAGGCTGAGAAGGAGGAGGCTGAGTACCGTAAAGTCTTTGAGAATTTCCTTCACAACTCCATCTTTGTGCCCAGACCTGAGAGGAGGCGGAGAGATGTCCTGCAGGTGGCTAACACCACCATGTCCAGCCGAAGCAGGAACACCACGGTAGCTGACACCTACAATATCACAGACCCGGAAGAGTTCGAGACAGAATACCCTTTCTTTGAGAGCAGAGTGGATAACAAGGAGAGGACTGTCATTTCCAACCTCCGGCCTTTCACTCTGTACCGTATCGATATCCACAGCTGCAACCACGAGGCTGAGAAGCTGGGCTGCAGCGCCTCCAACTTTGTCTTTGCAAGAACCATGCCAGCAGAAGGAGCAGATGACATTCCTGGCCCAGTGACCTGGGAGCCAAGACCTGAAAACTCCATCTTTTTAAAGTGGCCAGAACCCGAGAACCCCAACGGATTGATTCTAATGTATGAAATAAAATACGGATCGCAAGTCGAGGATCAGCGGGAATGTGTGTCCAGACAGGAGTACAGGAAGTATGGAGGGGCCAAACTTAACCGTCTAAACCCAGGGAACTATACGGCCCGGATTCAGGCTACCTCCCTCTCTGGGAATGGGTCGTGGACAGATCCTGTGTTCTTCTATGTCCCAGCCAAAACAACGTATGAGAATTTCATGCATCTGATCATTGCTCTGCCGGTTGCCATCCTGCTGATTGTGGGGGGCCTGGTAATCATGCTGTATGTCTTCCATAGAAAGAGGGTGTCTGGGCCAGCAGAAGGCAGTAGTTGGAAAGGGCCATTTCCATCCTGTCTGTTCCTAGTGTACGTGCCTGATGAATGGGAGGTAGCTCGGGAGAAGATCACCATGAACCGGGAGCTCGGACAAGGGTCCTTCGGGATGGTCTATGAAGGAGTGGCCAAGGGCGTGGTCAAGGACGAGCCTGAAACCAGAGTGGCCATCAAGACAGTGAATGAGGCTGCAAGTATGCGTGAGAGAATTGAGTTTCTCAACGAGGCCTCAGTGATGAAGGAGTTCAACTGTCACCATGTGGTCCGGTTGCTGGGTGTAGTATCCCAAGGCCAGCCCACCCTGGTCATCATGGAACTAATGACACGTGGCGATCTCAAAAGTTATCTCCGGTCTCTAAGGCCAGAGGTGGAGAATAATCTAGTCCTGATTCCTCCGAGCTTAAGCAAGATGATCCAGATGGCTGGAGAGATTGCAGATGGCATGGCCTACCTCAATGCCAACAAGTTCGTCCACAGAGACCTGGCTGCTCGGAACTGCATGGTAGCTGAAGATTTCACAGTCAAAATTGGAGATTTTGGTATGACACGAGACATCTACGAGACGGACTACTACCGGAAAGGCGGGAAGGGCTTGCTGCCTGTGCGCTGGATGTCTCCCGAGTCCCTCAAGGATGGCGTCTTCACCACTCATTCCGATGTCTGGTCCTTTGGGGTCGTCCTCTGGGAGATCGCCACTCTGGCTGAGCAGCCGTACCAGGGCCTGTCCAACGAGCAAGTTCTTCGTTTCGTCATGGAGGGCGGCCTTCTGGACAAGCCGGATAACTGCCCCGATATGCTGTTTGAACTTATGCGCATGTGCTGGCAGTACAACCCCAAGATGCGGCCCTCCTTCCTGGAGATCATCGGAAGCATCAAGGATGAGATGGAGCCCAGTTTCCAGGAGGTCTCCTTCTACTACAGCGAGGAGAACAAGCCTCCAGAGCCGGAGGAGCTGGAGATGGAGCTGGAGCTGGAGCCCGAGAACATGGAGAGCGTCCCGCTGGACCCTTCGGCCTCCTCAGCCTCCCTGCCTCTGCCTGAAAGACACTCAGGACACAAGGCTGAGAACGGCCCTGGCGTGCTGGTTCTCCGTGCCAGTTTTGATGAGAGACAGCCTTACGCTCACATGAATGGGGGACGCGCCAACGAGAGGGCCTTGCCTCTGCCCCAGTCCTCAACCTGC
+
+>ENSPTRT00000013802_pantroglodytes
+ATGAAGTCTGGCTCCGGAGGAGGGTCCCCGACCTCGCTGTGGGGGCTCCTGTTTCTCTCCGCCGCGCTCTCGCTCTGGCCGACGAGTGGAGAAATCTGCGGGCCAGGCATCGACATCCGCAACGACTATCAGCAGCTGAAGCGCCTGGAGAACTGCACGGTGATCGAGGGCTACCTCCACATCCTGCTCATCTCCAAGGCCGAGGACTACCGCAGCTACCGCTTCCCCAAGCTCACGGTCATTACCGAGTACTTGCTGCTGTTCCGAGTGGCTGGCCTCGAGAGCCTCGGAGACCTCTTCCCCAACCTCACGGTCATCCGCGGCTGGAAACTCTTCTACAACTACGCCCTGGTCATCTTCGAGATGACCAATCTCAAGGATATTGGGCTTTACAACCTGAGGAACATTACTCGGGGGGCCATCAGGATTGAGAAAAATGCTGACCTCTGTTACCTCTCCACTGTGGACTGGTCCCTGATCCTGGATGCGGTGTCCAATAACTACATTGTGGGGAATAAGCCCCCAAAGGAATGTGGGGACCTGTGTCCAGGGACCATGGAGGAGAAGCCGATGTGTGAGAAGACCACCATCAACAATGAGTACAACTACCGCTGCTGGACCACAAACCGCTGCCAGAAAATGTGCCCGAGCACGTGTGGGAAGCGGGCGTGCACCGAGAACAACGAGTGCTGCCACCCCGAGTGCCTGGGCAGCTGCAGCGCGCCTGACAACGACACGGCCTGTGTAGCTTGCCGCCACTACTACTATGCCGGTGTCTGTGTGCCTGCCTGCCCGCCCAACACCTACAGGTTTGAGGGCTGGCGCTGTGTGGACCGTGACTTCTGCGCCAACATCCTCAGCGCCGAGAGCAGCGACTCCGAGGGGTTTGTGATCCACGACGGCGAGTGCATGCAGGAGTGCCCCTCGGGCTTCATCCGCAACGGCAGCCAGAGCATGTACTGCATCCCTTGTGAAGGTCCTTGCCCGAAGGTCTGTGAGGAAGAAAAGAAAACAAAGACCATTGATTCTGTTACTTCTGCTCAGATGCTCCAAGGATGCACCATCTTCAAGGGCAATTTGCTCATTAACATCCGACGGGGGAATAACATTGCTTCAGAGCTGGAGAACTTCATGGGGCTCATCGAGGTGGTGACGGGCTACGTGAAGATCCGCCATTCTCATGCCTTGGTCTCCTTGTCCTTCCTAAAAAACCTTCGCCTCATCTTAGGAGAGGAGCAGCTAGAAGGGAATTACTCCTTCTACGTCCTCGACAACCAGAACTTGCAGCAACTATGGGACTGGGACCACCGCAACCTGACCATCAAAGCAGGGAAAATGTACTTTGCTTTCAATCCCAAATTATGTGTTTCCGAAATTTACCGCATGGAGGAAGTGACGGGGACTAAAGGGCGCCAAAGCAAAGGGGACATAAACACCAGGAACAACGGGGAGAGAGCCTCCTGTGAAAGTGACGTCCTGCATTTCACCTCCACCACCACGTCGAAGAATCGCATCATCATAACCTGGCACCGGTACCGGCCCCCTGACTACAGGGATCTCATCAGCTTCACCGTTTACTACAAGGAAGCACCCTTTAAGAATGTCACGGAGTATGATGGGCAGGACGCCTGCGGCTCCAACAGCTGGAACATGGTGGACGTGGACCTCCCGCCCAACAAGGACGTGGAGCCCGGCATCTTACTACATGGGCTGAAGCCCTGGACTCAGTACGCCGTTTACGTCAAGGCTGTGACCCTCACCATGGTGGAGAATGACCATATCCGTGGGGCCAAGAGTGAGATCTTGTACATTCGCACCAATGCTTCAGTTCCTTCCATTCCCTTGGACGTTCTTTCAGCATCGAACTCCTCTTCTCAGTTAATCGTGAAGTGGAACCCTCCCTCTCTGCCCAACGGCAACCTGAGTTACTACATTGTGCGCTGGCAGCGGCAGCCTCAGGACGGCTACCTTTACCGGCACAATTACTGCTCCAAAGACAAAATCCCCATCAGGAAGTATGCCGACGGCACCATCGACATTGAGGAGGTCACAGAGAACCCCAAGACTGAGGTGTGTGGTGGGGAGAAAGGGCCTTGCTGCGCCTGCCCCAAAACTGAAGCCGAGAAGCAGGCCGAGAAGGAGGAGGCTGAATACCGCAAAGTCTTTGAGAATTTCCTGCACAACTCCATCTTCGTGCCCAGACCTGAAAGGAAGCGGAGAGATGTCATGCAAGTGGCCAACACCACCATGTCCAGCCGAAGCAGGAACACCACGGCCGCAGACACCTACAACATCACCGACCCGGAAGAGCTGGAGACAGAGTACCCTTTCTTTGAGAGCAGAGTGGATAACAAGGAGAGAACTGTGATTTCTAACCTTCGGCCTTTCACATTGTACCGCATCGATATCCACAGCTGCAACCACGAGGCTGAGAAGCTGGGCTGCAGCGCCTCCAACTTTGTCTTTGCAAGGACTATGCCCGCAGAAGGAGCAGATGACATTCCTGGGCCAGTGACCTGGGAGCCAAGGCCTGAAAACTCCATCTTTTTAAAGTGGCCGGAACCTGAGAATCCCAATGGATTGATTCTAATGTATGAAATAAAATACGGATCACAAGTTGAGGATCAGCGAGAATGTGTGTCCAGACAGGAATACAGGAAGTATGGAGGGGCCAAGCTAAACCGGCTAAACCCGGGGAACTACACAGCCCGGATTCAGGCCACATCTCTCTCTGGGAATGGGTCGTGGACAGATCCTGTGTTCTTCTATGTCCAGGCCAAAACAGGATATGAAAACTTCATCCATCTGATCATCGCTCTGCCCGTCGCTGTCCTGTTGATCGTGGGAGGGTTGGTGATTATGCTGTACGTCTTCCATAGAAAGAGAAATAACAGCAGGCTGGGGAATGGAGTGCTGTATGCCTCTGTGAACCCGGAGTACTTCAGCGCTGCTGATGTGTACGTTCCCGATGAGTGGGAGGTGGCTCGGGAGAAGATCACCATGAGCCGGGAACTTGGGCAGGGGTCCTTTGGGATGGTCTATGAAGGAGTTGCCAAGGGTGTGGTGAAAGATGAACCTGAAACCAGAGTGGCCATTAAAACAGTGAACGAGGCCGCAAGCATGCGTGAAAGGATTGAGTTTCTCAACGAAGCTTCTGTGATGAAGGAGTTCAATTGTCACCATGTGGTGCGATTGCTGGGTGTGGTGTCCCAAGGCCAGCCAACACTGGTCATCATGGAACTGATGACACGGGGCGATCTCAAAAGTTATCTCCGGTCTCTGAGGCCAGAAATGGAGAATAATCCAGTCCTAGCACCTCCAAGCCTGAGCAAGATGATTCAGATGGCCGGAGAGATTGCAGACGGCATGGCATACCTCAACGCCAATAAGTTCGTCCACAGAGACCTTGCTGCCCGGAATTGCATGGTAGCCGAAGATTTCACAGTCAAAATCGGAGATTTTGGTATGACGCGAGATATCTATGAGACAGACTATTACCGGAAAGGAGGGAAAGGGCTGCTGCCCGTGCGCTGGATGTCTCCCGAGTCCCTCAAGGATGGAGTCTTCACCACTTACTCGGACGTCTGGTCCTTCGGGGTCGTCCTCTGGGAGATCGCCACACTGGCCGAGCAGCCCTACCAGGGCTTGTCCAACGAGCAAGTCCTTCGCTTCGTCATGGAGGGCGGCCTTCTGGACAAGCCAGACAACTGTCCCGACATGCTGTTTGAACTGATGCGCATGTGCTGGCAGTATAACCCCAAGATGAGGCCTTCCTTCCTGGAGATCATCAGCAGCATCAAAGAGGAGATGGAGCCTGGCTTCCGGGAGGTCTCCTTCTACTACAGCGAGGAGAACAAGCTGCCCGAGCCGGAGGAGCTGGACCTGGAGCCAGAGAACATGGAGAGCGTCCCCCTGGACCCCTCGGCCTCCCTGCCACTGCCCGACAGACACTCAGGACACAAGGCCGAGAACGGCCCCGGCCCTGGGGTGCTGGTCCTCCGCGCCAGCTTCGACGAGAGACAGCCGTACGCACACATGAACGGGGGCCGCAAGAACGAGCGGGCCTTGCCGCTGCCCCAGTCTTCGACCTGCTGA
diff -r 000000000000 -r 794a6e864a96 test-data/output1.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/output1.fasta Thu Dec 15 11:04:25 2016 -0500
@@ -0,0 +1,425 @@
+>ENSMUST00000091291_musmusculus
+ATGGGCTTCGGGAGAGGATGTGAGACGACGG-CTGTGCCATTGCTGGTGG
+CCGTGGCCGCGTTGCTGGTGGGCACAGCCGGCCACCTGTACCCTGGAGAG
+GTGTGC---CCTGGTATGGACATCCGGAACAACCTGACCAGGCTACATGA
+GCTGGAGAACTGCTCAGTCATTGAGGGCCATCTGCAGATCCTCCTGATGT
+TCAAGACCAGACCCGAAGATTTCCGAGACCTCAGTTTCCCCAAACTCATC
+ATGATCACAGATTACCTGCTTCTCTTCCGTGTCTATGGTCTGGAAAGTCT
+GAAAGACCTCTTCCCAAATCTCACAGTCATCCGAGGCTCCCGTCTCTTCT
+TCAACTATGCCCTGGTTATCTTCGAGATGGTCCACCTGAAGGAGCTGGGG
+CTTTATAACCTCATGAACATCACCCGGGGCTCTGTCCGCATCGAGAAGAA
+TAATGAGCTCTGCTACCTGGCCACTATCGACTGGTCCCGTATCCTGGATT
+CTGTGGAGGACAACTACATTGTACTGAACAAAGATGACAACGAGGAATGT
+GGGGATGTCTGTCCAGGCACCGCCAAGGGCAA-GACCA--ACTGTCCTGC
+CACTGTCATCAATGGGCAGTTTGTGGAACGGTGCTGGACACACAGTCATT
+GTCAGAAAGTTTGCCCAACCATCTGTAAGTCACATGGCTGCACAGCTGAA
+GGCCTGTGCTGCCACAAAGAGTGCCTGGGCAACTGTTCGGAACCTGATGA
+CCCCACCAAGTGTGTGGCCTGTCGCAACTTCTATCTGGATGGTCAGTGTG
+TGGAGACCTGCCCGCCACCCTACTATCACTTCCAGGACTGGCGCTGTGTG
+AACTTCAGCTTCTGCCAAGACCTTCACTTCAAATGCAGGAACTCTCGGAA
+GCCTGGCTGCCACCAATACGTCATTCACAACAATAAGTGCATCCCCGAGT
+GCCCGTCTGGCTATACCATGAATTCCAGC---AACTTGATGTGCACCCCA
+TGTCTGGGACCCTGCCCTAAGGTCTGCCA-AATCCTCGAAGGTGA--GAA
+GACCATTGATTCTGTGACATCTGCCCAGGAGCTCCGAGGCTGCACTGTGA
+TCAACGGTAGCCTGATCATCAACATCCGAGGGGGCAACAACCTGGCAGCT
+GAGCTGGAGGCTAACCTTGGCCTCATTGAAGAAATTTCGGGATTTCTAAA
+GATCCGCCGCTCCTATGCTCTGGTATCACTTTCTTTCTTCAGGAAGCTAC
+ATCTGATTCGAGGAGAGACCTTGGAAATTGGGAACTATTCTTTTTATGCC
+TTGGACAACCAGAACCTGAGGCAACTCTGGGACTGGAGCAAACACAACCT
+CACCATCACTCAGGGCAAGCTCTTCTTCCATTACAACCCGAAACTCTGCT
+TGTCTGAAATTCACAAGATGGAAGAAGTCTCCGGAACTAAGGGCCGTCAG
+GAGAGGAACGACATTGCCCTGAAGACCAATGGGGACCAGGCATCGTGTGA
+AAATGAATTGCTTAAATTTTCTTTCATTCGGACATCTTTTGACAAGATCC
+TGTTGAGGTGGGAACCCTACTGGCCCCCCGACTTCCGAGATCTCCTGGGA
+TTCATGTTGTTCTACAAAGAGGCCCCTTATCAGAATGTGACAGAGTTTGA
+TGGGCAGGATGCTTGTGGCTCCAACAGCTGGACTGTGGTGGATATTGACC
+CGCCCCAGAGGTCCAACGACCCCAAGTCTCAGACCCCAAGCCACCCTGGG
+TGGCTGATGCGGGGCCTCAAACCCTGGACCCAATACGCCATCTTTGTGAA
+GACCTTGGTTACCTTCTC--TGATGAACGGC-GGACCTATGGAGCCAAAA
+GTGATATCATCTATGTGCAAACAGATGCCACTAATCCTTCTGTCCCCCTG
+GATCCCATATCAGTTTCTAATTCCTCATCTCAGATTATCTTAAAGTGGAA
+GCCCCCCTCTGACCCCAATGGCAACATCACACACTACCTGGTGTACTGGG
+AGAGGCAAGCAGAGGACAGCGAGCTGTTTGAGCTGGATTATTGTCTCAAA
+GGGCTGAAGCT-CCCTTCACGGACCTGGTCCCCAC--CCTTTGAGTCTGA
+TGAT-TCTCAGAAG--CACAATCAGAGTGAGTATGACGACTCGGCCAGTG
+AGTGCTGCTCATGCCCTAAGACTGACTCTCAGATCCTGAAGGAGCTGGAG
+GAGTCTTCATTCAGGAAGACCTTCGAGGATTACCTGCACAACGTGGTTTT
+TGTCCCCAGGCCATCCCGAAAGCGAAGATCCCTTGAAGAGGTGGGGAATG
+TGAC---AGCCACCACACTCACACTTCCAGATTTCCCCAACGTCTCCTCT
+ACCATTGTGCCCACAAGTCAGGAG---GAGCACAGGCCATTTGAGAA---
+AGTGGTGAACAAGGAGTCACTTGTCATCTCTGGCCTGAGACACTTCACTG
+GGTACCGCATTGAGCTGCAGGCATGCAATCAAGATTCCCCAGATGAGAGG
+TGCAGTGTGGCTGCCTACGTCAGTGCCCGGACCATGCCTGAAGCTAAGGC
+AGATGACATCGTTGGCCCTGTGACTCATGAAATCTTTGAGAACAATGTTG
+TACACTTAATGTGGCAAGAGCCAAAGGAACCTAATGGTCTGATTGTGCTA
+TATGAAGTGAGCTATCGCCGATATGGTGATGAGGAGCTGCACCTCTGTGT
+CTCCCGGAAGCATTTTGCCCTGGAGCGGGGCTGCAGGCTGCGAGGGCTCT
+CCCCAGGAAACTACAGTGTTCGAGTCCGGGCTACCTCTCTGGCAGGAAAT
+GGCTCCTGGACAGAACCCACCTATTTTTATGTGACTGATTATTTAGATGT
+CCCATCAAATATTGCCAAAATTATCATTGGACCCCTCATCTTTGTCTTCC
+TCTTCAGTGTTGTGATTGGAAGTATT---TATCTATTTCTGAGAAAGAGG
+CA----GCCGGATGGGCCAATGGGACCACTGTATGCATCTTCAAACCCTG
+AGTACCTCAGTGCCAGTGATGTGTTTCCATCTTCTGTGTACGTGCCGGAC
+GAGTGGGAGGTGCCTCGAGAGAAGATCACCCTTCTTCGAGAGCTGGGGCA
+GGGATCCTTTGGTATGGTGTATGAAGGCAATGCCAAGGATATCATCAAGG
+GTGAGGCAGAGACCCGTGTTGCGGTTAAGACTGTCAATGAGTCAGCCAGT
+CTTCGAGAACGGATCGAGTTCCTCAATGAGGCATCAGTCATGAAGGGATT
+CACCTGCCATCATGTGGTCCGCCTTCTTGGGGTGGTATCCAAAGGACAGC
+CAACGCTGGTAGTGATGGAATTGATGGCTCATGGAGACCTGAAAAGTCAC
+CTCCGTTCTCTGAGGCCAGATGCTGAG---AATAACCCAGGCCGCCCTCC
+CCCTACCTTGCAAGAAATGATTCAGATGACAGCAGAAATTGCTGATGGCA
+TGGCATACTTGAACGCCAAGAAGTTTGTGCACCGGGACCTGGCAGCTCGA
+AACTGCATGGTTGCCCATGATTTTACTGTCAAAATTGGAGACTTTGGAAT
+GACAAGGGACATCTACGAGACAGATTACTATCGGAAAGGGGGCAAGGGAC
+TGCTTCCTGTGAGGTGGATGTCACCTGAGTCCCTGAAGGATGGAGTCTTT
+ACTGCTTCTTCTGATATGTGGTCCTTTGGGGTGGTCCTTTGGGAAATCAC
+TAGCCTGGCTGAGCAACCTTATCAAGGCCTGTCTAATGAACAGGTGTTGA
+AGTTTGTCATGGATGGAGGCTATCTGGATCCCCCTGATAACTGTCCAGAG
+AGACTCACTGACCTGATGCGCATGTGCTGGCAGTTCAACCCCAAGATGAG
+GCCAACCTTCCTGGAAATCGTCAACCTGCTCAAGGATGACCTCCACCCCA
+GCTTTCCAGAAGTTTCCTTCTTCTACAGCGAGGAGAACAAGGCTCCTGAG
+AGTGAGGAGCTGGAGATGG------------AGTTTGAAGACATGGAGAA
+TGTCCCGTTGGATCGTTC----------------CTCTCACTGTCAGAGA
+GAA----GAGGCTGGGGGCCGGGAGGGAGGG-------TCCTCACTGAGC
+ATCAAACGGACCTATGATGAACACATCCCCTATACCCACATGAATGGGGG
+CAAGAAGAACGGACGTGTCCTTACCCTGCCAAGGTCAAACCCTTCCTAA
+>ENSCAFT00000026349_canisfamiliaris
+----ATGGCAGTGCCCAGTCTGTGCCCATGGGTCGCATGCCTGCTGGTGA
+TC----CTCCTCTCCTTGGGATTTGGCCTGGACACACT--------AGAG
+GTGTGC---CCCAGCCTGGACATCCGCTCAGAGGTGGCGGAGCTGCGCCG
+GCTGGAGAACTGCAGCGTGGTGGAGGGCCATCTGCAGATCCTGCTCATGT
+TCACGGCCACGGGCGAGGACTTCCGCGGCCTCAGCTTCCCGCGCCTCACT
+CAGGTCACTGACTACCTGCTGCTCTTCCGCGTGTATGGCCTGGAGAGCCT
+GCGGGACCTCTTCCCCAACCTCGCGGTCGTGCGTGGCGCCCGCCTCTTCC
+TGGGTTATGCGCTGGTGGTCTACGAGATGCCGCACCTGCGGGACGTGGGG
+CTGCCGGCGCTGGGGGCCGTGCTGCGTGGGGCCGTGCGGGTGGAGAAGAA
+CCAGGAGCTCTGTCATCTCTCCACCATCGACTGGGGGCTGCTGCAGCCCT
+CGCCCAGCGCCAACCACATCGTGGGCAACAA-GCTGCGTG--AGGAGTGT
+GCCGACGTGTGCCCTGGTGTGCTGGGTGCCACCGGCCAGCCCTGCGCCAG
+GACCACCTTCAGTGGGCACACCGACTACAGATGCTGGACCTCCAGCCACT
+GCCAGAGAGTGTGTCCCTGCCCCCATGGG---CTGGCCTGCACAGCTGGG
+GGTGAGTGCTGCCACACTGAATGCCTGGGGGGCTGCAGCCGGCCGGAAGA
+CCCCCGCGCCTGCGTCGCCTGTCGCCACCTCTACTTCCAGGGTGCCTGCC
+ACCGGGCCTGCCCTCCAGGCACCTACCAGCACGAGTCCTGGCGCTGTGTC
+ACGGCAGAGCGCTGTGCCAGCCTGCGCTCTGTGCCCGGCCG---------
+----CACCTCCATC--TTTGGCATCCACGAGGGCAGCTGCCTGGCCCAGT
+GCCCTCCGGGCTTCACCCGCAACGGCAGC---AGCATGTTCTGCCACAAG
+TGTGAGGGGCTGTGCCCCAAAGAGTGCA---------AGGTGGGTACCAA
+GACCATCGACTCCACGCAGGCGGCACAGGACCTGGCGGGCTGCACCCACG
+TGGAGGGGAGCCTCATCATCAACCTCCGCCAGGGCTACAACCTAGAGCTG
+GAGCTGCAACAGAGCCTGGGGCTGATAGAGACCATCACTGGCTTCCTCAA
+GATCAAGCACTCCTTTGCCCTCGTGTCCCTGGGCTTTTTCAAGAACCTCA
+AACTCATCCGAGGGGACGCCATGGTGGATGGGAACTACACCCTGTATGTG
+CTGGACAACCAGAACCTACAGCAGCTGGGGGCCTGGGTGGCTGCGGGGCT
+CACCATTCCCGTGGGCAAGATATACTTCGCTTTCAACCCTCGCCTCTGCT
+TGGAGCACATCTACCGCCTGGAAGAGGTGACCGGCACGCGGGGACGGCAA
+AACAAGGCTGAGATCAACCCCCGCACCAACGGAGACCGCGCCGCCTGCCA
+AACTCGCACCCTGCGCTTCGTGTCCAACGTGACGCAAGCTGACAGCATCT
+TGCTGCGCTGGGAGCGCTACGAGCCGCTGGAGGCTCGGGACCTGCTCAGC
+TTCATCGTGTACTACAAGGAGTCCCCATTCCAGAATGCCACAGAGCACAC
+AGGTCCAGATGCCTGTGGAACCCAGAGCTGGAACCTGCTGGATGTGGAGC
+TGCCCTTAAG---CCGCACCCAAGAACCCGGGGTAA--------------
+----CTCTAGCACCCCTCAAGCCCTGGACACAATATGCAGTGTTTGTACG
+GGCCATCACACTGACCACTGCTGAGGACAGCCCCCACCAAGGAGCCCAGA
+GCCCCATCGTCTACCTCCGAACCCTGCCTGCGGCGCCCACTGTGCCCCAG
+GACGTCATCTCCACGTCCAATTCCTCGTCCCACCTGCTCGTGCGCTGGAA
+GCCACCGATTCAGCGCAACGGGAACATCACCTACTACCTGGTGCTGTGGC
+AGCGTCTGGCGGAGGACGGCGACCTCTATCTCAACGACTACTGCCACCGC
+GGCCTGCGGCTGCCCACCAGCAACAACGACCCGCG---CTTCGACCGCGA
+GGA-----CGGTGAACTCGAAGCCGAGATGGAGCCGGGCTGCTGCCCTTG
+CCAGCACCCACCGCCT--GGGCAGGTCCT--GCCGCCGCTGGAGGCGCAA
+GAGGCCTCGTTCCAGAAGAAGTTCGAAAACTTCCTGCACAACGCCATCAC
+CATCCCCAAGTCCCCCTGGAAGGTGACGTCCGTCA-ATAAGAGCCCTCAA
+AGAC--------ATGCGGGGAGGCACCGCCGGGCGGCCGGGGC--GCTCC
+GGCTTGGGGGCAACAGCTCG-------GATTTCGAGATCCAGGAGGACAA
+AGTGCC---CCGGGAGCGAGCAGTGTTGAGTGGTCTGCGCCACTTTACGG
+AATATCGTATCGACATCCACGCCTGCAACCACGCGGCTCACACCGTGGGC
+TGCAGCGCGGCCACGTTCGTCTTCGCGCGCACCATGCCGCACAGAGAAGC
+TGATGGCATCCCAGGGAAGGTGGCCTGGGAGGCAGCCAGCAAAAGCAGTG
+TCCTCCTGCGCTGGCTGGAGCCACCTGACCCCAACGGACTCATCCTCAAG
+TATGAAATCAAGTACCGCCGCTTGGGAGAGGAGGCCACAGTGCTATGTGT
+GTCCCGCCTACGATATGCCAAATTTGGGGGTGTCCAGCTGGCCCTGCTGC
+CCCCTGGAAACTACTCCGCCAGAGTTCGGGCAACCTCGCTGGCTGGCAAC
+GGCTCCTGGACAGAAAGTATCGCTTTCTACGTCCCAGGCCCAGAGGAGGA
+------AGACTCCGGGGGGCTGCACGTCCTTCTCACTGTCACCCCCGTGG
+GGCTCATGCTGTTCATCATTCTTGCCGC-CCTCGGTTTCTTCTACGG---
+--------CAGGAAGAGAAACAGCAC-CCTCTATGCCTCAGTGAATCCGG
+AGTACTTCAGCGCCTCTGAT---------------ATGTACATCCCTGAT
+GAGTGGGAGGTGCCTCGGGAGCAGATCTCCATAATCCGAGAGCTGGGCCA
+GGGCTCCTTTGGGATGGTATATGAAGGCCTGGCACAAGGACTAGAGGCTG
+GAGAGGAGTCCACACCCGTGGCCCTGAAGACAGTGAATGAGTTGGCCAGC
+CCACGAGAACGCATTGAGTTCCTCAAGGAAGCTTCTGTCATGAAGGCATT
+CAAGTGTCACCATGTGGTACGTCTCCTGGGTGTTGTGTCTCAAGGCCAGC
+CAACTCTGGTCATCATGGAGTTAATGACTCGTGGGGACCTCAAGAGCCAT
+CTTCGATCTCTGCGGCCTGAGGCAGAG---AACAACCCTGGGCTCCCACG
+GCCAGCACTGGGAGATATGATCCAGATGGCTGGTGAGATTGCAGATGGCA
+TGGCTTACCTCGCTGCCAACAAGTTTGTGCATCGAGACCTGGCAGCCAGA
+AACTGCATGGTGTCCCAGGACTTCACCGTCAAGATTGGGGACTTCGGGAT
+GACTCGAGACGTGTACGAGACAGACTATTACCGCAAGGGCGGGAAAGGGC
+TGCTGCCCGTGCGCTGGATGGCCCCCGAGTCCCTCAAAGATGGAATCTTC
+ACCACACATTCGGATGTTTGGTCCTTCGGGGTGGTGCTCTGGGAGATCGT
+GACCCTAGCTGAACAGCCCTACCAGGGTTTATCCAACGAGCAGGTGCTCA
+AGTTTGTCATGGATGGTGGGGTCCTGGAGGAGCTGGAGAGCTGTCCCCTT
+CAGCTGCAAGAACTGATGAGCCGCTGCTGGCAGCAGAACCCACGCCTGCG
+GCCCACCTTCACCCACATCCTGGACAGCATTCGGGGGGAACTGCGGCCCT
+CTTTCCGCCTCTTTTCCTTCTATTACAGCCCAGAGTGCCAGG----GGAG
+CCAGGGGGCCCTGCT-----------------GCCTAATGCCAAACCCAA
+CTCCCTACCAACCCCAGAAGGGGCT-------CCCTCAGACTGCATGCCC
+CAAAATGGGGGTCCAGGGCACTGA--------------------------
+--------------------------------------------------
+-------------------------------------------------
+>ENSMUST00000005671_homosapiens
+ATGAAGTCTGGCTCCGGAGGAGGGTCCCCGA-CCTCGC---TGTGGGGGC
+TCGTGTTTCTCTCCGCCGCGCTCTCTCTCTGGCCGACGAG---TGGAGAA
+ATCTGTGGGCCCGGCATTGACATCCGCAACGACTATCAGCAGCTGAAGCG
+CCTGGAAAACTGCACGGTGATCGAGGGCTTCCTCCACATCCTGCTCATCT
+CCAAGGCC------GAGGACTACCGAAGCTACCGCTTCCCCAAGCTCACC
+GTCATCACTGAGTACTTGCTGCTCTTCCGAGTCGCTGGCCTCGAGAGCCT
+GGGAGACCTCTTCCCCAACCTCACAGTCATCCGTGGCTGGAAACTCTTCT
+ACAACTACGCACTGGTCATCTTCGAGATGACCAATCTCAAGGATATTGGG
+CTTTATAATCTGAGGAACATTACTCGGGGGGCCATCAGGATTGAGAAGAA
+CGCCGACCTCTGTTACCTCTCCACCATAGACTGGTCTCTCATCTTGGATG
+CGGTGTCCAATAACTACATTGTGGGGAACAA-GCCCCCGA--AGGAATGT
+GGGGACCTGTGTCCAGGGACATTGGAGGAGAA-GCCCA--TGTGTGAGAA
+GACCACCATCAACAATGAGTACAACTACCGCTGCTGGACCACAAATCGCT
+GCCAGAAAATGTGCCCAAGTGTGTGCGGGAAGCGAGCCTGCACCGAGAAC
+AACGAGTGCTGCCACCCGGAGTGCCTGGGCAGCTGCCACACACCGGACGA
+CAACACAACCTGCGTGGCCTGCAGACACTACTACTACAAAGGCGTGTGTG
+TGCCTGCCTGCCCGCCTGGCACCTACAGGTTCGAGGGCTGGCGCTGTGTG
+GATCGCGATTTCTGC----GCCAACATCCCCAACGCTGAGA------GCA
+G--TGACTCGGATGGCTTCGTTATCCACGACGATGAGTGCATGCAGGAGT
+GTCCCTCAGGCTTCATCCGCAACAGCACCCAGAGCATGTACTGTATCCCC
+TGCGAAGGCCCCTGCCCCAAAGTCTGCGGCGATGAAGAGAAGAAAACGAA
+AACCATCGATTCGGTGACTTCTGCTCAAATGCTCCAAGGATGCACCATCC
+TGAAGGGCAATCTGCTTATTAACATCCGGAGAGGCAATAACATTGCCTCG
+GAGTTGGAGAACTTCATGGGGCTCATCGAGGTGGTGACCGGCTACGTGAA
+GATCCGCCATTCTCATGCCTTGGTCTCCTTGTCCTTCCTGAAGAACCTTC
+GTCTCATCTTAGGAGAGGAGCAGCTGGAAGGGAACTACTCCTTCTATGTC
+CTAGACAACCAGAACTTGCAGCAGCTGTGGGACTGGAACCACCGGAACCT
+GACCGTCAGGTCCGGAAAGATGTACTTTGCTTTCAATCCCAAGCTGTGTG
+TCTCCGAAATTTACCGCATGGAGGAAGTGACCGGAACCAAGGGACGCCAG
+AGCAAAGGGGACATAAACACCAGGAACAACGGAGAGCGAGCTTCCTGTGA
+AAGTGATGTTCTCCGTTTCACCTCCACCACGACCTGGAAGAACCGAATCA
+TCATAACGTGGCACCGGTACCGGCCGCCGGACTACCGGGATCTCATCAGC
+TTCACAGTTTACTACAAGGAGGCACCATTTAAAAACGTTACGGAATATGA
+CGGGCAGGATGCCTGTGGCTCCAACAGCTGGAACATGGTGGATGTAGACC
+TGCCTCCGAA---CAAGGAGGGCGAGCCTGGCATTT--------------
+----TACTGCATGGGCTGAAGCCCTGGACCCAGTATGCTGTCTATGTCAA
+GGCTGTGACCCTCACCATGGTGGAAAACGACCATATCCGTGGGGCCAAAA
+GTGAAATCTTGTACATTCGCACCAATGCTTCAGTCCCTTCCATTCCCCTA
+GATGTCCTCTCAGCATCAAACTCTTCCTCTCAGCTGATTGTGAAGTGGAA
+TCCTCCAACTCTGCCCAATGGTAACTTGAGTTACTACATTGTGAGGTGGC
+AGCGGCAGCCCCAGGATGGTTACCTGTACCGGCACAACTACTGCTCCAAA
+GA--CAAAATA-CCCATCAGAAAGTACGCCGATGGTACCATCGACGTGGA
+GGAGGTGACGGAAAATCCCAAGACAGAAGTGTGTGGTGGTGATAAAGGGC
+CATGCTGCGCTTGCCCTAAAACTGAAGCTGAGAAGCAGGCTGAGAAGGAG
+GAGGCTGAGTACCGTAAAGTCTTTGAGAATTTCCTTCACAATTCCATCTT
+TGTGCCCAGGCCCGAAAGGAGGCGGAGAGACGTCATGCAAGTGGCCAACA
+CGACCATGTCCAGCCGAAGCAGGAACACCACGGTAGCTGACACCTACAAT
+ATCACAGACCCGGAGGAGTTCGAGACAGAGTACCCTTTCTTTGAGAGCAG
+AGTGGATAACAAGGAGAGGACTGTCATCTCCAACCTCCGGCCTTTCACTC
+TGTACCGCATCGATATCCACAGCTGCAACCACGAGGCTGAGAAGCTGGGC
+TGCAGCGCCTCCAACTTCGTCTTTGCGAGAACCATGCCAGCAGAAGGAGC
+AGATGATATCCCTGGTCCGGTGACCTGGGAGCCAAGACCCGAAAACTCCA
+TCTTTTTAAAGTGGCCAGAACCCGAGAACCCCAACGGATTGATCCTAATG
+TATGAAATTAAATACGGGTCGCAAGTCGAGGATCAGCGGGAA---TGTGT
+GTCCAGACAGGAGTACAGGAAGTACGGAGGGGCCAAACTCAACCGTCTAA
+ACCCAGGGAACTATACAGCCCGGATTCAGGCTACCTCCCTCTCTGGGAAT
+GGGTCATGGACAGATCCTGTGTTCTTCTATGTC-CCCGCCAAAACGACGT
+ATGA--GAACTTCATGCATCTGATCATTGCTCTGCCGGTTGCCATCCTGC
+TGATCGTTGGGGGGCTGGTTATCATGCTGTATGTCTTCCATAGAAAGAGA
+AATAACAGCAGGTTGGGCAATGGAGT-GCTGTATGCTTCTGTGAACCCCG
+AGTATTTCAGCGCAGCTGAT---------------GTGTACGTGCCTGAT
+GAATGGGAGGTAGCTCGAGAGAAGATCACCATGAACCGGGAGCTCGGACA
+AGGGTCCTTTGGGATGGTCTATGAAGGAGTGGCCAAGGGTGTGGTCAAGG
+ATGAACCCGAAACCAGAGTGGCCATCAAGACGGTAAACGAGGCTGCAAGT
+ATGCGTGAAAGAATCGAGTTTCTCAACGAGGCCTCGGTGATGAAGGAGTT
+CAATTGTCACCATGTGGTCCGGTTGCTGGGTGTGGTATCCCAAGGCCAGC
+CCACCCTGGTCATCATGGAACTAATGACACGCGGTGATCTCAAAAGTTAT
+CTCCGGTCTCTGAGGCCAGAAGTGGAGCAGAATAATCTAGTCCTCATTCC
+TCCGAGCTTAAGCAAGATGATCCAGATGGCTGGAGAGATTGCAGATGGCA
+TGGCCTACCTCAATGCCAACAAGTTCGTCCACAGAGACCTTGCTGCTAGG
+AACTGCATGGTAGCCGAAGATTTCACAGTCAAAATTGGAGATTTCGGTAT
+GACACGAGACATCTACGAGACGGACTACTACCGGAAAGGCGGGAAGGGGT
+TGCTGCCTGTGCGCTGGATGTCTCCCGAGTCCCTCAAGGATGGTGTCTTC
+ACTACTCATTCTGATGTCTGGTCCTTCGGGGTCGTCCTCTGGGAGATCGC
+CACGCTGGCTGAGCAGCCCTACCAGGGCTTGTCCAACGAGCAAGTTCTTC
+GTTTCGTCATGGAGGGTGGCCTTCTGGACAAGCCGGACAACTGCCCTGAT
+ATGCTGTTTGAACTTATGCGCATGTGCTGGCAGTATAACCCCAAGATGCG
+GCCCTCCTTCCTGGAGATCATCGGCAGCATCAAGGATGAGATGGAGCCCA
+GCTTCCAGGAGGTCTCCTTCTACTACAGCGAGGAGAACAAGCCTCCCGAG
+CCAGAGGAGCTGGAGATGG------------AGCCTGAGAACATGGAGAG
+CGTCCCACTGGACCCTTCGGCCTCCTCAGCCTCCCTGCCTCTGCCTGAAA
+GACACTCAGGACACAAGGCTGAGAATGGCCCGGGCCCTGGCGTGCTCGTT
+CTCCGCGCCAGTTTTGATGAGAGACAGCCTTACGCTCACATGAACGGGGG
+ACGCGCCAACGAGAGGGCCTTGCCTCTGCCCCAGTCCTCGACCTGCTGA
+>ENSRNOT00000019267_rattusnorvegicus
+ATGAAGTCTGGCTCCGGAGGAGGGTCCCCGA-CCTCGC---TGTGGGGGC
+TCGTGTTTCTCTCCGCCGCGCTCTCGCTCTGGCCGACGAG---TGGAGAA
+ATTTGTGGGCCCGGCATTGACATCCGCAACGACTATCAGCAGCTGAAGCG
+CCTGGAAAACTGCACGGTGATCGAGGGCTTCCTCCACATCCTGCTCATCT
+CCAAGGCC------GAGGACTACCGAAGCTACCGCTTCCCCAAGCTCACG
+GTCATCACCGAGTACTTGCTGCTGTTTCGAGTGGCCGGCCTCGAGAGCCT
+GGGAGACCTCTTCCCGAACCTCACAGTCATCCGTGGCTGGAAACTCTTCT
+ACAATTACGCACTGGTCATCTTCGAGATGACCAATCTCAAGGATATTGGG
+CTTTATAATCTGAGGAACATTACTCGGGGGGCCATCAGGATTGAGAAAAA
+CGCTGACCTCTGTTACCTCTCCACCATAGACTGGTCTCTCATCTTGGATG
+CGGTGTCCAATAACTACATTGTGGGGAACAA-GCCCCCAA--AGGAATGT
+GGGGACCTGTGTCCAGGGACCTTGGAGGAGAA-GCCCA--TGTGTGAGAA
+GACCACCATCAACAATGAGTACAACTACCGCTGCTGGACCACAAATCGCT
+GCCAGAAAATGTGCCCAAGTGTGTGTGGGAAGCGAGCCTGCACCGAGAAC
+AATGAGTGCTGCCACCCGGAGTGCCTAGGCAGCTGCCACACACCGGACGA
+CAACACAACCTGCGTGGCCTGCCGACACTACTACTACAAAGGCGTGTGCG
+TGCCTGCCTGCCCGCCTGGCACCTACAGGTTCGAGGGCTGGCGCTGTGTG
+GACCGGGATTTCTGC----GCCAACATCCCCAACGCCGAGA------GCA
+G--TGACTCAGATGGCTTCGTCATCCACGATGGCGAGTGCATGCAGGAGT
+GTCCATCAGGCTTCATCCGCAACAGCACCCAGAGCATGTACTGTATCCCC
+TGTGAAGGCCCCTGCCCCAAGGTCTGCGGCGATGAAGAAAAGAAAACGAA
+AACCATCGATTCTGTGACGTCTGCCCAGATGCTCCAAGGGTGCACCATTT
+TGAAGGGCAATCTGCTTATTAACATCCGGCGAGGCAATAACATTGCCTCG
+GAATTGGAGAACTTCATGGGGCTCATCGAGGTGGTGACTGGCTACGTGAA
+GATCCGCCATTCCCATGCCTTGGTCTCCTTGTCCTTCCTGAAGAACCTTC
+GTCTCATCTTAGGAGAGGAGCAGCTAGAAGGGAACTACTCCTTCTATGTC
+CTGGACAACCAGAACTTGCAGCAGCTGTGGGACTGGAACCACCGGAACCT
+GACCGTCAGGTCAGGGAAAATGTACTTCGCTTTCAATCCCAAGCTGTGTG
+TCTCTGAAATTTACCGGATGGAGGAGGTGACAGGAACAAAGGGACGGCAG
+AGCAAAGGAGACATAAACACCAGGAACAACGGAGAGCGAGCTTCCTGTGA
+AAGTGATGTTCTCCGTTTCACCTCCACCACCACCTGGAAGAACCGCATCA
+TCATAACGTGGCACCGGTACCGGCCGCCGGACTACCGGGATCTCATCAGT
+TTCACAGTCTACTACAAGGAGGCACCCTTTAAAAACGTCACGGAATACGA
+CGGGCAGGATGCCTGTGGCTCCAACAGCTGGAACATGGTGGACGTGGACC
+TGCCTCCGAA---CAAGGAGGGGGAGCCTGGCATTT--------------
+----TGCTGCATGGGCTGAAGCCCTGGACCCAGTATGCAGTCTATGTCAA
+GGCTGTGACCCTCACCATGGTGGAAAACGACCACATCCGTGGGGCCAAAA
+GTGAAATCTTGTACATTCGCACCAACGCTTCAGTTCCTTCCATTCCTCTA
+GATGTCCTCTCGGCATCAAACTCCTCCTCTCAGCTGATCGTGAAGTGGAA
+CCCCCCAACTCTGCCCAATGGTAACTTGAGTTACTACATTGTGAGGTGGC
+AGCGGCAGCCGCAGGATGGCTATCTGTTCCGGCACAACTACTGCTCCAAA
+GA--CAAAATA-CCCATCAGAAAGTACGCCGATGGTACCATCGATGTGGA
+GGAGGTGACAGAAAATCCCAAGACAGAAGTGTGCGGTGGTGATAAAGGGC
+CGTGCTGTGCCTGTCCTAAAACCGAAGCTGAGAAGCAGGCTGAGAAGGAG
+GAGGCTGAGTACCGTAAAGTCTTTGAGAATTTCCTTCACAACTCCATCTT
+TGTGCCCAGACCTGAGAGGAGGCGGAGAGATGTCCTGCAGGTGGCTAACA
+CCACCATGTCCAGCCGAAGCAGGAACACCACGGTAGCTGACACCTACAAT
+ATCACAGACCCGGAAGAGTTCGAGACAGAATACCCTTTCTTTGAGAGCAG
+AGTGGATAACAAGGAGAGGACTGTCATTTCCAACCTCCGGCCTTTCACTC
+TGTACCGTATCGATATCCACAGCTGCAACCACGAGGCTGAGAAGCTGGGC
+TGCAGCGCCTCCAACTTTGTCTTTGCAAGAACCATGCCAGCAGAAGGAGC
+AGATGACATTCCTGGCCCAGTGACCTGGGAGCCAAGACCTGAAAACTCCA
+TCTTTTTAAAGTGGCCAGAACCCGAGAACCCCAACGGATTGATTCTAATG
+TATGAAATAAAATACGGATCGCAAGTCGAGGATCAGCGGGAA---TGTGT
+GTCCAGACAGGAGTACAGGAAGTATGGAGGGGCCAAACTTAACCGTCTAA
+ACCCAGGGAACTATACGGCCCGGATTCAGGCTACCTCCCTCTCTGGGAAT
+GGGTCGTGGACAGATCCTGTGTTCTTCTATGTC-CCAGCCAAAACAACGT
+ATGA--GAATTTCATGCATCTGATCATTGCTCTGCCGGTTGCCATCCTGC
+TGATTGTGGGGGGCCTGGTAATCATGCTGTATGTCTTCCATAGAAAGAGG
+-------GTGTCTGGGCCAGCAGAAG-GCAGTA-GTTGGAAAGGGCCATT
+TCCATCCTGTCTGTTCCTA----------------GTGTACGTGCCTGAT
+GAATGGGAGGTAGCTCGGGAGAAGATCACCATGAACCGGGAGCTCGGACA
+AGGGTCCTTCGGGATGGTCTATGAAGGAGTGGCCAAGGGCGTGGTCAAGG
+ACGAGCCTGAAACCAGAGTGGCCATCAAGACAGTGAATGAGGCTGCAAGT
+ATGCGTGAGAGAATTGAGTTTCTCAACGAGGCCTCAGTGATGAAGGAGTT
+CAACTGTCACCATGTGGTCCGGTTGCTGGGTGTAGTATCCCAAGGCCAGC
+CCACCCTGGTCATCATGGAACTAATGACACGTGGCGATCTCAAAAGTTAT
+CTCCGGTCTCTAAGGCCAGAGGTGGAG---AATAATCTAGTCCTGATTCC
+TCCGAGCTTAAGCAAGATGATCCAGATGGCTGGAGAGATTGCAGATGGCA
+TGGCCTACCTCAATGCCAACAAGTTCGTCCACAGAGACCTGGCTGCTCGG
+AACTGCATGGTAGCTGAAGATTTCACAGTCAAAATTGGAGATTTTGGTAT
+GACACGAGACATCTACGAGACGGACTACTACCGGAAAGGCGGGAAGGGCT
+TGCTGCCTGTGCGCTGGATGTCTCCCGAGTCCCTCAAGGATGGCGTCTTC
+ACCACTCATTCCGATGTCTGGTCCTTTGGGGTCGTCCTCTGGGAGATCGC
+CACTCTGGCTGAGCAGCCGTACCAGGGCCTGTCCAACGAGCAAGTTCTTC
+GTTTCGTCATGGAGGGCGGCCTTCTGGACAAGCCGGATAACTGCCCCGAT
+ATGCTGTTTGAACTTATGCGCATGTGCTGGCAGTACAACCCCAAGATGCG
+GCCCTCCTTCCTGGAGATCATCGGAAGCATCAAGGATGAGATGGAGCCCA
+GTTTCCAGGAGGTCTCCTTCTACTACAGCGAGGAGAACAAGCCTCCAGAG
+CCGGAGGAGCTGGAGATGGAGCTGGAGCTGGAGCCCGAGAACATGGAGAG
+CGTCCCGCTGGACCCTTCGGCCTCCTCAGCCTCCCTGCCTCTGCCTGAAA
+GACACTCAGGACACAAGGCTGAGAACGGCCC------TGGCGTGCTGGTT
+CTCCGTGCCAGTTTTGATGAGAGACAGCCTTACGCTCACATGAATGGGGG
+ACGCGCCAACGAGAGGGCCTTGCCTCTGCCCCAGTCCTCAACCTGC---
+>ENSPTRT00000013802_pantroglodytes
+ATGAAGTCTGGCTCCGGAGGAGGGTCCCCGA-CCTCGC---TGTGGGGGC
+TCCTGTTTCTCTCCGCCGCGCTCTCGCTCTGGCCGACGAG---TGGAGAA
+ATCTGCGGGCCAGGCATCGACATCCGCAACGACTATCAGCAGCTGAAGCG
+CCTGGAGAACTGCACGGTGATCGAGGGCTACCTCCACATCCTGCTCATCT
+CCAAGGCC------GAGGACTACCGCAGCTACCGCTTCCCCAAGCTCACG
+GTCATTACCGAGTACTTGCTGCTGTTCCGAGTGGCTGGCCTCGAGAGCCT
+CGGAGACCTCTTCCCCAACCTCACGGTCATCCGCGGCTGGAAACTCTTCT
+ACAACTACGCCCTGGTCATCTTCGAGATGACCAATCTCAAGGATATTGGG
+CTTTACAACCTGAGGAACATTACTCGGGGGGCCATCAGGATTGAGAAAAA
+TGCTGACCTCTGTTACCTCTCCACTGTGGACTGGTCCCTGATCCTGGATG
+CGGTGTCCAATAACTACATTGTGGGGAATAA-GCCCCCAA--AGGAATGT
+GGGGACCTGTGTCCAGGGACCATGGAGGAGAA-GCCGA--TGTGTGAGAA
+GACCACCATCAACAATGAGTACAACTACCGCTGCTGGACCACAAACCGCT
+GCCAGAAAATGTGCCCGAGCACGTGTGGGAAGCGGGCGTGCACCGAGAAC
+AACGAGTGCTGCCACCCCGAGTGCCTGGGCAGCTGCAGCGCGCCTGACAA
+CGACACGGCCTGTGTAGCTTGCCGCCACTACTACTATGCCGGTGTCTGTG
+TGCCTGCCTGCCCGCCCAACACCTACAGGTTTGAGGGCTGGCGCTGTGTG
+GACCGTGACTTCTGC----GCCAACATCCTCAGCGCCGAGA------GCA
+G--CGACTCCGAGGGGTTTGTGATCCACGACGGCGAGTGCATGCAGGAGT
+GCCCCTCGGGCTTCATCCGCAACGGCAGCCAGAGCATGTACTGCATCCCT
+TGTGAAGGTCCTTGCCCGAAGGTCTGTG---AGGAAGAAAAGAAAACAAA
+GACCATTGATTCTGTTACTTCTGCTCAGATGCTCCAAGGATGCACCATCT
+TCAAGGGCAATTTGCTCATTAACATCCGACGGGGGAATAACATTGCTTCA
+GAGCTGGAGAACTTCATGGGGCTCATCGAGGTGGTGACGGGCTACGTGAA
+GATCCGCCATTCTCATGCCTTGGTCTCCTTGTCCTTCCTAAAAAACCTTC
+GCCTCATCTTAGGAGAGGAGCAGCTAGAAGGGAATTACTCCTTCTACGTC
+CTCGACAACCAGAACTTGCAGCAACTATGGGACTGGGACCACCGCAACCT
+GACCATCAAAGCAGGGAAAATGTACTTTGCTTTCAATCCCAAATTATGTG
+TTTCCGAAATTTACCGCATGGAGGAAGTGACGGGGACTAAAGGGCGCCAA
+AGCAAAGGGGACATAAACACCAGGAACAACGGGGAGAGAGCCTCCTGTGA
+AAGTGACGTCCTGCATTTCACCTCCACCACCACGTCGAAGAATCGCATCA
+TCATAACCTGGCACCGGTACCGGCCCCCTGACTACAGGGATCTCATCAGC
+TTCACCGTTTACTACAAGGAAGCACCCTTTAAGAATGTCACGGAGTATGA
+TGGGCAGGACGCCTGCGGCTCCAACAGCTGGAACATGGTGGACGTGGACC
+TCCCGCCCAA---CAAGGACGTGGAGCCCGGCATCT--------------
+----TACTACATGGGCTGAAGCCCTGGACTCAGTACGCCGTTTACGTCAA
+GGCTGTGACCCTCACCATGGTGGAGAATGACCATATCCGTGGGGCCAAGA
+GTGAGATCTTGTACATTCGCACCAATGCTTCAGTTCCTTCCATTCCCTTG
+GACGTTCTTTCAGCATCGAACTCCTCTTCTCAGTTAATCGTGAAGTGGAA
+CCCTCCCTCTCTGCCCAACGGCAACCTGAGTTACTACATTGTGCGCTGGC
+AGCGGCAGCCTCAGGACGGCTACCTTTACCGGCACAATTACTGCTCCAAA
+GA--CAAAATC-CCCATCAGGAAGTATGCCGACGGCACCATCGACATTGA
+GGAGGTCACAGAGAACCCCAAGACTGAGGTGTGTGGTGGGGAGAAAGGGC
+CTTGCTGCGCCTGCCCCAAAACTGAAGCCGAGAAGCAGGCCGAGAAGGAG
+GAGGCTGAATACCGCAAAGTCTTTGAGAATTTCCTGCACAACTCCATCTT
+CGTGCCCAGACCTGAAAGGAAGCGGAGAGATGTCATGCAAGTGGCCAACA
+CCACCATGTCCAGCCGAAGCAGGAACACCACGGCCGCAGACACCTACAAC
+ATCACCGACCCGGAAGAGCTGGAGACAGAGTACCCTTTCTTTGAGAGCAG
+AGTGGATAACAAGGAGAGAACTGTGATTTCTAACCTTCGGCCTTTCACAT
+TGTACCGCATCGATATCCACAGCTGCAACCACGAGGCTGAGAAGCTGGGC
+TGCAGCGCCTCCAACTTTGTCTTTGCAAGGACTATGCCCGCAGAAGGAGC
+AGATGACATTCCTGGGCCAGTGACCTGGGAGCCAAGGCCTGAAAACTCCA
+TCTTTTTAAAGTGGCCGGAACCTGAGAATCCCAATGGATTGATTCTAATG
+TATGAAATAAAATACGGATCACAAGTTGAGGATCAGCGAGAA---TGTGT
+GTCCAGACAGGAATACAGGAAGTATGGAGGGGCCAAGCTAAACCGGCTAA
+ACCCGGGGAACTACACAGCCCGGATTCAGGCCACATCTCTCTCTGGGAAT
+GGGTCGTGGACAGATCCTGTGTTCTTCTATGTC-CAGGCCAAAACAGGAT
+ATGA--AAACTTCATCCATCTGATCATCGCTCTGCCCGTCGCTGTCCTGT
+TGATCGTGGGAGGGTTGGTGATTATGCTGTACGTCTTCCATAGAAAGAGA
+AATAACAGCAGGCTGGGGAATGGAGT-GCTGTATGCCTCTGTGAACCCGG
+AGTACTTCAGCGCTGCTGAT---------------GTGTACGTTCCCGAT
+GAGTGGGAGGTGGCTCGGGAGAAGATCACCATGAGCCGGGAACTTGGGCA
+GGGGTCCTTTGGGATGGTCTATGAAGGAGTTGCCAAGGGTGTGGTGAAAG
+ATGAACCTGAAACCAGAGTGGCCATTAAAACAGTGAACGAGGCCGCAAGC
+ATGCGTGAAAGGATTGAGTTTCTCAACGAAGCTTCTGTGATGAAGGAGTT
+CAATTGTCACCATGTGGTGCGATTGCTGGGTGTGGTGTCCCAAGGCCAGC
+CAACACTGGTCATCATGGAACTGATGACACGGGGCGATCTCAAAAGTTAT
+CTCCGGTCTCTGAGGCCAGAAATGGAG---AATAATCCAGTCCTAGCACC
+TCCAAGCCTGAGCAAGATGATTCAGATGGCCGGAGAGATTGCAGACGGCA
+TGGCATACCTCAACGCCAATAAGTTCGTCCACAGAGACCTTGCTGCCCGG
+AATTGCATGGTAGCCGAAGATTTCACAGTCAAAATCGGAGATTTTGGTAT
+GACGCGAGATATCTATGAGACAGACTATTACCGGAAAGGAGGGAAAGGGC
+TGCTGCCCGTGCGCTGGATGTCTCCCGAGTCCCTCAAGGATGGAGTCTTC
+ACCACTTACTCGGACGTCTGGTCCTTCGGGGTCGTCCTCTGGGAGATCGC
+CACACTGGCCGAGCAGCCCTACCAGGGCTTGTCCAACGAGCAAGTCCTTC
+GCTTCGTCATGGAGGGCGGCCTTCTGGACAAGCCAGACAACTGTCCCGAC
+ATGCTGTTTGAACTGATGCGCATGTGCTGGCAGTATAACCCCAAGATGAG
+GCCTTCCTTCCTGGAGATCATCAGCAGCATCAAAGAGGAGATGGAGCCTG
+GCTTCCGGGAGGTCTCCTTCTACTACAGCGAGGAGAACAAGCTGCCCGAG
+CCGGAGGAGCTGGACCTGG------------AGCCAGAGAACATGGAGAG
+CGTCCCCCTGGACCCCTCGGCCTCC---------CTGCCACTGCCCGACA
+GACACTCAGGACACAAGGCCGAGAACGGCCCCGGCCCTGGGGTGCTGGTC
+CTCCGCGCCAGCTTCGACGAGAGACAGCCGTACGCACACATGAACGGGGG
+CCGCAAGAACGAGCGGGCCTTGCCGCTGCCCCAGTCTTCGACCTGCTGA