changeset 3:78dd29aa7fc1 draft

planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 81a1e79dda127d1afc16c7e456bbec16093a3c3f-dirty
author earlhaminst
date Mon, 20 Feb 2017 06:25:50 -0500
parents df6527887a18
children fa59d6fea7f5
files filter_by_fasta_ids.py t_coffee.xml t_coffee_to_cigar.py test-data/cigar.tabular test-data/output2.fasta
diffstat 5 files changed, 279 insertions(+), 129 deletions(-) [+]
line wrap: on
line diff
--- a/filter_by_fasta_ids.py	Mon Jan 09 14:26:41 2017 -0500
+++ b/filter_by_fasta_ids.py	Mon Feb 20 06:25:50 2017 -0500
@@ -2,65 +2,27 @@
 """ A script to build specific fasta databases """
 from __future__ import print_function
 
-
-import logging
+import collections
 import sys
 
-
-# ===================================== Iterator ===============================
-class Sequence:
-    ''' Holds protein sequence information '''
-    def __init__(self):
-        self.header = ""
-        self.sequence_parts = []
-
-    def get_sequence(self):
-        return "".join([line.rstrip().replace('\n', '').replace('\r', '') for line in self.sequence_parts])
+Sequence = collections.namedtuple('Sequence', ['header', 'sequence'])
 
 
-class FASTAReader:
-    """
-        FASTA db iterator. Returns a single FASTA sequence object.
-    """
-    def __init__(self, fasta_name):
-        self.fasta_file = open(fasta_name)
-        self.next_line = self.fasta_file.readline()
-
-    def __iter__(self):
-        return self
-
-    def __next__(self):
-        ''' Iteration '''
-        # while True:
-        #    line = self.fasta_file.readline()
-        #    if not line:
-        #        raise StopIteration
-        #    if line[0] == '>':
-        #        break
-        next_line = self.next_line
-        if not next_line:
-            raise StopIteration
-
-        seq = Sequence()
-        seq.header = next_line.rstrip().replace('\n', '').replace('\r', '')
-
-        next_line = self.fasta_file.readline()
-        while next_line and next_line[0] != '>':
-            # tail = self.fasta_file.tell()
-            # line = self.fasta_file.readline()
-            # if not line:
-            #   break
-            # if line[0] == '>':
-            #   self.fasta_file.seek(tail)
-            #   break
-            seq.sequence_parts.append(next_line)
-            next_line = self.fasta_file.readline()
-        self.next_line = next_line
-        return seq
-
-    # Python 2/3 compat
-    next = __next__
-# ==============================================================================
+def FASTAReader_gen(fasta_filename):
+    fasta_file = open(fasta_filename)
+    line = fasta_file.readline()
+    while True:
+        if not line:
+            return
+        assert line.startswith('>'), "FASTA headers must start with >"
+        header = line.rstrip()
+        sequence_parts = []
+        line = fasta_file.readline()
+        while line and line[0] != '>':
+            sequence_parts.append(line.rstrip())
+            line = fasta_file.readline()
+        sequence = "".join(sequence_parts)
+        yield Sequence(header, sequence)
 
 
 def target_match(target, search_entry):
@@ -74,9 +36,6 @@
 
 def main():
     ''' the main function'''
-    logging.basicConfig(filename='filter_fasta_log',
-                        level=logging.INFO,
-                        format='%(asctime)s :: %(levelname)s :: %(message)s',)
 
     used_sequences = set()
     work_summary = {'wanted': 0, 'found': 0, 'duplicates': 0}
@@ -87,20 +46,17 @@
             targets.append(">%s" % line.strip().upper())
 
     work_summary['wanted'] = len(targets)
-    homd_db = FASTAReader(sys.argv[2])
 
     # output = open(sys.argv[3], "w")
-    for entry in homd_db:
+    for entry in FASTAReader_gen(sys.argv[2]):
         target_matched_results = target_match(targets, entry.header)
         if target_matched_results:
             work_summary['found'] += 1
             targets.remove(target_matched_results)
-            sequence = entry.get_sequence()
+            sequence = entry.sequence
             used_sequences.add(sequence)
             print(entry.header)
             print(sequence)
-    for parm, count in work_summary.items():
-        logging.info('%s ==> %d', parm, count)
 
 
 if __name__ == "__main__":
--- a/t_coffee.xml	Mon Jan 09 14:26:41 2017 -0500
+++ b/t_coffee.xml	Mon Feb 20 06:25:50 2017 -0500
@@ -1,4 +1,4 @@
-<tool id="t_coffee" name="T-Coffee" version="11.0.8">
+<tool id="t_coffee" name="T-Coffee" version="11.0.8_1">
     <description>multiple sequence alignment</description>
     <requirements>
         <requirement type="package" version="11.0.8">t_coffee</requirement>
@@ -34,13 +34,10 @@
         #if $outputs
             #set $outputs_arr = str($outputs).split(',')
             #for $o in $outputs_arr
-                #if $o not in ['cigar', 'dnd']
+                #if $o != 'dnd'
                     #set $output_opt += $o + ','
                 #end if
             #end for
-            #if 'cigar' in $outputs_arr and 'fasta_aln' not in $outputs_arr
-                #set $output_opt += 'fasta_aln,'
-            #end if
         #else
             #set $outputs_arr = []
         #end if
@@ -49,10 +46,6 @@
         #end if
 
         t_coffee '$input' $method_opt $output_opt -n_core \${GALAXY_SLOTS:-1} -run_name t_coffee_out -quiet
-
-        #if 'cigar' in $outputs_arr
-            && python '$__tool_directory__/t_coffee_to_cigar.py' t_coffee_out.fasta_aln > '$cigar'
-        #end if
 ]]>
     </command>
     <inputs>
@@ -93,7 +86,6 @@
             <option value="proba_pair">proba_pair</option>
         </param>
         <param name="outputs" type="select" multiple="true" optional="false" display="checkboxes" label="Output formats">
-            <option value="cigar">CIGAR</option>
             <option value="clustalw_aln">clustalw_aln</option>
             <option value="dnd" selected="true">dnd</option>
             <option value="fasta_aln">fasta_aln</option>
@@ -107,9 +99,6 @@
         </param>
     </inputs>
     <outputs>
-        <data name="cigar" format="tabular" label="${tool.name} on ${on_string}: cigar" >
-            <filter>'cigar' in outputs</filter>
-        </data>
         <data name="clustalw_aln" format="clustalw" label="${tool.name} on ${on_string}: clustalw_aln" from_work_dir="t_coffee_out.clustalw_aln">
             <filter>'clustalw_aln' in outputs</filter>
         </data>
@@ -147,15 +136,15 @@
             <param name="input" value="input.fasta" ftype="fasta" />
             <param name="method02" value="clustalw_msa" />
             <param name="outputs" value="fasta_aln" />
-            <output name="fasta_aln" file="output1.fasta" />
+            <output name="fasta_aln" file="output1.fasta" ftype="fasta" />
         </test>
         <test>
             <param name="filter_fasta" value="yes" />
             <param name="fasta_input" value="input.fasta" ftype="fasta" />
             <param name="identifiers" value="ids.txt" ftype="txt" />
             <param name="method02" value="clustalw_msa" />
-            <param name="outputs" value="cigar" />
-            <output name="cigar" file="cigar.tabular" />
+            <param name="outputs" value="fasta_aln" />
+            <output name="fasta_aln" file="output2.fasta" ftype="fasta" />
         </test>
     </tests>
     <help>
--- a/t_coffee_to_cigar.py	Mon Jan 09 14:26:41 2017 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,47 +0,0 @@
-#!/usr/bin/env python
-""" A script to build specific fasta databases """
-from __future__ import print_function
-
-import re
-import sys
-
-
-FASTA_MATCH_RE = re.compile(r'[^-]')
-
-
-def convert_and_print(header, sequence):
-    # Converts each match into M and each gap into D
-    tmp_seq = FASTA_MATCH_RE.sub('M', sequence)
-    tmp_seq = tmp_seq.replace('-', 'D')
-    # Split the sequence in substrings composed by the same letter
-    tmp_seq = tmp_seq.replace('DM', 'D,M')
-    tmp_seq = tmp_seq.replace('MD', 'M,D')
-    cigar_list = tmp_seq.split(',')
-    # Condense each substring, e.g. DDDD in 4D, and concatenate them again
-    cigar = ''
-    for s in cigar_list:
-        if len(s) > 1:
-            cigar += str(len(s))
-        cigar += s[0]
-    print("%s\t%s" % (header, cigar))
-
-
-def main():
-    with open(sys.argv[1]) as fh:
-        header = None
-        sequence = None
-        for line in fh:
-            line = line.strip()
-            if line and line[0] == '>':
-                if header:
-                    convert_and_print(header, sequence)
-                header = line[1:]
-                sequence = ''
-            else:
-                sequence += line
-    if header:
-        convert_and_print(header, sequence)
-
-
-if __name__ == "__main__":
-    main()
--- a/test-data/cigar.tabular	Mon Jan 09 14:26:41 2017 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,3 +0,0 @@
-ENSMUST00000091291_musmusculus	41M3D64MD473MD7M2D375M3D74M3D771M2D13MD227MD13M7D226M3D58M3D6MD21M3D437M2D33MD76MD1017M12D34M10D14M3D22M4D10M4D107M
-ENSCAFT00000026349_canisfamiliaris	16D26M2D64MD422MD8M2D140M3D195M2D5M8D16M5D52M3D61M6D6M3D672M20D24MD311M8D25M2D58M2D11M2D103M5D17M6D18M5D22M2D10MD37M3D499M3D10M4D30M5D12M4D35M5D6M10D904M4D27M6D40M8D16M3D21M126D
-ENSRNOT00000019267_rattusnorvegicus	75MD14M6D112M6D317MD8M2D40MD7M2D273M4D23M3D16M5D794M3D6M18D315M2D7MD46M2D249MD335M3D142M2D8MD8M2D14MD35MD63M5D16M3D16M4D10M6D1152M3D
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/output2.fasta	Mon Feb 20 06:25:50 2017 -0500
@@ -0,0 +1,255 @@
+>ENSMUST00000091291_musmusculus
+ATGGGCTTCGGGAGAGGATGTGAGACGACGGCTGTGCCATT---GCTGGT
+GGCCGTGGCCGCGTTGCTGGTGGGCACAGCCGGCCACCTGTACCCTGGAG
+AGGTGTGC-CCTGGTATGGACATCCGGAACAACCTGACCAGGCTACATGA
+GCTGGAGAACTGCTCAGTCATTGAGGGCCATCTGCAGATCCTCCTGATGT
+TCAAGACCAGACCCGAAGATTTCCGAGACCTCAGTTTCCCCAAACTCATC
+ATGATCACAGATTACCTGCTTCTCTTCCGTGTCTATGGTCTGGAAAGTCT
+GAAAGACCTCTTCCCAAATCTCACAGTCATCCGAGGCTCCCGTCTCTTCT
+TCAACTATGCCCTGGTTATCTTCGAGATGGTCCACCTGAAGGAGCTGGGG
+CTTTATAACCTCATGAACATCACCCGGGGCTCTGTCCGCATCGAGAAGAA
+TAATGAGCTCTGCTACCTGGCCACTATCGACTGGTCCCGTATCCTGGATT
+CTGTGGAGGACAACTACATTGTACTGAACAAAGATGACAACGAGGAATGT
+GGGGATGTCTGTCCAGGCACCGCCAAGGGCAA-GACCAAC--TGTCCTGC
+CACTGTCATCAATGGGCAGTTTGTGGAACGGTGCTGGACACACAGTCATT
+GTCAGAAAGTTTGCCCAACCATCTGTAAGTCACATGGCTGCACAGCTGAA
+GGCCTGTGCTGCCACAAAGAGTGCCTGGGCAACTGTTCGGAACCTGATGA
+CCCCACCAAGTGTGTGGCCTGTCGCAACTTCTATCTGGATGGTCAGTGTG
+TGGAGACCTGCCCGCCACCCTACTATCACTTCCAGGACTGGCGCTGTGTG
+AACTTCAGCTTCTGCCAAGACCTTCACTTCAAATGCAGGAACTCTCGGAA
+GCCTGGCTGCCACCAATACGTCATTCACAACAATAAGTGCATCCCCGAGT
+GCCCGTCTGGCTATACC---ATGAATTCCAGCAACTTGATGTGCACCCCA
+TGTCTGGGACCCTGCCCTAAGGTCTGCCAAATCCTCGAAGGTGA---GAA
+GACCATTGATTCTGTGACATCTGCCCAGGAGCTCCGAGGCTGCACTGTGA
+TCAACGGTAGCCTGATCATCAACATCCGAGGGGGCAACAACCTGGCAGCT
+GAGCTGGAGGCTAACCTTGGCCTCATTGAAGAAATTTCGGGATTTCTAAA
+GATCCGCCGCTCCTATGCTCTGGTATCACTTTCTTTCTTCAGGAAGCTAC
+ATCTGATTCGAGGAGAGACCTTGGAAATTGGGAACTATTCTTTTTATGCC
+TTGGACAACCAGAACCTGAGGCAACTCTGGGACTGGAGCAAACACAACCT
+CACCATCACTCAGGGCAAGCTCTTCTTCCATTACAACCCGAAACTCTGCT
+TGTCTGAAATTCACAAGATGGAAGAAGTCTCCGGAACTAAGGGCCGTCAG
+GAGAGGAACGACATTGCCCTGAAGACCAATGGGGACCAGGCATCGTGTGA
+AAATGAATTGCTTAAATTTTCTTTCATTCGGACATCTTTTGACAAGATCC
+TGTTGAGGTGGGAACCCTACTGGCCCCCCGACTTCCGAGATCTCCTGGGA
+TTCATGTTGTTCTACAAAGAGGCCCCTTATCAGAATGTGACAGAGTTTGA
+TGGGCAGGATGCTTGTGGCTCCAACAGCTGGACTGTGGTGGATATTGACC
+CGCCCCAGAGGTCCAACGACCCCAAGTCTCAGACCCCAAGCCACCCTGGG
+TGGCTGATGCGGGGCCTCAAACCCTGGACCCAATACGCCATCTTTGTGAA
+GACCTTGGTTACCTTCTC--TGATGAACGGCGG-ACCTATGGAGCCAAAA
+GTGATATCATCTATGTGCAAACAGATGCCACTAATCCTTCTGTCCCCCTG
+GATCCCATATCAGTTTCTAATTCCTCATCTCAGATTATCTTAAAGTGGAA
+GCCCCCCTCTGACCCCAATGGCAACATCACACACTACCTGGTGTACTGGG
+AGAGGCAAGCAGAGGACAGCGAGCTGTTTGAGCTGGATTATTGTCTCAAA
+GGGCTGAAGCT-CCCTTCACGGACC-------TGGTCCCCACCCTTTGAG
+TCTGATGATTCTCAGAAGCACAATCAGAGTGAGTATGACGACTCGGCCAG
+TGAGTGCTGCTCATGCCCTAAGACTGACTCTCAGATCCTGAAGGAGCTGG
+AGGAGTCTTCATTCAGGAAGACCTTCGAGGATTACCTGCACAACGTGGTT
+TTTGTCCCCAGGCCATCCCGAAAGCGAAGATCCCTTGAAGAGGTGGGGAA
+TGTGACAG---CCACCACACTCACACTTCCAGATTTCCCCAACGTCTCCT
+CTACCATTGTGCCCACAAG---TCAGGA-GGAGCACAGGCCATTTGAGAA
+---AGTGGTGAACAAGGAGTCACTTGTCATCTCTGGCCTGAGACACTTCA
+CTGGGTACCGCATTGAGCTGCAGGCATGCAATCAAGATTCCCCAGATGAG
+AGGTGCAGTGTGGCTGCCTACGTCAGTGCCCGGACCATGCCTGAAGCTAA
+GGCAGATGACATCGTTGGCCCTGTGACTCATGAAATCTTTGAGAACAATG
+TTGTACACTTAATGTGGCAAGAGCCAAAGGAACCTAATGGTCTGATTGTG
+CTATATGAAGTGAGCTATCGCCGATATGGTGATGAGGAGCTGCACCTCTG
+TGTCTCCCGGAAGCATTTTGCCCTGGAGCGGGGCTGCAGGCTGCGAGGGC
+TCTCCCCAGGAAACTACAGTGTTCGAGTCCGGGCTACCTCTCTGGCAGGA
+AATGGCTCCTGGACAGAACCCACCTATTTTTATGTGACTG--ATTATTTA
+GATGTCCCATCAAATATTGCCAAAA-TTATCATTGGACCCCTCATCTTTG
+TCTTCCTCTTCAGTGTTGTGATTGGAAGTATTTATCTATTTCTGAGAAAG
+AG-GCAGCCGGATGGGCCAATGGGACCACTGTATGCATCTTCAAACCCTG
+AGTACCTCAGTGCCAGTGATGTGTTTCCATCTTCTGTGTACGTGCCGGAC
+GAGTGGGAGGTGCCTCGAGAGAAGATCACCCTTCTTCGAGAGCTGGGGCA
+GGGATCCTTTGGTATGGTGTATGAAGGCAATGCCAAGGATATCATCAAGG
+GTGAGGCAGAGACCCGTGTTGCGGTTAAGACTGTCAATGAGTCAGCCAGT
+CTTCGAGAACGGATCGAGTTCCTCAATGAGGCATCAGTCATGAAGGGATT
+CACCTGCCATCATGTGGTCCGCCTTCTTGGGGTGGTATCCAAAGGACAGC
+CAACGCTGGTAGTGATGGAATTGATGGCTCATGGAGACCTGAAAAGTCAC
+CTCCGTTCTCTGAGGCCAGATGCTGAGAATAACCCAGGCCGCCCTCCCCC
+TACCTTGCAAGAAATGATTCAGATGACAGCAGAAATTGCTGATGGCATGG
+CATACTTGAACGCCAAGAAGTTTGTGCACCGGGACCTGGCAGCTCGAAAC
+TGCATGGTTGCCCATGATTTTACTGTCAAAATTGGAGACTTTGGAATGAC
+AAGGGACATCTACGAGACAGATTACTATCGGAAAGGGGGCAAGGGACTGC
+TTCCTGTGAGGTGGATGTCACCTGAGTCCCTGAAGGATGGAGTCTTTACT
+GCTTCTTCTGATATGTGGTCCTTTGGGGTGGTCCTTTGGGAAATCACTAG
+CCTGGCTGAGCAACCTTATCAAGGCCTGTCTAATGAACAGGTGTTGAAGT
+TTGTCATGGATGGAGGCTATCTGGATCCCCCTGATAACTGTCCAGAGAGA
+CTCACTGACCTGATGCGCATGTGCTGGCAGTTCAACCCCAAGATGAGGCC
+AACCTTCCTGGAAATCGTCAACCTGCTCAAGGATGACCTCCACCCCAGCT
+TTCCAGAAGTTTCCTTCTTCTACAGCGAGGAGAACAAGGCTCCTGAGAGT
+GAGGAGCTGGAGATGGAGTT------------TGAAGACATGGAGAATGT
+CCCGTTGGATCGTTCC----------TCTCACTGTCAGAG---AGAAGAG
+GCTGGGGGCCGGGAG----GGAGGGTCCT----CACTGAGCATCAAACGG
+ACCTATGATGAACACATCCCCTATACCCACATGAATGGGGGCAAGAAGAA
+CGGACGTGTCCTTACCCTGCCAAGGTCAAACCCTTCCTAA
+>ENSCAFT00000026349_canisfamiliaris
+----------------ATGGCAGTGCCCAGTCTGTGCCCATG--GGTCGC
+ATGCCTGCTGGTGATCCTCCTCTCCTTGGGATTTGGCCTGGACACACTAG
+AGGTGTGC-CCCAGCCTGGACATCCGCTCAGAGGTGGCGGAGCTGCGCCG
+GCTGGAGAACTGCAGCGTGGTGGAGGGCCATCTGCAGATCCTGCTCATGT
+TCACGGCCACGGGCGAGGACTTCCGCGGCCTCAGCTTCCCGCGCCTCACT
+CAGGTCACTGACTACCTGCTGCTCTTCCGCGTGTATGGCCTGGAGAGCCT
+GCGGGACCTCTTCCCCAACCTCGCGGTCGTGCGTGGCGCCCGCCTCTTCC
+TGGGTTATGCGCTGGTGGTCTACGAGATGCCGCACCTGCGGGACGTGGGG
+CTGCCGGCGCTGGGGGCCGTGCTGCGTGGGGCCGTGCGGGTGGAGAAGAA
+CCAGGAGCTCTGTCATCTCTCCACCATCGACTGGGGGCTGCTGCAGCCCT
+CGCCCAGCGCCAACCACATCGTGGGCAACAA-GCTGCGTG--AGGAGTGT
+GCCGACGTGTGCCCTGGTGTGCTGGGTGCCACCGGCCAGCCCTGCGCCAG
+GACCACCTTCAGTGGGCACACCGACTACAGATGCTGGACCTCCAGCCACT
+GCCAGAGAGTGTGTCCCTGCCCCCATGGGCTG---GCCTGCACAGCTGGG
+GGTGAGTGCTGCCACACTGAATGCCTGGGGGGCTGCAGCCGGCCGGAAGA
+CCCCCGCGCCTGCGTCGCCTGTCGCCACCTCTACTTCCAGGGTGCCTGCC
+ACCGGGCCTGCCCTCCAGGCACCTACCAGCACGAGTCCTGGCGCTGTGTC
+ACGGCAGAGCGCTGTGCCAGCCTGCGCTCT--GTGCC--------CGGCC
+GCACCTCCATC-----TTTGGCATCCACGAGGGCAGCTGCCTGGCCCAGT
+GCCCTCCGGGCTTCACCC---GCAACGGCAGCAGCATGTTCTGCCACAAG
+TGTGAGGGGCTGTGCCCCAAAGAGTGCAAGGT------GGGTAC---CAA
+GACCATCGACTCCACGCAGGCGGCACAGGACCTGGCGGGCTGCACCCACG
+TGGAGGGGAGCCTCATCATCAACCTCCGCCAGGGCTACAACCTAGAGCTG
+GAGCTGCAACAGAGCCTGGGGCTGATAGAGACCATCACTGGCTTCCTCAA
+GATCAAGCACTCCTTTGCCCTCGTGTCCCTGGGCTTTTTCAAGAACCTCA
+AACTCATCCGAGGGGACGCCATGGTGGATGGGAACTACACCCTGTATGTG
+CTGGACAACCAGAACCTACAGCAGCTGGGGGCCTGGGTGGCTGCGGGGCT
+CACCATTCCCGTGGGCAAGATATACTTCGCTTTCAACCCTCGCCTCTGCT
+TGGAGCACATCTACCGCCTGGAAGAGGTGACCGGCACGCGGGGACGGCAA
+AACAAGGCTGAGATCAACCCCCGCACCAACGGAGACCGCGCCGCCTGCCA
+AACTCGCACCCTGCGCTTCGTGTCCAACGTGACGCAAGCTGACAGCATCT
+TGCTGCGCTGGGAGCGCTACGAGCCGCTGGAGGCTCGGGACCTGCTCAGC
+TTCATCGTGTACTACAAGGAGTCCCCATTCCAGAATGCCACAGAGCACAC
+AGGTCCAGATGCCTGTGGAACCCAGAGCTGGAACCTGCTGGATGTGGAGC
+TGCCCTTAAGCCGCACCCA--------------------AGAACCCGGGG
+TAACTCTAGCACC-CCTCAAGCCCTGGACACAATATGCAGTGTTTGTACG
+GGCCATCACACTGACCACTGCTGAGGACAGCCCCCACCAAGGAGCCCAGA
+GCCCCATCGTCTACCTCCGAACCCTGCCTGCGGCGCCCACTGTGCCCCAG
+GACGTCATCTCCACGTCCAATTCCTCGTCCCACCTGCTCGTGCGCTGGAA
+GCCACCGATTCAGCGCAACGGGAACATCACCTACTACCTGGTGCTGTGGC
+AGCGTCTGGCGGAGGACGGCGACCTCTATCTCAACGACTACTGCCACCGC
+GGCCTGCGGCTGCCCACCAGCAACA--------ACGACCCGCGCTTCGAC
+CGCGAGGA--CGGTGAACTCGAAGCCGAGATGGAGCCGGGCTGCTGCCCT
+TGCCAGCACCCACCGCCT--GGGCAGGTCCT--GCCGCCGCTGGAGGCGC
+AAGAGGCCTCGTTCCAGAAGAAGTTCGAAAACTTCCTGCACAACGCCATC
+ACCATCCCCAAGTCCCCCTGGAAGGTGACGTCCGTC-----AATAAGAGC
+CCTCAAAG------ACATGCGGGGAGGCACCG-----CCGGGCGGCCGGG
+GCGCTCCGG--CTTGGGGGCA-ACAGCTCGGATTTCGAGATCCAGGAGGA
+CAAAGTGCC---CCGGGAGCGAGCAGTGTTGAGTGGTCTGCGCCACTTTA
+CGGAATATCGTATCGACATCCACGCCTGCAACCACGCGGCTCACACCGTG
+GGCTGCAGCGCGGCCACGTTCGTCTTCGCGCGCACCATGCCGCACAGAGA
+AGCTGATGGCATCCCAGGGAAGGTGGCCTGGGAGGCAGCCAGCAAAAGCA
+GTGTCCTCCTGCGCTGGCTGGAGCCACCTGACCCCAACGGACTCATCCTC
+AAGTATGAAATCAAGTACCGCCGCTTGGGAGAGGAGGCCACAGTGCTATG
+TGTGTCCCGCCTACGATATGCCAAATTTGGGGGTGTCCAGCTGGCCCTGC
+TGCCCCCTGGAAACTACTCCGCCAGAGTTCGGGCAACCTCGCTGGCTGGC
+AACGGCTCCTGGACAGAAAGTATCGCTTTCTACGTCCCAGGCCCAGAGGA
+GGAAGACTCCGGGGGGCTGCACGTCCTTCTCACTGTCACCCCCGTGGGGC
+TCATGCTGTTC---ATCATTCTTG----CCGCCCTCGGTTTCTTCTACGG
+CAGGAAGA-----GAAACAGCACCC----TCTATGCCTCAGTGAATCCGG
+AGTACTTCAGCGCC-----TCTGAT----------ATGTACATCCCTGAT
+GAGTGGGAGGTGCCTCGGGAGCAGATCTCCATAATCCGAGAGCTGGGCCA
+GGGCTCCTTTGGGATGGTATATGAAGGCCTGGCACAAGGACTAGAGGCTG
+GAGAGGAGTCCACACCCGTGGCCCTGAAGACAGTGAATGAGTTGGCCAGC
+CCACGAGAACGCATTGAGTTCCTCAAGGAAGCTTCTGTCATGAAGGCATT
+CAAGTGTCACCATGTGGTACGTCTCCTGGGTGTTGTGTCTCAAGGCCAGC
+CAACTCTGGTCATCATGGAGTTAATGACTCGTGGGGACCTCAAGAGCCAT
+CTTCGATCTCTGCGGCCTGAGGCAGAGAACAACCCTGGGCTCCCACGGCC
+AGCACTGGGAGATATGATCCAGATGGCTGGTGAGATTGCAGATGGCATGG
+CTTACCTCGCTGCCAACAAGTTTGTGCATCGAGACCTGGCAGCCAGAAAC
+TGCATGGTGTCCCAGGACTTCACCGTCAAGATTGGGGACTTCGGGATGAC
+TCGAGACGTGTACGAGACAGACTATTACCGCAAGGGCGGGAAAGGGCTGC
+TGCCCGTGCGCTGGATGGCCCCCGAGTCCCTCAAAGATGGAATCTTCACC
+ACACATTCGGATGTTTGGTCCTTCGGGGTGGTGCTCTGGGAGATCGTGAC
+CCTAGCTGAACAGCCCTACCAGGGTTTATCCAACGAGCAGGTGCTCAAGT
+TTGTCATGGATGGTGGGGTCCTGGAGGAGCTGGAGAGCTGTCCCCTTCAG
+CTGCAAGAACTGATGAGCCGCTGCTGGCAGCAGAACCCACGCCTGCGGCC
+CACCTTCACCCACATCCTGGACAGCATTCGGGGGGAACTGCGGCCCTCTT
+TCCGCCTCTTTTCCTTCTATTACAGCCCAGAGTGCCAGG----GGAGCCA
+GGGGGCCCTGCTGCCTAATG------CCAAACCCAACTCCCTACCAACCC
+CAGAAGGGGCTCCCTC--------AGACTGCATGCCCCAA---AATGGGG
+GTCCAGGGCACTGA------------------------------------
+--------------------------------------------------
+----------------------------------------
+>ENSRNOT00000019267_rattusnorvegicus
+ATGAAGTCTGGCTCCGGAGGAGGGTCCCCGACCTCGCTGTGGGGGCTCGT
+GTTTCTCTCCGCCGCGCTCTCGCTC-TGGCCGACGAGTGG------AGAA
+ATTTGTGGGCCCGGCATTGACATCCGCAACGACTATCAGCAGCTGAAGCG
+CCTGGAAAACTGCACGGTGATCGAGGGCTTCCTCCACATCCTGCTCATCT
+CCAAGGCC------GAGGACTACCGAAGCTACCGCTTCCCCAAGCTCACG
+GTCATCACCGAGTACTTGCTGCTGTTTCGAGTGGCCGGCCTCGAGAGCCT
+GGGAGACCTCTTCCCGAACCTCACAGTCATCCGTGGCTGGAAACTCTTCT
+ACAATTACGCACTGGTCATCTTCGAGATGACCAATCTCAAGGATATTGGG
+CTTTATAATCTGAGGAACATTACTCGGGGGGCCATCAGGATTGAGAAAAA
+CGCTGACCTCTGTTACCTCTCCACCATAGACTGGTCTCTCATCTTGGATG
+CGGTGTCCAATAACTACATTGTGGGGAACAA-GCCCCCAA--AGGAATGT
+GGGGACCTGTGTCCAGGGACCTTGGAGGAGAA-GCCCATG--TGTGAGAA
+GACCACCATCAACAATGAGTACAACTACCGCTGCTGGACCACAAATCGCT
+GCCAGAAAATGTGCCCAAGTGTGTGTGGGAAGCGAGCCTGCACCGAGAAC
+AATGAGTGCTGCCACCCGGAGTGCCTAGGCAGCTGCCACACACCGGACGA
+CAACACAACCTGCGTGGCCTGCCGACACTACTACTACAAAGGCGTGTGCG
+TGCCTGCCTGCCCGCCTGGCACCTACAGGTTCGAGGGCTGGCGCTGTGTG
+GACCGGGATTTCTGC----GCCAACATCCCCAACGCCGAGAG---CAGTG
+ACTCAGATGGC-----TTCGTCATCCACGATGGCGAGTGCATGCAGGAGT
+GTCCATCAGGCTTCATCCGCAACAGCACCCAGAGCATGTACTGTATCCCC
+TGTGAAGGCCCCTGCCCCAAGGTCTGCGGCGATGAAGAAAAGAAAACGAA
+AACCATCGATTCTGTGACGTCTGCCCAGATGCTCCAAGGGTGCACCATTT
+TGAAGGGCAATCTGCTTATTAACATCCGGCGAGGCAATAACATTGCCTCG
+GAATTGGAGAACTTCATGGGGCTCATCGAGGTGGTGACTGGCTACGTGAA
+GATCCGCCATTCCCATGCCTTGGTCTCCTTGTCCTTCCTGAAGAACCTTC
+GTCTCATCTTAGGAGAGGAGCAGCTAGAAGGGAACTACTCCTTCTATGTC
+CTGGACAACCAGAACTTGCAGCAGCTGTGGGACTGGAACCACCGGAACCT
+GACCGTCAGGTCAGGGAAAATGTACTTCGCTTTCAATCCCAAGCTGTGTG
+TCTCTGAAATTTACCGGATGGAGGAGGTGACAGGAACAAAGGGACGGCAG
+AGCAAAGGAGACATAAACACCAGGAACAACGGAGAGCGAGCTTCCTGTGA
+AAGTGATGTTCTCCGTTTCACCTCCACCACCACCTGGAAGAACCGCATCA
+TCATAACGTGGCACCGGTACCGGCCGCCGGACTACCGGGATCTCATCAGT
+TTCACAGTCTACTACAAGGAGGCACCCTTTAAAAACGTCACGGAATACGA
+CGGGCAGGATGCCTGTGGCTCCAACAGCTGGAACATGGTGGACGTGGACC
+TGCCTCCGAA---CAAGGA------------------GGGGGAGCCTGGC
+ATTTTGCTGCATGGGCTGAAGCCCTGGACCCAGTATGCAGTCTATGTCAA
+GGCTGTGACCCTCACCATGGTGGAAAACGACCACATCCGTGGGGCCAAAA
+GTGAAATCTTGTACATTCGCACCAACGCTTCAGTTCCTTCCATTCCTCTA
+GATGTCCTCTCGGCATCAAACTCCTCCTCTCAGCTGATCGTGAAGTGGAA
+CCCCCCAACTCTGCCCAATGGTAACTTGAGTTACTACATTGTGAGGTGGC
+AGCGGCAGCCGCAGGATGGCTATCTGTTCCGGCACAACTACTGCTCCAAA
+GA--CAAAATA-CCCATCAGAAAGTACGCCGATGGTACCATCGATGTGGA
+GGAGGTGA--CAGAAAATCCCAAGACAGAAGTGTGCGGTGGTGATAAAGG
+GCCGTGCTGTGCCTGTCCTAAAACCGAAGCTGAGAAGCAGGCTGAGAAGG
+AGGAGGCTGAGTACCGTAAAGTCTTTGAGAATTTCCTTCACAACTCCATC
+TTTGTGCCCAGACCTGAGAGGAGGCGGAGAGATGTCCTGCAGGTGGCTAA
+CACCACCATGTCCAGCCGAAGCAGGAACACCACGGTAGCTGACACCTACA
+ATATCACAG-ACCCGGAAGAGTTCGAGACAGAATACCCTTTCTTTGAGAG
+CAGAGTGGATAACAAGGAGAGGACTGTCATTTCCAACCTCCGGCCTTTCA
+CTCTGTACCGTATCGATATCCACAGCTGCAACCACGAGGCTGAGAAGCTG
+GGCTGCAGCGCCTCCAACTTTGTCTTTGCAAGAACCATGCCAGCAGAAGG
+AGCAGATGACATTCCTGGCCCAGTGACCTGGGAGCCAAGACCTGAAAACT
+CCATCTTTTTAAAGTGGCCAGAACCCGAGAACCCCAACGGATTGATTCTA
+ATGTATGAAATAAAATACGGATCGCAAGTCGAGGATCAGCGGGAA---TG
+TGTGTCCAGACAGGAGTACAGGAAGTATGGAGGGGCCAAACTTAACCGTC
+TAAACCCAGGGAACTATACGGCCCGGATTCAGGCTACCTCCCTCTCTGGG
+AATGGGTCGTGGACAGATCCTGTGTTCTTCTATGTCCCAG--CCAAAACA
+-ACGTATGA--GAATTTCATGCATC-TGATCATTGCTCTGCCGGTTGCCA
+TCCTGCTGATT-GTGGGGGGCCTGGTAATCATGCTGTATGTCTTCCATAG
+AAAGAGGGTGTCTGGGCCAGCAGAA-----GGCAGTAGTTGGAAAG---G
+GCCATTTCCATCCTG----TCTGTTCCTA------GTGTACGTGCCTGAT
+GAATGGGAGGTAGCTCGGGAGAAGATCACCATGAACCGGGAGCTCGGACA
+AGGGTCCTTCGGGATGGTCTATGAAGGAGTGGCCAAGGGCGTGGTCAAGG
+ACGAGCCTGAAACCAGAGTGGCCATCAAGACAGTGAATGAGGCTGCAAGT
+ATGCGTGAGAGAATTGAGTTTCTCAACGAGGCCTCAGTGATGAAGGAGTT
+CAACTGTCACCATGTGGTCCGGTTGCTGGGTGTAGTATCCCAAGGCCAGC
+CCACCCTGGTCATCATGGAACTAATGACACGTGGCGATCTCAAAAGTTAT
+CTCCGGTCTCTAAGGCCAGAGGTGGAGAATAATCTAGTCCTGATTCCTCC
+GAGCTTAAGCAAGATGATCCAGATGGCTGGAGAGATTGCAGATGGCATGG
+CCTACCTCAATGCCAACAAGTTCGTCCACAGAGACCTGGCTGCTCGGAAC
+TGCATGGTAGCTGAAGATTTCACAGTCAAAATTGGAGATTTTGGTATGAC
+ACGAGACATCTACGAGACGGACTACTACCGGAAAGGCGGGAAGGGCTTGC
+TGCCTGTGCGCTGGATGTCTCCCGAGTCCCTCAAGGATGGCGTCTTCACC
+ACTCATTCCGATGTCTGGTCCTTTGGGGTCGTCCTCTGGGAGATCGCCAC
+TCTGGCTGAGCAGCCGTACCAGGGCCTGTCCAACGAGCAAGTTCTTCGTT
+TCGTCATGGAGGGCGGCCTTCTGGACAAGCCGGATAACTGCCCCGATATG
+CTGTTTGAACTTATGCGCATGTGCTGGCAGTACAACCCCAAGATGCGGCC
+CTCCTTCCTGGAGATCATCGGAAGCATCAAGGATGAGATGGAGCCCAGTT
+TCCAGGAGGTCTCCTTCTACTACAGCGAGGAGAACAAGCCTCCAGAGCCG
+GAGGAGCTGGAGATGGAGCTGGAGCTGGAGCCCGAGAACATGGAGAGCGT
+CCCGCTGGACCCTTCGGCCTCCTCAGCCTCCCTGCCTCTGCCTGAAAGAC
+ACTCAGGACACAAGGCTGAGAACGGCCCTGGCGTGCTGGTTCTCCGTGCC
+AGTTTTGATGAGAGACAGCCTTACGCTCACATGAATGGGGGACGCGCCAA
+CGAGAGGGCCTTGCCTCTGCCCCAGTCCTCAACCTGC---