changeset 3:276f0f31ddb0 draft

Uploaded
author greg
date Mon, 30 Oct 2017 09:53:09 -0400
parents a322e5e668a3
children 8ef8997aaa0c
files .shed.yml gene_family_integrator.py gene_family_integrator.xml macros.xml test-data/3722.faa test-data/3722.fna test-data/3722_integrated.faa test-data/3722_integrated.fna test-data/38889.faa test-data/38889.fna test-data/38889_integrated.faa test-data/38889_integrated.fna test-data/39614.faa test-data/39614.fna test-data/39614_integrated.faa test-data/39614_integrated.fna test-data/tool-data/plant_tribes/scaffolds/README.txt utils.py
diffstat 18 files changed, 725 insertions(+), 220 deletions(-) [+]
line wrap: on
line diff
--- a/.shed.yml	Thu Aug 24 13:32:01 2017 -0400
+++ b/.shed.yml	Mon Oct 30 09:53:09 2017 -0400
@@ -8,7 +8,7 @@
   utilize objective classifications of complete protein sequences from sequenced plant genomes to perform
   comparative evolutionary studies.  This tool integrates classified post processed de novo transcriptome
   assembly sequences with the scaffold gene family sequences.
-remote_repository_url: https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/plant_tribes/gene_family_integrator
+remote_repository_url: https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/phylogenetics/plant_tribes/gene_family_integrator
 type: unrestricted
 categories:
 - Phylogenetics
--- a/gene_family_integrator.py	Thu Aug 24 13:32:01 2017 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,32 +0,0 @@
-#!/usr/bin/env python
-import argparse
-import os
-
-import utils
-
-OUTPUT_DIR = 'integratedGeneFamilies_dir'
-
-parser = argparse.ArgumentParser()
-parser.add_argument('--orthogroup_faa', dest='orthogroup_faa', help='Directory of input fasta datasets')
-parser.add_argument('--scaffold', dest='scaffold', help='Orthogroups or gene families proteins scaffold')
-parser.add_argument('--method', dest='method', help='Protein clustering method')
-parser.add_argument('--orthogroup_fna', dest='orthogroup_fna', default=None, help='Use correspong coding sequences')
-parser.add_argument('--output', dest='output', help='Output dataset')
-parser.add_argument('--output_dir', dest='output_dir', help='Output dataset file_path directory')
-
-args = parser.parse_args()
-
-# Build the command line.
-cmd = 'GeneFamilyIntegrator'
-cmd += ' --orthogroup_faa %s' % args.orthogroup_faa
-cmd += ' --scaffold %s' % args.scaffold
-cmd += ' --method %s' % args.method
-if args.orthogroup_fna is not None:
-    cmd += ' --orthogroup_fna'
-
-# Run the command.
-utils.run_command(cmd)
-
-# Handle outputs.
-utils.move_directory_files(os.path.join(OUTPUT_DIR, 'orthogroups_fasta'), args.output_dir)
-utils.write_html_output(args.output, 'Integrated gene family sequences', args.output_dir)
--- a/gene_family_integrator.xml	Thu Aug 24 13:32:01 2017 -0400
+++ b/gene_family_integrator.xml	Mon Oct 30 09:53:09 2017 -0400
@@ -1,70 +1,58 @@
-<tool id="plant_tribes_gene_family_integrator" name="GeneFamilyIntegrator" version="@WRAPPER_VERSION@.2">
+<tool id="plant_tribes_gene_family_integrator" name="GeneFamilyIntegrator" version="@WRAPPER_VERSION@.3.0">
     <description>integrates gene models in pre-computed orthologous gene family clusters with classified gene coding sequences</description>
     <macros>
         <import>macros.xml</import>
     </macros>
-    <expand macro="requirements_gene_family_integrator" />
+    <requirements>
+        <requirement type="package" version="1.0.3">plant_tribes_gene_family_integrator</requirement>
+    </requirements>
     <command detect_errors="exit_code"><![CDATA[
-#set input_format = $input_format_cond.input_format
-#set scaffold = $input_format_cond.scaffold
-#set method = $input_format_cond.method
-
-python '$__tool_directory__/gene_family_integrator.py'
+#set input_dir = 'input_dir'
+mkdir $input_dir &&
+#for $i in $input:
+    #set filename = $i.file_name
+    #set name = $i.name
+    ln -s $filename $input_dir/$name &&
+#end for
+GeneFamilyIntegrator
+--orthogroup_fasta '$input_dir'
 --scaffold '$scaffold.fields.path'
 --method $method
-#if str($input_format) == 'ptortho':
-    --orthogroup_faa '$input_format_cond.input_ptortho.extra_files_path'
-    --output '$output_ptortho'
-    --output_dir '$output_ptortho.files_path'
-#else:
-    ## str($input_format) == 'ptorthocs'
-    --orthogroup_faa '$input_format_cond.input_ptorthocs.extra_files_path'
-    #if str($input_format_cond.orthogroup_fna) == 'yes':
-        --orthogroup_fna true
-        --output '$output_ptorthocs'
-        --output_dir '$output_ptorthocs.files_path'
-    #else:
-        --output '$output_ptortho'
-        --output_dir '$output_ptortho.files_path'
-    #end if
-#end if
+&>proc.log
     ]]></command>
     <inputs>
-        <conditional name="input_format_cond">
-            <param name="input_format" type="select" label="Classified orthogroup fasta files">
-                <option value="ptortho">Proteins orthogroup fasta files</option>
-                <option value="ptorthocs">Protein and coding sequences orthogroup fasta files</option>
-            </param>
-            <when value="ptortho">
-                <param name="input_ptortho" format="ptortho" type="data" label="Proteins orthogroup fasta files">
-                    <validator type="empty_extra_files_path" />
-                </param>
-                <expand macro="param_scaffold" />
-                <expand macro="param_method" />
-            </when>
-            <when value="ptorthocs">
-                <param name="input_ptorthocs" format="ptorthocs" type="data" label="Protein and coding sequences orthogroup fasta files">
-                    <validator type="empty_extra_files_path" />
-                </param>
-                <expand macro="param_scaffold" />
-                <expand macro="param_method" />
-                <expand macro="param_orthogroup_fna" />
-            </when>
-        </conditional>
+        <param name="input" format="fasta" type="data_collection" collection_type="list" label="Classified orthogroup fasta files" />
+        <expand macro="param_scaffold" />
+        <expand macro="param_method" />
     </inputs>
     <outputs>
-        <data name="output_ptortho" format="ptortho" label="${tool.name} (integrated gene family clusters) on ${on_string}">
-            <filter>input_format_cond['input_format'] == 'ptortho' or (input_format_cond['input_format'] == 'ptorthocs' and input_format_cond['orthogroup_fna'] == 'no')</filter>
-        </data>
-        <data name="output_ptorthocs" format="ptorthocs" label="${tool.name} (integrated gene family clusters) on ${on_string}">
-            <filter>input_format_cond['input_format'] == 'ptorthocs' and input_format_cond['orthogroup_fna'] == 'yes'</filter>
-        </data>
+        <collection name="output" type="list" label="${tool.name} (integrated gene family clusters) on ${on_string}">
+            <discover_datasets pattern="__name__" directory="integratedGeneFamilies_dir" format="fasta" />
+        </collection>
     </outputs>
     <tests>
-        <!-- Test framework does not currently support inputs whose associated extra_files_path contains files to be analyzed.
         <test>
+            <param name="input">
+                <collection type="list">
+                    <element name="3722.faa" value="3722.faa"/>
+                    <element name="3722.fna" value="3722.fna"/>
+                    <element name="38889.faa" value="38889.faa"/>
+                    <element name="38889.fna" value="38889.fna"/>
+                    <element name="39614.faa" value="39614.faa"/>
+                    <element name="39614.fna" value="39614.fna"/>
+                </collection>
+            </param>
+            <param name="scaffold" value="22Gv1.1"/>
+            <param name="method" value="orthomcl"/>
+            <output_collection name="output" type="list">
+                <element name="3722.faa" file="3722_integrated.faa" ftype="fasta"/>
+                <element name="3722.fna" file="3722_integrated.fna" ftype="fasta"/>
+                <element name="38889.faa" file="38889_integrated.faa" ftype="fasta"/>
+                <element name="38889.fna" file="38889_integrated.fna" ftype="fasta"/>
+                <element name="39614.faa" file="39614_integrated.faa" ftype="fasta"/>
+                <element name="39614.fna" file="39614_integrated.fna" ftype="fasta"/>
+            </output_collection>
         </test>
-        -->
     </tests>
     <help>
 This tool is one of the PlantTribes collection of automated modular analysis pipelines for comparative and evolutionary
@@ -79,11 +67,6 @@
 
  * **Gene family scaffold** - one of the PlantTribes gene family scaffolds installed into Galaxy by the PlantTribes Scaffold Data Manager tool.
  * **Protein clustering method** - gene family scaffold protein clustering method as described in the AssemblyPostProcessor tool.
-
-**Other options**
-
- * **Orthogroups coding sequences** - Select 'Yes' to create corresponding coding sequences orthogroup fasta files for the classified protein sequences.
-
     </help>
     <citations>
         <expand macro="citation1" />
--- a/macros.xml	Thu Aug 24 13:32:01 2017 -0400
+++ b/macros.xml	Mon Oct 30 09:53:09 2017 -0400
@@ -1,47 +1,6 @@
 <?xml version='1.0' encoding='UTF-8'?>
 <macros>
     <token name="@WRAPPER_VERSION@">1.0</token>
-    <xml name="requirements_assembly_post_processor">
-        <requirements>
-            <requirement type="package" version="1.0.2">plant_tribes_assembly_post_processor</requirement>
-        </requirements>
-    </xml>
-    <xml name="requirements_gene_family_aligner">
-        <requirements>
-            <requirement type="package" version="1.0.2">plant_tribes_gene_family_aligner</requirement>
-        </requirements>
-    </xml>
-    <xml name="requirements_gene_family_classifier">
-        <requirements>
-            <requirement type="package" version="1.0.2">plant_tribes_gene_family_classifier</requirement>
-        </requirements>
-    </xml>
-    <xml name="requirements_gene_family_integrator">
-        <requirements>
-            <requirement type="package" version="1.0.2">plant_tribes_gene_family_integrator</requirement>
-        </requirements>
-    </xml>
-    <xml name="requirements_kaks_analysis">
-        <requirements>
-            <requirement type="package" version="1.0.2">plant_tribes_kaks_analysis</requirement>
-        </requirements>
-    </xml>
-    <xml name="requirements_ks_distribution">
-        <requirements>
-            <requirement type="package" version="1.3.2">r-optparse</requirement>
-        </requirements>
-    </xml>
-    <xml name="requirements_gene_family_phylogeny_builder">
-        <requirements>
-            <requirement type="package" version="1.0.2">plant_tribes_gene_family_phylogeny_builder</requirement>
-        </requirements>
-    </xml>
-    <xml name="param_codon_alignments">
-        <param name="codon_alignments" type="select" label="Codon alignments">
-            <option value="yes" selected="true">Yes</option>
-            <option value="no">No</option>
-        </param>
-    </xml>
     <xml name="param_method">
         <param name="method" type="select" label="Protein clustering method">
             <option value="gfam" selected="true">GFam</option>
@@ -49,74 +8,12 @@
             <option value="orthomcl">OrthoMCL</option>
         </param>
     </xml>
-    <xml name="param_options_type">
-        <param name="options_type" type="select" label="Options Configuration">
-            <option value="basic" selected="true">Basic</option>
-            <option value="advanced">Advanced</option>
-        </param>
-    </xml>
-    <xml name="param_orthogroup_fna">
-        <param name="orthogroup_fna" type="select" label="Orthogroups coding sequences">
-            <option value="yes" selected="true">Yes</option>
-            <option value="no">No</option>
-        </param>
-    </xml>
     <xml name="param_scaffold">
         <param name="scaffold" type="select" label="Gene family scaffold">
             <options from_data_table="plant_tribes_scaffolds" />
             <validator type="no_options" message="No PlantTribes scaffolds are available.  Use the PlantTribes Scaffolds Download Data Manager tool in Galaxy to install and populate the PlantTribes scaffolds data table." />
         </param>
     </xml>
-    <xml name="param_sequence_type">
-        <param name="sequence_type" type="select" label="Sequence type used in the phylogenetic inference (dna)">
-            <option value="protein" selected="true">Amino acid based</option>
-            <option value="dna">Nucleotide based</option>
-        </param>
-    </xml>
-    <xml name="cond_alignment_method">
-        <conditional name="alignment_method_cond">
-            <param name="alignment_method" type="select" force_select="true" label="Multiple sequence alignment method">
-                <option value="mafft" selected="true">MAFFT</option>
-                <option value="pasta">PASTA</option>
-            </param>
-            <when value="mafft" />
-            <when value="pasta">
-                <param name="pasta_iter_limit" type="integer" value="3" min="1" label="PASTA iteration limit" />
-            </when>
-        </conditional>
-    </xml>
-    <xml name="cond_remove_gappy_sequences">
-        <conditional name="remove_gappy_sequences_cond">
-            <param name="remove_gappy_sequences" type="select" label="Alignment post-processing configuration">
-                <option value="no" selected="true">No</option>
-                <option value="yes">Yes</option>
-            </param>
-            <when value="no" />
-            <when value="yes">
-                <conditional name="trim_type_cond">
-                    <param name="trim_type" type="select" label="Trimming method">
-                        <option value="gap_trimming" selected="true">Gap score based trimming</option>
-                        <option value="automated_trimming">Automated heuristic trimming</option>
-                    </param>
-                    <when value="gap_trimming">
-                        <param name="gap_trimming" type="float" optional="true" min="0" max="1.0" label="Gap score" />
-                    </when>
-                    <when value="automated_trimming" />
-                </conditional>
-                <conditional name="remove_sequences_with_gaps_cond">
-                    <param name="remove_sequences_with_gaps" type="select" label="Remove sequences">
-                        <option value="no" selected="true">No</option>
-                        <option value="yes">Yes</option>
-                    </param>
-                    <when value="no" />
-                    <when value="yes">
-                        <param name="remove_sequences_with_gaps_of" type="float" optional="true" min="0" max="1" label="Coverage score" />
-                        <param name="iterative_realignment" type="integer" optional="true" min="0" label="Realignment iteration limit" />
-                    </when>
-                </conditional>
-            </when>
-        </conditional>
-    </xml>
     <xml name="citation1">
         <citation type="bibtex">
             @misc{None,
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/3722.faa	Mon Oct 30 09:53:09 2017 -0400
@@ -0,0 +1,3 @@
+>contig_7
+ENEWSGAEFLNEMAAMMTQNKSNENGTGTFEELQQLFDEMFQSDIESFNGCSSSSNETCSNSNKRNSIESSSANFRPENG
+NESGEISGKKNTRKGKGDX
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/3722.fna	Mon Oct 30 09:53:09 2017 -0400
@@ -0,0 +1,5 @@
+>contig_7
+GAGAATGAGTGGTCTGGGGCTGAGTTTTTGAATGAAATGGCGGCAATGATGACTCAAAATAAATCCAATGAAAACGGAAC
+CGGAACTTTTGAAGAACTGCAACAATTGTTCGATGAAATGTTTCAGAGCGACATCGAGTCCTTCAATGGTTGTTCTTCAT
+CATCCAATGAAACATGTAGCAACTCGAACAAGAGGAATTCCATTGAGTCGAGCTCGGCTAATTTCAGACCCGAAAATGGA
+AACGAAAGCGGCGAGATTAGCGGGAAGAAGAATACTAGGAAAGGTAAAGGTGACGNN
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/3722_integrated.faa	Mon Oct 30 09:53:09 2017 -0400
@@ -0,0 +1,161 @@
+>gnl_Glyma1.01_PACid_16266208
+MANEGKKSNNFYSILGLSKECTELELKNAYRKLAKKWHPDRCSATGNLELVEEAKKKFQEIREAYSVLSDANKRLMYDVG
+VYDSDDDENGMGDFLDEMLTMMSHTKSNENGEESFEELQQLFEDMFQADIGLDGGPSLASSDSSTSSAYMTYSESSSSNK
+RNSSEMNFGKAENSSVFDASYQNFCFGVNQLQDIKKGKGGILGGGGRSRHRSGRKQKMFYGHDV
+>gnl_Glyma1.01_PACid_16266209
+MANEGKKSNNFYSILGLSKECTELELKNAYRKLAKKWHPDRCSATGNLELVEEAKKKFQEIREAYSVLSDANKRLMYDVG
+VYDSDDDENGMGDFLDEMLTMMSHTKSNENGEESFEELQQLFEDMFQADIGLDGGPSLASSDSSTSSAYMTYSESSSSNK
+RNSSEMNFGKAENSSVFDASYQNFCFGTGEPTPRYKEGKGGNSRRRR
+>gnl_Glyma1.01_PACid_16266210
+MANEGKKSNNFYSILGLSKECTELELKNAYRKLAKKWHPDRCSATGNLELVEEAKKKFQEIREAYSVLSDANKRLMYDVG
+VYDSDDDENGMGDFLDEMLTMMSHTKSNENGEESFEELQQLFEDMFQADIGLDGGPSLASSDSSTSSAYMTYSESSSSNK
+RNSSEMNFGKAENSSVFDASYQNFCFGVGHVNYHYQ
+>gnl_Glyma1.01_PACid_16301083
+MADEGNKSNNFYSILGLKKECTELELKNAYRKLAKKWHPDRCSATGNSELVEEAKKKFQEIREAYSVLSDANKRLMYDVG
+VYDSDDDENGMGDFLDEMLTMMSQTKSNENGEESFEELQQLFEDMFEADIGLDGGPSLASSDCSTSSAYMTYSESSSSNK
+HNSSEMNFGKAENSSVFDAGYQNFCFGVNQLQDIKKKKGGILGGGRSRHRNGRKQNMSYGHDVSSNDYPGISTK
+>gnl_Glyma1.01_PACid_16301085
+MADEGNKSNNFYSILGLKKECTELELKNAYRKLAKKWHPDRCSATGNSELVEEAKKKFQEIREAYSVLSDANKRLMYDVG
+VYDSDDDENGMGDFLDEMLTMMSQTKSNENGEESFEELQQLFEDMFEADIGLDGGPSLASSDCSTSSAYMTYSESSSSNK
+HNSSEMNFGKAENSSVFDAGYQNFCFGVGHVNYHYQ
+>gnl_Glyma1.01_PACid_16301084
+MADEGNKSNNFYSILGLKKECTELELKNAYRKLAKKWHPDRCSATGNSELVEEAKKKFQEIREAYSVLSDANKRLMYDVG
+VYDSDDDENGMGDFLDEMLTMMSQTKSNENGEESFEELQQLFEDMFEADIGLDGGPSLASSDCSTSSAYMTYSESSSSNK
+HNSSEMNFGKAENSSVFDAGYQNFCFGTGEPTPRYKEEKGGNSRRR
+>gnl_Medtr3.5_Medtr8g022310.1
+MANEGNKSNDFYAVLGLNKECSDSELRNAYKKLALKWHPDRCSASGNVKFVEEAKKKFQAIQEAYSVLSDSNKRLMYDVG
+VYDSDDDENGMGDFLNEMVTMMSQTKSNENGEESFEELQQLFDDMFQADIGLNGSTSLNASGCSTSSTFMTFSESSNSNK
+RNSTQMNFGKAEDSSSFGANYQNFCFGMKHLQEDVEKEKGGILEGGGSKKQRKGRKQKISCGHVSSNDHPGISAN
+>gnl_Medtr3.5_Medtr8g022310.2
+MANEGNKSNDFYAVLGLNKECSDSELRNAYKKLALKWHPDRCSASGNVKFVEEAKKKFQAIQEAYSVLSDSNKRLMYDVG
+VYDSDDDENGMGDFLNEMVTMMSQTKSNENGEESFEELQQLFDDMFQADIGLNGSTSLNASGCSTSSTFMTFSESSNSNK
+RNSTQMNFGKAEDSSSFGANYQNFCFGVNLVNYHYQ
+>gnl_Musac1.0_GSMUA_Achr6T31040_001
+MAAEEDKSGDFYAVLGLRKECSETELRNAYKKLAMRWHPDKCLASGNAQIVGEAKEKFQEIQKAYSVLSDSNKRFLYDVG
+VYDNDDDNDENGMGDFIGEMLEMMSQTKPNENSQDSFQELQELFVEMFQDDLDAGFGGSIFHDCPWAQPTNGQDCWTSSG
+LHFANGRSKCGNKRGNSAVNLGKVNLEELEHGTSDFYFGLNDAAQPSQGKGGSNNKRRNGRKQKVSSNHDVSS
+>gnl_Musac1.0_GSMUA_Achr9T18140_001
+MAAGEEKIGDFYTVLGLRKECSEAELRIAYKKLAMRWHPDKCSASGNHRRMEEAKEKFQEIQKAYSVLSDSSKRFLYDVG
+IYDNEDDNDEKGMGDFIGEIAQMMSQTKSGENGHDSFEELQRMFLDMFQDDLDAGFGDSSIHSGPQARPTDGLNCSMPSG
+LQFADGGNNGSNKRGNSEKAKLDGLENSSTGFCFGLNDAGQSSKGKGSANSKRRNGRKQKVSSKHDVSSSDAEVSF
+>gnl_Musac1.0_GSMUA_Achr8T23700_001
+MASDMDASGDFYSVLGLKKECSEAELRNAYKKLALKWHPDKCSASGNEIRMKEAKQQFQEIQKAYSVLSDSNKRFLYDVG
+AYDKDDDKDEEGMVEFLGEMAQMMRQTKCCGSGQESFEQLQQMFVEMFHDDLDAGFCGHSSATSGAASCGNKRDNSAMDS
+GKRKPDELDPAAIGFCLGTKDAGQSSKGRGSNSKRRNRRKQKASSKHDNSSHNAKVSA
+>gnl_Musac1.0_GSMUA_AchrUn_randomT02210_001
+MEGDEEKSGDFYAVLGLKKEGSMAELKNAYKKLAMKWHPDKCPASGNKIRMDKAKEKFQEIQKAYSVLSDSNKRFLYDVG
+VYDKDDEEDEEGMGDFIGEIAQMMSQSKPSGSGHESLEELHRQVVEMFLDELDAGDRFSSANQGASSCDGRDDGGGNKRG
+NWAVDWGKEKLNELGPGTGGFCFGVSRRVHSFDLMIDVVHLIHSDLTLE
+>gnl_Orysa6.0_PACid_16843526
+MADGGEKCRDAAGEGGGGGDLYAVLGLKKECSDADLKLAYRKLAMRWHPDKCSSSSSAKHMEEAKEKFQEIQGAYSVLSD
+SNKRFLYDVGVYDDDDNDDDNLQGMGDFIGEMAQMMSQARPTRQESFKELQQLFVDMFQADLDSGFCNGPSKCYHTQAQS
+QTRTSSTSPSMSPSPPPPVATEAESPSCNGINKRGSSAMDSGKPPRASEVGSGQSQSGFCFGKSDAKQAAKTRSGNTASR
+RRNGRKQKVSSKHDVSSEDEMPGSQWHGVA
+>gnl_Orysa6.0_PACid_16843528
+MADGGEKCRDAAGEGGGGGDLYAVLGLKKECSDADLKLAYRKLAMRWHPDKCSSSSSAKHMEEAKEKFQEIQGAYSVLSD
+SNKRFLYDVGVYDDDDNDDDNLQGMGDFIGEMAQMMSQARPTRQESFKELQQLFVDMFQADLDSGFCNGPSKCYHTQAQS
+QTRTSSTSPSMSPSPPPPVATEAESPSCNGINKRGSSAMDSGKPPRASEVGSGQSQSGFCFGQKSDAKQAAKTRSGNTAS
+RRRNGRKQKVSSKHDVSSEDEMPGSQWHGVA
+>gnl_Poptr2.2_PACid_18217800
+MANGGEDKWKSNDLYQVLGLNKECTDTELRSAYKKLALRWHPDRCSASGNSKFVEEAKKKFQAIQQAYSVLSDTNKRFLY
+DVGVDDSDDDENGMGDFLNEMAVMMSQTKPSENMEESLEELQELFDEMFQEDLHSFGIDSQAAPSCPPSYVSYSESSNSN
+NKRVSADMNLGKTKVDDSSSFNSHFEKFCLGTGGTAATFQEGEGGSKRRNSRRSQRQTKARQETKSFFGL
+>gnl_Poptr2.2_PACid_18234651
+MENGGEEKGKSNDFYQVLGLNKDCTATELRNAYKKLALKWHPDRCSASENSRFVDEAKKKFQTIQQAYSVLSDTNKRFLY
+DVGVYDSEDDENGMGGFMNEMAAMMSQTKPHENVEESFEELQGLFEEMFQEDLDSFGIACQATTCVSYSESSNSNDKRVS
+VDMNLKKTKVDDSSGFNSHVEKFCLGVSGTPAIFQEGEGSKRRSSRRNRR
+>gnl_Poptr2.2_PACid_18234649
+MLRMENGGEEKGKSNDFYQVLGLNKDCTATELRNAYKKLALKWHPDRCSASENSRFVDEAKKKFQTIQQAYSVLSDTNKR
+FLYDVGVYDSEDDENGMGGFMNEMAAMMSQTKPHENVEESFEELQGLFEEMFQEDLDSFGIACQATTCVSYSESSNSNDK
+RVSVDMNLKKTKVDDSSGFNSHVEKFCLGVEHQQSFKKGKGVRGGVQGGTGGRERKGRKQEVSSGYDVSSHDHGISAS
+>gnl_Poptr2.2_PACid_18234650
+MENGGEEKGKSNDFYQVLGLNKDCTATELRNAYKKLALKWHPDRCSASENSRFVDEAKKKFQTIQQAYSVLSDTNKRFLY
+DVGVYDSEDDENGMGGFMNEMAAMMSQTKPHENVEESFEELQGLFEEMFQEDLDSFGIACQATTCVSYSESSNSNDKRVS
+VDMNLKKTKVDDSSGFNSHVEKFCLGVEHQQSFKKGKGVRGGVQGGTGGRERKGRKQEVSSGYDVSSHDHGISAS
+>gnl_Soltu3.4_PGSC0003DMP400016105
+MGNDYYAVLGLKKECTETELRNAYKKLALKWHPDRCSASGNSKFVDEAKKKFQAIQEAYSVLSDANKRFLYDVGVYDSGD
+DDDENGMGDFLNEMAAMMSQNKSNENQEETFEELQDMFDEIFNSDNGMSSSSSSSSRTGTPSMCSTTSSTSSSETFFTFS
+NKRSSGEMKSGKGDSCQFQGFCEGTGGASGKSNERERSRRKNSKSGRKQ
+>gnl_Soltu3.4_PGSC0003DMP400016106
+MGNDYYAVLGLKKECTETELRNAYKKLALKWHPDRCSASGNSKFVDEAKKKFQAIQEAYSVLSDANKRFLYDVGVYDSGD
+DDDENGMGDFLNEMAAMMSQNKSNENQEETFEELQDMFDEIFNSDNGMSSSSSSSSRTGTPSMCSTTSSTSSSETFFTFS
+NKRSSGEMKSGKGDSCQFQGFCEGVEHLEKAMKENGVGGKIPRVDGSNRMDAKRQKVLS
+>gnl_Ambtr1.0.27_AmTr_v1.0_scaffold00007.329
+MAPRGEKDSDFYAILGLKKECSASDLRNAYKRLALRWHPDRCSASGNTKFVEECKKKFQAIQQAYSVLSDANKRFLYDVG
+AYGSDDDDQGMGEFLGEMAVMMSQTKPSEKGPESFEDLQNLFQEMFERDLDMFKSSTSHNNNNDNNNNNHRSSDNNNCSS
+VHCFSNTNKRNCSDMNAGEASEVGRFAFSCYATEFLHKQTFSVGADDVRSESSNKRRNGRKQKSTSSSRKS
+>gnl_Aquco1.0_PACid_18141277
+MASEEEASDFYKVLGLKNNCSSLELRNAYKKLALKWHPDRCAASGNSKFVEEAKKNFQAIQEAYSVLSDEQKRFMYDVGV
+YDKDDDDENEDMGDFLGEMMSMMKQENTSADGQQSFEDLQNLFQEMVQNDKEFYNPASQNSSIYNASNNMFSFSNNENLN
+NASNNTFSSFYNENLNSSNKKSCSSMSAENTKVDFNMESLDFRSFSIGLEGGTSFQNSKGRGVTGRRTGRKQKGSSCNDM
+SSHDSKILA
+>gnl_Arath10_AT3G14200.1
+MASSNSEKINENLYAVLGLKKECSKTELRSAYKKLALRWHPDRCSSMEFVEEAKKKFQAIQEAYSVLSDSNKRFLYDVGA
+YNTDDDDDQNGMGDFLNEMATMMNQSKPSDNNTGDSFEQLQDLFNEMFQGDAAAFPSSSSCSTSNFTSSRSFVFDTNSQR
+SSSFATSSMGMNNDPFGYDPRAHSFSLGVDHQQEFKKGKNNGGRRNRRKNNVPSAGHETSSSNNYGVPTS
+>gnl_Bradi1.2_Bradi3g60090.1
+MATGGDKCGGKPAAAGVGGGDLYSVLGVNKECSDADLKVAYRKLAMRWHPDRCSSSSSTKHMEEAKEKFQEIQGAYSVLS
+DANKRFLYDVGVYEEHEEEDDDTLQGMGDFLGEMAHMMSQTQPARQESFEELQQLFVDMFQSDIESGFCNGPAKDHDPVQ
+RQTRTFSTPPSPSPSPPPPLATVDEAASCNGINKRGSSAMGSGKPPRAGEVSGGHGQSEFCFGMSDAKQAPKARGGNASR
+RRNGQKQKLSSKHDVSSGDEMPRPHAAV
+>gnl_Carpa1.181_PACid_16420351
+MADGEDKNNSDLYAVLGLNKECTPAELRNAYKKLAMRWHPDRCSASGNSMFVEEAKKKFQAIQEAYSVLSDANKRFLYDV
+GAYESDDDENGMGDFLNEMAAMMSQTKPNENGNAQESFEELQELFQEMFQGDMGFNTFGSSSQPTTSSCSASSAYATCSE
+TSNPNNNKRNSSEMNYGKKKVDDSSGFHAHFQTFCLGVEQQQDFKKGKEARGGIRGKPGGSRRQGRKQKVSSRHNVSSND
+LGISAS
+>gnl_Frave2.0_gene05408
+MAGGKWVPPPLSQFHLHIKRFRRRQKVSGSSGNTEPSGYTDCKNISNRRMEEKGNDFYAVMGLKKECSDSELRNAYKKLA
+LIWHPDRCSASGNSKFVEEAKKKFQDIQQAYSVLSDANKRFLYDVGAYESDDDENGMGDFLNEMAVMMSQTKPNENGGES
+FEQLQELFEEMFQGDIEGFSSCSQPPTSCSTSSSSYALYCENSTPSNKRNSSAMNYGNATLDSSGFDAHFHNFCVGTGGK
+PAKDREGDARKRKDSRRSNR
+>gnl_Mimgu1.0_PACid_17694730
+MAADEEKSSDFYGVLGLRKECTAAELRVAYKKLAMKWHPDRCSASGNLKYVEEAKNKFQAVQQAYSVLSDANKRFLYDVG
+IYDSEDDADENGMGDFLNEMVAMMGQSKPNENKNESFQELQDLFEEIFNNDAEEVFKIPPPHFPYQDSCSETRTASNKRN
+AREMGSVNFSNIEATPFEGFCIGENVIFGGERIQTRPGGGSRRTKPKISTSIDGLIS
+>gnl_Nelnu1.0_NNU_010544-RA
+MNLLLQKWHPDRCSSSGNSKFVEDSKKKFQAIQEAYSVLSDENKRFLYDVGVYDCDDDDDDENGMGEFLGEMATMMSQIK
+PSENGPESLEKLQELFEEMFQRDMDDGFFSPSPQCASFSSSCSSSSSSTTYFSYNNNKHDNKRNCSDISSMDDFYTFGTD
+SIQFSNFCIGVEGGEDSKVRGGKSRRKSNRRQKVSSSKHDPSCR
+>gnl_Solly2.3_Solyc03g123560.2.1
+MEDKSNDYYAVLGLKKECTDTELRNAYKKLALKWHPDRCSASGNLKFVDEAKKQFQAIQEAYSVLSDANKKFLYDVGVYD
+SGDDDDENGMGDFLNEMAAMMSQNKSNENQGEETFEELQDMFNEMFNSDNGTFSSSSSSSSSWTGTPSMCSTTSSTSSSE
+TFLTFPNKRSSGEMKSGSSVRGDSCQFQGFCVGAGGTSGKCNERERSWRKNSKSGRKH
+>gnl_Sorbi1.4_PACid_1968370
+MDAGGEKFSDAAAAEGGEGGGDLYAVLGLKKECSDADLKVAYRKLAKKWHPDKCSSSSSVKHMEEAKEKFQEIQGAYSVL
+SDANKRLLYDVGVYDDEDDEDSMQGMGDFIGEMAQMMSQVRPTRQESFEELQQLFVDMFQSDIDSGFCNGSAKDQVQGQA
+KSRTCSTSPSSSPSPPPPPTIVKEAEVSSCNGFNKRGSSAMDSGKPPRPVEGGAGQAGFCFGVSDTKQTPKPRGPNTSRR
+RNGRKQKLSSKHDVSSEDETAGS
+>gnl_Thepa2.0_Tp3g12470
+MASNNSEKGNDDLYGVLGLKKECTTTELRTAYKKLALRWHPDRCSSMGTPEFVDEAKKKFQAIQEAYSVLSDSNKRFLYD
+VGAYNSDDEDQNGMGDFLNEMAAMMNQSKPSENNSGDSFEQLQDLFNEMFQGDAAAFSSSSSSSCSASTFTSSCSFVFDT
+NSQRSPFETSSMGTNDLFGFDHSAHTFSLGVEHQQDFKKGKNSGGRRNRRKNNAQSAAHETASSNNYGVPTS
+>gnl_Theca1.0_Tc06_g010450
+MANGEEKNNDFYAVLGLNKECTPTELRTAYKKLALRWHPDRCSASGNSKFVEEAKKKFQAIQQAYSVLSDSNKRFLYDVG
+AYDSDDDENGMGDFLNEMAGMMSQTKSNENGGESFEELQELFEEMFQADIDSFESTGQSTPSCSASSSFGSYGESSSSNK
+RNSSEMSSVETRLESSSSFDAQFHSFCLGVEHRQDIKQHRGARGGMRGAAGGSRRRNGRKQKVSSGHDVTSNDCGISAS
+>gnl_Vitvi12X_PACid_17827068
+MAAGEEKSNDFYAVLGLKKECTASELRNAYKRLALMWHPDRCSSSGNSKFVEEAKKKFQAIQEAYSVLSDANKRFLYDVG
+AYDSDDDENGMGDFLNEMAVMMSQTKSNENGKESFEELQELFEDMFQRDVDAFNSASHHPMNSFPSSTSTSSYCESSNAN
+NKRNSAEMGSGRMMSAGESSAFDAHFQSFCFGTGGTPGRFQEGERSKRRNSRRSQR
+>gnl_Selmo1.0_PACid_15401289
+MEKRKEDPYTVLGVQKSSSSSEIRSAYRKLAMKWHPDKQHSLEDQAKAKFQGIQEAYSVLSDDKKRVLYDSGLYDEGDDE
+VS
+>gnl_Orysa6.0_PACid_16864430
+MARGGGGGGGADADLYAVLGLSRECTDADLRLAYRKLAMIWHPDRCSVAGGSASAAGVDEAKERFQEIQGAYSVLSDSNK
+RFLYDVGVYDGNDGDDDDDEADLSGMGDFLGEMAQMMSQATPAESFEELQQLFVDMFQDDIDAGLCQSTPPPPSWPSPPA
+AANARSPAAAATSRKGVNKRCSPAAMDMDSGLSSLLGISGFCFEAPWTSQDASTAAGGGGGKRRKQRPPPASHNV
+>gnl_Sorbi1.4_PACid_1982925
+MAATSHCGNIQDQDEEASAPGAADLYAVLGLNRECTDAELRVAYRRLAMIWHPDRCSASGSSPARMEEAKERFQEIQGAY
+SVLSDSNKRLLYDVGVYDSDDDEADLSGMGDFLGEMADMMSQATPTETFEELQQVFVDMFQDDLDDAGFFGGLPTTGRRA
+QAPSTSLPPSVSSSPLRPTPAAGRSKGPQATPSSSFKGVERRGSTSTAKRPRPNGSAGLESDLGLSGFCFMVSKEMSKSK
+ERQAVWASDDGDRSTDGKQRLSTSRDVSGGGMSRSLQGQSSKNLLQCMASKS
+>gnl_Medtr3.5_Medtr8g022310.3
+MANEGNKSNDFYAVLGLNKECSDSELRNAYKKLALKWHPDRCSASGNVKFVEEAKKKFQAIQEAYSVLSDSNKRLMYDVG
+VYDSDDDENVRHLFHTIHELGTLFCVMFCFFISLRGEKRSNLNLTFSLSH
+>gnl_Nelnu1.0_NNU_000115-RA
+MEVDSHRSSPSYYTILGVDQNSSASEIRNAYRKLAMQWHPDKWTKTPSLLEKAKSKFQQIQEAYSGGLLVFMLSDQGKRT
+LYDVGLYDPDDETNDEVGLRRFHAGDDISHERCEETGEEIQLGGTTGDVSGNVTRAGVEDGECWWCVVVRWSCSLKEELK
+EGQMGIISESDDAGHDTPSLPHLHGSELELLGRTGCCN
+>contig_7
+ENEWSGAEFLNEMAAMMTQNKSNENGTGTFEELQQLFDEMFQSDIESFNGCSSSSNETCSNSNKRNSIESSSANFRPENG
+NESGEISGKKNTRKGKGDX
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/3722_integrated.fna	Mon Oct 30 09:53:09 2017 -0400
@@ -0,0 +1,378 @@
+>gnl_Glyma1.01_PACid_16266208
+ATGGCTAATGAAGGAAAGAAAAGCAATAACTTCTATTCGATCTTGGGCTTGAGCAAGGAGTGCACTGAATTGGAGCTAAA
+GAATGCTTATAGGAAACTTGCAAAGAAATGGCACCCAGATCGTTGTTCAGCCACCGGGAATTTAGAGTTAGTGGAAGAAG
+CTAAGAAAAAATTTCAGGAAATTCGGGAAGCCTATTCTGTTTTATCTGACGCCAACAAAAGGTTAATGTACGACGTGGGA
+GTCTACGACAGTGATGACGACGAAAACGGCATGGGGGACTTCTTGGACGAAATGTTAACAATGATGAGTCATACCAAATC
+AAATGAAAATGGAGAGGAGAGCTTTGAGGAGTTGCAACAGCTTTTTGAAGACATGTTTCAAGCGGATATTGGATTGGATG
+GAGGCCCTTCTCTTGCTTCTTCTGATTCCTCAACTTCATCTGCTTACATGACTTACAGTGAAAGTTCTAGTTCAAATAAA
+CGCAATTCCTCTGAGATGAATTTCGGGAAGGCAGAGAATTCTTCTGTCTTTGATGCCAGTTACCAGAATTTCTGTTTTGG
+GGTGAACCAACTCCAAGATATAAAGAAGGGAAAGGGGGGAATTCTAGGAGGAGGAGGTAGAAGTAGACACAGAAGTGGCA
+GAAAGCAAAAAATGTTCTATGGCCATGATGTT
+>gnl_Glyma1.01_PACid_16266209
+ATGGCTAATGAAGGAAAGAAAAGCAATAACTTCTATTCGATCTTGGGCTTGAGCAAGGAGTGCACTGAATTGGAGCTAAA
+GAATGCTTATAGGAAACTTGCAAAGAAATGGCACCCAGATCGTTGTTCAGCCACCGGGAATTTAGAGTTAGTGGAAGAAG
+CTAAGAAAAAATTTCAGGAAATTCGGGAAGCCTATTCTGTTTTATCTGACGCCAACAAAAGGTTAATGTACGACGTGGGA
+GTCTACGACAGTGATGACGACGAAAACGGCATGGGGGACTTCTTGGACGAAATGTTAACAATGATGAGTCATACCAAATC
+AAATGAAAATGGAGAGGAGAGCTTTGAGGAGTTGCAACAGCTTTTTGAAGACATGTTTCAAGCGGATATTGGATTGGATG
+GAGGCCCTTCTCTTGCTTCTTCTGATTCCTCAACTTCATCTGCTTACATGACTTACAGTGAAAGTTCTAGTTCAAATAAA
+CGCAATTCCTCTGAGATGAATTTCGGGAAGGCAGAGAATTCTTCTGTCTTTGATGCCAGTTACCAGAATTTCTGTTTTGG
+GACAGGTGAACCAACTCCAAGATATAAAGAAGGGAAAGGGGGGAATTCTAGGAGGAGGAGG
+>gnl_Glyma1.01_PACid_16266210
+ATGGCTAATGAAGGAAAGAAAAGCAATAACTTCTATTCGATCTTGGGCTTGAGCAAGGAGTGCACTGAATTGGAGCTAAA
+GAATGCTTATAGGAAACTTGCAAAGAAATGGCACCCAGATCGTTGTTCAGCCACCGGGAATTTAGAGTTAGTGGAAGAAG
+CTAAGAAAAAATTTCAGGAAATTCGGGAAGCCTATTCTGTTTTATCTGACGCCAACAAAAGGTTAATGTACGACGTGGGA
+GTCTACGACAGTGATGACGACGAAAACGGCATGGGGGACTTCTTGGACGAAATGTTAACAATGATGAGTCATACCAAATC
+AAATGAAAATGGAGAGGAGAGCTTTGAGGAGTTGCAACAGCTTTTTGAAGACATGTTTCAAGCGGATATTGGATTGGATG
+GAGGCCCTTCTCTTGCTTCTTCTGATTCCTCAACTTCATCTGCTTACATGACTTACAGTGAAAGTTCTAGTTCAAATAAA
+CGCAATTCCTCTGAGATGAATTTCGGGAAGGCAGAGAATTCTTCTGTCTTTGATGCCAGTTACCAGAATTTCTGTTTTGG
+GGTCGGTCATGTAAACTATCATTACCAA
+>gnl_Glyma1.01_PACid_16301083
+ATGGCCGATGAAGGAAACAAAAGCAATAACTTCTATTCGATCTTGGGGTTGAAGAAGGAGTGCACTGAATTGGAGCTAAA
+GAATGCTTATAGGAAACTTGCAAAGAAATGGCACCCAGATCGTTGTTCAGCGACCGGGAATTCAGAGTTAGTGGAAGAAG
+CTAAGAAAAAATTTCAGGAAATTCGGGAAGCCTATTCAGTTTTATCTGACGCCAACAAAAGGTTAATGTACGACGTGGGA
+GTCTACGACAGTGATGACGACGAGAACGGCATGGGGGACTTCTTGGACGAAATGCTAACAATGATGAGTCAGACCAAATC
+GAATGAAAACGGAGAGGAGAGCTTTGAGGAGTTGCAACAGTTGTTTGAAGACATGTTTGAAGCAGATATTGGATTGGACG
+GAGGCCCTTCTCTTGCTTCTTCTGATTGCTCAACTTCATCTGCTTACATGACTTATAGTGAAAGTTCTAGTTCAAATAAA
+CACAATTCCTCTGAGATGAATTTCGGGAAGGCAGAGAATTCTTCTGTCTTTGATGCTGGTTACCAGAATTTCTGTTTTGG
+GGTGAACCAACTCCAAGATATAAAGAAGAAAAAGGGGGGAATTCTAGGAGGAGGTAGAAGTAGACACAGAAATGGCAGAA
+AGCAAAATATGTCCTATGGCCATGATGTTTCATCGAATGACTACCCTGGAATTTCCACAAAG
+>gnl_Glyma1.01_PACid_16301085
+ATGGCCGATGAAGGAAACAAAAGCAATAACTTCTATTCGATCTTGGGGTTGAAGAAGGAGTGCACTGAATTGGAGCTAAA
+GAATGCTTATAGGAAACTTGCAAAGAAATGGCACCCAGATCGTTGTTCAGCGACCGGGAATTCAGAGTTAGTGGAAGAAG
+CTAAGAAAAAATTTCAGGAAATTCGGGAAGCCTATTCAGTTTTATCTGACGCCAACAAAAGGTTAATGTACGACGTGGGA
+GTCTACGACAGTGATGACGACGAGAACGGCATGGGGGACTTCTTGGACGAAATGCTAACAATGATGAGTCAGACCAAATC
+GAATGAAAACGGAGAGGAGAGCTTTGAGGAGTTGCAACAGTTGTTTGAAGACATGTTTGAAGCAGATATTGGATTGGACG
+GAGGCCCTTCTCTTGCTTCTTCTGATTGCTCAACTTCATCTGCTTACATGACTTATAGTGAAAGTTCTAGTTCAAATAAA
+CACAATTCCTCTGAGATGAATTTCGGGAAGGCAGAGAATTCTTCTGTCTTTGATGCTGGTTACCAGAATTTCTGTTTTGG
+GGTAGGTCATGTAAACTATCATTACCAA
+>gnl_Glyma1.01_PACid_16301084
+ATGGCCGATGAAGGAAACAAAAGCAATAACTTCTATTCGATCTTGGGGTTGAAGAAGGAGTGCACTGAATTGGAGCTAAA
+GAATGCTTATAGGAAACTTGCAAAGAAATGGCACCCAGATCGTTGTTCAGCGACCGGGAATTCAGAGTTAGTGGAAGAAG
+CTAAGAAAAAATTTCAGGAAATTCGGGAAGCCTATTCAGTTTTATCTGACGCCAACAAAAGGTTAATGTACGACGTGGGA
+GTCTACGACAGTGATGACGACGAGAACGGCATGGGGGACTTCTTGGACGAAATGCTAACAATGATGAGTCAGACCAAATC
+GAATGAAAACGGAGAGGAGAGCTTTGAGGAGTTGCAACAGTTGTTTGAAGACATGTTTGAAGCAGATATTGGATTGGACG
+GAGGCCCTTCTCTTGCTTCTTCTGATTGCTCAACTTCATCTGCTTACATGACTTATAGTGAAAGTTCTAGTTCAAATAAA
+CACAATTCCTCTGAGATGAATTTCGGGAAGGCAGAGAATTCTTCTGTCTTTGATGCTGGTTACCAGAATTTCTGTTTTGG
+GACAGGTGAACCAACTCCAAGATATAAAGAAGAAAAAGGGGGGAATTCTAGGAGGAGG
+>gnl_Medtr3.5_Medtr8g022310.1
+ATGGCTAACGAAGGAAACAAAAGCAATGATTTCTATGCAGTTTTGGGATTGAATAAGGAATGCTCTGATTCAGAGCTAAG
+GAATGCTTATAAGAAACTTGCACTGAAATGGCATCCAGATCGTTGTTCAGCTTCAGGGAATGTGAAGTTTGTGGAAGAAG
+CTAAGAAGAAATTTCAGGCAATTCAAGAAGCCTATTCTGTTTTATCTGACTCGAACAAGAGATTAATGTACGACGTTGGA
+GTTTACGACAGTGATGATGACGAAAATGGTATGGGAGACTTTCTGAATGAAATGGTTACAATGATGAGCCAAACTAAATC
+AAATGAAAATGGAGAGGAGAGCTTCGAGGAGTTACAACAGTTGTTTGATGATATGTTTCAAGCGGATATCGGATTAAATG
+GAAGCACCTCTCTTAATGCTTCGGGTTGCTCCACTTCATCGACTTTCATGACGTTCAGTGAAAGCTCGAATTCAAATAAG
+CGCAATTCCACTCAAATGAATTTTGGGAAGGCAGAGGATTCTTCTAGTTTTGGTGCAAATTACCAGAACTTCTGTTTTGG
+GATGAAGCACCTTCAAGAAGATGTGGAGAAGGAAAAAGGGGGAATTCTAGAAGGAGGAGGTAGCAAAAAACAAAGAAAAG
+GAAGAAAACAAAAAATTTCATGTGGACATGTTTCCTCTAATGACCATCCTGGTATTTCTGCTAAT
+>gnl_Medtr3.5_Medtr8g022310.2
+ATGGCTAACGAAGGAAACAAAAGCAATGATTTCTATGCAGTTTTGGGATTGAATAAGGAATGCTCTGATTCAGAGCTAAG
+GAATGCTTATAAGAAACTTGCACTGAAATGGCATCCAGATCGTTGTTCAGCTTCAGGGAATGTGAAGTTTGTGGAAGAAG
+CTAAGAAGAAATTTCAGGCAATTCAAGAAGCCTATTCTGTTTTATCTGACTCGAACAAGAGATTAATGTACGACGTTGGA
+GTTTACGACAGTGATGATGACGAAAATGGTATGGGAGACTTTCTGAATGAAATGGTTACAATGATGAGCCAAACTAAATC
+AAATGAAAATGGAGAGGAGAGCTTCGAGGAGTTACAACAGTTGTTTGATGATATGTTTCAAGCGGATATCGGATTAAATG
+GAAGCACCTCTCTTAATGCTTCGGGTTGCTCCACTTCATCGACTTTCATGACGTTCAGTGAAAGCTCGAATTCAAATAAG
+CGCAATTCCACTCAAATGAATTTTGGGAAGGCAGAGGATTCTTCTAGTTTTGGTGCAAATTACCAGAACTTCTGTTTTGG
+GGTCAATCTTGTAAATTATCATTACCAA
+>gnl_Musac1.0_GSMUA_Achr6T31040_001
+ATGGCGGCCGAAGAGGACAAAAGCGGCGACTTCTACGCCGTGTTGGGGCTCAGGAAGGAGTGCTCCGAAACCGAGCTGAG
+GAATGCGTACAAGAAGCTTGCCATGAGGTGGCATCCGGATAAGTGCTTGGCTTCGGGAAATGCTCAAATCGTGGGGGAAG
+CCAAGGAGAAGTTTCAGGAGATCCAGAAAGCCTACTCTGTTCTCTCAGACTCCAATAAGAGATTCCTGTACGATGTGGGA
+GTCTACGACAACGATGATGACAATGACGAAAACGGTATGGGAGACTTTATAGGGGAGATGTTGGAGATGATGAGCCAAAC
+GAAACCCAATGAGAACAGCCAAGATAGCTTCCAGGAGCTGCAGGAGCTGTTTGTGGAGATGTTCCAGGACGACCTGGATG
+CCGGATTTGGTGGTTCCATCTTCCACGATTGCCCCTGGGCTCAACCGACCAATGGCCAAGATTGCTGGACTTCATCGGGA
+CTGCACTTTGCTAATGGAAGGAGTAAGTGTGGCAACAAGCGGGGCAACTCAGCTGTGAACTTGGGAAAGGTCAATCTTGA
+AGAGTTGGAACATGGTACCAGCGACTTCTATTTTGGGCTAAATGATGCAGCACAGCCATCACAAGGGAAAGGAGGCAGTA
+ATAACAAGAGAAGGAATGGAAGAAAGCAAAAGGTTTCATCCAATCATGATGTCTCATCC
+>gnl_Musac1.0_GSMUA_Achr9T18140_001
+ATGGCCGCCGGGGAGGAAAAGATCGGCGATTTCTACACGGTGCTGGGGCTCAGGAAGGAGTGCTCGGAGGCGGAGCTGAG
+GATCGCGTACAAGAAGCTGGCTATGAGATGGCATCCGGATAAGTGCTCGGCTTCGGGAAACCATCGAAGGATGGAGGAAG
+CGAAGGAGAAGTTCCAGGAAATCCAAAAAGCCTACTCTGTTCTCTCGGACTCCAGCAAGAGATTTCTGTATGATGTGGGA
+ATCTACGATAACGAGGATGATAATGACGAAAAAGGAATGGGGGATTTTATTGGGGAGATAGCTCAGATGATGAGCCAAAC
+GAAATCTGGGGAGAATGGTCATGATAGCTTTGAGGAGCTGCAGCGGATGTTCCTGGATATGTTCCAGGACGACCTGGACG
+CCGGATTCGGTGATTCTTCCATCCACAGTGGCCCCCAAGCTCGGCCAACCGACGGTCTCAATTGCTCGATGCCATCAGGA
+CTGCAGTTTGCTGATGGAGGGAACAATGGCAGCAACAAGAGAGGCAACTCGGAGAAGGCAAAGCTGGATGGGTTGGAAAA
+CAGTTCCACTGGCTTCTGCTTCGGGTTGAATGATGCAGGGCAGTCATCAAAAGGAAAAGGAAGCGCTAATAGCAAGAGAA
+GGAATGGAAGAAAGCAGAAGGTCTCATCCAAACATGATGTCTCATCCAGTGATGCTGAGGTCTCATTT
+>gnl_Musac1.0_GSMUA_Achr8T23700_001
+ATGGCGAGCGACATGGATGCAAGCGGCGATTTCTACTCGGTGCTGGGGCTGAAGAAGGAGTGCTCCGAGGCGGAGCTCAG
+GAATGCGTACAAGAAGCTCGCTTTGAAGTGGCATCCCGATAAGTGCTCGGCGTCGGGTAATGAGATTCGCATGAAGGAAG
+CGAAGCAGCAGTTCCAGGAGATCCAGAAAGCCTACTCTGTTCTCTCCGACTCCAACAAGAGATTTCTGTACGATGTTGGA
+GCCTACGACAAAGACGACGACAAAGACGAAGAGGGGATGGTGGAGTTTCTTGGGGAGATGGCGCAAATGATGAGGCAAAC
+CAAATGCTGTGGGAGCGGCCAGGAGAGCTTCGAGCAGCTGCAGCAGATGTTCGTGGAGATGTTCCACGACGATCTGGACG
+CGGGATTCTGCGGCCACTCCTCGGCCACCTCGGGCGCGGCGTCCTGCGGCAACAAACGGGACAACTCGGCGATGGACTCG
+GGCAAGCGGAAGCCGGACGAGTTGGACCCGGCCGCCATTGGGTTCTGCCTCGGGACAAAGGATGCAGGGCAATCCTCAAA
+AGGAAGAGGTAGCAACAGCAAGAGAAGGAACAGAAGAAAGCAAAAGGCATCATCCAAGCATGACAACTCATCTCACAATG
+CTAAGGTCTCAGCT
+>gnl_Musac1.0_GSMUA_AchrUn_randomT02210_001
+ATGGAGGGGGACGAGGAGAAGAGCGGGGATTTCTACGCGGTGCTGGGGCTGAAGAAAGAGGGCTCCATGGCGGAGCTCAA
+GAATGCGTACAAGAAGCTGGCGATGAAGTGGCATCCCGATAAGTGTCCTGCGTCAGGCAATAAGATACGCATGGATAAAG
+CGAAGGAGAAGTTCCAGGAGATCCAAAAAGCCTACTCTGTTCTCTCCGACTCCAACAAGCGATTCCTGTACGATGTCGGA
+GTTTACGACAAAGACGATGAGGAAGATGAAGAGGGGATGGGGGACTTCATTGGGGAGATCGCGCAAATGATGAGCCAGTC
+CAAACCCAGCGGGAGCGGCCACGAGAGCTTGGAGGAGCTGCATCGGCAGGTCGTGGAGATGTTCCTCGACGAACTGGACG
+CCGGAGATCGCTTCTCCTCGGCCAACCAAGGCGCGTCGTCCTGCGACGGCAGGGACGACGGCGGCGGTAACAAGCGTGGC
+AACTGGGCGGTGGACTGGGGCAAGGAGAAGCTGAACGAGTTGGGCCCGGGCACCGGCGGGTTCTGCTTCGGGGTGAGTCG
+CCGAGTCCACTCCTTTGATCTTATGATAGACGTAGTCCACCTCATCCATTCTGATCTGACTCTGGAA
+>gnl_Orysa6.0_PACid_16843526
+ATGGCCGACGGGGGAGAGAAGTGCCGGGACGCGGCCGGCGAGGGCGGCGGCGGCGGCGACCTGTACGCCGTGCTCGGGCT
+CAAGAAGGAGTGCTCCGACGCCGACCTCAAGCTCGCGTACCGGAAGCTCGCCATGAGATGGCATCCGGACAAATGCTCAT
+CCTCCAGCAGTGCAAAGCACATGGAGGAAGCCAAGGAGAAGTTCCAGGAGATCCAGGGCGCCTATTCCGTCCTCTCAGAC
+TCAAACAAGCGGTTCCTCTACGACGTGGGGGTATATGATGATGACGACAATGACGATGACAACCTGCAGGGGATGGGGGA
+CTTCATTGGTGAGATGGCCCAGATGATGAGCCAGGCACGGCCAACGAGGCAGGAGAGCTTTAAAGAACTGCAGCAGCTAT
+TCGTAGACATGTTCCAAGCTGATCTTGATTCGGGTTTCTGCAATGGACCCTCAAAGTGCTACCATACCCAGGCCCAAAGC
+CAGACTCGAACATCCTCAACCTCCCCTTCGATGTCACCGTCTCCACCGCCTCCAGTAGCTACTGAGGCAGAATCGCCATC
+ATGTAATGGTATTAATAAGCGTGGTTCATCAGCAATGGACTCTGGGAAGCCTCCAAGAGCCAGCGAAGTCGGTTCTGGAC
+AGAGTCAATCAGGGTTTTGTTTCGGGAAGAGTGATGCTAAACAAGCGGCGAAGACGCGAAGCGGGAACACGGCCAGCCGG
+AGGAGGAACGGCCGGAAGCAGAAGGTGTCGTCGAAGCACGACGTCTCGTCTGAGGACGAGATGCCAGGTTCGCAGTGGCA
+CGGCGTGGCC
+>gnl_Orysa6.0_PACid_16843528
+ATGGCCGACGGGGGAGAGAAGTGCCGGGACGCGGCCGGCGAGGGCGGCGGCGGCGGCGACCTGTACGCCGTGCTCGGGCT
+CAAGAAGGAGTGCTCCGACGCCGACCTCAAGCTCGCGTACCGGAAGCTCGCCATGAGATGGCATCCGGACAAATGCTCAT
+CCTCCAGCAGTGCAAAGCACATGGAGGAAGCCAAGGAGAAGTTCCAGGAGATCCAGGGCGCCTATTCCGTCCTCTCAGAC
+TCAAACAAGCGGTTCCTCTACGACGTGGGGGTATATGATGATGACGACAATGACGATGACAACCTGCAGGGGATGGGGGA
+CTTCATTGGTGAGATGGCCCAGATGATGAGCCAGGCACGGCCAACGAGGCAGGAGAGCTTTAAAGAACTGCAGCAGCTAT
+TCGTAGACATGTTCCAAGCTGATCTTGATTCGGGTTTCTGCAATGGACCCTCAAAGTGCTACCATACCCAGGCCCAAAGC
+CAGACTCGAACATCCTCAACCTCCCCTTCGATGTCACCGTCTCCACCGCCTCCAGTAGCTACTGAGGCAGAATCGCCATC
+ATGTAATGGTATTAATAAGCGTGGTTCATCAGCAATGGACTCTGGGAAGCCTCCAAGAGCCAGCGAAGTCGGTTCTGGAC
+AGAGTCAATCAGGGTTTTGTTTCGGGCAGAAGAGTGATGCTAAACAAGCGGCGAAGACGCGAAGCGGGAACACGGCCAGC
+CGGAGGAGGAACGGCCGGAAGCAGAAGGTGTCGTCGAAGCACGACGTCTCGTCTGAGGACGAGATGCCAGGTTCGCAGTG
+GCACGGCGTGGCC
+>gnl_Poptr2.2_PACid_18217800
+ATGGCAAACGGAGGAGAAGATAAATGGAAAAGCAATGACTTATATCAAGTCTTGGGGTTGAATAAGGAATGCACTGATAC
+AGAGCTCAGGAGTGCTTATAAGAAACTTGCACTGAGATGGCATCCAGATCGATGTTCAGCTTCAGGAAATTCTAAGTTCG
+TTGAAGAAGCCAAAAAGAAGTTTCAGGCAATTCAACAGGCCTATTCTGTTCTTTCTGACACCAACAAGAGGTTTCTTTAC
+GACGTTGGTGTTGATGACAGTGATGACGACGAAAATGGAATGGGTGATTTTCTGAATGAAATGGCTGTCATGATGAGCCA
+AACGAAGCCTAGCGAAAACATGGAGGAGAGCCTAGAGGAACTGCAAGAATTATTTGACGAGATGTTCCAAGAGGATCTGC
+ATTCGTTTGGGATTGACAGTCAGGCTGCTCCTTCATGTCCTCCTTCTTATGTATCCTACAGCGAAAGTTCCAACTCAAAT
+AACAAACGTGTTTCTGCTGATATGAACTTGGGGAAGACTAAAGTGGATGATTCTTCTAGCTTCAACTCTCACTTTGAGAA
+ATTCTGTTTAGGGACAGGTGGAACAGCAGCAACCTTTCAAGAAGGTGAAGGTGGGAGTAAGAGGAGGAATTCAAGGAGGA
+GCCAGCGGCAGACGAAGGCAAGACAAGAAACAAAGAGTTTCTTCGGGCTA
+>gnl_Poptr2.2_PACid_18234651
+ATGGAAAATGGAGGAGAAGAGAAAGGGAAAAGCAATGACTTTTATCAGGTTTTGGGGTTGAATAAGGATTGCACTGCCAC
+AGAGCTGAGGAATGCTTATAAGAAACTTGCACTGAAATGGCATCCAGATCGATGTTCAGCTTCGGAAAATTCCAGGTTCG
+TTGACGAAGCCAAAAAGAAGTTTCAGACAATTCAACAGGCCTATTCTGTTCTTTCTGACACCAACAAGAGGTTTCTTTAC
+GACGTTGGTGTTTATGACAGTGAAGACGACGAAAATGGAATGGGCGGATTTATGAATGAAATGGCTGCTATGATGAGCCA
+AACAAAGCCCCACGAAAACGTGGAGGAGAGCTTTGAGGAATTGCAAGGATTGTTTGAGGAGATGTTCCAAGAGGATTTGG
+ATTCGTTTGGGATTGCCTGTCAGGCTACTACCTGTGTGTCATACAGCGAAAGCTCCAACTCAAATGATAAACGTGTTTCT
+GTCGATATGAACTTGAAGAAGACAAAGGTGGATGATTCTTCTGGCTTCAATTCTCACGTCGAGAAGTTCTGTTTAGGGGT
+AAGTGGAACACCAGCAATCTTTCAAGAAGGGGAAGGGAGTAAGAGGAGGAGTTCAAGGAGGAACCGGCGG
+>gnl_Poptr2.2_PACid_18234649
+ATGCTAAGAATGGAAAATGGAGGAGAAGAGAAAGGGAAAAGCAATGACTTTTATCAGGTTTTGGGGTTGAATAAGGATTG
+CACTGCCACAGAGCTGAGGAATGCTTATAAGAAACTTGCACTGAAATGGCATCCAGATCGATGTTCAGCTTCGGAAAATT
+CCAGGTTCGTTGACGAAGCCAAAAAGAAGTTTCAGACAATTCAACAGGCCTATTCTGTTCTTTCTGACACCAACAAGAGG
+TTTCTTTACGACGTTGGTGTTTATGACAGTGAAGACGACGAAAATGGAATGGGCGGATTTATGAATGAAATGGCTGCTAT
+GATGAGCCAAACAAAGCCCCACGAAAACGTGGAGGAGAGCTTTGAGGAATTGCAAGGATTGTTTGAGGAGATGTTCCAAG
+AGGATTTGGATTCGTTTGGGATTGCCTGTCAGGCTACTACCTGTGTGTCATACAGCGAAAGCTCCAACTCAAATGATAAA
+CGTGTTTCTGTCGATATGAACTTGAAGAAGACAAAGGTGGATGATTCTTCTGGCTTCAATTCTCACGTCGAGAAGTTCTG
+TTTAGGGGTGGAACACCAGCAATCTTTCAAGAAGGGGAAGGGAGTAAGAGGAGGAGTTCAAGGAGGAACCGGCGGTAGAG
+AGAGGAAAGGCAGGAAACAAGAAGTTTCATCTGGCTATGATGTCTCCTCCCATGACCATGGTATTTCTGCTTCA
+>gnl_Poptr2.2_PACid_18234650
+ATGGAAAATGGAGGAGAAGAGAAAGGGAAAAGCAATGACTTTTATCAGGTTTTGGGGTTGAATAAGGATTGCACTGCCAC
+AGAGCTGAGGAATGCTTATAAGAAACTTGCACTGAAATGGCATCCAGATCGATGTTCAGCTTCGGAAAATTCCAGGTTCG
+TTGACGAAGCCAAAAAGAAGTTTCAGACAATTCAACAGGCCTATTCTGTTCTTTCTGACACCAACAAGAGGTTTCTTTAC
+GACGTTGGTGTTTATGACAGTGAAGACGACGAAAATGGAATGGGCGGATTTATGAATGAAATGGCTGCTATGATGAGCCA
+AACAAAGCCCCACGAAAACGTGGAGGAGAGCTTTGAGGAATTGCAAGGATTGTTTGAGGAGATGTTCCAAGAGGATTTGG
+ATTCGTTTGGGATTGCCTGTCAGGCTACTACCTGTGTGTCATACAGCGAAAGCTCCAACTCAAATGATAAACGTGTTTCT
+GTCGATATGAACTTGAAGAAGACAAAGGTGGATGATTCTTCTGGCTTCAATTCTCACGTCGAGAAGTTCTGTTTAGGGGT
+GGAACACCAGCAATCTTTCAAGAAGGGGAAGGGAGTAAGAGGAGGAGTTCAAGGAGGAACCGGCGGTAGAGAGAGGAAAG
+GCAGGAAACAAGAAGTTTCATCTGGCTATGATGTCTCCTCCCATGACCATGGTATTTCTGCTTCA
+>gnl_Soltu3.4_PGSC0003DMP400016105
+ATGGGCAATGATTATTATGCAGTTTTGGGATTGAAAAAGGAATGCACTGAAACAGAGCTTAGGAATGCTTATAAGAAGCT
+TGCACTGAAATGGCACCCAGATCGCTGTTCAGCATCGGGGAATTCGAAGTTTGTAGATGAAGCAAAGAAGAAATTTCAGG
+CAATTCAAGAAGCATATTCTGTGTTATCGGATGCAAACAAAAGGTTTCTGTACGATGTAGGAGTTTATGACTCTGGTGAT
+GATGACGACGAAAATGGCATGGGTGATTTCCTGAATGAAATGGCAGCTATGATGAGCCAAAATAAGTCCAATGAAAATCA
+GGAAGAAACCTTTGAGGAATTGCAGGATATGTTTGACGAAATTTTCAATAGTGATAATGGGATGTCTTCTTCTTCTTCTT
+CTTCTTCTCGGACTGGAACTCCTTCAATGTGTTCTACTACATCGTCTACATCTTCCAGTGAGACCTTTTTTACCTTTTCC
+AACAAAAGAAGTTCAGGTGAAATGAAGTCGGGTAAAGGCGATTCTTGCCAATTCCAAGGATTTTGTGAAGGGACAGGTGG
+AGCATCTGGAAAAAGCAATGAAAGAGAACGGAGTCGGAGGAAAAATTCCAAGAGTGGACGGAAGCAA
+>gnl_Soltu3.4_PGSC0003DMP400016106
+ATGGGCAATGATTATTATGCAGTTTTGGGATTGAAAAAGGAATGCACTGAAACAGAGCTTAGGAATGCTTATAAGAAGCT
+TGCACTGAAATGGCACCCAGATCGCTGTTCAGCATCGGGGAATTCGAAGTTTGTAGATGAAGCAAAGAAGAAATTTCAGG
+CAATTCAAGAAGCATATTCTGTGTTATCGGATGCAAACAAAAGGTTTCTGTACGATGTAGGAGTTTATGACTCTGGTGAT
+GATGACGACGAAAATGGCATGGGTGATTTCCTGAATGAAATGGCAGCTATGATGAGCCAAAATAAGTCCAATGAAAATCA
+GGAAGAAACCTTTGAGGAATTGCAGGATATGTTTGACGAAATTTTCAATAGTGATAATGGGATGTCTTCTTCTTCTTCTT
+CTTCTTCTCGGACTGGAACTCCTTCAATGTGTTCTACTACATCGTCTACATCTTCCAGTGAGACCTTTTTTACCTTTTCC
+AACAAAAGAAGTTCAGGTGAAATGAAGTCGGGTAAAGGCGATTCTTGCCAATTCCAAGGATTTTGTGAAGGGGTGGAGCA
+TCTGGAAAAAGCAATGAAAGAGAACGGAGTCGGAGGAAAAATTCCAAGAGTGGACGGAAGCAATAGGATGGATGCTAAAA
+GGCAAAAGGTTCTATCA
+>gnl_Ambtr1.0.27_AmTr_v1.0_scaffold00007.329
+ATGGCACCCCGAGGAGAGAAAGACAGTGATTTTTATGCAATTTTAGGGTTGAAGAAGGAGTGCTCTGCTTCAGATCTCAG
+AAATGCGTACAAAAGGCTCGCACTTCGGTGGCATCCAGATAGGTGCTCTGCCTCAGGGAACACAAAGTTTGTGGAGGAAT
+GCAAGAAAAAGTTCCAGGCCATTCAGCAGGCTTATTCCGTGCTCTCGGATGCAAATAAGAGGTTTTTGTACGATGTTGGA
+GCATATGGAAGTGACGATGACGATCAGGGAATGGGTGAATTTCTTGGGGAGATGGCGGTAATGATGAGCCAGACAAAGCC
+CAGTGAAAAAGGGCCGGAGAGCTTTGAGGATCTACAGAACTTGTTTCAGGAGATGTTCGAAAGGGATCTGGACATGTTTA
+AGTCATCGACCTCCCACAACAACAACAATGATAACAACAATAATAATCATAGGAGTAGTGACAATAATAATTGTAGTAGT
+GTTCATTGTTTTAGTAACACCAACAAGAGGAATTGCTCGGATATGAATGCCGGAGAAGCCTCGGAGGTCGGGCGCTTTGC
+TTTCTCATGCTATGCGACAGAGTTCCTGCACAAGCAAACTTTCTCCGTCGGGGCGGATGATGTGCGATCGGAGTCGAGCA
+ACAAGAGACGAAACGGGAGGAAACAGAAATCCACCTCCTCTTCAAGGAAAAGT
+>gnl_Aquco1.0_PACid_18141277
+ATGGCTAGTGAAGAAGAAGCAAGTGATTTCTATAAAGTTTTGGGATTGAAAAATAACTGTTCTTCCTTGGAGCTCAGGAA
+CGCTTATAAGAAGCTTGCACTGAAATGGCATCCGGATCGTTGTGCTGCTTCAGGAAACTCAAAGTTTGTTGAAGAAGCAA
+AGAAGAATTTTCAAGCAATACAAGAAGCTTATTCTGTTCTTTCAGATGAGCAGAAACGATTTATGTATGACGTTGGTGTC
+TACGATAAAGATGATGATGACGAAAATGAAGATATGGGCGATTTTTTAGGTGAAATGATGTCTATGATGAAGCAAGAAAA
+TACTAGTGCGGATGGACAACAAAGTTTTGAAGACCTTCAAAACCTATTCCAGGAAATGGTTCAAAATGACAAAGAATTTT
+ATAATCCAGCCTCTCAAAACTCGTCGATATATAATGCTAGTAACAACATGTTCTCCTTCTCTAATAACGAGAACTTAAAC
+AATGCTAGCAACAACACATTCTCCTCTTTTTATAATGAGAACCTAAACAGCAGCAACAAGAAAAGTTGCTCAAGTATGAG
+CGCAGAAAATACCAAGGTGGACTTTAACATGGAATCATTGGACTTCCGCAGCTTTTCTATTGGGTTAGAAGGCGGTACAT
+CATTTCAAAACTCCAAAGGGAGAGGAGTAACGGGTAGGCGGACAGGAAGGAAACAGAAGGGGTCGTCCTGTAATGATATG
+TCTTCCCATGATTCTAAGATTTTGGCG
+>gnl_Arath10_AT3G14200.1
+ATGGCGTCCAGTAATAGCGAGAAGATCAACGAGAATCTGTACGCTGTTCTGGGTTTGAAGAAGGAATGTTCTAAGACGGA
+GCTCCGTTCTGCTTATAAGAAGCTTGCTCTCAGATGGCATCCAGATCGTTGTTCGTCAATGGAGTTTGTAGAAGAAGCAA
+AGAAGAAATTTCAGGCAATCCAAGAAGCCTACTCTGTTCTGTCTGACTCCAACAAGAGGTTCCTGTATGATGTTGGTGCT
+TATAATACTGATGATGATGATGACCAAAACGGAATGGGAGATTTCTTGAACGAAATGGCGACTATGATGAATCAATCCAA
+GCCTAGTGATAATAACACAGGGGACAGTTTTGAACAACTACAAGATCTGTTTAATGAGATGTTTCAAGGAGACGCTGCAG
+CATTCCCATCATCATCGTCCTGCTCCACTTCAAATTTCACTTCATCTCGTAGTTTTGTATTCGATACAAATTCTCAGCGG
+TCATCTTCGTTTGCGACAAGTTCGATGGGGATGAATAATGATCCTTTCGGATATGACCCGAGAGCTCATTCCTTCTCTTT
+AGGGGTGGACCATCAGCAAGAGTTCAAGAAAGGGAAAAACAATGGCGGAAGAAGAAACAGGAGAAAGAACAATGTTCCAT
+CGGCTGGTCACGAAACGTCGTCGTCAAACAACTATGGAGTCCCCACCTCA
+>gnl_Bradi1.2_Bradi3g60090.1
+ATGGCCACCGGGGGCGACAAGTGCGGCGGAAAGCCGGCGGCCGCGGGGGTGGGCGGCGGTGACCTGTACTCTGTGCTGGG
+CGTCAACAAGGAGTGCTCCGACGCCGACCTCAAGGTCGCCTACCGGAAGCTCGCCATGAGATGGCATCCGGATAGATGCT
+CCTCCTCCAGCAGCACCAAGCACATGGAGGAAGCAAAAGAGAAGTTCCAGGAGATCCAGGGCGCCTATTCCGTCCTCTCC
+GATGCCAACAAGCGCTTCCTCTATGACGTGGGGGTATATGAAGAACATGAAGAAGAAGATGATGACACTCTGCAGGGGAT
+GGGGGACTTCCTTGGTGAGATGGCCCATATGATGAGCCAGACGCAGCCAGCGAGACAGGAAAGCTTTGAGGAGCTCCAGC
+AGCTCTTCGTGGACATGTTCCAGTCTGATATTGAATCGGGATTCTGCAACGGACCTGCCAAGGACCATGACCCAGTCCAA
+AGACAGACGCGAACATTCTCGACCCCTCCTTCGCCATCGCCATCTCCACCGCCTCCACTAGCTACAGTGGACGAAGCGGC
+ATCATGTAATGGCATCAATAAGCGTGGCTCATCAGCAATGGGCTCTGGGAAGCCTCCAAGAGCTGGTGAAGTGAGTGGGG
+GTCACGGCCAGTCTGAGTTCTGTTTCGGGATGAGCGACGCCAAGCAAGCGCCGAAGGCGCGAGGCGGGAACGCTAGCAGG
+AGAAGGAACGGCCAGAAGCAGAAACTGTCGTCGAAGCACGACGTCTCCTCCGGCGATGAGATGCCGAGACCACATGCAGC
+AGTA
+>gnl_Carpa1.181_PACid_16420351
+ATGGCGGATGGAGAAGACAAGAACAACAGTGATTTGTATGCGGTTCTTGGATTGAATAAGGAATGTACTCCAGCAGAGCT
+CAGGAACGCTTATAAGAAACTTGCAATGAGATGGCATCCAGATCGCTGTTCCGCGTCGGGGAATTCAATGTTTGTGGAAG
+AAGCAAAGAAGAAATTTCAGGCAATCCAAGAAGCCTACTCTGTTCTTTCTGACGCAAACAAGAGGTTTCTGTACGACGTC
+GGAGCTTACGAAAGTGATGACGACGAAAATGGAATGGGTGATTTTTTAAACGAAATGGCAGCCATGATGAGCCAAACAAA
+GCCTAATGAGAATGGGAATGCACAAGAGAGCTTTGAAGAATTGCAAGAGTTGTTTCAAGAGATGTTTCAAGGGGATATGG
+GATTCAACACATTTGGATCTAGTTCTCAGCCTACTACTTCTTCGTGTTCTGCTTCCTCTGCATATGCAACCTGTAGCGAA
+ACCTCCAATCCTAACAACAACAAGCGCAATTCATCAGAAATGAATTATGGCAAGAAAAAGGTAGATGATTCTTCAGGGTT
+TCATGCTCATTTCCAAACCTTTTGTTTAGGGGTGGAACAGCAGCAAGATTTCAAGAAGGGGAAGGAAGCAAGAGGAGGAA
+TTCGAGGAAAACCCGGAGGTAGTAGGAGGCAGGGAAGGAAACAGAAGGTTTCATCTCGCCACAATGTCTCATCCAATGAC
+TTGGGCATTTCTGCTTCC
+>gnl_Frave2.0_gene05408
+ATGGCGGGGGGAAAGTGGGTCCCCCCACCCCTGTCCCAGTTTCATCTTCACATAAAGAGGTTCCGTCGACGTCAGAAAGT
+CTCTGGATCCAGTGGAAACACAGAACCTTCTGGATACACAGACTGCAAAAATATCTCCAATCGAAGAATGGAAGAGAAAG
+GCAATGACTTTTATGCTGTTATGGGGTTGAAGAAGGAATGCTCTGACTCGGAGCTCAGGAATGCTTATAAGAAACTTGCA
+CTGATATGGCACCCAGATCGTTGCTCTGCCTCAGGAAATTCAAAGTTCGTGGAAGAAGCCAAGAAGAAGTTTCAGGACAT
+TCAACAAGCCTATTCTGTTCTGTCCGACGCCAACAAGAGGTTTCTGTACGATGTAGGAGCTTATGAAAGTGATGATGACG
+AAAATGGAATGGGTGATTTTTTAAACGAGATGGCGGTGATGATGAGCCAGACTAAGCCGAATGAAAATGGAGGAGAGAGC
+TTCGAACAATTGCAGGAGCTCTTTGAAGAAATGTTTCAGGGGGATATTGAGGGCTTTAGCTCCTGCTCTCAGCCTCCTAC
+TTCCTGTTCTACTTCCTCATCTTCATACGCATTGTACTGTGAAAATTCTACTCCCAGTAACAAACGTAATTCCTCCGCAA
+TGAATTATGGCAACGCAACCCTGGACAGTTCTGGTTTTGATGCTCATTTTCACAATTTCTGTGTAGGGACAGGCGGGAAG
+CCAGCAAAGGATCGGGAAGGGGATGCCAGGAAGAGAAAGGATTCCAGGAGGAGTAACCGG
+>gnl_Mimgu1.0_PACid_17694730
+ATGGCTGCTGATGAAGAGAAAAGCAGCGATTTTTACGGCGTTCTGGGGCTGAGGAAAGAATGTACGGCGGCGGAGCTCAG
+GGTTGCCTACAAGAAACTTGCAATGAAATGGCATCCAGATCGTTGCTCTGCTTCTGGGAATTTAAAGTATGTGGAGGAAG
+CAAAGAACAAGTTTCAAGCTGTCCAACAGGCCTATTCTGTGCTTTCCGATGCCAACAAAAGGTTTCTCTACGACGTAGGA
+ATCTACGATTCTGAAGACGATGCTGACGAAAACGGTATGGGTGATTTCTTGAATGAAATGGTAGCAATGATGGGCCAAAG
+TAAACCAAATGAAAATAAAAACGAGAGCTTCCAAGAATTGCAAGATCTATTCGAGGAAATATTCAACAATGACGCGGAAG
+AGGTTTTCAAGATTCCTCCTCCGCACTTTCCGTACCAAGATTCTTGCAGCGAGACCCGCACCGCATCGAACAAGAGGAAC
+GCCCGCGAAATGGGCTCCGTAAATTTCAGTAATATCGAAGCCACACCATTTGAAGGGTTCTGCATAGGGGAAAATGTAAT
+TTTTGGGGGAGAGAGAATACAAACGAGGCCCGGAGGAGGTAGTAGGAGGACGAAGCCGAAGATTTCGACATCGATCGATG
+GTTTAATTAGT
+>gnl_Nelnu1.0_NNU_010544-RA
+ATGAACCTGTTGTTGCAGAAATGGCATCCGGATCGATGCTCCTCGTCGGGAAACTCTAAGTTCGTGGAAGATTCAAAGAA
+GAAATTTCAGGCAATTCAAGAGGCTTATTCTGTTCTATCCGACGAGAATAAGCGATTTCTTTACGACGTTGGAGTTTACG
+ACTGCGACGACGATGACGATGACGAAAACGGAATGGGAGAATTTTTGGGGGAAATGGCGACTATGATGAGCCAAATTAAA
+CCCAGCGAGAACGGGCCGGAGAGTTTGGAGAAGCTGCAGGAACTGTTCGAGGAAATGTTCCAAAGGGACATGGATGATGG
+TTTCTTCTCCCCCTCCCCCCAATGCGCTTCTTTTTCTTCGTCTTGCTCATCTTCTTCGTCGTCGACGACTTATTTTTCAT
+ATAATAACAACAAGCACGACAATAAAAGGAATTGCTCCGACATCAGTTCTATGGACGATTTCTACACATTTGGCACGGAT
+TCTATACAATTCAGCAATTTCTGCATTGGGGTGGAAGGAGGAGAAGATTCAAAAGTAAGAGGAGGAAAGTCAAGGCGGAA
+GAGCAACAGGAGACAAAAAGTTTCATCGTCTAAACACGATCCGTCGTGCCGT
+>gnl_Solly2.3_Solyc03g123560.2.1
+ATGGAAGACAAAAGCAATGATTATTATGCAGTTTTGGGGTTGAAGAAGGAATGCACTGACACAGAACTTAGGAATGCCTA
+TAAGAAGCTTGCACTGAAATGGCACCCAGATCGCTGTTCAGCATCGGGGAATTTGAAGTTTGTAGATGAAGCAAAGAAGC
+AATTTCAGGCAATTCAAGAAGCATATTCTGTGTTATCGGATGCAAACAAAAAGTTTTTGTACGATGTAGGAGTTTATGAC
+TCTGGTGATGATGACGACGAAAATGGCATGGGTGATTTCCTGAATGAAATGGCAGCTATGATGAGCCAAAATAAGTCCAA
+TGAAAATCAGGGAGAAGAAACCTTTGAGGAATTGCAGGATATGTTTAATGAAATGTTCAACAGTGATAATGGAACGTTTT
+CTTCTTCTTCTTCTTCTTCTTCTTCTTGGACTGGAACTCCTTCAATGTGCTCTACTACATCATCTACATCTTCAAGTGAG
+ACTTTTTTAACCTTTCCCAACAAGAGAAGTTCAGGTGAAATGAAGTCGGGTAGTAGTGTAAGAGGCGATTCTTGCCAATT
+CCAAGGATTTTGTGTAGGGGCAGGTGGAACTTCTGGAAAATGCAATGAAAGAGAACGAAGTTGGAGGAAAAATTCCAAGA
+GTGGACGGAAGCAT
+>gnl_Sorbi1.4_PACid_1968370
+ATGGACGCCGGGGGAGAGAAGTTCAGCGACGCGGCGGCGGCGGAGGGCGGTGAGGGCGGCGGCGACCTCTACGCCGTCCT
+CGGGCTCAAGAAGGAGTGCTCCGACGCCGACCTCAAGGTCGCTTACCGGAAGCTCGCCAAGAAATGGCACCCGGACAAAT
+GCTCCTCCTCCAGCAGCGTGAAACACATGGAGGAAGCCAAGGAGAAGTTCCAAGAGATCCAGGGCGCCTATTCCGTACTC
+TCTGACGCCAATAAACGGCTCCTCTACGATGTTGGAGTATACGACGATGAGGACGACGAGGATAGCATGCAGGGGATGGG
+TGACTTCATTGGTGAGATGGCCCAGATGATGAGCCAGGTGCGGCCGACGAGGCAGGAAAGCTTTGAGGAGCTGCAGCAGC
+TTTTTGTGGACATGTTCCAGTCTGATATTGATTCAGGATTCTGCAACGGGTCTGCTAAGGATCAAGTTCAGGGGCAAGCC
+AAAAGTAGAACATGCTCGACCTCACCTTCATCATCACCGTCCCCACCTCCTCCTCCTACTATAGTAAAGGAGGCAGAGGT
+GTCATCATGTAATGGCTTCAATAAGCGGGGTTCATCAGCAATGGACTCAGGGAAGCCTCCAAGGCCTGTTGAAGGCGGTG
+CTGGTCAGGCTGGATTTTGTTTTGGGGTGAGCGATACGAAGCAAACGCCGAAGCCGAGAGGTCCGAACACCAGCCGGAGG
+AGGAACGGCCGGAAACAGAAGCTGTCATCCAAGCACGATGTTTCATCTGAAGATGAAACGGCCGGTTCC
+>gnl_Thepa2.0_Tp3g12470
+ATGGCGTCGAACAATAGCGAGAAAGGAAACGATGATTTGTATGGTGTTCTGGGCTTGAAGAAGGAATGTACGACGACGGA
+GCTCCGTACTGCTTATAAGAAGCTTGCTCTAAGATGGCATCCAGATCGTTGTTCGTCAATGGGGACTCCAGAGTTTGTAG
+ACGAAGCAAAGAAGAAGTTTCAGGCAATCCAAGAGGCCTATTCTGTTCTGTCTGACTCCAACAAGAGGTTCCTCTATGAT
+GTTGGAGCTTATAACAGTGATGATGAAGACCAAAACGGTATGGGAGATTTCTTGAACGAAATGGCGGCAATGATGAATCA
+GTCCAAGCCTAGTGAGAATAACTCAGGGGACAGTTTTGAGCAGCTACAAGATCTGTTTAATGAGATGTTTCAAGGAGACG
+CTGCAGCATTCTCATCATCATCATCATCATCTTGCTCTGCTTCGACTTTTACTTCCTCTTGTAGCTTTGTCTTTGACACA
+AATAGTCAGCGGTCACCGTTTGAGACAAGCTCAATGGGGACTAATGATCTTTTTGGATTCGATCACAGTGCTCACACCTT
+CTCTTTAGGGGTGGAACATCAGCAGGATTTCAAGAAGGGGAAGAACAGTGGTGGAAGAAGAAACAGAAGGAAGAACAATG
+CTCAATCGGCTGCTCACGAGACGGCGTCGTCCAACAACTATGGAGTCCCCACTTCA
+>gnl_Theca1.0_Tc06_g010450
+ATGGCAAATGGAGAAGAGAAAAACAATGATTTTTATGCAGTTTTGGGGTTGAATAAAGAATGCACTCCGACAGAGCTCAG
+GACTGCTTATAAGAAACTTGCCCTGAGATGGCACCCTGATCGTTGCTCAGCTTCGGGAAATTCAAAGTTCGTGGAAGAAG
+CCAAGAAGAAATTTCAGGCCATTCAACAAGCCTATTCTGTTCTGTCTGACTCAAACAAGAGGTTTCTGTACGACGTAGGA
+GCTTATGACAGTGATGATGACGAAAATGGAATGGGAGATTTTTTGAACGAAATGGCAGGGATGATGAGCCAGACAAAATC
+TAATGAAAATGGAGGGGAAAGCTTCGAGGAACTACAGGAATTGTTTGAAGAAATGTTCCAAGCGGACATTGATTCATTTG
+AGTCTACTGGTCAGTCCACTCCTTCCTGCTCTGCTTCATCTTCGTTTGGGTCATATGGTGAAAGTTCCAGCTCCAACAAG
+CGGAATTCCTCTGAAATGAGTTCTGTGGAGACTAGGCTGGAGAGTTCTTCTAGCTTCGATGCACAATTTCACAGTTTTTG
+TCTCGGGGTGGAACACAGGCAAGATATCAAGCAACACAGAGGAGCCAGAGGAGGAATGCGAGGAGCAGCCGGCGGTAGTA
+GACGGAGAAATGGCAGGAAACAAAAGGTTTCATCTGGCCATGATGTTACTTCCAACGACTGTGGCATTTCTGCTTCA
+>gnl_Vitvi12X_PACid_17827068
+ATGGCCGCCGGAGAAGAGAAGAGCAATGATTTTTATGCCGTTCTAGGGTTGAAAAAGGAATGCACCGCCTCCGAGCTCAG
+AAATGCGTACAAGAGACTTGCCCTGATGTGGCACCCAGATCGTTGCTCCTCGTCGGGAAACTCGAAATTCGTGGAAGAAG
+CGAAGAAGAAATTTCAGGCCATACAAGAAGCCTATTCAGTTCTCTCTGATGCGAATAAAAGGTTTCTGTACGACGTTGGA
+GCCTACGACAGCGATGATGACGAAAACGGAATGGGGGATTTTTTGAATGAGATGGCGGTTATGATGAGCCAAACCAAGTC
+CAATGAAAATGGGAAGGAGAGCTTTGAGGAGTTGCAGGAGCTCTTTGAGGATATGTTCCAAAGGGATGTCGACGCATTCA
+ACTCTGCCTCTCATCACCCCATGAACTCTTTCCCCAGTTCTACTTCCACTTCTTCCTACTGCGAAAGCTCCAATGCCAAC
+AACAAGCGGAATTCGGCTGAAATGGGCTCTGGAAGGATGATGAGTGCAGGGGAGTCCTCTGCTTTTGATGCCCACTTTCA
+GAGCTTCTGCTTTGGGACAGGCGGCACGCCAGGGAGATTTCAGGAGGGGGAAAGGAGCAAGAGGAGGAATTCCAGGAGGA
+GCCAACGG
+>gnl_Selmo1.0_PACid_15401289
+ATGGAGAAGAGGAAAGAGGATCCCTACACTGTTCTTGGTGTCCAAAAGTCGAGTTCTAGCTCGGAAATTCGCTCCGCTTA
+TCGGAAGCTCGCCATGAAATGGCATCCAGATAAGCAACACTCTTTAGAGGATCAAGCAAAAGCGAAGTTCCAGGGCATTC
+AAGAAGCTTATTCAGTGCTATCCGACGACAAAAAAAGAGTTCTTTATGATTCGGGACTTTATGACGAGGGAGATGACGAG
+GTGAGT
+>gnl_Orysa6.0_PACid_16864430
+ATGGCCCGCGGCGGCGGCGGCGGCGGCGGCGCGGACGCCGACCTGTACGCCGTCCTCGGCCTCAGCAGGGAGTGCACCGA
+CGCCGACCTCAGGCTCGCCTACCGCAAGCTCGCCATGATATGGCATCCGGACAGGTGCTCGGTGGCCGGCGGCAGCGCGA
+GCGCGGCGGGCGTCGACGAGGCCAAGGAGCGATTCCAGGAGATCCAGGGCGCCTACTCCGTGCTCTCCGACTCCAACAAG
+CGCTTCCTCTACGACGTCGGCGTCTACGACGGCAACGACGGCGACGACGACGACGACGAAGCAGATCTGTCGGGGATGGG
+CGATTTCCTCGGCGAGATGGCGCAGATGATGAGCCAGGCGACGCCTGCGGAGAGCTTCGAGGAGTTGCAGCAGCTGTTCG
+TGGACATGTTCCAGGACGACATCGACGCCGGCCTCTGCCAGTCGACGCCGCCGCCGCCGTCATGGCCGTCGCCTCCGGCG
+GCCGCCAATGCACGATCGCCGGCGGCGGCGGCGACTTCACGCAAGGGCGTGAACAAGCGGTGCTCACCGGCGGCGATGGA
+CATGGACTCCGGTTTGAGCAGCCTGCTGGGCATTTCGGGCTTCTGTTTCGAGGCGCCATGGACGTCGCAGGACGCGAGCA
+CTGCCGCCGGCGGTGGCGGCGGCAAGAGGAGAAAGCAGAGGCCGCCGCCGGCGAGCCACAACGTG
+>gnl_Sorbi1.4_PACid_1982925
+ATGGCTGCTACAAGTCACTGCGGCAACATCCAGGACCAGGACGAAGAAGCTTCGGCTCCTGGCGCCGCCGACCTCTACGC
+CGTGCTCGGGCTCAACAGGGAGTGCACCGACGCCGAGCTCAGGGTCGCGTACCGGCGGCTCGCCATGATATGGCATCCGG
+ACAGGTGCTCGGCGTCCGGCAGCTCGCCGGCGCGCATGGAGGAGGCCAAGGAGCGGTTCCAGGAGATCCAGGGCGCCTAC
+TCCGTGCTCTCCGACTCCAACAAGCGGCTCCTCTACGACGTCGGCGTCTACGACAGCGACGACGACGAGGCTGACCTGTC
+GGGGATGGGCGACTTCCTCGGAGAGATGGCCGACATGATGAGCCAGGCCACGCCAACGGAGACCTTCGAGGAGCTGCAGC
+AGGTGTTCGTGGACATGTTCCAGGACGACCTGGACGACGCCGGCTTCTTCGGCGGGCTTCCGACGACGGGCCGCAGGGCC
+CAGGCACCCAGCACCTCGCTGCCGCCGTCGGTGTCGTCGTCGCCGTTGCGGCCGACGCCTGCCGCTGGAAGAAGCAAGGG
+TCCGCAAGCGACGCCGTCGTCGTCGTTTAAAGGCGTCGAGAGGCGGGGTTCGACGTCGACGGCGAAACGGCCGAGGCCCA
+ACGGGTCGGCGGGCCTGGAATCGGACCTGGGCCTCTCCGGATTCTGCTTCATGGTGAGTAAGGAGATGAGCAAGTCGAAG
+GAGAGGCAAGCGGTATGGGCCAGTGACGACGGTGACAGGAGCACCGATGGCAAGCAGAGGTTGTCGACGAGCCGCGATGT
+CTCCGGTGGTGGGATGTCACGCTCACTGCAGGGCCAAAGCAGCAAAAACTTGTTGCAGTGTATGGCCTCTAAGTCT
+>gnl_Medtr3.5_Medtr8g022310.3
+ATGGCTAACGAAGGAAACAAAAGCAATGATTTCTATGCAGTTTTGGGATTGAATAAGGAATGCTCTGATTCAGAGCTAAG
+GAATGCTTATAAGAAACTTGCACTGAAATGGCATCCAGATCGTTGTTCAGCTTCAGGGAATGTGAAGTTTGTGGAAGAAG
+CTAAGAAGAAATTTCAGGCAATTCAAGAAGCCTATTCTGTTTTATCTGACTCGAACAAGAGATTAATGTACGACGTTGGA
+GTTTACGACAGTGATGATGACGAAAATGTAAGGCACTTGTTTCACACCATTCATGAGTTGGGGACCCTCTTTTGCGTTAT
+GTTTTGTTTCTTCATTTCCTTGAGGGGAGAGAAGAGAAGCAACCTTAATTTAACCTTTTCACTTTCACAT
+>gnl_Nelnu1.0_NNU_000115-RA
+ATGGAGGTGGACTCCCATCGATCATCTCCATCTTACTACACCATCCTTGGTGTAGATCAGAATTCCTCCGCTTCCGAGAT
+ACGCAATGCTTACAGGAAGCTCGCGATGCAATGGCATCCAGACAAATGGACGAAAACTCCGTCGCTCTTAGAGAAAGCCA
+AGAGTAAATTCCAGCAAATCCAGGAGGCTTATTCGGGTGGGTTACTCGTTTTCATGTTATCGGATCAGGGGAAGAGAACA
+CTGTATGATGTCGGTCTGTATGACCCGGACGATGAAACGAATGACGAGGTGGGGCTTCGCAGATTTCATGCAGGAGATGA
+TATCTCTCATGAACGATGTGAAGAAACAGGAGAAGAAATACAGCTTGGAGGAACTACAGGAGATGTTAGTGGAAATGTCA
+CAAGGGCTGGAGTTGAAGATGGAGAGTGCTGGTGGTGTGTGGTTGTTAGATGGAGCTGCAGCCTCAAGGAGGAGCTCAAA
+GAGGGCCAGATGGGAATCATCAGCGAGTCCGACGACGCTGGACACGACACACCTTCTCTCCCCCACTTGCACGGTTCAGA
+GCTGGAATTGTTAGGAAGAACCGGCTGTTGCAAT
+>contig_7
+GAGAATGAGTGGTCTGGGGCTGAGTTTTTGAATGAAATGGCGGCAATGATGACTCAAAATAAATCCAATGAAAACGGAAC
+CGGAACTTTTGAAGAACTGCAACAATTGTTCGATGAAATGTTTCAGAGCGACATCGAGTCCTTCAATGGTTGTTCTTCAT
+CATCCAATGAAACATGTAGCAACTCGAACAAGAGGAATTCCATTGAGTCGAGCTCGGCTAATTTCAGACCCGAAAATGGA
+AACGAAAGCGGCGAGATTAGCGGGAAGAAGAATACTAGGAAAGGTAAAGGTGACGNN
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/38889.faa	Mon Oct 30 09:53:09 2017 -0400
@@ -0,0 +1,4 @@
+>contig_2
+XLSKVPIPSNNIYAINDKKSPEDAADDYENRLKELVSEKIIPVSTISGFPKFDLMLLGMGPDGHVASLFPSHMQRYEKEK
+WVTFITDSPKPPPSRITFTFPMINSASEIAMVVTGADLAGTTKIALGTTGNVKPGETPLPCTEVSAEGEVTWFLDKDAAS
+QLLNYVRFDD
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/38889.fna	Mon Oct 30 09:53:09 2017 -0400
@@ -0,0 +1,8 @@
+>contig_2
+NNCCTTTCCAAGGTACCCATCCCATCCAACAACATATATGCTATAAATGATAAGAAGTCCCCGGAAGATGCAGCAGACGA
+CTACGAAAACCGCCTCAAGGAACTCGTCTCCGAAAAAATCATACCCGTTTCAACCATTAGCGGGTTCCCGAAGTTCGACC
+TCATGTTGCTTGGAATGGGGCCCGACGGCCATGTGGCCTCTCTTTTCCCTTCTCACATGCAACGCTATGAGAAGGAGAAA
+TGGGTCACTTTCATAACTGACTCTCCCAAACCGCCTCCGTCGAGAATCACTTTTACGTTTCCGATGATCAACTCGGCTTC
+GGAGATCGCTATGGTGGTTACCGGGGCTGATTTGGCTGGTACAACTAAGATAGCATTGGGTACTACGGGCAATGTTAAGC
+CGGGTGAGACTCCTTTGCCTTGTACTGAAGTTTCGGCTGAGGGAGAGGTTACTTGGTTCTTGGACAAAGATGCTGCTTCA
+CAACTGTTAAATTATGTGCGCTTTGATGAT
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/38889_integrated.faa	Mon Oct 30 09:53:09 2017 -0400
@@ -0,0 +1,14 @@
+>gnl_Mimgu1.0_PACid_17675619
+MAETKTKVLKFDAEEDVAVALAKYTAYLSEKYIKEKNSFSVVLSGGTLIDTLRKLVEFPYKDSVDWSKWLIFWVDERVVP
+LDHEDSNYLLAYRGFLSKVPIPPSNIYAINDKKSPEGAADDYEERIKNLVEEKTLPISDSGFPKFDLMLLGMGPDGHVAS
+LFPSHNQRYEKKRWVTFITDSPKPPPPRITFTFPVINSASDIAMVVTGAELADTTKKALGNEKHTLPPLPCTEVSAEREL
+TWFLDKDAASKL
+>gnl_Solly2.3_Solyc06g053200.2.1
+MATQKGKKTVLKFDSEEDVSKALAKYTAELSEKFIKQKGSFTVVLSGGSLIDTMRKLVEPPYKDSIDWSKWWIFWVDERV
+VPLGHDDSNYKLASDGFLSKVPIPSSNIYAINDKESPEGAAADYEARLKQLIESKVLPLSAITGFPKFDLMLLGMGPDGH
+VASLFPLHPHRHEKERLVTFITDSPKPPPPRITFTFPVINSASEIAMVVTGAELAHMVDVALGNAPPPDGIPPPCTEVSA
+EEELTWFLDKDAASELQTSR
+>contig_2
+XLSKVPIPSNNIYAINDKKSPEDAADDYENRLKELVSEKIIPVSTISGFPKFDLMLLGMGPDGHVASLFPSHMQRYEKEK
+WVTFITDSPKPPPSRITFTFPMINSASEIAMVVTGADLAGTTKIALGTTGNVKPGETPLPCTEVSAEGEVTWFLDKDAAS
+QLLNYVRFDD
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/38889_integrated.fna	Mon Oct 30 09:53:09 2017 -0400
@@ -0,0 +1,30 @@
+>gnl_Mimgu1.0_PACid_17675619
+ATGGCCGAAACCAAAACAAAAGTACTGAAATTCGACGCGGAGGAAGATGTGGCCGTCGCTCTAGCAAAGTACACCGCCTA
+TCTCTCCGAAAAGTACATCAAGGAAAAGAATTCTTTCTCGGTGGTTCTCTCCGGCGGCACCCTAATAGATACACTCAGGA
+AACTAGTAGAGTTTCCATACAAGGATTCTGTGGATTGGTCGAAATGGCTGATATTTTGGGTTGACGAGAGAGTGGTTCCT
+CTTGATCATGAAGACAGCAACTACTTACTTGCATACCGTGGTTTTCTTTCAAAGGTACCTATTCCTCCAAGCAACATTTA
+CGCAATCAACGACAAGAAGTCTCCGGAAGGTGCAGCCGATGATTACGAGGAGCGTATCAAGAATCTGGTCGAGGAAAAAA
+CCCTACCTATTTCAGACAGTGGCTTCCCTAAATTCGACCTTATGCTTCTCGGAATGGGGCCCGATGGCCACGTGGCGTCT
+CTTTTCCCCTCTCACAATCAACGGTACGAAAAGAAACGGTGGGTGACATTCATAACTGACTCTCCCAAACCGCCGCCACC
+TAGGATCACTTTCACATTCCCAGTCATCAACTCTGCTTCGGACATTGCAATGGTGGTCACTGGTGCTGAGCTGGCGGATA
+CTACGAAGAAAGCATTGGGAAACGAGAAGCATACTCTTCCTCCTCTTCCTTGTACTGAAGTTTCGGCTGAGAGAGAGCTC
+ACTTGGTTCTTGGACAAAGATGCTGCTTCTAAACTG
+>gnl_Solly2.3_Solyc06g053200.2.1
+ATGGCAACCCAGAAAGGGAAGAAGACGGTGCTAAAATTCGACTCCGAAGAAGATGTATCAAAGGCACTTGCTAAATACAC
+TGCTGAGCTATCGGAAAAATTCATCAAACAAAAAGGTTCTTTCACTGTTGTGCTCTCTGGTGGTTCTCTTATCGATACCA
+TGAGGAAATTGGTAGAGCCGCCGTACAAAGACTCAATTGATTGGTCGAAATGGTGGATTTTTTGGGTAGACGAAAGAGTG
+GTTCCTCTAGGTCACGATGATAGCAATTATAAACTTGCTTCGGATGGGTTTCTTTCTAAGGTTCCGATCCCCTCTTCTAA
+CATTTATGCGATTAATGACAAGGAGTCACCTGAGGGTGCAGCTGCTGATTACGAAGCTCGTCTGAAACAATTGATTGAGA
+GCAAAGTTCTTCCGTTATCAGCAATTACTGGATTCCCCAAATTTGATCTTATGCTATTAGGTATGGGGCCAGATGGACAT
+GTAGCGTCTTTGTTTCCTTTGCATCCTCACCGCCACGAGAAGGAGCGGCTGGTCACCTTCATTACAGACTCACCAAAACC
+TCCTCCACCAAGGATTACTTTCACCTTTCCGGTAATTAATTCGGCTTCAGAGATAGCAATGGTGGTCACAGGAGCAGAGT
+TAGCTCATATGGTTGATGTCGCTTTGGGTAATGCGCCTCCTCCTGATGGAATTCCTCCCCCTTGTACTGAGGTTTCAGCT
+GAAGAGGAACTGACCTGGTTTTTAGACAAGGATGCTGCATCAGAACTACAGACCTCTAGA
+>contig_2
+NNCCTTTCCAAGGTACCCATCCCATCCAACAACATATATGCTATAAATGATAAGAAGTCCCCGGAAGATGCAGCAGACGA
+CTACGAAAACCGCCTCAAGGAACTCGTCTCCGAAAAAATCATACCCGTTTCAACCATTAGCGGGTTCCCGAAGTTCGACC
+TCATGTTGCTTGGAATGGGGCCCGACGGCCATGTGGCCTCTCTTTTCCCTTCTCACATGCAACGCTATGAGAAGGAGAAA
+TGGGTCACTTTCATAACTGACTCTCCCAAACCGCCTCCGTCGAGAATCACTTTTACGTTTCCGATGATCAACTCGGCTTC
+GGAGATCGCTATGGTGGTTACCGGGGCTGATTTGGCTGGTACAACTAAGATAGCATTGGGTACTACGGGCAATGTTAAGC
+CGGGTGAGACTCCTTTGCCTTGTACTGAAGTTTCGGCTGAGGGAGAGGTTACTTGGTTCTTGGACAAAGATGCTGCTTCA
+CAACTGTTAAATTATGTGCGCTTTGATGAT
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/39614.faa	Mon Oct 30 09:53:09 2017 -0400
@@ -0,0 +1,3 @@
+>contig_3
+XVDEGVVVAGLSEQEKASVSEILTTARAHSETIENLKRDHSQQVSCIEQHTNDTFRQKYMDYEPTGSTPVRSEPDIPSKG
+TIESLRAMPIDALEEEFRENHSYESAVTGKELMPSVTTRAPFSQIN
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/39614.fna	Mon Oct 30 09:53:09 2017 -0400
@@ -0,0 +1,6 @@
+>contig_3
+NNTGTGGATGAAGGAGTTGTTGTTGCTGGCTTGTCAGAGCAGGAGAAGGCATCTGTTTCTGAAATTCTGACAACTGCTAG
+AGCTCATTCAGAAACAATTGAGAACCTTAAGAGAGATCATTCCCAGCAGGTATCCTGTATCGAACAGCACACGAATGATA
+CTTTCAGGCAAAAATACATGGATTACGAGCCTACAGGGTCCACGCCAGTTAGGAGCGAGCCGGATATTCCCAGCAAAGGC
+ACAATAGAGTCACTTCGTGCCATGCCTATAGATGCACTTGAAGAAGAATTTCGAGAAAACCATTCATACGAATCTGCTGT
+TACAGGAAAGGAACTAATGCCGTCTGTTACGACTCGTGCACCATTTTCACAGATCAAC
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/39614_integrated.faa	Mon Oct 30 09:53:09 2017 -0400
@@ -0,0 +1,19 @@
+>gnl_Orysa6.0_PACid_16878968
+MSHMEAFQNVVLLHKANSNSTLEDISSLSAASCCSLDQLLACVEGEAQKIFGDIQNLLADHRSEVAHFTQELRESFRISL
+DRTKDMSSFILGLFDKYVEETSKLQSHSNHTHEAQVKSLEDFQKAYEEQSKSEEQKLLADITSLVSKHVTRQRELVGGRL
+NSLGDAARGNKAFLDEHTSAMEVVTKDAKRKWEMFAEQAENDCKVGSNFSAAKHCRMETILQECACTVDTAAQQWKASHA
+TVNDLCRKQIAEVEALVRSAIETNEQHEAEIASSRATAEEHASNSSKDLLQDVDNMLQEARNSSSRVVSTVEAHLGESQH
+LQESHSSHTAGINTHADNAFQSSYKDYEPTGETPVRSEPEVPSKDAIESLRAMPMESLMDEFRENHPYEPSKDRRPSLIP
+RSPLATINN
+>gnl_Phoda3.0_PDK_30s1023721g001
+VNQKMMKCTLIKDLYGEIERLKAEVYAAREKVGVYIPKERYHQEESERKAMAEQIEQMGVLLENNQKQIEDLQERYNTQL
+QQSDDLSKKLDATEILCVSLSKKLDATEKSLEHTSKLLAAAREDLKQAQYTLKEKDFVISEQRKAAREDKLNTANRSIVN
+NFRADLATRVGTLCNTVVASLDRQNEHLQSVEKLCQSSLDFHDKAVSELKRKVSASRALYTSHMEALQNVVRLHKASSNA
+SLEEMSSMISANTCSLDQLLALGQSEADLIFSDLQSILSIHRGEIANFTRELREKFQVNLDRTKEMSNFILELLEKIGKG
+TKEFQNDSTLVHEAQVKSIGDFQKAYEVEVRLTGLGDAARDSKAIMDNHASSMDIVTTDAKRKWEEYSKQAEQDSEDGSN
+FSAAKHCRMELMLQQCVNSVDATSQQWKKTHASVSEMSSKHVAEIEALVRSAIESNDQHDAEVASARMAAEEDVAKNSKD
+VLQHFDTVIDHERNSAAGVMAAVEAHSATLHKLQEEQSSQATEINSHAEDTFQNTYMDYEPTGETPTRSEPDIPSRGTIE
+SLRAMPIEALLEEFRENHPYESKEPKPSLIPRSPLVQLN
+>contig_3
+XVDEGVVVAGLSEQEKASVSEILTTARAHSETIENLKRDHSQQVSCIEQHTNDTFRQKYMDYEPTGSTPVRSEPDIPSKG
+TIESLRAMPIDALEEEFRENHSYESAVTGKELMPSVTTRAPFSQIN
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/39614_integrated.fna	Mon Oct 30 09:53:09 2017 -0400
@@ -0,0 +1,47 @@
+>gnl_Orysa6.0_PACid_16878968
+ATGTCACATATGGAAGCCTTCCAAAATGTTGTGCTCCTGCATAAAGCAAATTCAAATTCTACACTAGAGGATATATCATC
+CCTATCTGCTGCAAGCTGTTGCAGCCTTGATCAGCTTCTAGCTTGTGTCGAGGGAGAGGCACAGAAGATATTTGGTGATA
+TCCAGAATTTGCTAGCTGATCATCGAAGCGAAGTGGCACATTTCACTCAAGAGTTGCGGGAGAGTTTCCGCATTAGCTTG
+GATAGGACGAAGGACATGTCTAGTTTCATCCTTGGGTTGTTCGATAAGTATGTGGAGGAAACTTCGAAGTTGCAGAGCCA
+CTCCAATCACACACATGAAGCACAAGTCAAAAGCCTTGAAGATTTCCAGAAGGCTTATGAGGAGCAATCAAAATCAGAAG
+AACAAAAGCTTCTGGCGGACATCACCAGTTTGGTTTCTAAACACGTTACTCGACAACGAGAACTGGTGGGTGGTAGACTA
+AACTCTCTTGGTGACGCCGCTCGTGGAAACAAAGCATTTTTGGATGAGCACACGTCCGCCATGGAGGTGGTCACGAAGGA
+CGCCAAGAGAAAGTGGGAAATGTTTGCAGAGCAGGCAGAGAATGACTGCAAAGTTGGGTCCAACTTCTCTGCAGCTAAGC
+ATTGTCGCATGGAAACCATTCTGCAGGAATGTGCATGCACCGTCGACACTGCTGCTCAACAATGGAAAGCATCACATGCA
+ACTGTTAACGATCTATGCAGAAAACAAATAGCTGAAGTTGAAGCACTCGTCAGGAGTGCAATCGAAACCAACGAGCAGCA
+CGAAGCAGAGATTGCATCTTCCCGTGCCACGGCCGAGGAGCATGCGTCCAACAGCAGCAAGGACCTACTCCAAGATGTTG
+ACAATATGCTGCAGGAGGCGCGCAATTCGTCGTCGAGAGTGGTGTCGACGGTGGAAGCTCATTTGGGAGAGAGCCAGCAT
+CTACAGGAGAGCCACTCCAGCCATACCGCCGGCATCAACACCCACGCCGACAACGCTTTCCAGAGCAGCTACAAGGACTA
+CGAGCCGACCGGCGAAACTCCGGTGAGGTCGGAGCCGGAGGTGCCGAGCAAAGACGCGATCGAGTCGCTGCGAGCGATGC
+CGATGGAGTCCCTGATGGACGAGTTCCGCGAGAACCACCCCTACGAGCCGAGCAAGGACCGCAGGCCATCGCTCATCCCT
+CGCTCGCCGCTCGCCACCATCAACAAC
+>gnl_Phoda3.0_PDK_30s1023721g001
+GTAAACCAAAAAATGATGAAATGTACATTAATCAAAGATCTCTATGGAGAAATTGAGCGTCTAAAAGCAGAGGTGTATGC
+TGCTCGTGAGAAAGTTGGAGTTTACATACCAAAAGAACGCTACCATCAAGAAGAGAGCGAACGGAAGGCAATGGCAGAAC
+AAATTGAACAAATGGGGGTCTTGCTCGAAAACAATCAAAAGCAAATTGAGGATCTACAAGAAAGGTATAATACTCAACTT
+CAACAGTCTGATGACCTGAGCAAAAAGCTTGATGCCACCGAGATTCTCTGTGTTTCTCTGAGCAAAAAGCTTGATGCCAC
+CGAGAAAAGTTTGGAGCACACTAGCAAGTTATTGGCTGCTGCCAGAGAAGATCTGAAGCAAGCTCAGTATACTCTGAAGG
+AGAAAGATTTTGTTATATCAGAGCAGAGGAAAGCAGCTAGAGAAGACAAACTGAATACTGCCAACAGATCTATTGTGAAC
+AATTTTCGGGCTGATCTTGCAACAAGGGTTGGAACACTTTGTAATACTGTTGTTGCATCCTTGGATCGGCAAAATGAACA
+CCTTCAGTCTGTTGAGAAACTATGTCAATCTAGCCTTGATTTCCATGACAAGGCAGTATCAGAGCTGAAAAGGAAAGTGT
+CAGCTTCAAGAGCTTTGTATACTTCCCATATGGAAGCACTACAAAATGTAGTGCGTTTGCATAAGGCAAGCAGCAATGCC
+AGCTTAGAAGAGATGTCATCCATGATTTCTGCCAATACCTGCTCTCTTGATCAGTTACTTGCCTTGGGGCAAAGCGAAGC
+AGATCTGATTTTTAGTGATCTGCAAAGCATATTGTCAATTCACCGAGGAGAGATTGCAAATTTCACCCGTGAACTTCGTG
+AGAAATTTCAAGTTAATTTGGATCGGACAAAGGAGATGTCCAATTTTATTCTTGAGCTGCTTGAAAAGATAGGGAAGGGA
+ACAAAAGAATTTCAGAATGACTCAACTTTGGTACATGAGGCTCAGGTGAAGAGCATTGGTGATTTCCAAAAGGCATATGA
+GGTGGAAGTGAGGCTCACTGGACTGGGAGATGCTGCTCGAGATAGCAAAGCAATTATGGATAACCATGCATCATCAATGG
+ACATCGTCACAACTGATGCTAAGAGGAAGTGGGAAGAATATTCCAAGCAGGCAGAGCAAGATTCAGAGGACGGTTCTAAC
+TTTTCAGCAGCAAAACATTGTCGCATGGAACTCATGCTCCAACAGTGTGTAAACTCTGTTGATGCTACTTCTCAACAGTG
+GAAGAAGACACATGCATCTGTTAGTGAGATGAGCAGCAAACACGTTGCTGAAATTGAAGCACTTGTAAGGAGTGCCATTG
+AGAGCAATGATCAGCATGATGCTGAGGTTGCTTCAGCAAGAATGGCAGCAGAAGAGGATGTAGCGAAAAATAGCAAAGAT
+GTTCTTCAGCATTTTGATACTGTGATTGATCATGAGCGCAACTCAGCCGCTGGAGTGATGGCAGCAGTCGAAGCTCACTC
+AGCAACCCTGCATAAACTGCAAGAGGAACAATCAAGCCAGGCAACAGAGATTAATAGCCATGCGGAGGACACATTCCAAA
+ACACCTACATGGACTATGAACCAACGGGAGAAACCCCAACAAGGTCGGAACCAGATATACCAAGCAGGGGAACAATCGAA
+TCTCTTCGAGCCATGCCGATAGAAGCCCTCCTTGAAGAGTTCCGGGAGAACCATCCATACGAGTCCAAGGAGCCCAAACC
+GTCTCTCATACCACGCTCTCCACTCGTCCAGCTCAAC
+>contig_3
+NNTGTGGATGAAGGAGTTGTTGTTGCTGGCTTGTCAGAGCAGGAGAAGGCATCTGTTTCTGAAATTCTGACAACTGCTAG
+AGCTCATTCAGAAACAATTGAGAACCTTAAGAGAGATCATTCCCAGCAGGTATCCTGTATCGAACAGCACACGAATGATA
+CTTTCAGGCAAAAATACATGGATTACGAGCCTACAGGGTCCACGCCAGTTAGGAGCGAGCCGGATATTCCCAGCAAAGGC
+ACAATAGAGTCACTTCGTGCCATGCCTATAGATGCACTTGAAGAAGAATTTCGAGAAAACCATTCATACGAATCTGCTGT
+TACAGGAAAGGAACTAATGCCGTCTGTTACGACTCGTGCACCATTTTCACAGATCAAC
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tool-data/plant_tribes/scaffolds/README.txt	Mon Oct 30 09:53:09 2017 -0400
@@ -0,0 +1,3 @@
+For functional tests to work, this directory must contain symlinks to the scaffolds data
+installed into the Galaxy instance to which planemo points via the --galaxy_root parameter.
+This would typically be something like ~/galaxy/tool-data/plant_tribes/scaffolds/22Gv1.1.
--- a/utils.py	Thu Aug 24 13:32:01 2017 -0400
+++ b/utils.py	Mon Oct 30 09:53:09 2017 -0400
@@ -27,7 +27,7 @@
     return fstderr, fherr, fstdout, fhout
 
 
-def move_directory_files(source_dir, destination_dir, copy=False):
+def move_directory_files(source_dir, destination_dir, copy=False, remove_source_dir=False):
     source_directory = os.path.abspath(source_dir)
     destination_directory = os.path.abspath(destination_dir)
     if not os.path.isdir(destination_directory):
@@ -38,6 +38,8 @@
             shutil.copy(source_entry, destination_directory)
         else:
             shutil.move(source_entry, destination_directory)
+    if remove_source_dir:
+        os.rmdir(source_directory)
 
 
 def run_command(cmd):
@@ -52,29 +54,3 @@
 
 def stop_err(msg):
     sys.exit(msg)
-
-
-def write_html_output(output, title, dir):
-    with open(output, 'w') as fh:
-        dir_items = sorted(os.listdir(dir))
-        # Directories can only contain either files or directories,
-        # but not both.
-        if len(dir_items) > 0:
-            item_path = os.path.join(dir, dir_items[0])
-            if os.path.isdir(item_path):
-                header = 'Directories'
-            else:
-                header = 'Datasets'
-        else:
-            header = ''
-        fh.write('<html><head><h3>%s: %d items</h3></head>\n' % (title, len(dir_items)))
-        fh.write('<body><p/><table cellpadding="2">\n')
-        fh.write('<tr><b>%s</th></b>\n' % header)
-        for index, fname in enumerate(dir_items):
-            if index % 2 == 0:
-                bgcolor = '#D8D8D8'
-            else:
-                bgcolor = '#FFFFFF'
-            link = '<a href="%s" type="text/plain">%s</a>\n' % (fname, fname)
-            fh.write('<tr bgcolor="%s"><td>%s</td></tr>\n' % (bgcolor, link))
-        fh.write('</table></body></html>\n')