changeset 1:2ed5c0795f99 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/idba_ud commit 61e1699e65d6fd9f4f73650ed8463b37cd701344
author iuc
date Mon, 05 Aug 2019 15:59:42 -0400
parents fdaf2375d405
children 694b0f55b744
files idba_ud.xml macros.xml test-data/all_fasta.loc test-data/reference.fa tool-data/all_fasta.loc.sample tool_data_table_conf.xml.sample tool_data_table_conf.xml.test
diffstat 7 files changed, 592 insertions(+), 85 deletions(-) [+]
line wrap: on
line diff
--- a/idba_ud.xml	Fri Sep 21 15:25:56 2018 -0400
+++ b/idba_ud.xml	Mon Aug 05 15:59:42 2019 -0400
@@ -1,107 +1,137 @@
-<tool id="idba_ud" name="IDBA-UD" version="1.1.3">
+<tool id="idba_ud" name="IDBA-UD" version="@IDBA_VERSION@+galaxy1">
     <description>
-        Iterative de Bruijn Graph Assembler <!--for sequencing data with highly uneven depth-->
+        Iterative de Bruijn Graph Assembler for data with highly uneven depth
     </description>
     <macros>
         <import>macros.xml</import>
     </macros>
     <expand macro="requirements"/>
-    <command><![CDATA[
-
+    <command detect_errors="aggressive"><![CDATA[
     idba_ud
 
-    --read '$read'
-    #if $read_level_2:
-        --read_level_2 '$read_level_2'
-    #end if
-    #if $read_level_3:
-        --read_level_3 '$read_level_3'
-    #end if
-    #if $read_level_4:
-        --read_level_4 '$read_level_4'
-    #end if
-    #if $read_level_5:
-        --read_level_5 '$read_level_5'
-    #end if
-    #if $long_read:
-        --long_read '$long_read'
-    #end if
-    --mink $mink
-    --maxk $maxk
-    --step $step
-    --inner_mink $inner_mink
-    --inner_step $inner_step
-    --prefix $prefix
-    --min_count $min_count
-    --min_support $min_support
-    --num_threads \${GALAXY_SLOTS:-1}
-    --seed_kmer $seed_kmer
-    --min_contig $min_contig
-    --similar $similar
-    --max_mismatch $max_mismatch
-    --min_pairs $min_pairs
-    #if $other:
-        ${" ".join(str($other).split(","))}
-    #end if
+    @MAIN_INPUT@
+    @LEVELS_INPUT@
+    @KMER_OPTIONS@
+    @FILTER_OPTIONS@
+	--min_pairs $min_pairs
+	@OTHER_OPTIONS@
+	@THREADS@
     ]]></command>
 
     <inputs>
-        <param argument="--read" type="data" format="fasta" label="Fasta read file. Lower or equal to 600b"/>
-        <param argument="--long_read" type="data" format="fasta" optional="true" label="Fasta long read file. More than 600b"/>
-
-        <param argument="--read_level_2" type="data" format="fasta" optional="true" label="Fasta Paired-end reads for second level scaffolds"/>
-        <param argument="--read_level_3" type="data" format="fasta" optional="true" label="Fasta Paired-end reads for third level scaffolds"/>
-        <param argument="--read_level_4" type="data" format="fasta" optional="true" label="Fasta Paired-end reads for fourth level scaffolds"/>
-        <param argument="--read_level_5" type="data" format="fasta" optional="true" label="Fasta Paired-end reads for fifth level scaffolds"/>
-
-        <param argument="--mink" type="integer" value="20" max="312" label="Minimum k value. Lower or equal to 312"/>
-        <param argument="--maxk" type="integer" value="100" max="312" label="Maximum k value. Lower or equal to 312"/>
-        <param argument="--step" type="integer" value="20" label="Increment of k-mer of each iteration"/>
-        <param argument="--inner_mink" type="integer" value="10" label="Inner minimum k value"/>
-        <param argument="--inner_step" type="integer" value="5" label="Inner increment of k-mer"/>
-        <param argument="--prefix" type="integer" value="3" label="Prefix length used to build sub k-mer table"/>
-        <param argument="--min_count" type="integer" value="2" label="Minimum multiplicity for filtering k-mer when building the graph"/>
-        <param argument="--min_support" type="integer" value="1" label="Minimum supoort in each iteration"/>
-        <param argument="--seed_kmer" type="integer" value="30" label="Seed kmer size for alignment"/>
-        <param argument="--min_contig" type="integer" value="200" label="Minimum size of contig"/>
-        <param argument="--similar" type="float" value="0.95" label="Similarity for alignment"/>
-        <param argument="--max_mismatch" type="integer" value="3" label="Max mismatch of error correction"/>
-        <param argument="--min_pairs" type="integer" value="3" label="Minimum number of pairs"/>
-
-        <param name="other" type="select" display="checkboxes" multiple="true" label="Other options">
+        <expand macro="main_input"/>
+        <expand macro="levels_input"/>
+        <expand macro="kmer_options" maxk_default="100" step_default="20"/>
+        <expand macro="filter_options"/>
+        <expand macro="min_pairs_filter"/>
+        <expand macro="other_options">
             <option value="--no_bubble">Do not merge bubble (--no_bubble)</option>
-            <option value="--no_local">Do not use local assembly (--no_local)</option>
-            <option value="--no_coverage">Do not iterate on coverage (--no_coverage)</option>
-            <option value="--no_correct">Do not do correction (--no_correct)</option>
-            <option value="--pre_correction">Perform pre-correction before assembly (--pre_correction)</option>
-        </param>
+        </expand>
     </inputs>
     <outputs>
         <data name="output" from_work_dir="out/scaffold.fa" format="fasta"/>
     </outputs>
     <tests>
+        <!-- basic test + check of defaults -->
+        <test>
+            <param name="read" value="merged.fa" ftype="fasta"/>
+            <assert_command>
+                <has_text text="--mink 20" />
+                <has_text text="--maxk 100" />
+                <has_text text="--step 20" />
+                <has_text text="--inner_mink 10" />
+                <has_text text="--inner_step 5" />
+                <has_text text="--prefix 3" />
+                <has_text text="--min_count 2" />
+                <has_text text="--min_support 1" />
+                <has_text text="--seed_kmer 30" />
+                <has_text text="--min_contig 200" />
+                <has_text text="--similar 0.95" />
+                <has_text text="--max_mismatch 3" />
+                <has_text text="--min_pairs 3" />
+                <not_has_text text="--read_level_2" />
+                <not_has_text text="--read_level_3" />
+                <not_has_text text="--read_level_4" />
+                <not_has_text text="--read_level_5" />
+                <not_has_text text="--no_bubble" />
+                <not_has_text text="--no_local" />
+                <not_has_text text="--no_coverage" />
+                <not_has_text text="--no_correct" />
+                <not_has_text text="--pre_correction" />
+            </assert_command>
+            <output name="output" file="out/scaffold.fa" ftype="fasta"/>
+        </test>
+        <!-- read levels test -->
+        <test>
+            <param name="read" value="merged.fa" ftype="fasta"/>
+            <param name="read_level_2" ftype="fasta" value="merged.fa"/>
+            <param name="read_level_3" ftype="fasta" value="merged.fa"/>
+            <param name="read_level_4" ftype="fasta" value="merged.fa"/>
+            <param name="read_level_5" ftype="fasta" value="merged.fa"/>
+            <assert_command>
+                <has_text text="--read_level_2" />
+                <has_text text="--read_level_3" />
+                <has_text text="--read_level_4" />
+                <has_text text="--read_level_5" />
+            </assert_command>
+            <output name="output" file="out/scaffold.fa" ftype="fasta" compare="sim_size"/>
+        </test>
+        <!-- k-mer options -->
         <test>
             <param name="read" value="merged.fa" ftype="fasta"/>
-            <output name="output" file="out/scaffold.fa" ftype="fasta"/>
+            <param name="mink" value="19"/>
+            <param name="maxk" value="99"/>
+            <param name="step" value="19"/>
+            <param name="inner_mink" value="9"/>
+            <param name="inner_step" value="4"/>
+            <param name="prefix" value="2"/>
+            <param name="min_count" value="1"/>
+            <param name="min_support" value="2"/>
+            <param name="seed_kmer" value="29"/>
+            <assert_command>
+                <has_text text="--mink 19" />
+                <has_text text="--maxk 99" />
+                <has_text text="--step 19" />
+                <has_text text="--inner_mink 9" />
+                <has_text text="--inner_step 4" />
+                <has_text text="--prefix 2" />
+                <has_text text="--min_count 1" />
+                <has_text text="--min_support 2" />
+                <has_text text="--seed_kmer 29" />
+            </assert_command>
+            <output name="output" file="out/scaffold.fa" compare="sim_size"/>
+        </test>
+        <!-- filter options -->
+        <test>
+            <param name="read" value="merged.fa" ftype="fasta"/>
+            <param name="min_contig" value="199"/>
+            <param name="similar" value="0.96"/>
+            <param name="max_mismatch" value="2"/>
+            <assert_command>
+                <has_text text="--min_contig 199" />
+                <has_text text="--similar 0.96" />
+                <has_text text="--max_mismatch 2" />
+            </assert_command>
+            <output name="output" file="out/scaffold.fa" compare="sim_size"/>
+        </test>
+        <!-- min-pairs and other options -->
+        <test>
+            <param name="read" value="merged.fa" ftype="fasta"/>
+            <param name="min_pairs" value="2"/>
+            <param name="other" value="--no_bubble,--no_local,--no_coverage,--no_correct,--pre_correction"/>
+            <assert_command>
+                <has_text text="--min_pairs 2" />
+                <has_text text="--no_bubble" />
+                <has_text text="--no_local" />
+                <has_text text="--no_coverage" />
+                <has_text text="--no_correct" />
+                <has_text text="--pre_correction" />
+            </assert_command>
+            <output name="output" file="out/scaffold.fa" compare="sim_size"/>
         </test>
     </tests>
-    <help><![CDATA[
-        IDBA-UD is a iterative De Bruijn Graph De Novo Assembler for Short Reads Sequencing data with Highly Uneven Sequencing Depth. It is an extension of IDBA algorithm. IDBA-UD also iterates from small k to a large k. In each iteration, short and low-depth contigs are removed iteratively with cutoff threshold from low to high to reduce the errors in low-depth and high-depth regions. Paired-end reads are aligned to contigs and assembled locally to generate some missing k-mers in low-depth regions. With these technologies, IDBA-UD can iterate k value of de Bruijn graph to a very large value with less gaps and less branches to form long contigs in both low-depth and high-depth regions.
-
-
-Input: IDBA-UD takes interleaved paired end data in the FASTA format as input, 
-i.e. paired-end reads need to be stored in the same FASTA file suc h that a pair 
-of reads should be in two consecutive lines. 
-In Galaxy paired reads in separate FASTQ files can be converted into interleaved 
-FASTA using the tools:  
-
-* `FASTQ interlacer on paired end read <https://toolshed.g2.bx.psu.edu/view/devteam/fastq_paired_end_interlacer>`_
-* `Samtools extract FASTA or FASTQ from a SAM file <https://toolshed.g2.bx.psu.edu/view/devteam/fastq_to_fasta>`_
-
-Note that, IDBA-UD assumes that the paired-end reads are in order (->,<-). 
-If your data is in reverse order (<-,->), please convert it by yourself.
-    ]]></help>
-    <citations>
+    <expand macro="help" more_help="IDBA-UD is an extension of IDBA algorithm for Short Reads Sequencing data with Highly Uneven Sequencing Depth. IDBA-UD also iterates from small k to a large k. In each iteration, short and low-depth contigs are removed iteratively with cutoff threshold from low to high to reduce the errors in low-depth and high-depth regions. Paired-end reads are aligned to contigs and assembled locally to generate some missing k-mers in low-depth regions. With these technologies, IDBA-UD can iterate k value of de Bruijn graph to a very large value with less gaps and less branches to form long contigs in both low-depth and high-depth regions."/>
+    <expand macro="citations">
         <citation type="doi">10.1093/bioinformatics/bts174</citation>
-    </citations>
+    </expand>
 </tool>
--- a/macros.xml	Fri Sep 21 15:25:56 2018 -0400
+++ b/macros.xml	Mon Aug 05 15:59:42 2019 -0400
@@ -1,8 +1,128 @@
 <macros>
+    <token name="@IDBA_VERSION@">1.1.3</token>
     <xml name="requirements">
         <requirements>
-            <requirement type="package" version="1.1.3">idba</requirement>
+            <requirement type="package" version="@IDBA_VERSION@">idba</requirement>
             <yield/>
         </requirements>
     </xml>
+
+    <xml name="main_input">
+        <param argument="--read" type="data" format="fasta" label="Fasta read file. Lower or equal to 600b"/>
+        <param argument="--long_read" type="data" format="fasta" optional="true" label="Fasta long read file. More than 600b"/>
+    </xml>
+
+    <token name="@MAIN_INPUT@">
+        --read '$read'
+        #if $long_read:
+           --long_read '$long_read'
+        #end if
+    </token>
+
+    <xml name="levels_input">
+        <param argument="--read_level_2" type="data" format="fasta" optional="true" label="Fasta Paired-end reads for second level scaffolds"/>
+        <param argument="--read_level_3" type="data" format="fasta" optional="true" label="Fasta Paired-end reads for third level scaffolds"/>
+        <param argument="--read_level_4" type="data" format="fasta" optional="true" label="Fasta Paired-end reads for fourth level scaffolds"/>
+        <param argument="--read_level_5" type="data" format="fasta" optional="true" label="Fasta Paired-end reads for fifth level scaffolds"/>
+    </xml>
+    <token name="@LEVELS_INPUT@">
+    #if $read_level_2:
+        --read_level_2 '$read_level_2'
+    #end if
+    #if $read_level_3:
+        --read_level_3 '$read_level_3'
+    #end if
+    #if $read_level_4:
+        --read_level_4 '$read_level_4'
+    #end if
+    #if $read_level_5:
+        --read_level_5 '$read_level_5'
+    #end if
+    </token>
+
+    <xml name="kmer_options" token_maxk_default="" token_step_default="">
+        <param argument="--mink" type="integer" value="20" max="312" label="Minimum k value. Lower or equal to 312"/>
+        <param argument="--maxk" type="integer" value="@MAXK_DEFAULT@" max="312" label="Maximum k value. Lower or equal to 312"/>
+        <param argument="--step" type="integer" value="@STEP_DEFAULT@" label="Increment of k-mer of each iteration"/>
+        <param argument="--inner_mink" type="integer" value="10" label="Inner minimum k value"/>
+        <param argument="--inner_step" type="integer" value="5" label="Inner increment of k-mer"/>
+        <param argument="--prefix" type="integer" value="3" label="Prefix length used to build sub k-mer table"/>
+        <param argument="--min_count" type="integer" value="2" label="Minimum multiplicity for filtering k-mer when building the graph"/>
+        <param argument="--min_support" type="integer" value="1" label="Minimum supoort in each iteration"/>
+        <param argument="--seed_kmer" type="integer" value="30" label="Seed kmer size for alignment"/>
+    </xml>
+    <token name="@KMER_OPTIONS@">
+        --mink $mink
+        --maxk $maxk
+        --step $step
+        --inner_mink $inner_mink
+        --inner_step $inner_step
+        --prefix $prefix
+        --min_count $min_count
+        --min_support $min_support
+        --seed_kmer $seed_kmer
+    </token>
+
+    <xml name="filter_options">
+        <param argument="--min_contig" type="integer" value="200" label="Minimum size of contig"/>
+        <param argument="--similar" type="float" value="0.95" label="Similarity for alignment"/>
+        <param argument="--max_mismatch" type="integer" value="3" label="Max mismatch of error correction"/>
+    </xml>
+    <token name="@FILTER_OPTIONS@">
+        --min_contig $min_contig
+        --similar $similar
+        --max_mismatch $max_mismatch
+    </token>
+
+    <xml name="min_pairs_filter">
+        <param argument="--min_pairs" type="integer" value="3" label="Minimum number of pairs"/>
+    </xml>
+    <token name="@MIN_PAIRS_FILTER@">
+        --min_pairs $min_pairs
+    </token>
+    <xml name="other_options">
+       <param name="other" type="select" display="checkboxes" multiple="true" label="Other options">
+            <yield/>
+            <option value="--no_local">Do not use local assembly (--no_local)</option>
+            <option value="--no_coverage">Do not iterate on coverage (--no_coverage)</option>
+            <option value="--no_correct">Do not do correction (--no_correct)</option>
+            <option value="--pre_correction">Perform pre-correction before assembly (--pre_correction)</option>
+        </param>
+    </xml>
+    <token name="@OTHER_OPTIONS@">
+        #if $other:
+            ${" ".join(str($other).split(","))}
+        #end if
+    </token>
+
+    <token name="@THREADS@">
+        --num_threads \${GALAXY_SLOTS:-1}
+    </token>
+
+    <xml name="help" token_more_help="">
+        <help><![CDATA[
+IDBA is an iterative De Bruijn Graph De Novo Assembler for sequence assembly. Most assemblers based on de Bruijn graph build a de Bruijn graph with a specific k-mer size to perform the assembling task. For all of them, it is very crucial to find a specific value of k. If k is too large, there will be a lot of gap problems in the graph. If k is too small, there will a lot of branch problems. IDBA uses not only one specific k but a range of k values to build the iterative de Bruijn graph. It can keep all the information in graphs with different k values. 
+
+@MORE_HELP@
+
+Input: IDBA-* take interleaved paired end data in the FASTA format as input, 
+i.e. paired-end reads need to be stored in the same FASTA file such that a pair 
+of reads should be in two consecutive lines.
+In Galaxy paired reads in separate FASTQ files can be converted into interleaved 
+FASTA using the tools:  
+
+* `FASTQ interlacer on paired end read <https://toolshed.g2.bx.psu.edu/view/devteam/fastq_paired_end_interlacer>`_
+* `Samtools extract FASTA or FASTQ from a SAM file <https://toolshed.g2.bx.psu.edu/view/devteam/fastq_to_fasta>`_
+
+Note that, IDBA-* assumes that the paired-end reads are in order (->,<-). 
+If your data is in reverse order (<-,->), please convert it by yourself.
+        ]]></help>
+    </xml>
+    <xml name="citations">
+        <citations>
+            <citation type="doi">10.1007/978-3-642-12683-3_28</citation>
+            <yield/>
+        </citations>
+    </xml>
+
 </macros>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/all_fasta.loc	Mon Aug 05 15:59:42 2019 -0400
@@ -0,0 +1,20 @@
+#This file lists the locations and dbkeys of all the fasta files
+#under the "genome" directory (a directory that contains a directory
+#for each build). The script extract_fasta.py will generate the file
+#all_fasta.loc. This file has the format (white space characters are
+#TAB characters):
+#
+#<unique_build_id>	<dbkey>	<display_name>	<file_path>
+#
+#So, all_fasta.loc could look something like this:
+#
+#apiMel3	apiMel3	Honeybee (Apis mellifera): apiMel3	/path/to/genome/apiMel3/apiMel3.fa
+#hg19canon	hg19	Human (Homo sapiens): hg19 Canonical	/path/to/genome/hg19/hg19canon.fa
+#hg19full	hg19	Human (Homo sapiens): hg19 Full	/path/to/genome/hg19/hg19full.fa
+#
+#Your all_fasta.loc file should contain an entry for each individual
+#fasta file. So there will be multiple fasta files for each build,
+#such as with hg19 above.
+#
+test_id	test_dbkey	test display name	${__HERE__}/merged.fa
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/reference.fa	Mon Aug 05 15:59:42 2019 -0400
@@ -0,0 +1,303 @@
+>reference
+GAAAAACGCGCTTGCAGATGGGTCGCAGGTTACTTCCTTGCGCACAGGATATAGTTATACCAGCGTTATTGTCGTT
+GCCAATTTCCAGCAAAGCTTTATCCTGTACCCGAATAACCCCGGCAAACCCCAGAGGGAATTCTCCGCTGATTGTC
+AAAAACGCGCTTGCAGATGGGTCGCAGGTTACTTCCTTGCGCACAGGATATAGTTATACCAGCGTTATTGTCGTTA
+TTAGCGTAGCGCCACTGCCAATTTCCAGCAAAGCTTTATCCTGTACCCGAATAACCCCGGCAAACCCCAGAGGGAA
+AAAACGCGCTTGCAGATGGGTCGCAGGTTACTTCCTTGCGCACAGGATATAGTTATACCAGCGTTATTGTCGTTAG
+ATGTCTGGAAATATAGGGGCAAATCCAGGGTTCTTGTCCCATGATGTTCAAAACTGTCAATATCCTGCATTGTTAG
+AAACGCGCTTGCAGATGGGTCGCAGGTTACTTCCTTGCGCACAGGATATAGTTATACCAGCGTTATTGTCGTTAGT
+CAATATCCTGCATTGTTAGCGTAGCGCCACTGCCAATTTCCAGCAAAGCTTTATCCTGTACCCGAATAACCCCGGC
+AACGCGCTTGCAGATGGGTCGCAGGTTACTTCCTTGCGCACAGGATATAGTTATACCAGCGTTATTGTCGTTAGTG
+CCTGCATTGTTAGCGTAGCGCCACTGCCAATTTCCAGCAAAGCTTTATCCTGTACCCGAATAACCCCGGCAAACCC
+ACGCGCTTGCAGATGGGTCGCAGGTTACTTCCTTGCGCACAGGATATAGTTATACCAGCGTTATTGTCGTTAGTGG
+CCCGAATAACCCCGGCAAACCCCAGAGGGAATTCTCCGCTGATTGTCGTATCTCCATTCAGATATACCGACGACTC
+CGCGCTTGCAGATGGGTCGCAGGTTACTTCCTTGCGCACAGGATATAGTTATACCAGCGTTATTGTCGTTAGTGGT
+AAATATAGGGGCAAATCCAGGGTTCTTGTCCCATGATGTTCAAAACTGTCAATATCCTGCATTGTTAGCGTAGCGC
+GCGCTTGCAGATGGGTCGCAGGTTACTTCCTTGCGCACAGGATATAGTTATACCAGCGTTATTGTCGTTAGTGGTG
+TGCCAATTTCCAGCAAAGCTTTATCCTGTACCCGAATAACCCCGGCAAACCCCAGAGGGAATTCTCCGCTGATTGT
+CGCTTGCAGATGGGTCGCAGGTTACTTCCTTGCGCACAGGATATAGTTATACCAGCGTTATTGTCGTTAGTGGTGA
+GCCAATTTCCAGCAAAGCTTTATCCTGTACCCGAATAACCCCGGCAAACCCCAGAGGGAATTCTCCGCTGATTGTC
+GCTTGCAGATGGGTCGCAGGTTACTTCCTTGCGCACAGGATATAGTTATACCAGCGTTATTGTCGTTAGTGGTGAG
+GGGAATATCTGCGGCAGCGGGCTTGTAATGGGTTAAGTGATAACAGATGTCTGGAAATATAGGGGCAAATCCAGGG
+CTTGCAGATGGGTCGCAGGTTACTTCCTTGCGCACAGGATATAGTTATACCAGCGTTATTGTCGTTAGTGGTGAGT
+TTGTTAGCGTAGCGCCACTGCCAATTTCCAGCAAAGCTTTATCCTGTACCCGAATAACCCCGGCAAACCCCAGAGG
+TTGCAGATGGGTCGCAGGTTACTTCCTTGCGCACAGGATATAGTTATACCAGCGTTATTGTCGTTAGTGGTGAGTC
+CTTTATCCTGTACCCGAATAACCCCGGCAAACCCCAGAGGGAATTCTCCGCTGATTGTCGTATCTCCATTCAGATA
+TGCAGATGGGTCGCAGGTTACTTCCTTGCGCACAGGATATAGTTATACCAGCGTTATTGTCGTTAGTGGTGAGTCG
+CAGGGTTCTTGTCCCATGATGTTCAAAACTGTCAATATCCTGCATTGTTAGCGTAGCGCCACTGCCAATTTCCAGC
+GCAGATGGGTCGCAGGTTACTTCCTTGCGCACAGGATATAGTTATACCAGCGTTATTGTCGTTAGTGGTGAGTCGT
+CCACTGCCAATTTCCAGCAAAGCTTTATCCTGTACCCGAATAACCCCGGCAAACCCCAGAGGGAATTCTCCGCTGA
+CAGATGGGTCGCAGGTTACTTCCTTGCGCACAGGATATAGTTATACCAGCGTTATTGTCGTTAGTGGTGAGTCGTC
+CTGGAAATATAGGGGCAAATCCAGGGTTCTTGTCCCATGATGTTCAAAACTGTCAATATCCTGCATTGTTAGCGTA
+AGATGGGTCGCAGGTTACTTCCTTGCGCACAGGATATAGTTATACCAGCGTTATTGTCGTTAGTGGTGAGTCGTCG
+GTCTGGAAATATAGGGGCAAATCCAGGGTTCTTGTCCCATGATGTTCAAAACTGTCAATATCCTGCATTGTTAGCG
+GATGGGTCGCAGGTTACTTCCTTGCGCACAGGATATAGTTATACCAGCGTTATTGTCGTTAGTGGTGAGTCGTCGG
+CATTGTTAGCGTAGCGCCACTGCCAATTTCCAGCAAAGCTTTATCCTGTACCCGAATAACCCCGGCAAACCCCAGA
+ATGGGTCGCAGGTTACTTCCTTGCGCACAGGATATAGTTATACCAGCGTTATTGTCGTTAGTGGTGAGTCGTCGGT
+AGGGGCAAATCCAGGGTTCTTGTCCCATGATGTTCAAAACTGTCAATATCCTGCATTGTTAGCGTAGCGCCACTGC
+TGGGTCGCAGGTTACTTCCTTGCGCACAGGATATAGTTATACCAGCGTTATTGTCGTTAGTGGTGAGTCGTCGGTA
+CAGCGGGCTTGTAATGGGTTAAGTGATAACAGATGTCTGGAAATATAGGGGCAAATCCAGGGTTCTTGTCCCATGA
+GGGTCGCAGGTTACTTCCTTGCGCACAGGATATAGTTATACCAGCGTTATTGTCGTTAGTGGTGAGTCGTCGGTAT
+AGCTTTATCCTGTACCCGAATAACCCCGGCAAACCCCAGAGGGAATTCTCCGCTGATTGTCGTATCTCCATTCAGA
+GGTCGCAGGTTACTTCCTTGCGCACAGGATATAGTTATACCAGCGTTATTGTCGTTAGTGGTGAGTCGTCGGTATA
+TGTCAATATCCTGCATTGTTAGCGTAGCGCCACTGCCAATTTCCAGCAAAGCTTTATCCTGTACCCGAATAACCCC
+GTCGCAGGTTACTTCCTTGCGCACAGGATATAGTTATACCAGCGTTATTGTCGTTAGTGGTGAGTCGTCGGTATAT
+GTTCAAAACTGTCAATATCCTGCATTGTTAGCGTAGCGCCACTGCCAATTTCCAGCAAAGCTTTATCCTGTACCCG
+TCGCAGGTTACTTCCTTGCGCACAGGATATAGTTATACCAGCGTTATTGTCGTTAGTGGTGAGTCGTCGGTATATC
+CAAAGCTTTATCCTGTACCCGAATAACCCCGGCAAACCCCAGAGGGAATTCTCCGCTGATTGTCGTATCTCCATTC
+CGCAGGTTACTTCCTTGCGCACAGGATATAGTTATACCAGCGTTATTGTCGTTAGTGGTGAGTCGTCGGTATATCT
+CATAGTGCGCTGGGTTATCGCTCGCCACGGGAATATCTGCGGCAGCGGGCTTGTAATGGGTTAAGTGATAACAGAT
+GCAGGTTACTTCCTTGCGCACAGGATATAGTTATACCAGCGTTATTGTCGTTAGTGGTGAGTCGTCGGTATATCTG
+AGCAAAGCTTTATCCTGTACCCGAATAACCCCGGCAAACCCCAGAGGGAATTCTCCGCTGATTGTCGTATCTCCAT
+CAGGTTACTTCCTTGCGCACAGGATATAGTTATACCAGCGTTATTGTCGTTAGTGGTGAGTCGTCGGTATATCTGA
+GATGTTCAAAACTGTCAATATCCTGCATTGTTAGCGTAGCGCCACTGCCAATTTCCAGCAAAGCTTTATCCTGTAC
+AGGTTACTTCCTTGCGCACAGGATATAGTTATACCAGCGTTATTGTCGTTAGTGGTGAGTCGTCGGTATATCTGAA
+CCAGCAAAGCTTTATCCTGTACCCGAATAACCCCGGCAAACCCCAGAGGGAATTCTCCGCTGATTGTCGTATCTCC
+GGTTACTTCCTTGCGCACAGGATATAGTTATACCAGCGTTATTGTCGTTAGTGGTGAGTCGTCGGTATATCTGAAT
+AGCGGGCTTGTAATGGGTTAAGTGATAACAGATGTCTGGAAATATAGGGGCAAATCCAGGGTTCTTGTCCCATGAT
+GTTACTTCCTTGCGCACAGGATATAGTTATACCAGCGTTATTGTCGTTAGTGGTGAGTCGTCGGTATATCTGAATG
+TAACAGATGTCTGGAAATATAGGGGCAAATCCAGGGTTCTTGTCCCATGATGTTCAAAACTGTCAATATCCTGCAT
+TTACTTCCTTGCGCACAGGATATAGTTATACCAGCGTTATTGTCGTTAGTGGTGAGTCGTCGGTATATCTGAATGG
+ATGGCATCCGCATAGTGCGCTGGGTTATCGCTCGCCACGGGAATATCTGCGGCAGCGGGCTTGTAATGGGTTAAGT
+TACTTCCTTGCGCACAGGATATAGTTATACCAGCGTTATTGTCGTTAGTGGTGAGTCGTCGGTATATCTGAATGGA
+AGTGATAACAGATGTCTGGAAATATAGGGGCAAATCCAGGGTTCTTGTCCCATGATGTTCAAAACTGTCAATATCC
+ACTTCCTTGCGCACAGGATATAGTTATACCAGCGTTATTGTCGTTAGTGGTGAGTCGTCGGTATATCTGAATGGAG
+GCCACGGGAATATCTGCGGCAGCGGGCTTGTAATGGGTTAAGTGATAACAGATGTCTGGAAATATAGGGGCAAATC
+CTTCCTTGCGCACAGGATATAGTTATACCAGCGTTATTGTCGTTAGTGGTGAGTCGTCGGTATATCTGAATGGAGA
+TCGCCACGGGAATATCTGCGGCAGCGGGCTTGTAATGGGTTAAGTGATAACAGATGTCTGGAAATATAGGGGCAAA
+TTCCTTGCGCACAGGATATAGTTATACCAGCGTTATTGTCGTTAGTGGTGAGTCGTCGGTATATCTGAATGGAGAT
+TCCTGCATTGTTAGCGTAGCGCCACTGCCAATTTCCAGCAAAGCTTTATCCTGTACCCGAATAACCCCGGCAAACC
+TCCTTGCGCACAGGATATAGTTATACCAGCGTTATTGTCGTTAGTGGTGAGTCGTCGGTATATCTGAATGGAGATA
+CGCCACGGGAATATCTGCGGCAGCGGGCTTGTAATGGGTTAAGTGATAACAGATGTCTGGAAATATAGGGGCAAAT
+CCTTGCGCACAGGATATAGTTATACCAGCGTTATTGTCGTTAGTGGTGAGTCGTCGGTATATCTGAATGGAGATAC
+AATATAGGGGCAAATCCAGGGTTCTTGTCCCATGATGTTCAAAACTGTCAATATCCTGCATTGTTAGCGTAGCGCC
+CTTGCGCACAGGATATAGTTATACCAGCGTTATTGTCGTTAGTGGTGAGTCGTCGGTATATCTGAATGGAGATACG
+TTCAAAACTGTCAATATCCTGCATTGTTAGCGTAGCGCCACTGCCAATTTCCAGCAAAGCTTTATCCTGTACCCGA
+TTGCGCACAGGATATAGTTATACCAGCGTTATTGTCGTTAGTGGTGAGTCGTCGGTATATCTGAATGGAGATACGA
+AATGGGTTAAGTGATAACAGATGTCTGGAAATATAGGGGCAAATCCAGGGTTCTTGTCCCATGATGTTCAAAACTG
+TGCGCACAGGATATAGTTATACCAGCGTTATTGTCGTTAGTGGTGAGTCGTCGGTATATCTGAATGGAGATACGAC
+TTGTCCCATGATGTTCAAAACTGTCAATATCCTGCATTGTTAGCGTAGCGCCACTGCCAATTTCCAGCAAAGCTTT
+GCGCACAGGATATAGTTATACCAGCGTTATTGTCGTTAGTGGTGAGTCGTCGGTATATCTGAATGGAGATACGACA
+GGGTTCTTGTCCCATGATGTTCAAAACTGTCAATATCCTGCATTGTTAGCGTAGCGCCACTGCCAATTTCCAGCAA
+CGCACAGGATATAGTTATACCAGCGTTATTGTCGTTAGTGGTGAGTCGTCGGTATATCTGAATGGAGATACGACAA
+GCTGGGTTATCGCTCGCCACGGGAATATCTGCGGCAGCGGGCTTGTAATGGGTTAAGTGATAACAGATGTCTGGAA
+GCACAGGATATAGTTATACCAGCGTTATTGTCGTTAGTGGTGAGTCGTCGGTATATCTGAATGGAGATACGACAAT
+CGCCACTGCCAATTTCCAGCAAAGCTTTATCCTGTACCCGAATAACCCCGGCAAACCCCAGAGGGAATTCTCCGCT
+AGTGGTGAGTCGTCGGTATATCTGAATGGAGATACGACAATCAGCGGAGAATTCCCTCTGGGGTTTGCCGGGGTTA
+TTCAAAACTGTCAATATCCTGCATTGTTAGCGTAGCGCCACTGCCAATTTCCAGCAAAGCTTTATCCTGTACCCGA
+GTGGTGAGTCGTCGGTATATCTGAATGGAGATACGACAATCAGCGGAGAATTCCCTCTGGGGTTTGCCGGGGTTAT
+GTTAACGGCAGCAAAGAACCTTGCAGAGGCGTTCGAGCATTATAACGAATGGCATCCGCATAGTGCGCTGGGTTAT
+TGGTGAGTCGTCGGTATATCTGAATGGAGATACGACAATCAGCGGAGAATTCCCTCTGGGGTTTGCCGGGGTTATT
+GGCAGCGGGCTTGTAATGGGTTAAGTGATAACAGATGTCTGGAAATATAGGGGCAAATCCAGGGTTCTTGTCCCAT
+GGTGAGTCGTCGGTATATCTGAATGGAGATACGACAATCAGCGGAGAATTCCCTCTGGGGTTTGCCGGGGTTATTC
+TAATGGGTTAAGTGATAACAGATGTCTGGAAATATAGGGGCAAATCCAGGGTTCTTGTCCCATGATGTTCAAAACT
+GTGAGTCGTCGGTATATCTGAATGGAGATACGACAATCAGCGGAGAATTCCCTCTGGGGTTTGCCGGGGTTATTCG
+GCGGCAGCGGGCTTGTAATGGGTTAAGTGATAACAGATGTCTGGAAATATAGGGGCAAATCCAGGGTTCTTGTCCC
+TGAGTCGTCGGTATATCTGAATGGAGATACGACAATCAGCGGAGAATTCCCTCTGGGGTTTGCCGGGGTTATTCGG
+CGGGAATATCTGCGGCAGCGGGCTTGTAATGGGTTAAGTGATAACAGATGTCTGGAAATATAGGGGCAAATCCAGG
+AGTCGTCGGTATATCTGAATGGAGATACGACAATCAGCGGAGAATTCCCTCTGGGGTTTGCCGGGGTTATTCGGGT
+CAGCGGGCTTGTAATGGGTTAAGTGATAACAGATGTCTGGAAATATAGGGGCAAATCCAGGGTTCTTGTCCCATGA
+GTCGTCGGTATATCTGAATGGAGATACGACAATCAGCGGAGAATTCCCTCTGGGGTTTGCCGGGGTTATTCGGGTA
+CGTTCGAGCATTATAACGAATGGCATCCGCATAGTGCGCTGGGTTATCGCTCGCCACGGGAATATCTGCGGCAGCG
+TCGTCGGTATATCTGAATGGAGATACGACAATCAGCGGAGAATTCCCTCTGGGGTTTGCCGGGGTTATTCGGGTAC
+TGCGCTGGGTTATCGCTCGCCACGGGAATATCTGCGGCAGCGGGCTTGTAATGGGTTAAGTGATAACAGATGTCTG
+CGTCGGTATATCTGAATGGAGATACGACAATCAGCGGAGAATTCCCTCTGGGGTTTGCCGGGGTTATTCGGGTACA
+TCCCATGATGTTCAAAACTGTCAATATCCTGCATTGTTAGCGTAGCGCCACTGCCAATTTCCAGCAAAGCTTTATC
+GTCGGTATATCTGAATGGAGATACGACAATCAGCGGAGAATTCCCTCTGGGGTTTGCCGGGGTTATTCGGGTACAG
+AATATAGGGGCAAATCCAGGGTTCTTGTCCCATGATGTTCAAAACTGTCAATATCCTGCATTGTTAGCGTAGCGCC
+TCGGTATATCTGAATGGAGATACGACAATCAGCGGAGAATTCCCTCTGGGGTTTGCCGGGGTTATTCGGGTACAGG
+TGTCCCATGATGTTCAAAACTGTCAATATCCTGCATTGTTAGCGTAGCGCCACTGCCAATTTCCAGCAAAGCTTTA
+CGGTATATCTGAATGGAGATACGACAATCAGCGGAGAATTCCCTCTGGGGTTTGCCGGGGTTATTCGGGTACAGGA
+ACGAATGGCATCCGCATAGTGCGCTGGGTTATCGCTCGCCACGGGAATATCTGCGGCAGCGGGCTTGTAATGGGTT
+GGTATATCTGAATGGAGATACGACAATCAGCGGAGAATTCCCTCTGGGGTTTGCCGGGGTTATTCGGGTACAGGAT
+CTTGTCCCATGATGTTCAAAACTGTCAATATCCTGCATTGTTAGCGTAGCGCCACTGCCAATTTCCAGCAAAGCTT
+GTATATCTGAATGGAGATACGACAATCAGCGGAGAATTCCCTCTGGGGTTTGCCGGGGTTATTCGGGTACAGGATA
+TCTTGTCCCATGATGTTCAAAACTGTCAATATCCTGCATTGTTAGCGTAGCGCCACTGCCAATTTCCAGCAAAGCT
+TATATCTGAATGGAGATACGACAATCAGCGGAGAATTCCCTCTGGGGTTTGCCGGGGTTATTCGGGTACAGGATAA
+CGCATAGTGCGCTGGGTTATCGCTCGCCACGGGAATATCTGCGGCAGCGGGCTTGTAATGGGTTAAGTGATAACAG
+ATATCTGAATGGAGATACGACAATCAGCGGAGAATTCCCTCTGGGGTTTGCCGGGGTTATTCGGGTACAGGATAAA
+ATATAGGGGCAAATCCAGGGTTCTTGTCCCATGATGTTCAAAACTGTCAATATCCTGCATTGTTAGCGTAGCGCCA
+ATCTGAATGGAGATACGACAATCAGCGGAGAATTCCCTCTGGGGTTTGCCGGGGTTATTCGGGTACAGGATAAAGC
+TGCGGCAGCGGGCTTGTAATGGGTTAAGTGATAACAGATGTCTGGAAATATAGGGGCAAATCCAGGGTTCTTGTCC
+GGGTACAGGATAAAGCTTTGCTGGAAATTGGCAGTGGCGCTACGCTAACAATGCAGGATATTGACAGTTTTGAACA
+CATTATAACGAATGGCATCCGCATAGTGCGCTGGGTTATCGCTCGCCACGGGAATATCTGCGGCAGCGGGCTTGTA
+GGTACAGGATAAAGCTTTGCTGGAAATTGGCAGTGGCGCTACGCTAACAATGCAGGATATTGACAGTTTTGAACAT
+TTAACGGCAGCAAAGAACCTTGCAGAGGCGTTCGAGCATTATAACGAATGGCATCCGCATAGTGCGCTGGGTTATC
+TACAGGATAAAGCTTTGCTGGAAATTGGCAGTGGCGCTACGCTAACAATGCAGGATATTGACAGTTTTGAACATCA
+AGAGGCGTTCGAGCATTATAACGAATGGCATCCGCATAGTGCGCTGGGTTATCGCTCGCCACGGGAATATCTGCGG
+ACAGGATAAAGCTTTGCTGGAAATTGGCAGTGGCGCTACGCTAACAATGCAGGATATTGACAGTTTTGAACATCAT
+CATCCGCATAGTGCGCTGGGTTATCGCTCGCCACGGGAATATCTGCGGCAGCGGGCTTGTAATGGGTTAAGTGATA
+CAGGATAAAGCTTTGCTGGAAATTGGCAGTGGCGCTACGCTAACAATGCAGGATATTGACAGTTTTGAACATCATG
+CATTATAACGAATGGCATCCGCATAGTGCGCTGGGTTATCGCTCGCCACGGGAATATCTGCGGCAGCGGGCTTGTA
+AGGATAAAGCTTTGCTGGAAATTGGCAGTGGCGCTACGCTAACAATGCAGGATATTGACAGTTTTGAACATCATGG
+ACCTTGCAGAGGCGTTCGAGCATTATAACGAATGGCATCCGCATAGTGCGCTGGGTTATCGCTCGCCACGGGAATA
+GGATAAAGCTTTGCTGGAAATTGGCAGTGGCGCTACGCTAACAATGCAGGATATTGACAGTTTTGAACATCATGGG
+AGCATTATAACGAATGGCATCCGCATAGTGCGCTGGGTTATCGCTCGCCACGGGAATATCTGCGGCAGCGGGCTTG
+ATAAAGCTTTGCTGGAAATTGGCAGTGGCGCTACGCTAACAATGCAGGATATTGACAGTTTTGAACATCATGGGAC
+ATATCTGCGGCAGCGGGCTTGTAATGGGTTAAGTGATAACAGATGTCTGGAAATATAGGGGCAAATCCAGGGTTCT
+GCTTTGCTGGAAATTGGCAGTGGCGCTACGCTAACAATGCAGGATATTGACAGTTTTGAACATCATGGGACAAGAA
+CAGAGAGCTTCGTGAAAACGATAAAGCGTGACTACATCAGTATCATGCCCAAACCAGACGGGTTAACGGCAGCAAA
+CTTTGCTGGAAATTGGCAGTGGCGCTACGCTAACAATGCAGGATATTGACAGTTTTGAACATCATGGGACAAGAAC
+TATCGCTCGCCACGGGAATATCTGCGGCAGCGGGCTTGTAATGGGTTAAGTGATAACAGATGTCTGGAAATATAGG
+TGCAGGATATTGACAGTTTTGAACATCATGGGACAAGAACCCTGGATTTGCCCCTATATTTCCAGACATCTGTTAT
+ATGCCCAAACCAGACGGGTTAACGGCAGCAAAGAACCTTGCAGAGGCGTTCGAGCATTATAACGAATGGCATCCGC
+ATATTGACAGTTTTGAACATCATGGGACAAGAACCCTGGATTTGCCCCTATATTTCCAGACATCTGTTATCACTTA
+TAACGAATGGCATCCGCATAGTGCGCTGGGTTATCGCTCGCCACGGGAATATCTGCGGCAGCGGGCTTGTAATGGG
+GTTTTGAACATCATGGGACAAGAACCCTGGATTTGCCCCTATATTTCCAGACATCTGTTATCACTTAACCCATTAC
+CGAGCATTATAACGAATGGCATCCGCATAGTGCGCTGGGTTATCGCTCGCCACGGGAATATCTGCGGCAGCGGGCT
+CATCATGGGACAAGAACCCTGGATTTGCCCCTATATTTCCAGACATCTGTTATCACTTAACCCATTACAAGCCCGC
+AGTAACGGAATAGCAGAGAGCTTCGTGAAAACGATAAAGCGTGACTACATCAGTATCATGCCCAAACCAGACGGGT
+CATGGGACAAGAACCCTGGATTTGCCCCTATATTTCCAGACATCTGTTATCACTTAACCCATTACAAGCCCGCTGC
+CTTGCAGAGGCGTTCGAGCATTATAACGAATGGCATCCGCATAGTGCGCTGGGTTATCGCTCGCCACGGGAATATC
+TGGGACAAGAACCCTGGATTTGCCCCTATATTTCCAGACATCTGTTATCACTTAACCCATTACAAGCCCGCTGCCG
+CATCAGTATCATGCCCAAACCAGACGGGTTAACGGCAGCAAAGAACCTTGCAGAGGCGTTCGAGCATTATAACGAA
+GACAAGAACCCTGGATTTGCCCCTATATTTCCAGACATCTGTTATCACTTAACCCATTACAAGCCCGCTGCCGCAG
+GATAAAGCGTGACTACATCAGTATCATGCCCAAACCAGACGGGTTAACGGCAGCAAAGAACCTTGCAGAGGCGTTC
+ACAAGAACCCTGGATTTGCCCCTATATTTCCAGACATCTGTTATCACTTAACCCATTACAAGCCCGCTGCCGCAGA
+CGCCCGGATGTTGGGACTTGAACCGAAGAACACGGCGGTGCGGAGTCCGGAGAGTAACGGAATAGCAGAGAGCTTC
+CAAGAACCCTGGATTTGCCCCTATATTTCCAGACATCTGTTATCACTTAACCCATTACAAGCCCGCTGCCGCAGAT
+GGAGAGTAACGGAATAGCAGAGAGCTTCGTGAAAACGATAAAGCGTGACTACATCAGTATCATGCCCAAACCAGAC
+AAGAACCCTGGATTTGCCCCTATATTTCCAGACATCTGTTATCACTTAACCCATTACAAGCCCGCTGCCGCAGATA
+CGGTGCGGAGTCCGGAGAGTAACGGAATAGCAGAGAGCTTCGTGAAAACGATAAAGCGTGACTACATCAGTATCAT
+TGCCGCAGATATTCCCGTGGCGAGCGATAACCCAGCGCACTATGCGGATGCCATTCGTTATAATGCTCGAACGCCT
+CGCCGCTTCGGCAACGATCTTCCGTCGTCTCCAGTGGAGTGGCTGACGGATAATGGTTCATGCTACCGGGCTAATG
+GCCGCAGATATTCCCGTGGCGAGCGATAACCCAGCGCACTATGCGGATGCCATTCGTTATAATGCTCGAACGCCTC
+TCGTGAAAACGATAAAGCGTGACTACATCAGTATCATGCCCAAACCAGACGGGTTAACGGCAGCAAAGAACCTTGC
+CCGCAGATATTCCCGTGGCGAGCGATAACCCAGCGCACTATGCGGATGCCATTCGTTATAATGCTCGAACGCCTCT
+CGGAGAGTAACGGAATAGCAGAGAGCTTCGTGAAAACGATAAAGCGTGACTACATCAGTATCATGCCCAAACCAGA
+CGCAGATATTCCCGTGGCGAGCGATAACCCAGCGCACTATGCGGATGCCATTCGTTATAATGCTCGAACGCCTCTG
+GGCGGTGCGGAGTCCGGAGAGTAACGGAATAGCAGAGAGCTTCGTGAAAACGATAAAGCGTGACTACATCAGTATC
+GCAGATATTCCCGTGGCGAGCGATAACCCAGCGCACTATGCGGATGCCATTCGTTATAATGCTCGAACGCCTCTGC
+GGCTAATGAAACACGCCAGTTCGCCCGGATGTTGGGACTTGAACCGAAGAACACGGCGGTGCGGAGTCCGGAGAGT
+CAGATATTCCCGTGGCGAGCGATAACCCAGCGCACTATGCGGATGCCATTCGTTATAATGCTCGAACGCCTCTGCA
+GGAGAGTAACGGAATAGCAGAGAGCTTCGTGAAAACGATAAAGCGTGACTACATCAGTATCATGCCCAAACCAGAC
+AGATATTCCCGTGGCGAGCGATAACCCAGCGCACTATGCGGATGCCATTCGTTATAATGCTCGAACGCCTCTGCAA
+CCGTCGTCTCCAGTGGAGTGGCTGACGGATAATGGTTCATGCTACCGGGCTAATGAAACACGCCAGTTCGCCCGGA
+ATATTCCCGTGGCGAGCGATAACCCAGCGCACTATGCGGATGCCATTCGTTATAATGCTCGAACGCCTCTGCAAGG
+CACGGCGGTGCGGAGTCCGGAGAGTAACGGAATAGCAGAGAGCTTCGTGAAAACGATAAAGCGTGACTACATCAGT
+ATTCCCGTGGCGAGCGATAACCCAGCGCACTATGCGGATGCCATTCGTTATAATGCTCGAACGCCTCTGCAAGGTT
+TCCAGTGGAGTGGCTGACGGATAATGGTTCATGCTACCGGGCTAATGAAACACGCCAGTTCGCCCGGATGTTGGGA
+TTCCCGTGGCGAGCGATAACCCAGCGCACTATGCGGATGCCATTCGTTATAATGCTCGAACGCCTCTGCAAGGTTC
+GCAGAGAGCTTCGTGAAAACGATAAAGCGTGACTACATCAGTATCATGCCCAAACCAGACGGGTTAACGGCAGCAA
+TCCCGTGGCGAGCGATAACCCAGCGCACTATGCGGATGCCATTCGTTATAATGCTCGAACGCCTCTGCAAGGTTCT
+GTCCGGAGAGTAACGGAATAGCAGAGAGCTTCGTGAAAACGATAAAGCGTGACTACATCAGTATCATGCCCAAACC
+GTGGCGAGCGATAACCCAGCGCACTATGCGGATGCCATTCGTTATAATGCTCGAACGCCTCTGCAAGGTTCTTTGC
+GAAGAACACGGCGGTGCGGAGTCCGGAGAGTAACGGAATAGCAGAGAGCTTCGTGAAAACGATAAAGCGTGACTAC
+TGGCGAGCGATAACCCAGCGCACTATGCGGATGCCATTCGTTATAATGCTCGAACGCCTCTGCAAGGTTCTTTGCT
+TCGCCCGGATGTTGGGACTTGAACCGAAGAACACGGCGGTGCGGAGTCCGGAGAGTAACGGAATAGCAGAGAGCTT
+GAGCGATAACCCAGCGCACTATGCGGATGCCATTCGTTATAATGCTCGAACGCCTCTGCAAGGTTCTTTGCTGCCG
+CCGCTTCGGCAACGATCTTCCGTCGTCTCCAGTGGAGTGGCTGACGGATAATGGTTCATGCTACCGGGCTAATGAA
+ACCCAGCGCACTATGCGGATGCCATTCGTTATAATGCTCGAACGCCTCTGCAAGGTTCTTTGCTGCCGTTAACCCG
+ACGGCGGTGCGGAGTCCGGAGAGTAACGGAATAGCAGAGAGCTTCGTGAAAACGATAAAGCGTGACTACATCAGTA
+GCACTATGCGGATGCCATTCGTTATAATGCTCGAACGCCTCTGCAAGGTTCTTTGCTGCCGTTAACCCGTCTGGTT
+GAACACGGCGGTGCGGAGTCCGGAGAGTAACGGAATAGCAGAGAGCTTCGTGAAAACGATAAAGCGTGACTACATC
+CGGATGCCATTCGTTATAATGCTCGAACGCCTCTGCAAGGTTCTTTGCTGCCGTTAACCCGTCTGGTTTGGGCATG
+CTTGAACCGAAGAACACGGCGGTGCGGAGTCCGGAGAGTAACGGAATAGCAGAGAGCTTCGTGAAAACGATAAAGC
+GATGCCATTCGTTATAATGCTCGAACGCCTCTGCAAGGTTCTTTGCTGCCGTTAACCCGTCTGGTTTGGGCATGAT
+GCTAATGAAACACGCCAGTTCGCCCGGATGTTGGGACTTGAACCGAAGAACACGGCGGTGCGGAGTCCGGAGAGTA
+TCGTTATAATGCTCGAACGCCTCTGCAAGGTTCTTTGCTGCCGTTAACCCGTCTGGTTTGGGCATGATACTGATGT
+TACCGGCGGCTTCAACAGTGAAACAGTACAGGACGTCATGCTGGGAGCGGTGGAACGCCGCTTCGGCAACGATCTT
+TATAATGCTCGAACGCCTCTGCAAGGTTCTTTGCTGCCGTTAACCCGTCTGGTTTGGGCATGATACTGATGTAGTC
+TAATGGTTCATGCTACCGGGCTAATGAAACACGCCAGTTCGCCCGGATGTTGGGACTTGAACCGAAGAACACGGCG
+TAATGCTCGAACGCCTCTGCAAGGTTCTTTGCTGCCGTTAACCCGTCTGGTTTGGGCATGATACTGATGTAGTCAC
+CTGGGCGGTCACTACCGGCGGCTTCAACAGTGAAACAGTACAGGACGTCATGCTGGGAGCGGTGGAACGCCGCTTC
+CTCGAACGCCTCTGCAAGGTTCTTTGCTGCCGTTAACCCGTCTGGTTTGGGCATGATACTGATGTAGTCACGCTTT
+TCGTCTCCAGTGGAGTGGCTGACGGATAATGGTTCATGCTACCGGGCTAATGAAACACGCCAGTTCGCCCGGATGT
+CTCGAACGCCTCTGCAAGGTTCTTTGCTGCCGTTAACCCGTCTGGTTTGGGCATGATACTGATGTAGTCACGCTTT
+GTGGAACGCCGCTTCGGCAACGATCTTCCGTCGTCTCCAGTGGAGTGGCTGACGGATAATGGTTCATGCTACCGGG
+AACGCCTCTGCAAGGTTCTTTGCTGCCGTTAACCCGTCTGGTTTGGGCATGATACTGATGTAGTCACGCTTTATCG
+GATGTTGGGACTTGAACCGAAGAACACGGCGGTGCGGAGTCCGGAGAGTAACGGAATAGCAGAGAGCTTCGTGAAA
+CCTCTGCAAGGTTCTTTGCTGCCGTTAACCCGTCTGGTTTGGGCATGATACTGATGTAGTCACGCTTTATCGTTTT
+GCCGCTTCGGCAACGATCTTCCGTCGTCTCCAGTGGAGTGGCTGACGGATAATGGTTCATGCTACCGGGCTAATGA
+CTGCAAGGTTCTTTGCTGCCGTTAACCCGTCTGGTTTGGGCATGATACTGATGTAGTCACGCTTTATCGTTTTCAC
+CTACCGGGCTAATGAAACACGCCAGTTCGCCCGGATGTTGGGACTTGAACCGAAGAACACGGCGGTGCGGAGTCCG
+TGCAAGGTTCTTTGCTGCCGTTAACCCGTCTGGTTTGGGCATGATACTGATGTAGTCACGCTTTATCGTTTTCACG
+CCAGTGGAGTGGCTGACGGATAATGGTTCATGCTACCGGGCTAATGAAACACGCCAGTTCGCCCGGATGTTGGGAC
+GCAAGGTTCTTTGCTGCCGTTAACCCGTCTGGTTTGGGCATGATACTGATGTAGTCACGCTTTATCGTTTTCACGA
+GTTCGCCCGGATGTTGGGACTTGAACCGAAGAACACGGCGGTGCGGAGTCCGGAGAGTAACGGAATAGCAGAGAGC
+CAAGGTTCTTTGCTGCCGTTAACCCGTCTGGTTTGGGCATGATACTGATGTAGTCACGCTTTATCGTTTTCACGAA
+CGGTCACTACCGGCGGCTTCAACAGTGAAACAGTACAGGACGTCATGCTGGGAGCGGTGGAACGCCGCTTCGGCAA
+AAGGTTCTTTGCTGCCGTTAACCCGTCTGGTTTGGGCATGATACTGATGTAGTCACGCTTTATCGTTTTCACGAAG
+GAGTGGCTGACGGATAATGGTTCATGCTACCGGGCTAATGAAACACGCCAGTTCGCCCGGATGTTGGGACTTGAAC
+AGGTTCTTTGCTGCCGTTAACCCGTCTGGTTTGGGCATGATACTGATGTAGTCACGCTTTATCGTTTTCACGAAGC
+GACGGATAATGGTTCATGCTACCGGGCTAATGAAACACGCCAGTTCGCCCGGATGTTGGGACTTGAACCGAAGAAC
+GGTTCTTTGCTGCCGTTAACCCGTCTGGTTTGGGCATGATACTGATGTAGTCACGCTTTATCGTTTTCACGAAGCT
+CTTCCGTCGTCTCCAGTGGAGTGGCTGACGGATAATGGTTCATGCTACCGGGCTAATGAAACACGCCAGTTCGCCC
+GTTCTTTGCTGCCGTTAACCCGTCTGGTTTGGGCATGATACTGATGTAGTCACGCTTTATCGTTTTCACGAAGCTC
+GGCTGACGGATAATGGTTCATGCTACCGGGCTAATGAAACACGCCAGTTCGCCCGGATGTTGGGACTTGAACCGAA
+TTCTTTGCTGCCGTTAACCCGTCTGGTTTGGGCATGATACTGATGTAGTCACGCTTTATCGTTTTCACGAAGCTCT
+ACGCCAGTTCGCCCGGATGTTGGGACTTGAACCGAAGAACACGGCGGTGCGGAGTCCGGAGAGTAACGGAATAGCA
+TCTTTGCTGCCGTTAACCCGTCTGGTTTGGGCATGATACTGATGTAGTCACGCTTTATCGTTTTCACGAAGCTCTC
+GATCTTCCGTCGTCTCCAGTGGAGTGGCTGACGGATAATGGTTCATGCTACCGGGCTAATGAAACACGCCAGTTCG
+TAGTCACGCTTTATCGTTTTCACGAAGCTCTCTGCTATTCCGTTACTCTCCGGACTCCGCACCGCCGTGTTCTTCG
+CTACCGGCGGCTTCAACAGTGAAACAGTACAGGACGTCATGCTGGGAGCGGTGGAACGCCGCTTCGGCAACGATCT
+AGTCACGCTTTATCGTTTTCACGAAGCTCTCTGCTATTCCGTTACTCTCCGGACTCCGCACCGCCGTGTTCTTCGG
+CGTTCGCGCTGGACTGCTGTGATCGTGAGGCACTGCACTGGGCGGTCACTACCGGCGGCTTCAACAGTGAAACAGT
+ACGCTTTATCGTTTTCACGAAGCTCTCTGCTATTCCGTTACTCTCCGGACTCCGCACCGCCGTGTTCTTCGGTTCA
+CGTGTCACGTTCGCGCTGGACTGCTGTGATCGTGAGGCACTGCACTGGGCGGTCACTACCGGCGGCTTCAACAGTG
+ATCGTTTTCACGAAGCTCTCTGCTATTCCGTTACTCTCCGGACTCCGCACCGCCGTGTTCTTCGGTTCAAGTCCCA
+GTCGTCTCCAGTGGAGTGGCTGACGGATAATGGTTCATGCTACCGGGCTAATGAAACACGCCAGTTCGCCCGGATG
+ATCGTTTTCACGAAGCTCTCTGCTATTCCGTTACTCTCCGGACTCCGCACCGCCGTGTTCTTCGGTTCAAGTCCCA
+TGATCGTGAGGCACTGCACTGGGCGGTCACTACCGGCGGCTTCAACAGTGAAACAGTACAGGACGTCATGCTGGGA
+GTTTTCACGAAGCTCTCTGCTATTCCGTTACTCTCCGGACTCCGCACCGCCGTGTTCTTCGGTTCAAGTCCCAACA
+TCCGTCGTCTCCAGTGGAGTGGCTGACGGATAATGGTTCATGCTACCGGGCTAATGAAACACGCCAGTTCGCCCGG
+TTTTCACGAAGCTCTCTGCTATTCCGTTACTCTCCGGACTCCGCACCGCCGTGTTCTTCGGTTCAAGTCCCAACAT
+CGGTCACTACCGGCGGCTTCAACAGTGAAACAGTACAGGACGTCATGCTGGGAGCGGTGGAACGCCGCTTCGGCAA
+TTTTCACGAAGCTCTCTGCTATTCCGTTACTCTCCGGACTCCGCACCGCCGTGTTCTTCGGTTCAAGTCCCAACAT
+CTTCAACAGTGAAACAGTACAGGACGTCATGCTGGGAGCGGTGGAACGCCGCTTCGGCAACGATCTTCCGTCGTCT
+TTTTCACGAAGCTCTCTGCTATTCCGTTACTCTCCGGACTCCGCACCGCCGTGTTCTTCGGTTCAAGTCCCAACAT
+CTGCTGTGATCGTGAGGCACTGCACTGGGCGGTCACTACCGGCGGCTTCAACAGTGAAACAGTACAGGACGTCATG
+CACGAAGCTCTCTGCTATTCCGTTACTCTCCGGACTCCGCACCGCCGTGTTCTTCGGTTCAAGTCCCAACATCCGG
+CTGGGAGCGGTGGAACGCCGCTTCGGCAACGATCTTCCGTCGTCTCCAGTGGAGTGGCTGACGGATAATGGTTCAT
+AGCTCTCTGCTATTCCGTTACTCTCCGGACTCCGCACCGCCGTGTTCTTCGGTTCAAGTCCCAACATCCGGGCGAA
+CGGTGGAACGCCGCTTCGGCAACGATCTTCCGTCGTCTCCAGTGGAGTGGCTGACGGATAATGGTTCATGCTACCG
+AGCTCTCTGCTATTCCGTTACTCTCCGGACTCCGCACCGCCGTGTTCTTCGGTTCAAGTCCCAACATCCGGGCGAA
+ACTGCACTGGGCGGTCACTACCGGCGGCTTCAACAGTGAAACAGTACAGGACGTCATGCTGGGAGCGGTGGAACGC
+CTCTCTGCTATTCCGTTACTCTCCGGACTCCGCACCGCCGTGTTCTTCGGTTCAAGTCCCAACATCCGGGCGAACT
+CGGCAACGATCTTCCGTCGTCTCCAGTGGAGTGGCTGACGGATAATGGTTCATGCTACCGGGCTAATGAAACACGC
+TCTCTGCTATTCCGTTACTCTCCGGACTCCGCACCGCCGTGTTCTTCGGTTCAAGTCCCAACATCCGGGCGAACTG
+TCGGCAACGATCTTCCGTCGTCTCCAGTGGAGTGGCTGACGGATAATGGTTCATGCTACCGGGCTAATGAAACACG
+CTCTGCTATTCCGTTACTCTCCGGACTCCGCACCGCCGTGTTCTTCGGTTCAAGTCCCAACATCCGGGCGAACTGG
+CCGGCGGCTTCAACAGTGAAACAGTACAGGACGTCATGCTGGGAGCGGTGGAACGCCGCTTCGGCAACGATCTTCC
+TCTGCTATTCCGTTACTCTCCGGACTCCGCACCGCCGTGTTCTTCGGTTCAAGTCCCAACATCCGGGCGAACTGGC
+GTACAGGACGTCATGCTGGGAGCGGTGGAACGCCGCTTCGGCAACGATCTTCCGTCGTCTCCAGTGGAGTGGCTGA
+CTGCTATTCCGTTACTCTCCGGACTCCGCACCGCCGTGTTCTTCGGTTCAAGTCCCAACATCCGGGCGAACTGGCG
+AACAGTGAAACAGTACAGGACGTCATGCTGGGAGCGGTGGAACGCCGCTTCGGCAACGATCTTCCGTCGTCTCCAG
+GGACTCCGCACCGCCGTGTTCTTCGGTTCAAGTCCCAACATCCGGGCGAACTGGCGTGTTTCATTAGCCCGGTAGC
+CGGCGGCTTCAACAGTGAAACAGTACAGGACGTCATGCTGGGAGCGGTGGAACGCCGCTTCGGCAACGATCTTCCG
+GGACTCCGCACCGCCGTGTTCTTCGGTTCAAGTCCCAACATCCGGGCGAACTGGCGTGTTTCATTAGCCCGGTAGC
+AACAGTGAAACAGTACAGGACGTCATGCTGGGAGCGGTGGAACGCCGCTTCGGCAACGATCTTCCGTCGTCTCCAG
+GACTCCGCACCGCCGTGTTCTTCGGTTCAAGTCCCAACATCCGGGCGAACTGGCGTGTTTCATTAGCCCGGTAGCA
+ACTGCTGTGATCGTGAGGCACTGCACTGGGCGGTCACTACCGGCGGCTTCAACAGTGAAACAGTACAGGACGTCAT
+CCGTGTTCTTCGGTTCAAGTCCCAACATCCGGGCGAACTGGCGTGTTTCATTAGCCCGGTAGCATGAACCATTATC
+TTCGCGCTGGACTGCTGTGATCGTGAGGCACTGCACTGGGCGGTCACTACCGGCGGCTTCAACAGTGAAACAGTAC
+GTGTTCTTCGGTTCAAGTCCCAACATCCGGGCGAACTGGCGTGTTTCATTAGCCCGGTAGCATGAACCATTATCCG
+GTGAAACAGTACAGGACGTCATGCTGGGAGCGGTGGAACGCCGCTTCGGCAACGATCTTCCGTCGTCTCCAGTGGA
+TTCTTCGGTTCAAGTCCCAACATCCGGGCGAACTGGCGTGTTTCATTAGCCCGGTAGCATGAACCATTATCCGTCA
+CTACCGGCGGCTTCAACAGTGAAACAGTACAGGACGTCATGCTGGGAGCGGTGGAACGCCGCTTCGGCAACGATCT
+GGTTCAAGTCCCAACATCCGGGCGAACTGGCGTGTTTCATTAGCCCGGTAGCATGAACCATTATCCGTCAGCCACT
+GCTGTGATCGTGAGGCACTGCACTGGGCGGTCACTACCGGCGGCTTCAACAGTGAAACAGTACAGGACGTCATGCT
+TTCAAGTCCCAACATCCGGGCGAACTGGCGTGTTTCATTAGCCCGGTAGCATGAACCATTATCCGTCAGCCACTCC
+CGTGAGGCACTGCACTGGGCGGTCACTACCGGCGGCTTCAACAGTGAAACAGTACAGGACGTCATGCTGGGAGCGG
+CCGGGCGAACTGGCGTGTTTCATTAGCCCGGTAGCATGAACCATTATCCGTCAGCCACTCCACTGGAGACGACGGA
+GAGACTGCGTGTCACGTTCGCGCTGGACTGCTGTGATCGTGAGGCACTGCACTGGGCGGTCACTACCGGCGGCTTC
+CGGGCGAACTGGCGTGTTTCATTAGCCCGGTAGCATGAACCATTATCCGTCAGCCACTCCACTGGAGACGACGGAA
+TGAGGCACTGCACTGGGCGGTCACTACCGGCGGCTTCAACAGTGAAACAGTACAGGACGTCATGCTGGGAGCGGTG
+AACTGGCGTGTTTCATTAGCCCGGTAGCATGAACCATTATCCGTCAGCCACTCCACTGGAGACGACGGAAGATCGT
+GATCGTGAGGCACTGCACTGGGCGGTCACTACCGGCGGCTTCAACAGTGAAACAGTACAGGACGTCATGCTGGGAG
+ACTGGCGTGTTTCATTAGCCCGGTAGCATGAACCATTATCCGTCAGCCACTCCACTGGAGACGACGGAAGATCGTT
+AGAGAGACTGCGTGTCACGTTCGCGCTGGACTGCTGTGATCGTGAGGCACTGCACTGGGCGGTCACTACCGGCGGC
+CTGGCGTGTTTCATTAGCCCGGTAGCATGAACCATTATCCGTCAGCCACTCCACTGGAGACGACGGAAGATCGTTG
+TGGGCGGTCACTACCGGCGGCTTCAACAGTGAAACAGTACAGGACGTCATGCTGGGAGCGGTGGAACGCCGCTTCG
+CTGGCGTGTTTCATTAGCCCGGTAGCATGAACCATTATCCGTCAGCCACTCCACTGGAGACGACGGAAGATCGTTG
+TGTCACGTTCGCGCTGGACTGCTGTGATCGTGAGGCACTGCACTGGGCGGTCACTACCGGCGGCTTCAACAGTGAA
+TGGCGTGTTTCATTAGCCCGGTAGCATGAACCATTATCCGTCAGCCACTCCACTGGAGACGACGGAAGATCGTTGC
+CTGCACTGGGCGGTCACTACCGGCGGCTTCAACAGTGAAACAGTACAGGACGTCATGCTGGGAGCGGTGGAACGCC
+CCGGTAGCATGAACCATTATCCGTCAGCCACTCCACTGGAGACGACGGAAGATCGTTGCCGAAGCGGCGTTCCACC
+GATCGTGAGGCACTGCACTGGGCGGTCACTACCGGCGGCTTCAACAGTGAAACAGTACAGGACGTCATGCTGGGAG
+CATGAACCATTATCCGTCAGCCACTCCACTGGAGACGACGGAAGATCGTTGCCGAAGCGGCGTTCCACCGCTCCCA
+CTGCTGTGATCGTGAGGCACTGCACTGGGCGGTCACTACCGGCGGCTTCAACAGTGAAACAGTACAGGACGTCATG
+ATGAACCATTATCCGTCAGCCACTCCACTGGAGACGACGGAAGATCGTTGCCGAAGCGGCGTTCCACCGCTCCCAG
+TAACGGAGAGAGACTGCGTGTCACGTTCGCGCTGGACTGCTGTGATCGTGAGGCACTGCACTGGGCGGTCACTACC
+TGAACCATTATCCGTCAGCCACTCCACTGGAGACGACGGAAGATCGTTGCCGAAGCGGCGTTCCACCGCTCCCAGC
+AACGGAGAGAGACTGCGTGTCACGTTCGCGCTGGACTGCTGTGATCGTGAGGCACTGCACTGGGCGGTCACTACCG
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/all_fasta.loc.sample	Mon Aug 05 15:59:42 2019 -0400
@@ -0,0 +1,18 @@
+#This file lists the locations and dbkeys of all the fasta files
+#under the "genome" directory (a directory that contains a directory
+#for each build). The script extract_fasta.py will generate the file
+#all_fasta.loc. This file has the format (white space characters are
+#TAB characters):
+#
+#<unique_build_id>	<dbkey>	<display_name>	<file_path>
+#
+#So, all_fasta.loc could look something like this:
+#
+#apiMel3	apiMel3	Honeybee (Apis mellifera): apiMel3	/path/to/genome/apiMel3/apiMel3.fa
+#hg19canon	hg19	Human (Homo sapiens): hg19 Canonical	/path/to/genome/hg19/hg19canon.fa
+#hg19full	hg19	Human (Homo sapiens): hg19 Full	/path/to/genome/hg19/hg19full.fa
+#
+#Your all_fasta.loc file should contain an entry for each individual
+#fasta file. So there will be multiple fasta files for each build,
+#such as with hg19 above.
+#
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample	Mon Aug 05 15:59:42 2019 -0400
@@ -0,0 +1,8 @@
+<!-- Use the file tool_data_table_conf.xml.oldlocstyle if you don't want to update your loc files as changed in revision 4550:535d276c92bc-->
+<tables>
+    <!-- Locations of all fasta files under genome directory -->
+    <table name="all_fasta" comment_char="#" allow_duplicate_entries="False">
+        <columns>value, dbkey, name, path</columns>
+        <file path="tool-data/all_fasta.loc" />
+    </table>
+</tables>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.test	Mon Aug 05 15:59:42 2019 -0400
@@ -0,0 +1,8 @@
+<!-- Use the file tool_data_table_conf.xml.oldlocstyle if you don't want to update your loc files as changed in revision 4550:535d276c92bc-->
+<tables>
+    <!-- Locations of all fasta files under genome directory -->
+    <table name="all_fasta" comment_char="#" allow_duplicate_entries="False">
+        <columns>value, dbkey, name, path</columns>
+        <file path="${__HERE__}/test-data/all_fasta.loc" />
+    </table>
+</tables>