changeset 3:ec3c4654eacc draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/qiime/ commit a831282140ce160035a4ce984f48cc20198ed0a1
author iuc
date Thu, 22 Jun 2017 06:57:54 -0400
parents 70206002b220
children 01151a09513f
files assign_taxonomy.xml generate_test_data.sh macros.xml test-data/assign_taxonomy/mothur_id_to_taxonomy.txt test-data/assign_taxonomy/mothur_repr_set_seqs.fasta test-data/assign_taxonomy/sortmerna_input_seqs.fasta test-data/assign_taxonomy/sortmerna_map.blast test-data/assign_taxonomy/sortmerna_taxonomic_assignation.txt test-data/assign_taxonomy/uclust_taxonomic_assignation.txt
diffstat 9 files changed, 174 insertions(+), 104 deletions(-) [+]
line wrap: on
line diff
--- a/assign_taxonomy.xml	Fri May 19 04:09:30 2017 -0400
+++ b/assign_taxonomy.xml	Thu Jun 22 06:57:54 2017 -0400
@@ -4,7 +4,7 @@
         <import>macros.xml</import>
     </macros>
     <expand macro="requirements">
-        <requirement type="package" version="2.0.2">rdptools</requirement>
+        <!--<requirement type="package" version="2.0.2">rdptools</requirement>-->
         <requirement type="package" version="2.2.22">blast-legacy</requirement>
         <requirement type="package" version="2.3.4">vsearch</requirement>
         <requirement type="package" version="1.36.1">mothur</requirement>
@@ -13,8 +13,10 @@
     <command detect_errors="aggressive"><![CDATA[
         assign_taxonomy.py
             --input_fasta_fp '$input_fasta_fp'
-            #if $id_to_taxonomy_fp
-                --id_to_taxonomy_fp '$id_to_taxonomy_fp'
+            #if $id_to_taxonomy_condition.source_selector == 'history'
+                --id_to_taxonomy_fp '$id_to_taxonomy_condition.id_to_taxonomy_fp'
+            #else if $id_to_taxonomy_condition.source_selector == 'cached'
+                --id_to_taxonomy_fp '$id_to_taxonomy_condition.id_to_taxonomy_fp.fields.path'
             #end if
             --assignment_method '$methodcond.assignment_method'
             #if $methodcond.assignment_method == "uclust"
@@ -22,12 +24,18 @@
                 --similarity '$methodcond.similarity'
                 --uclust_max_accepts '$methodcond.uclust_max_accepts'
             #else if $methodcond.assignment_method == "rdp"
-                #if $methodcond.reference_seqs_fp
-                    --reference_seqs_fp '$methodcond.reference_seqs_fp'
+                #if $methodcond.references.source_selector == 'history'
+                    --reference_seqs_fp '$methodcond.references.reference_seqs_fp'
+                #else if $methodcond.references.source_selector == 'cached'
+                    --reference_seqs_fp '$methodcond.references.reference_seqs_fp.fields.path'
                 #end if
                 --confidence '$methodcond.confidence'
             #else if $methodcond.assignment_method == "blast"
-                --reference_seqs_fp '$methodcond.reference_seqs_fp'
+                #if $methodcond.references.source_selector == 'history'
+                    --reference_seqs_fp '$methodcond.references.reference_seqs_fp'
+                #else if $methodcond.references.source_selector == 'cached'
+                    --reference_seqs_fp '$methodcond.references.reference_seqs_fp.fields.path'
+                #end if
                 --blast_e_value '$methodcond.blast_e_value'
             #else if $methodcond.assignment_method == "rtax"
                 --read_1_seqs_fp '$methodcond.read_1_seqs_fp'
@@ -38,6 +46,11 @@
                 --amplicon_id_regex '$methodcond.amplicon_id_regex'
                 --header_id_rege '$methodcond.header_id_regex'
             #else if $methodcond.assignment_method == "mothur"
+                #if $methodcond.references.source_selector == 'history'
+                    --reference_seqs_fp '$methodcond.references.reference_seqs_fp'
+                #else if $methodcond.references.source_selector == 'cached'
+                    --reference_seqs_fp '$methodcond.references.reference_seqs_fp.fields.path'
+                #end if
                 --confidence '$methodcond.confidence'
             #else if $methodcond.assignment_method == "sortmerna"
                 --sortmerna_threads \${GALAXY_SLOTS:-1}
@@ -54,15 +67,32 @@
     ]]></command>
     <inputs>
         <param argument="--input_fasta_fp" type="data" format="fasta" label="Input fasta file" />
-        <param argument="--id_to_taxonomy_fp" label="Tab-delimited file mapping sequences to assigned taxonomy"  type="data" format="tabular" help="Each assigned taxonomy is provided as a semicolon-separated list. For assignment with rdp, each assigned taxonomy must be exactly 6 levels deep" optional="True"/>
+
+        <conditional name="id_to_taxonomy_condition">
+            <param name="source_selector" type="select" label="Do you want to use a taxonomy reference ?">
+                <option value="cached">Yes (from the local cache)</option>
+                <option value="history">Yes (from the active history)</option>
+                <option value="void" selected="true">No</option>
+            </param>
+            <when value="cached">
+                <param argument="--id_to_taxonomy_fp" label="Tab-delimited file mapping sequences to assigned taxonomy" type="select" help="Each assigned taxonomy is provided as a semicolon-separated list. For assignment with rdp, each assigned taxonomy must be exactly 6 levels deep">
+                    <options from_data_table="qiime_taxonomy"/>
+                </param>
+            </when>
+            <when value="history">
+                <param argument="--id_to_taxonomy_fp" label="Tab-delimited file mapping sequences to assigned taxonomy"  type="data" format="tabular" help="Each assigned taxonomy is provided as a semicolon-separated list. For assignment with rdp, each assigned taxonomy must be exactly 6 levels deep"/>
+            </when>
+            <when value="void"/>
+        </conditional>
+
         <conditional name="methodcond">
             <param argument="--assignment_method" label="Taxon assignment method" type="select">
                 <option selected="True" value="uclust">uclust</option>
-                <!--<option value="rdp">rdp</option>
+                <!--<option value="rdp">rdp</option>-->
                 <option value="blast">blast</option>
-                <option value="rtax">rtax</option>
+                <!--<option value="rtax">rtax</option>-->
                 <option value="mothur">mothur</option>
-                <option value="sortmerna">sortmerna</option>-->
+                <option value="sortmerna">sortmerna</option>
             </param>
             <when value="uclust">
                 <param argument="--min_consensus_fraction" type="float" value="0.51" label="Minimum fraction of database hits that must have a specific taxonomic assignment to assign that taxonomy to a query"/>
@@ -70,11 +100,11 @@
                 <param argument="uclust_max_accepts" type="integer" value="3" label="Number of database hits to consider when making an assignment"/>
             </when>
             <when value="rdp">
-                <param argument="--reference_seqs_fp" label="Reference sequences used as training sequences for the classifier" type="data" format="fasta" optional="True"/>
+                <expand macro="assign_taxonomy_reference_source"/>
                 <param argument="--confidence" type="float" value="0.5" label="Minimum confidence to record an assignment"/>
             </when>
             <when value="blast">
-                <param argument="--reference_seqs_fp" label="Reference sequences used to generate a blast database" type="data" format="fasta" optional="True"/>
+                <expand macro="assign_taxonomy_reference_source"/>
                 <param argument="--blast_e_value" type="float" value="0.001" label="Maximum e-value to record an assignment"/>
             </when>
             <when value="rtax">
@@ -87,10 +117,11 @@
                 <param argument="--header_id_regex" type="text" value="\S+\s+(\S+?)\/" label="Regex used to parse the result of split_libraries, to get the portion of the header that RTAX uses to match mate pairs" help="The default uses the amplicon ID, not including /1 or /3, as the primary key for the query sequences. Typically this regex will be the same as amplicon_id_regex, except that only the second group is captured" />
             </when>
             <when value="mothur">
+                <expand macro="assign_taxonomy_reference_source"/>
                 <param argument="--confidence" type="float" value="0.5" label="Minimum confidence to record an assignment"/>
             </when>
             <when value="sortmerna">
-                <param argument="--sortmerna_db" type="data" format="fasta" label="Pre-existing database to search against" optional="True"/>
+                <!--<param argument="- -sortmerna_db" type="data" format="fasta" label="Pre-existing database to search against" optional="True"/>-->
                 <param argument="--min_consensus_fraction" type="float" value="0.51" label="Minimum fraction of database hits that must have a specific taxonomic assignment to assign that taxonomy to a query"/>
                 <param argument="--similarity" type="float" value="0.9" label="Minimum percent similarity (expressed as a fraction between 0 and 1) to consider a database match a hit"/>
                 <param argument="--sortmerna_e_value" type="float" value="1.0" label="Maximum E-value when clustering"/>
@@ -100,20 +131,28 @@
         </conditional>
     </inputs>
     <outputs>
-        <data name="log" format="txt" from_work_dir="assign_taxonomy/*_assignments.log" label="${tool.name} on ${on_string}: Log"/>
+        <data name="log" format="txt" from_work_dir="assign_taxonomy/*_assignments.log" label="${tool.name} on ${on_string}: Log">
+            <filter>methodcond['assignment_method']!="mothur"</filter>
+        </data>
         <data name="tax_assignments" format="txt" from_work_dir="assign_taxonomy/*.txt" label="${tool.name} on ${on_string}: Taxonomic assignment"/>
         <data name="sortmerna_map" format="tabular" from_work_dir="assign_taxonomy/sortmerna_map.blast" label="${tool.name} on ${on_string}: SortMeRNA Blast">
             <filter>methodcond['assignment_method']=="sortmerna"</filter>
         </data>
     </outputs>
     <tests>
+        <!-- Uclust assignment method -->
         <test>
             <param name="input_fasta_fp" value="assign_taxonomy/uclust_input_seqs.fasta"/>
-            <param name="assignment_method" value="uclust"/>
-            <param name="min_consensus_fraction" value="0.51"/>
-            <param name="similarity" value="0.9"/>
-            <param name="uclust_max_accepts" value="3" />
-            <output name="tax_assignments" value="assign_taxonomy/uclust_taxonomic_assignation.txt"/>
+            <conditional name="id_to_taxonomy_condition">
+                <param name="source_selector" value="void" />
+            </conditional>
+            <conditional name="methodcond">
+                <param name="assignment_method" value="uclust"/>
+                <param name="min_consensus_fraction" value="0.51"/>
+                <param name="similarity" value="0.9"/>
+                <param name="uclust_max_accepts" value="3" />
+            </conditional>
+            <output name="tax_assignments" md5="57b0cf51fc0142f369134ea923d78d99"/>
             <output name="log">
                 <assert_contents>
                     <has_text text="UclustConsensusTaxonAssigner" />
@@ -121,22 +160,73 @@
                 </assert_contents>
             </output>
         </test>
+        <!-- Mothur assignment method -->
+        <!-- Note: there is variability in the assignment results with this method so the md5 checksum comparison is not possible -->
+        <test>
+            <param name="input_fasta_fp" value="assign_taxonomy/mothur_repr_set_seqs.fasta"/>
+            <conditional name="id_to_taxonomy_condition">
+                <param name="source_selector" value="history" />
+                <param name="id_to_taxonomy_fp" value="assign_taxonomy/mothur_id_to_taxonomy.txt"/>
+            </conditional>
+            <conditional name="methodcond">
+                <param name="assignment_method" value="mothur"/>
+                <conditional name="references">
+                    <param name="source_selector" value="history" />
+                    <param name="reference_seqs_fp" value="assign_taxonomy/mothur_ref_seq_set.fna" />
+                </conditional>
+                <param name="confidence" value="0.5"/>
+            </conditional>
+            <output name="tax_assignments">
+                <assert_contents>
+                    <has_text text="X67228" />
+                    <has_text text="Rhizobium" />
+                    <has_text text="EF503697" />
+                </assert_contents>
+            </output>
+        </test>
+        <!-- Blast assignment method -->
+        <test>
+            <param name="input_fasta_fp" value="assign_taxonomy/mothur_repr_set_seqs.fasta"/>
+            <conditional name="id_to_taxonomy_condition">
+                <param name="source_selector" value="history" />
+                <param name="id_to_taxonomy_fp" value="assign_taxonomy/mothur_id_to_taxonomy.txt"/>
+            </conditional>
+            <conditional name="methodcond">
+                <param name="assignment_method" value="blast"/>
+                <conditional name="references">
+                    <param name="source_selector" value="history" />
+                    <param name="reference_seqs_fp" value="assign_taxonomy/mothur_ref_seq_set.fna" />
+                </conditional>
+                <param name="blast_e_value" value="0.001"/>
+            </conditional>
+            <output name="tax_assignments" md5="5ab8d28f67bcbf828937d222b2ab9c6e"/>
+            <output name="log">
+                <assert_contents>
+                    <has_text text="BlastTaxonAssigner" />
+                    <has_text text="inspected: 2" />
+                </assert_contents>
+            </output>
+        </test>
+        <!-- SortMeRNA assignment method -->
+        <!-- Note: The input file has been reduced to only 1 sequence but this test is still quite long to execute (more than 10min) -->
         <!--<test>
-            <param name="input_fasta_fp" value="assign_taxonomy/mothur_ref_seq_set.fna"/>
-            <param name="assignment_method" value="sortmerna"/>
-            <param name="min_consensus_fraction" value="0.51" />
-            <param name="similarity" value="0.9" />
-            <param name="sortmerna_e_value" value="1.0" />
-            <param name="sortmerna_coverage" value="0.9" />
-            <param name="sortmerna_best_N_alignments" value="5" />
+            <param name="input_fasta_fp" value="assign_taxonomy/sortmerna_input_seqs.fasta"/>
+            <conditional name="methodcond">
+                <param name="assignment_method" value="sortmerna"/>
+                <param name="min_consensus_fraction" value="0.51" />
+                <param name="similarity" value="0.9" />
+                <param name="sortmerna_e_value" value="1.0" />
+                <param name="sortmerna_coverage" value="0.9" />
+                <param name="sortmerna_best_N_alignments" value="5" />
+            </conditional>
             <output name="log">
                 <assert_contents>
                     <has_text text="Application:SortMeRNA" />
                     <has_text text="min_consensus_fraction" />
                 </assert_contents>
             </output>
-            <output name="tax_assignments" value="assign_taxonomy/sortmerna_taxonomic_assignation.txt"/>
-            <output name="sortmerna_map" value="assign_taxonomy/sortmerna_map.blast"/>
+            <output name="tax_assignments" md5="0da68ab9762b677a00f34051eadad68c"/>
+            <output name="sortmerna_map" md5="16e349be29f121fca741d6294f79ce7c"/>
         </test>-->
     </tests>
     <help><![CDATA[
--- a/generate_test_data.sh	Fri May 19 04:09:30 2017 -0400
+++ b/generate_test_data.sh	Thu Jun 22 06:57:54 2017 -0400
@@ -92,9 +92,32 @@
     --similarity '0.9' \
     --uclust_max_accepts '3' \
     -o assign_taxonomy_uclust
-cp assign_taxonomy_uclust/uclust_input_seqs_tax_assignments.txt 'test-data/assign_taxonomy/uclust_taxonomic_assignation.txt'
+ls assign_taxonomy_uclust
+md5sum 'assign_taxonomy_uclust/uclust_input_seqs_tax_assignments.txt'
 rm -rf assign_taxonomy_uclust
 
+assign_taxonomy.py \
+    --input_fasta_fp 'test-data/assign_taxonomy/mothur_repr_set_seqs.fasta' \
+    --id_to_taxonomy_fp 'test-data/assign_taxonomy/mothur_id_to_taxonomy.txt' \
+    --assignment_method 'mothur' \
+    --reference_seqs_fp 'test-data/assign_taxonomy/mothur_ref_seq_set.fna' \
+    --confidence '0.5' \
+    -o assign_taxonomy_mothur
+ls assign_taxonomy_mothur
+md5sum 'assign_taxonomy_mothur/mothur_repr_set_seqs_tax_assignments.txt'
+rm -rf assign_taxonomy_mothur
+
+assign_taxonomy.py \
+    --input_fasta_fp 'test-data/assign_taxonomy/mothur_repr_set_seqs.fasta' \
+    --id_to_taxonomy_fp 'test-data/assign_taxonomy/mothur_id_to_taxonomy.txt' \
+    --assignment_method 'mothur' \
+    --reference_seqs_fp 'test-data/assign_taxonomy/mothur_ref_seq_set.fna' \
+    --blast_e_value '0.001' \
+    -o assign_taxonomy_blast
+ls assign_taxonomy_blast
+md5sum 'assign_taxonomy_blast/mothur_repr_set_seqs_tax_assignments.txt'
+rm -rf assign_taxonomy_blast
+
 #assign_taxonomy.py \
 #    --input_fasta_fp 'test-data/assign_taxonomy/rdp_input_seqs.fasta' \
 #    --id_to_taxonomy_fp 'test-data/assign_taxonomy/rdp_id_to_taxonomy.txt' \
@@ -116,14 +139,6 @@
 #    -o assign_taxonomy_rtax
 #ls assign_taxonomy_rtax
 
-#assign_taxonomy.py \
-#    --input_fasta_fp 'test-data/assign_taxonomy/mothur_ref_seq_set.fna' \
-#    --id_to_taxonomy_fp 'test-data/assign_taxonomy/mothur_id_to_taxonomy.txt' \
-#    --assignment_method 'mothur' \
-#    --confidence 0.5  \
-#    -o assign_taxonomy_mothur
-#ls assign_taxonomy_mothur
-
 assign_taxonomy.py \
     --input_fasta_fp 'test-data/assign_taxonomy/mothur_ref_seq_set.fna' \
     --assignment_method 'sortmerna' \
@@ -133,8 +148,9 @@
     --sortmerna_coverage "0.9" \
     --sortmerna_best_N_alignments "5" \
     -o assign_taxonomy_sortmerna
-cp assign_taxonomy_sortmerna/sortmerna_map.blast 'test-data/assign_taxonomy/sortmerna_map.blast'
-cp assign_taxonomy_sortmerna/mothur_ref_seq_set_tax_assignments.txt 'test-data/assign_taxonomy/sortmerna_taxonomic_assignation.txt'
+ls assign_taxonomy_sortmerna
+md5sum 'assign_taxonomy_sortmerna/mothur_ref_seq_set_tax_assignments.txt'
+md5sum 'assign_taxonomy_sortmerna/sortmerna_map.blast'
 rm -rf assign_taxonomy_sortmerna
 
 #beta_diversity
@@ -1105,22 +1121,3 @@
 cp validate_mapping_file_output/*.log 'test-data/validate_mapping_file/map.tsv.log'
 cp validate_mapping_file_output/*corrected.txt 'test-data/validate_mapping_file/map.tsv_corrected.txt'
 rm -rf validate_mapping_file_output
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
--- a/macros.xml	Fri May 19 04:09:30 2017 -0400
+++ b/macros.xml	Thu Jun 22 06:57:54 2017 -0400
@@ -29,6 +29,22 @@
             </when>
         </conditional>
     </xml>
+    <xml name="assign_taxonomy_reference_source">
+        <conditional name="references">
+            <param name="source_selector" type="select" label="Select a reference sequence file from">
+                <option value="cached">The local cache</option>
+                <option value="history">The active history</option>
+            </param>
+            <when value="cached">
+                <param argument="--reference_seqs_fp" label="Reference sequences either used to generate a blast database (Blast) or used as training sequences for the selected classifier (RDP, Mothur)" type="select">
+                    <options from_data_table="qiime_rep_set"/>
+                </param>
+            </when>
+            <when value="history">
+                <param argument="--reference_seqs_fp" type="data" format="fasta" label="Reference sequences to search against"/>
+            </when>
+        </conditional>
+    </xml>
     <xml name="pick_otus_similarity">
         <param argument="--similarity" type="float" value="0.97" label="Sequence similarity threshold"/>
     </xml>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/assign_taxonomy/mothur_id_to_taxonomy.txt	Thu Jun 22 06:57:54 2017 -0400
@@ -0,0 +1,7 @@
+X67228	Bacteria;Proteobacteria;Alphaproteobacteria;Rhizobiales;Rhizobiaceae;Rhizobium
+X73443	Bacteria;Firmicutes;Clostridia;Clostridiales;Clostridiaceae;Clostridium
+AB004750	Bacteria;Proteobacteria;Gammaproteobacteria;Enterobacteriales;Enterobacteriaceae;Enterobacter
+xxxxxx	Bacteria;Proteobacteria;Gammaproteobacteria;Pseudomonadales;Pseudomonadaceae;Pseudomonas
+AB004748	Bacteria;Proteobacteria;Gammaproteobacteria;Enterobacteriales;Enterobacteriaceae;Enterobacter
+AB000278	Bacteria;Proteobacteria;Gammaproteobacteria;Vibrionales;Vibrionaceae;Photobacterium
+AB000390	Bacteria;Proteobacteria;Gammaproteobacteria;Vibrionales;Vibrionaceae;Vibrio
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/assign_taxonomy/mothur_repr_set_seqs.fasta	Thu Jun 22 06:57:54 2017 -0400
@@ -0,0 +1,4 @@
+>X67228 some description
+aacgaacgctggcggcaggcttaacacatgcaagtcgaacgctccgcaaggagagtggcagacgggtgagtaacgcgtgggaatctacccaaccctgcggaatagctctgggaaactggaattaataccgcatacgccctacgggggaaagatttatcggggatggatgagcccgcgttggattagctagttggtggggtaaaggcctaccaaggcgacgatccatagctggtctgagaggatgatcagccacattgggactgagacacggcccaaa
+>EF503697
+TAAAATGACTAGCCTGCGAGTCACGCCGTAAGGCGTGGCATACAGGCTCAGTAACACGTAGTCAACATGCCCAAAGGACGTGGATAACCTCGGGAAACTGAGGATAAACCGCGATAGGCCAAGGTTTCTGGAATGAGCTATGGCCGAAATCTATATGGCCTTTGGATTGGACTGCGGCCGATCAGGCTGTTGGTGAGGTAATGGCCCACCAAACCTGTAACCGGTACGGGCTTTGAGAGAAGTAGCCCGGAGATGGGCACTGAGACAAGGGCCCAGGCCCTATGGGGCGCAGCAGGCGCGAAACCTCTGCAATAGGCGAAAGCCTGACAGGGTTACTCTGAGTGATGCCCGCTAAGGGTATCTTTTGGCACCTCTAAAAATGGTGCAGAATAAGGGGTGGGCAAGTCTGGTGTCAGCCGCCGCGGTAATACCAGCACCCCGAGTTGTCGGGACGATTATTGGGCCTAAAGCATCCGTAGCCTGTTCTGCAAGTCCTCCGTTAAATCCACCTGCTCAACGGATGGGCTGCGGAGGATACCGCAGAGCTAGGAGGCGGGAGAGGCAAACGGTACTCAGTGGGTAGGGGTAAAATCCATTGATCTACTGAAGACCACCAGTGGCGAAGGCGGTTTGCCAGAACGCGCTCGACGGTGAGGGATGAAAGCTGGGGGAGCAAACCGGATTAGATACCCGGGGTAGTCCCAGCTGTAAACGGATGCAGACTCGGGTGATGGGGTTGGCTTCCGGCCCAACCCCAATTGCCCCCAGGCGAAGCCCGTTAAGATCTTGCCGCCCTGTCAGATGTCAGGGCCGCCAATACTCGAAACCTTAAAAGGAAATTGGGCGCGGGAAAAGTCACCAAAAGGGGGTTGAAACCCTGCGGGTTATATATTGTAAACC
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/assign_taxonomy/sortmerna_input_seqs.fasta	Thu Jun 22 06:57:54 2017 -0400
@@ -0,0 +1,2 @@
+>X67228
+aacgaacgctggcggcaggcttaacacatgcaagtcgaacgctccgcaaggagagtggcagacgggtgagtaacgcgtgggaatctacccaaccctgcggaatagctctgggaaactggaattaataccgcatacgccctacgggggaaagatttatcggggatggatgagcccgcgttggattagctagttggtggggtaaaggcctaccaaggcgacgatccatagctggtctgagaggatgatcagccacattgggactgagacacggcccaaa
--- a/test-data/assign_taxonomy/sortmerna_map.blast	Fri May 19 04:09:30 2017 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,28 +0,0 @@
-X67228	152350	98.6	277	4	0	1	277	22	298	5.76e-129	464	277M	100	
-X67228	558499	97.1	275	8	0	1	275	2	276	1.05e-122	443	275M2S	99.3	
-X67228	553706	97.5	277	7	0	1	277	1	277	4.7e-125	451	277M	100	
-X67228	553981	95.7	277	12	0	1	277	2	278	1.55e-118	429	277M	100	
-X67228	4423084	98.6	277	4	0	1	277	21	297	5.76e-129	464	277M	100	
-X73443	179865	96.3	269	2	8	8	276	2	268	2.31e-114	415	7S3M1I28M1I3M1I7M1D8M1D20M1I26M1I115M1D54M	97.5	
-X73443	181718	96	269	3	8	8	276	2	268	4.66e-113	411	7S3M1I28M1I3M1I7M1D8M1D20M1I26M1I115M1D54M	97.5	
-X73443	193551	96.3	269	2	8	8	276	2	268	2.31e-114	415	7S3M1I28M1I3M1I7M1D8M1D21M1I26M1I114M1D54M	97.5	
-X73443	212341	96.3	269	2	8	8	276	2	268	2.31e-114	415	7S3M1I28M1I3M1I7M1D8M1D21M1I26M1I114M1D54M	97.5	
-X73443	175883	96	269	3	8	8	276	2	268	4.66e-113	411	7S3M1I28M1I3M1I7M1D8M1D21M1I26M1I114M1D54M	97.5	
-AB004750	3888577	100	339	0	0	1	339	26	364	1.61e-166	588	339M	100	
-AB004750	581782	97.6	339	8	0	1	339	27	365	4.36e-156	554	339M	100	
-AB004750	1108679	97.9	339	7	0	1	339	26	364	2.16e-157	558	339M	100	
-AB004750	1109844	97.9	339	7	0	1	339	26	364	2.16e-157	558	339M	100	
-AB004750	4418165	99.7	339	1	0	1	339	28	366	3.25e-165	584	339M	100	
-xxxxxx	1102995	97.5	361	8	1	1	361	22	383	2.94e-166	588	174M1D187M	100	
-xxxxxx	340031	95.6	361	13	3	1	361	23	386	1.07e-158	562	169M3D192M	100	
-xxxxxx	340031	95.6	361	13	3	1	361	23	386	1.07e-158	562	169M3D192M	100	
-AB004748	581782	98	396	8	0	1	396	27	422	8.13e-186	653	396M	100	
-AB004748	1108679	98.2	396	7	0	1	396	26	421	4.04e-187	657	396M	100	
-AB004748	1109844	98.2	396	7	0	1	396	26	421	4.04e-187	657	396M	100	
-AB004748	3888577	100	396	0	0	1	396	26	421	3.01e-196	687	396M	100	
-AB004748	561327	97.5	396	10	0	1	396	1	396	3.3e-183	644	396M	100	
-AB000278	554346	98.6	368	5	0	1	368	6	373	4e-175	617	368M	100	
-AB000278	160928	97	368	7	4	1	368	33	400	2.94e-166	588	33M1D5M1I8M1I2M1D318M	100	
-AB000390	4433053	98.1	317	6	0	1	317	13	329	3.2e-147	524	317M	100	
-AB000390	19456	94.4	317	14	4	1	317	12	328	4.28e-132	474	77M2D4M2I234M	100	
-AB000390	4432126	94.4	317	14	4	1	317	13	329	4.28e-132	474	77M2D4M2I234M	100	
--- a/test-data/assign_taxonomy/sortmerna_taxonomic_assignation.txt	Fri May 19 04:09:30 2017 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,8 +0,0 @@
-#OTU ID	taxonomy	confidence	num hits
-AB004750	k__Bacteria; p__Proteobacteria; c__Gammaproteobacteria; o__Enterobacteriales; f__Enterobacteriaceae; g__; s__	0.60	5
-AB000390	k__Bacteria; p__Proteobacteria; c__Gammaproteobacteria; o__Vibrionales; f__Vibrionaceae	1.00	3
-xxxxxx	k__Bacteria; p__Proteobacteria; c__Gammaproteobacteria; o__Alteromonadales; f__Alteromonadaceae; g__Marinobacter; s__	1.00	3
-X67228	k__Bacteria; p__Proteobacteria; c__Alphaproteobacteria; o__Rhizobiales; f__Rhizobiaceae	0.60	5
-AB000278	k__Bacteria; p__Proteobacteria; c__Gammaproteobacteria; o__Vibrionales; f__Vibrionaceae; g__Photobacterium	1.00	2
-AB004748	k__Bacteria; p__Proteobacteria; c__Gammaproteobacteria; o__Enterobacteriales; f__Enterobacteriaceae; g__; s__	0.60	5
-X73443	k__Bacteria; p__Firmicutes; c__Clostridia; o__Clostridiales; f__Lachnospiraceae	1.00	5
--- a/test-data/assign_taxonomy/uclust_taxonomic_assignation.txt	Fri May 19 04:09:30 2017 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,10 +0,0 @@
-11469739	k__Bacteria; p__OP9; c__JS1; o__SB-45; f__; g__; s__	1.00	3
-11480235	k__Bacteria; p__OD1; c__; o__; f__; g__; s__	1.00	1
-11460543	k__Bacteria; p__OP9; c__JS1; o__SB-45; f__; g__; s__	1.00	3
-11460523	k__Bacteria; p__Proteobacteria; c__Deltaproteobacteria; o__Desulfobacterales; f__Desulfobulbaceae; g__; s__	1.00	3
-11472286	k__Bacteria; p__WS5; c__; o__; f__; g__; s__	1.00	1
-11458037	k__Bacteria; p__Firmicutes; c__Clostridia; o__Clostridiales; f__Peptococcaceae; g__Desulfosporosinus; s__meridiei	1.00	3
-11472384	k__Bacteria; p__Proteobacteria; c__Betaproteobacteria; o__Burkholderiales; f__Burkholderiaceae; g__Burkholderia; s__	0.67	3
-11469752	k__Bacteria; p__TM7; c__TM7-1; o__; f__; g__; s__	1.00	3
-11480408	k__Bacteria; p__Firmicutes; c__Clostridia; o__Clostridiales; f__; g__; s__	1.00	3
-11468680	k__Bacteria; p__Proteobacteria; c__Betaproteobacteria; o__Burkholderiales; f__Burkholderiaceae; g__Burkholderia; s__	1.00	3