Mercurial > repos > iuc > qiime_assign_taxonomy

diff assign_taxonomy.xml @ 3:ec3c4654eacc draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/qiime/ commit a831282140ce160035a4ce984f48cc20198ed0a1
author: iuc
date: Thu, 22 Jun 2017 06:57:54 -0400
parents: fa330c61c0a5
children: b4170e1a3b85
--- a/assign_taxonomy.xml	Fri May 19 04:09:30 2017 -0400
+++ b/assign_taxonomy.xml	Thu Jun 22 06:57:54 2017 -0400
@@ -4,7 +4,7 @@
         <import>macros.xml</import>
     </macros>
     <expand macro="requirements">
-        <requirement type="package" version="2.0.2">rdptools</requirement>
+        <!--<requirement type="package" version="2.0.2">rdptools</requirement>-->
         <requirement type="package" version="2.2.22">blast-legacy</requirement>
         <requirement type="package" version="2.3.4">vsearch</requirement>
         <requirement type="package" version="1.36.1">mothur</requirement>
@@ -13,8 +13,10 @@
     <command detect_errors="aggressive"><![CDATA[
         assign_taxonomy.py
             --input_fasta_fp '$input_fasta_fp'
-            #if $id_to_taxonomy_fp
-                --id_to_taxonomy_fp '$id_to_taxonomy_fp'
+            #if $id_to_taxonomy_condition.source_selector == 'history'
+                --id_to_taxonomy_fp '$id_to_taxonomy_condition.id_to_taxonomy_fp'
+            #else if $id_to_taxonomy_condition.source_selector == 'cached'
+                --id_to_taxonomy_fp '$id_to_taxonomy_condition.id_to_taxonomy_fp.fields.path'
             #end if
             --assignment_method '$methodcond.assignment_method'
             #if $methodcond.assignment_method == "uclust"
@@ -22,12 +24,18 @@
                 --similarity '$methodcond.similarity'
                 --uclust_max_accepts '$methodcond.uclust_max_accepts'
             #else if $methodcond.assignment_method == "rdp"
-                #if $methodcond.reference_seqs_fp
-                    --reference_seqs_fp '$methodcond.reference_seqs_fp'
+                #if $methodcond.references.source_selector == 'history'
+                    --reference_seqs_fp '$methodcond.references.reference_seqs_fp'
+                #else if $methodcond.references.source_selector == 'cached'
+                    --reference_seqs_fp '$methodcond.references.reference_seqs_fp.fields.path'
                 #end if
                 --confidence '$methodcond.confidence'
             #else if $methodcond.assignment_method == "blast"
-                --reference_seqs_fp '$methodcond.reference_seqs_fp'
+                #if $methodcond.references.source_selector == 'history'
+                    --reference_seqs_fp '$methodcond.references.reference_seqs_fp'
+                #else if $methodcond.references.source_selector == 'cached'
+                    --reference_seqs_fp '$methodcond.references.reference_seqs_fp.fields.path'
+                #end if
                 --blast_e_value '$methodcond.blast_e_value'
             #else if $methodcond.assignment_method == "rtax"
                 --read_1_seqs_fp '$methodcond.read_1_seqs_fp'
@@ -38,6 +46,11 @@
                 --amplicon_id_regex '$methodcond.amplicon_id_regex'
                 --header_id_rege '$methodcond.header_id_regex'
             #else if $methodcond.assignment_method == "mothur"
+                #if $methodcond.references.source_selector == 'history'
+                    --reference_seqs_fp '$methodcond.references.reference_seqs_fp'
+                #else if $methodcond.references.source_selector == 'cached'
+                    --reference_seqs_fp '$methodcond.references.reference_seqs_fp.fields.path'
+                #end if
                 --confidence '$methodcond.confidence'
             #else if $methodcond.assignment_method == "sortmerna"
                 --sortmerna_threads \${GALAXY_SLOTS:-1}
@@ -54,15 +67,32 @@
     ]]></command>
     <inputs>
         <param argument="--input_fasta_fp" type="data" format="fasta" label="Input fasta file" />
-        <param argument="--id_to_taxonomy_fp" label="Tab-delimited file mapping sequences to assigned taxonomy"  type="data" format="tabular" help="Each assigned taxonomy is provided as a semicolon-separated list. For assignment with rdp, each assigned taxonomy must be exactly 6 levels deep" optional="True"/>
+
+        <conditional name="id_to_taxonomy_condition">
+            <param name="source_selector" type="select" label="Do you want to use a taxonomy reference ?">
+                <option value="cached">Yes (from the local cache)</option>
+                <option value="history">Yes (from the active history)</option>
+                <option value="void" selected="true">No</option>
+            </param>
+            <when value="cached">
+                <param argument="--id_to_taxonomy_fp" label="Tab-delimited file mapping sequences to assigned taxonomy" type="select" help="Each assigned taxonomy is provided as a semicolon-separated list. For assignment with rdp, each assigned taxonomy must be exactly 6 levels deep">
+                    <options from_data_table="qiime_taxonomy"/>
+                </param>
+            </when>
+            <when value="history">
+                <param argument="--id_to_taxonomy_fp" label="Tab-delimited file mapping sequences to assigned taxonomy"  type="data" format="tabular" help="Each assigned taxonomy is provided as a semicolon-separated list. For assignment with rdp, each assigned taxonomy must be exactly 6 levels deep"/>
+            </when>
+            <when value="void"/>
+        </conditional>
+
         <conditional name="methodcond">
             <param argument="--assignment_method" label="Taxon assignment method" type="select">
                 <option selected="True" value="uclust">uclust</option>
-                <!--<option value="rdp">rdp</option>
+                <!--<option value="rdp">rdp</option>-->
                 <option value="blast">blast</option>
-                <option value="rtax">rtax</option>
+                <!--<option value="rtax">rtax</option>-->
                 <option value="mothur">mothur</option>
-                <option value="sortmerna">sortmerna</option>-->
+                <option value="sortmerna">sortmerna</option>
             </param>
             <when value="uclust">
                 <param argument="--min_consensus_fraction" type="float" value="0.51" label="Minimum fraction of database hits that must have a specific taxonomic assignment to assign that taxonomy to a query"/>
@@ -70,11 +100,11 @@
                 <param argument="uclust_max_accepts" type="integer" value="3" label="Number of database hits to consider when making an assignment"/>
             </when>
             <when value="rdp">
-                <param argument="--reference_seqs_fp" label="Reference sequences used as training sequences for the classifier" type="data" format="fasta" optional="True"/>
+                <expand macro="assign_taxonomy_reference_source"/>
                 <param argument="--confidence" type="float" value="0.5" label="Minimum confidence to record an assignment"/>
             </when>
             <when value="blast">
-                <param argument="--reference_seqs_fp" label="Reference sequences used to generate a blast database" type="data" format="fasta" optional="True"/>
+                <expand macro="assign_taxonomy_reference_source"/>
                 <param argument="--blast_e_value" type="float" value="0.001" label="Maximum e-value to record an assignment"/>
             </when>
             <when value="rtax">
@@ -87,10 +117,11 @@
                 <param argument="--header_id_regex" type="text" value="\S+\s+(\S+?)\/" label="Regex used to parse the result of split_libraries, to get the portion of the header that RTAX uses to match mate pairs" help="The default uses the amplicon ID, not including /1 or /3, as the primary key for the query sequences. Typically this regex will be the same as amplicon_id_regex, except that only the second group is captured" />
             </when>
             <when value="mothur">
+                <expand macro="assign_taxonomy_reference_source"/>
                 <param argument="--confidence" type="float" value="0.5" label="Minimum confidence to record an assignment"/>
             </when>
             <when value="sortmerna">
-                <param argument="--sortmerna_db" type="data" format="fasta" label="Pre-existing database to search against" optional="True"/>
+                <!--<param argument="- -sortmerna_db" type="data" format="fasta" label="Pre-existing database to search against" optional="True"/>-->
                 <param argument="--min_consensus_fraction" type="float" value="0.51" label="Minimum fraction of database hits that must have a specific taxonomic assignment to assign that taxonomy to a query"/>
                 <param argument="--similarity" type="float" value="0.9" label="Minimum percent similarity (expressed as a fraction between 0 and 1) to consider a database match a hit"/>
                 <param argument="--sortmerna_e_value" type="float" value="1.0" label="Maximum E-value when clustering"/>
@@ -100,20 +131,28 @@
         </conditional>
     </inputs>
     <outputs>
-        <data name="log" format="txt" from_work_dir="assign_taxonomy/*_assignments.log" label="${tool.name} on ${on_string}: Log"/>
+        <data name="log" format="txt" from_work_dir="assign_taxonomy/*_assignments.log" label="${tool.name} on ${on_string}: Log">
+            <filter>methodcond['assignment_method']!="mothur"</filter>
+        </data>
         <data name="tax_assignments" format="txt" from_work_dir="assign_taxonomy/*.txt" label="${tool.name} on ${on_string}: Taxonomic assignment"/>
         <data name="sortmerna_map" format="tabular" from_work_dir="assign_taxonomy/sortmerna_map.blast" label="${tool.name} on ${on_string}: SortMeRNA Blast">
             <filter>methodcond['assignment_method']=="sortmerna"</filter>
         </data>
     </outputs>
     <tests>
+        <!-- Uclust assignment method -->
         <test>
             <param name="input_fasta_fp" value="assign_taxonomy/uclust_input_seqs.fasta"/>
-            <param name="assignment_method" value="uclust"/>
-            <param name="min_consensus_fraction" value="0.51"/>
-            <param name="similarity" value="0.9"/>
-            <param name="uclust_max_accepts" value="3" />
-            <output name="tax_assignments" value="assign_taxonomy/uclust_taxonomic_assignation.txt"/>
+            <conditional name="id_to_taxonomy_condition">
+                <param name="source_selector" value="void" />
+            </conditional>
+            <conditional name="methodcond">
+                <param name="assignment_method" value="uclust"/>
+                <param name="min_consensus_fraction" value="0.51"/>
+                <param name="similarity" value="0.9"/>
+                <param name="uclust_max_accepts" value="3" />
+            </conditional>
+            <output name="tax_assignments" md5="57b0cf51fc0142f369134ea923d78d99"/>
             <output name="log">
                 <assert_contents>
                     <has_text text="UclustConsensusTaxonAssigner" />
@@ -121,22 +160,73 @@
                 </assert_contents>
             </output>
         </test>
+        <!-- Mothur assignment method -->
+        <!-- Note: there is variability in the assignment results with this method so the md5 checksum comparison is not possible -->
+        <test>
+            <param name="input_fasta_fp" value="assign_taxonomy/mothur_repr_set_seqs.fasta"/>
+            <conditional name="id_to_taxonomy_condition">
+                <param name="source_selector" value="history" />
+                <param name="id_to_taxonomy_fp" value="assign_taxonomy/mothur_id_to_taxonomy.txt"/>
+            </conditional>
+            <conditional name="methodcond">
+                <param name="assignment_method" value="mothur"/>
+                <conditional name="references">
+                    <param name="source_selector" value="history" />
+                    <param name="reference_seqs_fp" value="assign_taxonomy/mothur_ref_seq_set.fna" />
+                </conditional>
+                <param name="confidence" value="0.5"/>
+            </conditional>
+            <output name="tax_assignments">
+                <assert_contents>
+                    <has_text text="X67228" />
+                    <has_text text="Rhizobium" />
+                    <has_text text="EF503697" />
+                </assert_contents>
+            </output>
+        </test>
+        <!-- Blast assignment method -->
+        <test>
+            <param name="input_fasta_fp" value="assign_taxonomy/mothur_repr_set_seqs.fasta"/>
+            <conditional name="id_to_taxonomy_condition">
+                <param name="source_selector" value="history" />
+                <param name="id_to_taxonomy_fp" value="assign_taxonomy/mothur_id_to_taxonomy.txt"/>
+            </conditional>
+            <conditional name="methodcond">
+                <param name="assignment_method" value="blast"/>
+                <conditional name="references">
+                    <param name="source_selector" value="history" />
+                    <param name="reference_seqs_fp" value="assign_taxonomy/mothur_ref_seq_set.fna" />
+                </conditional>
+                <param name="blast_e_value" value="0.001"/>
+            </conditional>
+            <output name="tax_assignments" md5="5ab8d28f67bcbf828937d222b2ab9c6e"/>
+            <output name="log">
+                <assert_contents>
+                    <has_text text="BlastTaxonAssigner" />
+                    <has_text text="inspected: 2" />
+                </assert_contents>
+            </output>
+        </test>
+        <!-- SortMeRNA assignment method -->
+        <!-- Note: The input file has been reduced to only 1 sequence but this test is still quite long to execute (more than 10min) -->
         <!--<test>
-            <param name="input_fasta_fp" value="assign_taxonomy/mothur_ref_seq_set.fna"/>
-            <param name="assignment_method" value="sortmerna"/>
-            <param name="min_consensus_fraction" value="0.51" />
-            <param name="similarity" value="0.9" />
-            <param name="sortmerna_e_value" value="1.0" />
-            <param name="sortmerna_coverage" value="0.9" />
-            <param name="sortmerna_best_N_alignments" value="5" />
+            <param name="input_fasta_fp" value="assign_taxonomy/sortmerna_input_seqs.fasta"/>
+            <conditional name="methodcond">
+                <param name="assignment_method" value="sortmerna"/>
+                <param name="min_consensus_fraction" value="0.51" />
+                <param name="similarity" value="0.9" />
+                <param name="sortmerna_e_value" value="1.0" />
+                <param name="sortmerna_coverage" value="0.9" />
+                <param name="sortmerna_best_N_alignments" value="5" />
+            </conditional>
             <output name="log">
                 <assert_contents>
                     <has_text text="Application:SortMeRNA" />
                     <has_text text="min_consensus_fraction" />
                 </assert_contents>
             </output>
-            <output name="tax_assignments" value="assign_taxonomy/sortmerna_taxonomic_assignation.txt"/>
-            <output name="sortmerna_map" value="assign_taxonomy/sortmerna_map.blast"/>
+            <output name="tax_assignments" md5="0da68ab9762b677a00f34051eadad68c"/>
+            <output name="sortmerna_map" md5="16e349be29f121fca741d6294f79ce7c"/>
         </test>-->
     </tests>
     <help><![CDATA[
author	iuc
date	Thu, 22 Jun 2017 06:57:54 -0400
parents	fa330c61c0a5
children	b4170e1a3b85