comparison dada2_assignTaxonomyAddspecies.xml @ 0:18517edb4733 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
author iuc
date Fri, 08 Nov 2019 18:50:24 -0500
parents
children 1c9715cef808
comparison
equal deleted inserted replaced
-1:000000000000 0:18517edb4733
1 <tool id="dada2_assignTaxonomyAddspecies" name="dada2: assignTaxonomy and addSpecies" version="@DADA2_VERSION@+galaxy@WRAPPER_VERSION@" profile="19.09">
2 <description>Learn Error rates</description>
3 <macros>
4 <import>macros.xml</import>
5 </macros>
6 <expand macro="requirements"/>
7 <expand macro="stdio"/>
8 <expand macro="version_command"/>
9 <command detect_errors="exit_code"><![CDATA[
10 Rscript '$dada2_script' \${GALAXY_SLOTS:-1}
11 ]]></command>
12 <configfiles>
13 <configfile name="dada2_script"><![CDATA[
14 @READ_FOO@
15
16 library(dada2, quietly=T)
17
18 args <- commandArgs(trailingOnly = TRUE)
19 nthreads <- as.integer(args[1])
20
21 seqs <- $read_data($seqs)
22
23 #if $reference_cond.reference_select == "history"
24 ref <- '$reference_cond.refFasta'
25 tl <- '$reference_cond.taxLevels'
26 #else
27 ref <- '$reference_cond.refFasta.fields.path'
28 tl <- '$reference_cond.refFasta.fields.taxlevels'
29 #end if
30 tl <- strsplit(tl, ",")[[1]]
31
32 taxa <- assignTaxonomy(seqs, ref, minBoot = $minBoot, tryRC = $tryRC,
33 outputBootstraps = $outputBootstraps,
34 taxLevels = tl, multithread = nthreads, verbose=T)
35
36 #if $outputBootstraps
37 boot <- taxa\$boot
38 taxa <- taxa\$tax
39 #end if
40
41 #if $addSpecies_cond.addSpecies_select == "TRUE"
42 #if $addSpecies_cond.allowMultiple_cond.allowMultiple == "num"
43 aM <- $addSpecies_cond.allowMultiple_cond.num
44 #else
45 aM <- $addSpecies_cond.allowMultiple_cond.allowMultiple
46 #end if
47 #if $addSpecies_cond.speciesreference_cond.speciesreference_select == "history"
48 ref <- '$addSpecies_cond.speciesreference_cond.speciesrefFasta'
49 #else
50 ref <- '$addSpecies_cond.speciesreference_cond.speciesrefFasta.fields.path'
51 #end if
52 taxa <- addSpecies(taxa, ref, allowMultiple = aM, tryRC = $addSpecies_cond.tryRC)
53 #end if
54 write.table(taxa, file = '$output', quote = F, sep = "\t", row.names = T, col.names = NA)
55
56 #if $outputBootstraps
57 write.table(boot, file = '$bootstraps', quote = F, sep = "\t", row.names = T, col.names = NA)
58 #end if
59 ]]></configfile>
60 </configfiles>
61 <inputs>
62 <param name="seqs" type="data" format="@DADA_UNIQUES@,dada2_sequencetable,dada2_uniques" label="sequences to be assigned" help=""/>
63 <conditional name="reference_cond">
64 <param name="reference_select" type="select" label="Select a reference dataset your history or use a built-in?">
65 <option value="builtin">Use a built-in reference</option>
66 <option value="history">Use reference data from the history</option>
67 </param>
68 <when value="builtin">
69 <param name="refFasta" type="select" label="Select reference data set" help="If a reference data set of interest is not listed, contact the Galaxy administrators">
70 <options from_data_table="dada2_taxonomy">
71 <filter type="sort_by" column="2"/>
72 <validator type="no_options" message="No indexes are available for the selected input dataset"/>
73 </options>
74 </param>
75 </when>
76 <when value="history">
77 <param name="refFasta" type="data" format="fasta,fasta.gz" label="Reference data set" />
78 <param argument="taxLevels" type="text" label="Names of the taxonomic levels in the data set" help="comma separated list" />
79 </when>
80 </conditional>
81 <param argument="minBoot" type="integer" value="50" min="0" label="Minimum bootstrap confidence" help="for assigning a
82 taxonomic level"/>
83 <param argument="tryRC" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="false" label="Try reverse complement" help="the reverse-complement of each sequence will be used for classification if it is a better match to the reference sequences than the forward sequence"/>
84 <param argument="outputBootstraps" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="false" label="Output bootstrap values"/>
85
86 <conditional name="addSpecies_cond">
87 <param name="addSpecies_select" type="select" label="Add genus-species binomials to the taxonomic table">
88 <option value="FALSE">No</option>
89 <option value="TRUE">Yes</option>
90 </param>
91 <when value="FALSE"/>
92 <when value="TRUE">
93 <conditional name="speciesreference_cond">
94 <param name="speciesreference_select" type="select" label="Select a reference dataset your history or use a built-in?">
95 <option value="builtin">Use a built-in reference</option>
96 <option value="history">Use reference data from the history</option>
97 </param>
98 <when value="builtin">
99 <param name="speciesrefFasta" argument="refFasta" type="select" label="Select reference data set" help="If a reference data set of interest is not listed, contact the Galaxy administrators">
100 <options from_data_table="dada2_species">
101 <filter type="sort_by" column="2"/>
102 <validator type="no_options" message="No indexes are available for the selected input dataset"/>
103 </options>
104 </param>
105 </when>
106 <when value="history">
107 <param name="speciesrefFasta" argument="refFasta" type="data" format="fasta,fasta.gz" label="Reference data set" />
108 </when>
109 </conditional>
110 <conditional name="allowMultiple_cond">
111 <param argument="allowMultiple" type="select" label="reporting options">
112 <option value="FALSE">only unambiguous identifications</option>
113 <option value="TRUE">all exactly matched species</option>
114 <option value="num">specify the maximal number of reported exactly matched species</option>
115 </param>
116 <when value="FALSE"/>
117 <when value="TRUE"/>
118 <when value="num">
119 <param name="num" type="integer" value="" min="1" label="Number of matched species"/>
120 </when>
121 </conditional>
122 <param argument="tryRC" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="false" label="Try reverse complement" help="the reverse-complement of each sequence will be used for classification if it is a better match to the reference sequences than the forward sequence"/>
123 </when>
124 </conditional>
125 </inputs>
126 <outputs>
127 <data name="output" format="tabular" label="${tool.name} on ${on_string}"/>
128 <data name="bootstraps" format="tabular" label="${tool.name} on ${on_string}: bootstraps">
129 <filter>outputBootstraps</filter>
130 </data>
131 </outputs>
132 <tests>
133 <!-- test w default params -->
134 <test expect_num_outputs="1">
135 <param name="seqs" ftype="dada2_sequencetable" value="removeBimeraDenovo_F3D0.tab"/>
136 <param name="reference_cond|reference_select" value="history"/>
137 <param name="reference_cond|refFasta" ftype="fasta" value="reference.fa"/>
138 <param name="reference_cond|taxLevels" value="Level1,Level2,Level3,Level4,Level5" />
139 <param name="addSpecies_cond|addSpecies_select" value="TRUE"/>
140 <param name="addSpecies_cond|speciesreference_cond|speciesreference_select" value="history"/>
141 <param name="addSpecies_cond|speciesreference_cond|speciesrefFasta" ftype="fasta" value="reference_species.fa" />
142 <output name="output" value="assignTaxonomyAddspecies_F3D0.tab" ftype="tabular" />
143 </test>
144 <!-- test w default params, bulit in reference -->
145 <test expect_num_outputs="1">
146 <param name="seqs" ftype="dada2_sequencetable" value="removeBimeraDenovo_F3D0.tab"/>
147 <param name="reference_cond|reference_select" value="builtin"/>
148 <param name="reference_cond|refFasta" value="test_buildid"/>
149 <param name="addSpecies_cond|addSpecies_select" value="TRUE"/>
150 <param name="addSpecies_cond|speciesreference_cond|speciesreference_select" value="builtin"/>
151 <param name="addSpecies_cond|speciesreference_cond|speciesrefFasta" value="test_buildid" />
152 <output name="output" value="assignTaxonomyAddspecies_F3D0.tab" ftype="tabular" />
153 </test>
154 <!-- test w output bootstraps, minRC, note: sim_size for bootstraps output due to the probabilistics -->
155 <test expect_num_outputs="2">
156 <param name="seqs" ftype="dada2_sequencetable" value="removeBimeraDenovo_F3D0.tab"/>
157 <param name="reference_cond|reference_select" value="history"/>
158 <param name="reference_cond|refFasta" ftype="fasta" value="reference.fa"/>
159 <param name="reference_cond|taxLevels" value="Level1,Level2,Level3,Level4,Level5" />
160 <param name="minBoot" value="42" />
161 <param name="tryRC" value="TRUE" />
162 <param name="outputBootstraps" value="TRUE" />
163 <param name="addSpecies_cond|addSpecies_select" value="TRUE"/>
164 <param name="addSpecies_cond|speciesreference_cond|speciesreference_select" value="history"/>
165 <param name="addSpecies_cond|speciesreference_cond|speciesrefFasta" ftype="fasta" value="reference_species.fa" />
166 <param name="addSpecies_cond|allowMultiple" value="TRUE"/>
167 <param name="addSpecies_cond|tryRC" value="TRUE" />
168 <output name="output" value="assignTaxonomyAddspecies_F3D0.tab" ftype="tabular" compare="sim_size" />
169 <output name="bootstraps" value="assignTaxonomyAddspecies_F3D0_boot.tab" ftype="tabular" compare="sim_size" />
170 </test>
171 </tests>
172 <help><![CDATA[
173 Description
174 ...........
175
176 This tool implements dada2's assignTaxonomy and assignSpecies functions.
177
178 - assignTaxonomy assigns taxonomy to the sequence variants. The DADA2 package provides a native implementation of the naive Bayesian classifier method for this purpose (see Wang et al. 2007, kmer size 8 and 100 bootstrap replicates). The assignTaxonomy function takes as input a set of sequences to be classified and a training set of reference sequences with known taxonomy, and outputs taxonomic assignments with at least minBoot bootstrap confidence. Properly formatted reference files for several popular taxonomic databases are available http://benjjneb.github.io/dada2/training.html
179 - assignSpecies makes species level assignments based on exact matching between ASVs and sequenced reference strains. Recent analysis suggests that exact matching (or 100% identity) is the only appropriate way to assign species to 16S gene fragments. Currently, species-assignment training fastas are available for the Silva and RDP 16S databases.
180
181 Usage
182 .....
183
184 **Input**
185
186 - A list of sequences contained in the results of removeBimeraDenovo or sequenceTable (note that also the results of dada, and mergePairs are accepted).
187 - Reference data bases for taxonomic and species/genus level assignment. Several cached data bases can be chosen (ask your Galaxy admin if they are missing). For using custom data bases see below.
188
189 **Output**
190
191 - A table containing the assigned taxonomies exceeding the minBoot level of bootstrapping confidence. Rows correspond to the provided sequences, columns to the taxonomic levels. NA indicates that the sequence was not consistently classified at that level at the minBoot threshold.
192 - Optionally two columns for the genus and species taxonomic levels can be added. NA indicates that the sequence was not classified at that level.
193 - If outputBootstraps checked, a table containing the assigned taxonomies (named "taxa") and the bootstrap values (named "boot") will be returned.
194
195 @HELP_OVERVIEW@
196
197 Custom Reference data sets
198 ..........................
199
200 For ** taxonomy assignment ** the following is needed:
201
202 - a reference fasta data base
203 - a comma separated list of taxonomic ranks present in the reference data base
204
205 The reference fasta data base for taxonomic assignment (fasta or compressed fasta) needs to encode the taxonomy corresponding to each sequence in the fasta header lines in the following fashion (note, the second sequence is not assigned down to level 6):
206
207 ::
208
209 >Level1;Level2;Level3;Level4;Level5;Level6;
210 ACCTAGAAAGTCGTAGATCGAAGTTGAAGCATCGCCCGATGATCGTCTGAAGCTGTAGCATGAGTCGATTTTCACATTCAGGGATACCATAGGATAC
211 >Level1;Level2;Level3;Level4;Level5;
212 CGCTAGAAAGTCGTAGAAGGCTCGGAGGTTTGAAGCATCGCCCGATGGGATCTCGTTGCTGTAGCATGAGTACGGACATTCAGGGATCATAGGATAC
213
214 The list of required taxonomic ranks could be for instance: "Kingdom,Phylum,Class,Order,Family,Genus"
215
216 The reference data base for ** species assignment ** is a fasta file (or compressed fasta file), with the id line formatted as follows:
217
218 ::
219
220 >ID Genus species
221 ACCTAGAAAGTCGTAGATCGAAGTTGAAGCATCGCCCGATGATCGTCTGAAGCTGTAGCATGAGTCGATTTTCACATTCAGGGATACCATAGGATAC
222 >ID Genus species
223 CGCTAGAAAGTCGTAGAAGGCTCGGAGGTTTGAAGCATCGCCCGATGGGATCTCGTTGCTGTAGCATGAGTACGGACATTCAGGGATCATAGGATAC
224
225
226 ]]></help>
227 <expand macro="citations">
228 <citation type="doi">10.1128/AEM.00062-07</citation>
229 </expand>
230 </tool>
231
232