9
|
1 <?xml version="1.0" ?>
|
|
2 <tool id="qiime_feature-classifier_classify-hybrid-vsearch-sklearn" name="qiime feature-classifier classify-hybrid-vsearch-sklearn" version="2019.7">
|
|
3 <description> - ALPHA Hybrid classifier: VSEARCH exact match + sklearn classifier</description>
|
|
4 <requirements>
|
|
5 <requirement type="package" version="2019.7">qiime2</requirement>
|
|
6 </requirements>
|
|
7 <command><![CDATA[
|
|
8 qiime feature-classifier classify-hybrid-vsearch-sklearn
|
|
9
|
|
10 --i-query=$iquery
|
|
11 --i-reference-reads=$ireferencereads
|
|
12
|
|
13
|
|
14
|
|
15
|
|
16 #if str( $id_to_taxonomy_fp.selector ) == 'history'
|
|
17 #set $tax = $id_to_taxonomy_fp.taxonomy_fp
|
|
18 --i-reference-taxonomy '$tax'
|
|
19 #else:
|
|
20 #set $tax = $id_to_taxonomy_fp.taxonomy_fp.fields.path
|
|
21 --i-reference-taxonomy '$tax'
|
|
22 #end if
|
|
23
|
|
24
|
|
25
|
|
26
|
|
27
|
|
28 #if str( $id_to_classifier_fp.selector ) == 'history'
|
|
29 #set $classifier = $id_to_classifier_fp.classifier_fp
|
|
30 --i-classifier '$classifier'
|
|
31 #else:
|
|
32 #set $classifier = $id_to_classifier_fp.classifier_fp.fields.path
|
|
33 --i-classifier '$classifier'
|
|
34 #end if
|
|
35
|
|
36
|
|
37
|
|
38
|
|
39
|
|
40 #if str($pmaxaccepts):
|
|
41 --p-maxaccepts=$pmaxaccepts
|
|
42 #end if
|
|
43
|
|
44 #if str($pconfidence):
|
|
45 --p-confidence=$pconfidence
|
|
46 #end if
|
|
47
|
|
48
|
|
49
|
|
50
|
|
51 #if str($ppercidentity):
|
|
52 --p-perc-identity=$ppercidentity
|
|
53 #end if
|
|
54
|
|
55 #if str($pquerycov):
|
|
56 --p-query-cov=$pquerycov
|
|
57 #end if
|
|
58
|
|
59 #if str($pstrand) != 'None':
|
|
60 --p-strand=$pstrand
|
|
61 #end if
|
|
62
|
|
63 #if str($pminconsensus):
|
|
64 --p-min-consensus=$pminconsensus
|
|
65 #end if
|
|
66
|
|
67
|
|
68 #if str($preadorientation) != 'None':
|
|
69 --p-read-orientation=$preadorientation
|
|
70 #end if
|
|
71
|
|
72 #set $pthreads = '${GALAXY_SLOTS:-4}'
|
|
73
|
|
74 #if str($pthreads):
|
|
75
|
|
76 #if str($pthreads):
|
|
77 --p-threads="$pthreads"
|
|
78 #end if
|
|
79
|
|
80 #end if
|
|
81
|
|
82
|
|
83 #if $pprefilter:
|
|
84 --p-prefilter
|
|
85 #end if
|
|
86
|
|
87 #if str($psamplesize):
|
|
88 --p-sample-size=$psamplesize
|
|
89 #end if
|
|
90
|
|
91 #if str($prandseed):
|
|
92 --p-randseed=$prandseed
|
|
93 #end if
|
|
94
|
|
95
|
|
96 --o-classification=oclassification
|
|
97
|
|
98 ;
|
|
99 cp oclassification.qza $oclassification
|
|
100 ]]></command>
|
|
101 <inputs>
|
|
102 <param format="qza,no_unzip.zip" label="--i-query: ARTIFACT FeatureData[Sequence] Sequences to classify taxonomically. [required]" name="iquery" optional="False" type="data"/>
|
|
103 <param format="qza,no_unzip.zip" label="--i-reference-reads: ARTIFACT FeatureData[Sequence] reference sequences. [required]" name="ireferencereads" optional="False" type="data"/>
|
|
104
|
|
105
|
|
106 <conditional name="id_to_taxonomy_fp" optional="True">
|
|
107 <param name="selector" type="select" label="Reference taxonomy to query">
|
|
108 <option value="cached">Public databases</option>
|
|
109 <option value="history">Databases from your history</option>
|
|
110 </param>
|
|
111 <when value="cached">
|
|
112 <param argument="--taxonomy_fp" label="Reference taxonomy" type="select" optional="True">
|
|
113 <options from_data_table="qiime_taxonomy" />
|
|
114 </param>
|
|
115 </when>
|
|
116 <when value="history">
|
|
117 <param argument="--taxonomy_fp" type="data" format="qza,no_unzip.zip" label="Reference databases" optional="True" />
|
|
118 </when>
|
|
119 </conditional>
|
|
120
|
|
121
|
|
122 <conditional name="id_to_classifier_fp" optional="True">
|
|
123 <param name="selector" type="select" label="Reference classifier to query">
|
|
124 <option value="cached">Public classifiers</option>
|
|
125 <option value="history">Classifiers from your history</option>
|
|
126 </param>
|
|
127 <when value="cached">
|
|
128 <param name="classifier_fp" label="Reference classifier" type="select" optional="True">
|
|
129 <options from_data_table="qiime_rep_set" />
|
|
130 </param>
|
|
131 </when>
|
|
132 <when value="history">
|
|
133 <param name="classifier_fp" type="data" format="qza,no_unzip.zip" label="Reference classifier" optional="True" />
|
|
134 </when>
|
|
135 </conditional>
|
|
136
|
|
137
|
|
138 <param label="--p-maxaccepts: VALUE Int % Range(1, None) | Str % Choices('all') Maximum number of hits to keep for each query. Set to 'all' to keep all hits > perc-identity similarity. [default: 10]" name="pmaxaccepts" optional="True" type="text" value="10" />
|
|
139 <param label="--p-confidence: VALUE Float % Range(0, 1, inclusive_end=True) | Str % Choices('disable') Confidence threshold for limiting taxonomic depth. Set to 'disable' to disable confidence calculation, or 0 to calculate confidence but not apply it to limit the taxonomic depth of the assignments. [default: 0.7]" name="pconfidence" optional="True" type="text" value="0.7" />
|
|
140
|
|
141
|
|
142 <param label="--p-perc-identity: PROPORTION Range(0.0, 1.0, inclusive_end=True) Percent sequence similarity to use for PREFILTER. Reject match if percent identity to query is lower. Set to a lower value to perform a rough pre-filter. This parameter is ignored if `prefilter` is disabled. [default: 0.5]" name="ppercidentity" optional="True" type="float" value="0.5" min="0" max="1" exclusive_end="False" />
|
|
143 <param label="--p-query-cov: PROPORTION Range(0.0, 1.0, inclusive_end=True) Query coverage threshold to use for PREFILTER. Reject match if query alignment coverage per high-scoring pair is lower. Set to a lower value to perform a rough pre-filter. This parameter is ignored if `prefilter` is disabled. [default: 0.8]" name="pquerycov" optional="True" type="float" value="0.8" min="0" max="1" exclusive_end="False" />
|
|
144 <param label="--p-strand: " name="pstrand" optional="True" type="select">
|
|
145 <option selected="True" value="None">Selection is Optional</option>
|
|
146 <option value="both">both</option>
|
|
147 <option value="plus">plus</option>
|
|
148 </param>
|
|
149 <param label="--p-min-consensus: NUMBER Range(0.5, 1.0, inclusive_start=False, inclusive_end=True) Minimum fraction of assignments must match top hit to be accepted as consensus assignment. [default: 0.51]" name="pminconsensus" optional="True" type="float" value="0.51" min="0.5" max="1" exclusive_end="True" />
|
|
150 <param label="--p-read-orientation: TEXT Choices('same', 'reverse-complement', 'auto') Direction of reads with respect to reference sequences in pre-trained sklearn classifier. same will cause reads to be classified unchanged; reverse-complement will cause reads to be reversed and complemented prior to classification. 'auto' will autodetect orientation based on the confidence estimates for the first 100 reads. [default: 'auto'] " name="preadorientation" optional="True" type="select" >
|
|
151 <option value="None">Selection is Optional</option>
|
|
152 <option value="same">same</option>
|
|
153 <option value="reverse-complement">reverse-complement</option>
|
|
154 <option selected="True" value="auto">auto</option>
|
|
155 </param>
|
|
156 <param label="--p-prefilter: --p-no-prefilter Toggle positive filter of query sequences on or off. [default: True]" name="pprefilter" selected="False" type="boolean"/>
|
|
157 <param label="--p-sample-size: INTEGER Range(1, None) Randomly extract the given number of sequences from the reference database to use for prefiltering. This parameter is ignored if `prefilter` is disabled. [default: 1000]" name="psamplesize" optional="True" type="integer" value="1000" min="1"/>
|
|
158 <param label="--p-randseed: INTEGER Use integer as a seed for the pseudo-random generator Range(0, None) used during prefiltering. A given seed always produces the same output, which is useful for replicability. Set to 0 to use a pseudo-random seed. This parameter is ignored if `prefilter` is disabled. [default: 0]" name="prandseed" optional="True" type="integer" value="0" min="0"/>
|
|
159 </inputs>
|
|
160 <outputs>
|
|
161 <data format="qza" label="${tool.name} on ${on_string}: classification.qza" name="oclassification"/>
|
|
162 </outputs>
|
|
163 <help><![CDATA[
|
|
164 ALPHA Hybrid classifier: VSEARCH exact match + sklearn classifier
|
|
165 ##################################################################
|
|
166
|
|
167 NOTE: THIS PIPELINE IS AN ALPHA RELEASE. Please report bugs to
|
|
168 https://forum.qiime2.org! Assign taxonomy to query sequences using hybrid
|
|
169 classifier. First performs rough positive filter to remove artifact and
|
|
170 low-coverage sequences (use "prefilter" parameter to toggle this step on or
|
|
171 off). Second, performs VSEARCH exact match between query and
|
|
172 reference_reads to find exact matches, followed by least common ancestor
|
|
173 consensus taxonomy assignment from among maxaccepts top hits, min_consensus
|
|
174 of which share that taxonomic assignment. Query sequences without an exact
|
|
175 match are then classified with a pre-trained sklearn taxonomy classifier to
|
|
176 predict the most likely taxonomic lineage.
|
|
177
|
|
178 Parameters
|
|
179 ----------
|
|
180 query : FeatureData[Sequence]
|
|
181 Sequences to classify taxonomically.
|
|
182 reference_reads : FeatureData[Sequence]
|
|
183 reference sequences.
|
|
184 reference_taxonomy : FeatureData[Taxonomy]
|
|
185 reference taxonomy labels.
|
|
186 classifier : TaxonomicClassifier
|
|
187 Pre-trained sklearn taxonomic classifier for classifying the reads.
|
|
188 maxaccepts : Int % Range(1, None) | Str % Choices('all'), optional
|
|
189 Maximum number of hits to keep for each query. Set to "all" to keep all
|
|
190 hits > perc_identity similarity.
|
|
191 perc_identity : Float % Range(0.0, 1.0, inclusive_end=True), optional
|
|
192 Percent sequence similarity to use for PREFILTER. Reject match if
|
|
193 percent identity to query is lower. Set to a lower value to perform a
|
|
194 rough pre-filter. This parameter is ignored if `prefilter` is disabled.
|
|
195 query_cov : Float % Range(0.0, 1.0, inclusive_end=True), optional
|
|
196 Query coverage threshold to use for PREFILTER. Reject match if query
|
|
197 alignment coverage per high-scoring pair is lower. Set to a lower value
|
|
198 to perform a rough pre-filter. This parameter is ignored if `prefilter`
|
|
199 is disabled.
|
|
200 strand : Str % Choices('both', 'plus'), optional
|
|
201 Align against reference sequences in forward ("plus") or both
|
|
202 directions ("both").
|
|
203 min_consensus : Float % Range(0.5, 1.0, inclusive_start=False, inclusive_end=True), optional
|
|
204 Minimum fraction of assignments must match top hit to be accepted as
|
|
205 consensus assignment.
|
|
206 reads_per_batch : Int % Range(0, None), optional
|
|
207 Number of reads to process in each batch for sklearn classification. If
|
|
208 "auto", this parameter is autoscaled to min(number of query sequences /
|
|
209 threads, 20000).
|
|
210 confidence : Float % Range(0, 1, inclusive_end=True) | Str % Choices('disable'), optional
|
|
211 Confidence threshold for limiting taxonomic depth. Set to "disable" to
|
|
212 disable confidence calculation, or 0 to calculate confidence but not
|
|
213 apply it to limit the taxonomic depth of the assignments.
|
|
214 read_orientation : Str % Choices('same', 'reverse-complement', 'auto'), optional
|
|
215 Direction of reads with respect to reference sequences in pre-trained
|
|
216 sklearn classifier. same will cause reads to be classified unchanged;
|
|
217 reverse-complement will cause reads to be reversed and complemented
|
|
218 prior to classification. "auto" will autodetect orientation based on
|
|
219 the confidence estimates for the first 100 reads.
|
|
220 prefilter : Bool, optional
|
|
221 Toggle positive filter of query sequences on or off.
|
|
222 sample_size : Int % Range(1, None), optional
|
|
223 Randomly extract the given number of sequences from the reference
|
|
224 database to use for prefiltering. This parameter is ignored if
|
|
225 `prefilter` is disabled.
|
|
226 randseed : Int % Range(0, None), optional
|
|
227 Use integer as a seed for the pseudo-random generator used during
|
|
228 prefiltering. A given seed always produces the same output, which is
|
|
229 useful for replicability. Set to 0 to use a pseudo-random seed. This
|
|
230 parameter is ignored if `prefilter` is disabled.
|
|
231
|
|
232 Returns
|
|
233 -------
|
|
234 classification : FeatureData[Taxonomy]
|
|
235 The resulting taxonomy classifications.
|
|
236 ]]></help>
|
|
237 <macros>
|
|
238 <import>qiime_citation.xml</import>
|
|
239 </macros>
|
|
240 <expand macro="qiime_citation"/>
|
|
241 </tool>
|