20
|
1 <?xml version="1.0" ?>
|
|
2 <tool id="qiime_feature-classifier_classify-hybrid-vsearch-sklearn" name="qiime feature-classifier classify-hybrid-vsearch-sklearn"
|
|
3 version="2020.8">
|
|
4 <description> ALPHA Hybrid classifier: VSEARCH exact match + sklearn classifier</description>
|
|
5 <requirements>
|
|
6 <requirement type="package" version="2020.8">qiime2</requirement>
|
|
7 </requirements>
|
|
8 <command><![CDATA[
|
|
9 qiime feature-classifier classify-hybrid-vsearch-sklearn
|
|
10
|
|
11 --i-query=$iquery
|
|
12
|
|
13 --i-reference-reads=$ireferencereads
|
|
14
|
|
15 --i-reference-taxonomy=$ireferencetaxonomy
|
|
16
|
|
17 --i-classifier=$iclassifier
|
|
18
|
|
19 #if str($pmaxaccepts) != 'None':
|
|
20 --p-maxaccepts=$pmaxaccepts
|
|
21 #end if
|
|
22
|
|
23 --p-perc-identity=$ppercidentity
|
|
24
|
|
25 --p-query-cov=$pquerycov
|
|
26
|
|
27 #if str($pstrand) != 'None':
|
|
28 --p-strand=$pstrand
|
|
29 #end if
|
|
30
|
|
31 --p-min-consensus=$pminconsensus
|
|
32
|
|
33 #if str($pmaxhits) != 'None':
|
|
34 --p-maxhits=$pmaxhits
|
|
35 #end if
|
|
36
|
|
37 #if str($pmaxrejects) != 'None':
|
|
38 --p-maxrejects=$pmaxrejects
|
|
39 #end if
|
|
40
|
|
41 #if str($pconfidence) != 'None':
|
|
42 --p-confidence=$pconfidence
|
|
43 #end if
|
|
44
|
|
45 #if str($preadorientation) != 'None':
|
|
46 --p-read-orientation=$preadorientation
|
|
47 #end if
|
|
48
|
|
49 --p-threads=$pthreads
|
|
50
|
|
51 #if $pnoprefilter:
|
|
52 --p-no-prefilter
|
|
53 #end if
|
|
54
|
|
55 --p-sample-size=$psamplesize
|
|
56
|
|
57 --p-randseed=$prandseed
|
|
58
|
|
59 --o-classification=oclassification
|
|
60
|
|
61 #if str($examples) != 'None':
|
|
62 --examples=$examples
|
|
63 #end if
|
|
64
|
|
65 ;
|
|
66 cp oclassification.qza $oclassification
|
|
67
|
|
68 ]]></command>
|
|
69 <inputs>
|
|
70 <param format="qza,no_unzip.zip" label="--i-query: ARTIFACT FeatureData[Sequence] Sequences to classify taxonomically. [required]" name="iquery" optional="False" type="data" />
|
|
71 <param format="qza,no_unzip.zip" label="--i-reference-reads: ARTIFACT FeatureData[Sequence] reference sequences. [required]" name="ireferencereads" optional="False" type="data" />
|
|
72 <param format="qza,no_unzip.zip" label="--i-reference-taxonomy: ARTIFACT FeatureData[Taxonomy] reference taxonomy labels. [required]" name="ireferencetaxonomy" optional="False" type="data" />
|
|
73 <param format="qza,no_unzip.zip" label="--i-classifier: ARTIFACT TaxonomicClassifier Pre-trained sklearn taxonomic classifier for classifying the reads. [required]" name="iclassifier" optional="False" type="data" />
|
|
74 <param label="--p-maxaccepts: " name="pmaxaccepts" optional="True" type="select">
|
|
75 <option selected="True" value="None">Selection is Optional</option>
|
|
76 <option value="Int % Range(1">Int % Range(1</option>
|
|
77 <option value="None">None</option>
|
|
78 </param>
|
|
79 <param exclude_max="False" label="--p-perc-identity: PROPORTION Range(0.0, 1.0, inclusive_end=True) Percent sequence similarity to use for PREFILTER. Reject match if percent identity to query is lower. Set to a lower value to perform a rough pre-filter. This parameter is ignored if `prefilter` is disabled. [default: 0.5]" max="1.0" min="0.0" name="ppercidentity" optional="True" type="float" value="0.5" />
|
|
80 <param exclude_max="False" label="--p-query-cov: PROPORTION Range(0.0, 1.0, inclusive_end=True) Query coverage threshold to use for PREFILTER. Reject match if query alignment coverage per high-scoring pair is lower. Set to a lower value to perform a rough pre-filter. This parameter is ignored if `prefilter` is disabled. [default: 0.8]" max="1.0" min="0.0" name="pquerycov" optional="True" type="float" value="0.8" />
|
|
81 <param label="--p-strand: " name="pstrand" optional="True" type="select">
|
|
82 <option selected="True" value="None">Selection is Optional</option>
|
|
83 <option value="both">both</option>
|
|
84 <option value="plus">plus</option>
|
|
85 </param>
|
|
86 <param exclude_max="False" exclude_min="True" label="--p-min-consensus: NUMBER Range(0.5, 1.0, inclusive_start=False, inclusive_end=True) Minimum fraction of assignments must match top hit to be accepted as consensus assignment. [default: 0.51]" max="1.0" min="0.5" name="pminconsensus" optional="True" type="float" value="0.51" />
|
|
87 <param label="--p-maxhits: " name="pmaxhits" optional="True" type="select">
|
|
88 <option selected="True" value="None">Selection is Optional</option>
|
|
89 <option value="Int % Range(1">Int % Range(1</option>
|
|
90 <option value="None">None</option>
|
|
91 </param>
|
|
92 <param label="--p-maxrejects: " name="pmaxrejects" optional="True" type="select">
|
|
93 <option selected="True" value="None">Selection is Optional</option>
|
|
94 <option value="Int % Range(1">Int % Range(1</option>
|
|
95 <option value="None">None</option>
|
|
96 </param>
|
|
97 <param label="--p-confidence: " name="pconfidence" optional="True" type="select">
|
|
98 <option selected="True" value="None">Selection is Optional</option>
|
|
99 <option value="Float % Range(0">Float % Range(0</option>
|
|
100 <option value="1">1</option>
|
|
101 <option value="inclusive_end=True">inclusive_end=True</option>
|
|
102 </param>
|
|
103 <param label="--p-read-orientation: " name="preadorientation" optional="True" type="select">
|
|
104 <option selected="True" value="None">Selection is Optional</option>
|
|
105 <option value="same">same</option>
|
|
106 <option value="reverse-complement">reverse-complement</option>
|
|
107 <option value="auto">auto</option>
|
|
108 </param>
|
|
109 <param label="--p-no-prefilter: Do not toggle positive filter of query sequences on or off. [default: True]" name="pnoprefilter" selected="False" type="boolean" />
|
|
110 <param label="--p-sample-size: INTEGER Range(1, None) Randomly extract the given number of sequences from the reference database to use for prefiltering. This parameter is ignored if `prefilter` is disabled. [default: 1000]" min="1" name="psamplesize" optional="True" type="integer" value="1000" />
|
|
111 <param label="--p-randseed: INTEGER Use integer as a seed for the pseudo-random generator Range(0, None) used during prefiltering. A given seed always produces the same output, which is useful for replicability. Set to 0 to use a pseudo-random seed. This parameter is ignored if `prefilter` is disabled. [default: 0]" min="0" name="prandseed" optional="True" type="integer" value="0" />
|
|
112 <param label="--examples: Show usage examples and exit." name="examples" optional="False" type="data" />
|
|
113
|
|
114 </inputs>
|
|
115
|
|
116 <outputs>
|
|
117 <data format="qza" label="${tool.name} on ${on_string}: classification.qza" name="oclassification" />
|
|
118
|
|
119 </outputs>
|
|
120
|
|
121 <help><![CDATA[
|
|
122 ALPHA Hybrid classifier: VSEARCH exact match + sklearn classifier
|
|
123 ###############################################################
|
|
124
|
|
125 NOTE: THIS PIPELINE IS AN ALPHA RELEASE. Please report bugs to
|
|
126 https://forum.qiime2.org! Assign taxonomy to query sequences using hybrid
|
|
127 classifier. First performs rough positive filter to remove artifact and
|
|
128 low-coverage sequences (use "prefilter" parameter to toggle this step on or
|
|
129 off). Second, performs VSEARCH exact match between query and
|
|
130 reference_reads to find exact matches, followed by least common ancestor
|
|
131 consensus taxonomy assignment from among maxaccepts top hits, min_consensus
|
|
132 of which share that taxonomic assignment. Query sequences without an exact
|
|
133 match are then classified with a pre-trained sklearn taxonomy classifier to
|
|
134 predict the most likely taxonomic lineage.
|
|
135
|
|
136 Parameters
|
|
137 ----------
|
|
138 query : FeatureData[Sequence]
|
|
139 Sequences to classify taxonomically.
|
|
140 reference_reads : FeatureData[Sequence]
|
|
141 reference sequences.
|
|
142 reference_taxonomy : FeatureData[Taxonomy]
|
|
143 reference taxonomy labels.
|
|
144 classifier : TaxonomicClassifier
|
|
145 Pre-trained sklearn taxonomic classifier for classifying the reads.
|
|
146 maxaccepts : Int % Range(1, None) | Str % Choices('all'), optional
|
|
147 Maximum number of hits to keep for each query. Set to "all" to keep all
|
|
148 hits > perc_identity similarity. Note that if strand=both, maxaccepts
|
|
149 will keep N hits for each direction (if searches in the opposite
|
|
150 direction yield results that exceed the minimum perc_identity). In
|
|
151 those cases use maxhits to control the total number of hits returned.
|
|
152 This option works in pair with maxrejects. The search process sorts
|
|
153 target sequences by decreasing number of k-mers they have in common
|
|
154 with the query sequence, using that information as a proxy for sequence
|
|
155 similarity. After pairwise alignments, if the first target sequence
|
|
156 passes the acceptation criteria, it is accepted as best hit and the
|
|
157 search process stops for that query. If maxaccepts is set to a higher
|
|
158 value, more hits are accepted. If maxaccepts and maxrejects are both
|
|
159 set to "all", the complete database is searched.
|
|
160 perc_identity : Float % Range(0.0, 1.0, inclusive_end=True), optional
|
|
161 Percent sequence similarity to use for PREFILTER. Reject match if
|
|
162 percent identity to query is lower. Set to a lower value to perform a
|
|
163 rough pre-filter. This parameter is ignored if `prefilter` is disabled.
|
|
164 query_cov : Float % Range(0.0, 1.0, inclusive_end=True), optional
|
|
165 Query coverage threshold to use for PREFILTER. Reject match if query
|
|
166 alignment coverage per high-scoring pair is lower. Set to a lower value
|
|
167 to perform a rough pre-filter. This parameter is ignored if `prefilter`
|
|
168 is disabled.
|
|
169 strand : Str % Choices('both', 'plus'), optional
|
|
170 Align against reference sequences in forward ("plus") or both
|
|
171 directions ("both").
|
|
172 min_consensus : Float % Range(0.5, 1.0, inclusive_start=False, inclusive_end=True), optional
|
|
173 Minimum fraction of assignments must match top hit to be accepted as
|
|
174 consensus assignment.
|
|
175 maxhits : Int % Range(1, None) | Str % Choices('all'), optional
|
|
176 maxrejects : Int % Range(1, None) | Str % Choices('all'), optional
|
|
177 reads_per_batch : Int % Range(0, None), optional
|
|
178 Number of reads to process in each batch for sklearn classification. If
|
|
179 "auto", this parameter is autoscaled to min(number of query sequences /
|
|
180 threads, 20000).
|
|
181 confidence : Float % Range(0, 1, inclusive_end=True) | Str % Choices('disable'), optional
|
|
182 Confidence threshold for limiting taxonomic depth. Set to "disable" to
|
|
183 disable confidence calculation, or 0 to calculate confidence but not
|
|
184 apply it to limit the taxonomic depth of the assignments.
|
|
185 read_orientation : Str % Choices('same', 'reverse-complement', 'auto'), optional
|
|
186 Direction of reads with respect to reference sequences in pre-trained
|
|
187 sklearn classifier. same will cause reads to be classified unchanged;
|
|
188 reverse-complement will cause reads to be reversed and complemented
|
|
189 prior to classification. "auto" will autodetect orientation based on
|
|
190 the confidence estimates for the first 100 reads.
|
|
191 threads : Int % Range(1, None), optional
|
|
192 Number of threads to use for job parallelization.
|
|
193 prefilter : Bool, optional
|
|
194 Toggle positive filter of query sequences on or off.
|
|
195 sample_size : Int % Range(1, None), optional
|
|
196 Randomly extract the given number of sequences from the reference
|
|
197 database to use for prefiltering. This parameter is ignored if
|
|
198 `prefilter` is disabled.
|
|
199 randseed : Int % Range(0, None), optional
|
|
200 Use integer as a seed for the pseudo-random generator used during
|
|
201 prefiltering. A given seed always produces the same output, which is
|
|
202 useful for replicability. Set to 0 to use a pseudo-random seed. This
|
|
203 parameter is ignored if `prefilter` is disabled.
|
|
204
|
|
205 Returns
|
|
206 -------
|
|
207 classification : FeatureData[Taxonomy]
|
|
208 The resulting taxonomy classifications.
|
|
209 ]]></help>
|
|
210 <macros>
|
|
211 <import>qiime_citation.xml</import>
|
|
212 </macros>
|
|
213 <expand macro="qiime_citation"/>
|
|
214 </tool> |