comparison centrifuge.xml @ 0:a33c3356b8cf draft default tip

"planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/centrifuge commit e7a2d92dc14da78e7ba641c603de75a985d037b6-dirty"
author thanhlv
date Mon, 31 Jan 2022 11:17:00 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:a33c3356b8cf
1 <tool id="centrifuge" name="Centrifuge" version="1.0.4_beta">
2
3 <description>Read-based metagenome characterization</description>
4
5 <!-- ***************************************************************** -->
6
7 <requirements>
8 <requirement type="package" version="1.0.4_beta">centrifuge</requirement>
9 </requirements>
10
11 <!-- ***************************************************************** -->
12
13 <version_command>centrifuge --version | perl -wnE'print "$1\n" for /centrifuge\S+ version (\S+)/g'</version_command>
14
15 <!-- ***************************************************************** -->
16
17 <command detect_errors="aggressive">
18 <![CDATA[
19
20 centrifuge
21
22 ##--Output Options------------------------------
23
24 --out-fmt $outputs.out_fmt
25 --tab-fmt-cols $outputs.tab_fmt_cols
26
27 ##--General Options-----------------------------
28
29 --threads \${GALAXY_SLOTS:-1}
30
31 #if $general.skip != "":
32 --skip $general.skip
33 #end if
34 #if $general.upto != "":
35 --upto $general.upto
36 #end if
37 #if $general.trim5 != "":
38 --trim5 $general.trim5
39 #end if
40 #if $general.trim3 != "":
41 --trim3 $general.trim3
42 #end if
43
44 $general.ignore_quals
45 $general.nofw
46 $general.norc
47 $general.non_deterministic
48
49 #if $general.seed != "":
50 --seed $general.seed
51 #end if
52
53 ##--Classification------------------------------
54
55 --min-hitlen $classification.min_hitlen
56 -k $classification.k_distinct
57
58 #if $classification.min_totallen != "":
59 --min-totallen $classification.min_totallen
60 #end if
61
62 #if $classification.host_taxids != "":
63 --host-taxids $classification.host_taxids
64 #end if
65
66 #if $classification.exclude_taxids != "":
67 --exclude-taxids $classification.exclude_taxids
68 #end if
69
70 ##--Inputs--------------------------------------
71
72 -x '${inputs.db.fields.path}'
73
74 #for $s in $inputs.unpaired
75 -U '${s.u_reads}'
76 #end for
77
78 #for $s in $inputs.paired
79 -1 '${s.p_reads.forward}'
80 -2 '${s.p_reads.reverse}'
81 #end for
82
83 #if $inputs.sra:
84 --sra-acc $inputs.sra
85 #end if
86
87 #if $outputs.out_fmt == "tab":
88 -S $out_tab
89 #elif $outputs.out_fmt == "sam":
90 -S $out_sam
91 #end if
92
93 --report-file $report
94
95 ]]>
96 </command>
97
98 <!-- ***************************************************************** -->
99
100 <inputs>
101
102 <section name="inputs" title="Inputs" expanded="True">
103
104 <repeat name="unpaired" title="Unpaired reads" min="0" default="0">
105 <param name="u_reads" type="data" format="fastq" label="Unpaired reads"/>
106 </repeat>
107
108 <repeat name="paired" title="Paired reads" min="0" default="0">
109 <param name="p_reads" type="data_collection" collection_type="paired" format="fastq" label="Paired read collection"/>
110 </repeat>
111
112 <param name="sra" type="text" label="SRA accession">
113 <sanitizer invalid_char="">
114 <valid initial="string.letters,string.digits">
115 <add value="," />
116 </valid>
117 </sanitizer>
118 </param>
119
120 <param name="db" type="select" label="Select a reference database">
121 <options from_data_table="centrifuge_indices">
122 <filter type="sort_by" column="2"/>
123 <validator type="no_options" message="No indexes are available for the selected input dataset"/>
124 </options>
125 </param>
126
127 </section>
128
129 <section name="outputs" title="Outputs" expanded="False">
130
131 <param argument="--out-fmt" name="out_fmt" type="select" label="Output format">
132 <option value="tab" selected="true">tabular</option>
133 <option value="sam">SAM</option>
134 </param>
135
136 <param argument="--tab-fmt-cols" name="tab_fmt_cols" type="text" label="Output columns" value="readID,seqID,taxID,score,2ndBestScore,hitLength,queryLength,numMatches">
137 <sanitizer invalid_char="">
138 <valid initial="string.letters,string.digits">
139 <add value="," />
140 </valid>
141 </sanitizer>
142 </param>
143
144 </section>
145
146 <section name="general" title="General options" expanded="False">
147
148 <param argument="--skip" type="integer" value="" optional="true" label="Initial reads to skip" />
149 <param argument="--upto" type="integer" value="" optional="true" label="Stop after reads" />
150 <param argument="--trim5" type="integer" value="" optional="true" label="Trim 5' bases" />
151 <param argument="--trim3" type="integer" value="" optional="true" label="Trim 3' bases" />
152
153 <param argument="--ignore-quals" name="ignore_quals" type="boolean" truevalue="--ignore-quals" falsevalue="" checked="no" label="Ignore qualities" />
154
155 <param argument="--nofw" type="boolean" truevalue="--nofw" falsevalue="" checked="no" label="Don't map forward strand" />
156 <param argument="--norc" type="boolean" truevalue="--norc" falsevalue="" checked="no" label="Don't map rev-com strand" />
157
158 <param argument="--seed" type="integer" value="" min="0" optional="true" label="Starting seed" />
159 <param argument="--non-deterministic" name="non_deterministic" type="boolean" truevalue="--non-deterministic" falsevalue="" checked="no" label="Use non-deterministic seeding" />
160
161 </section>
162
163 <section name="classification" title="Classification" expanded="True">
164
165 <param argument="--min-hitlen" name="min_hitlen" type="integer" value="22" min="16" label="Minimum hit length" />
166 <param name="k_distinct" type="integer" value="5" min="5" label="Searching for at most k distinct, primary assignments for each read or pair." help="Primary assignments mean assignments whose assignment score is equal or higher than any other assignments. If there are more primary assignments than this value, the search will merge some of the assignments into a higher taxonomic rank. The assignment score for a paired-end assignment equals the sum of the assignment scores of the individual mates"/>
167 <param argument="--min-totallen" name="min_totallen" type="integer" optional="true" min="0" label="Minimum summed length" />
168
169 <param argument="--host-taxids" name="host_taxids" type="text" label="Host taxonomic IDs">
170 <sanitizer invalid_char="">
171 <valid initial="string.digits">
172 <add value="," />
173 </valid>
174 </sanitizer>
175 </param>
176 <param argument="--exclude-taxids" name="exclude_taxids" type="text" label="Excluded taxonomic IDs">
177 <sanitizer invalid_char="">
178 <valid initial="string.digits">
179 <add value="," />
180 </valid>
181 </sanitizer>
182 </param>
183
184 </section>
185
186 </inputs>
187
188 <!-- ***************************************************************** -->
189
190 <outputs>
191
192 <data name="out_tab" format="tabular" label="Centrifuge on ${on_string}: Output">
193 <filter>(outputs['out_fmt'] == "tab")</filter>
194 </data>
195 <data name="out_sam" format="sam" label="Centrifuge on ${on_string}: Output">
196 <filter>(outputs['out_fmt'] == "sam")</filter>
197 </data>
198 <data name="report" format="tabular" label="Centrifuge on ${on_string}: Report" />
199
200 </outputs>
201
202 <!-- ***************************************************************** -->
203
204 <tests>
205 <!-- default unpaired -->
206 <test>
207 <param name="db" value="test_db" />
208 <param name="u_reads" ftype="fastq" value="input_u.fq" />
209 <output name="out_tab" file="defaults.tsv" sort="true" />
210 <output name="report" file="defaults.report" />
211 </test>
212 <!-- default paired -->
213 <test>
214 <param name="db" value="test_db" />
215 <param name="p_reads">
216 <collection type="paired">
217 <element name="forward" value="input_f.fq" />
218 <element name="reverse" value="input_r.fq" />
219 </collection>
220 </param>
221 <output name="out_tab" file="paired.tsv" sort="true" />
222 <output name="report" file="paired.report" />
223 </test>
224 <!-- default combined -->
225 <test>
226 <param name="db" value="test_db" />
227 <param name="u_reads" ftype="fastq" value="input_u.fq" />
228 <param name="p_reads">
229 <collection type="paired">
230 <element name="forward" value="input_f.fq" />
231 <element name="reverse" value="input_r.fq" />
232 </collection>
233 </param>
234 <output name="out_tab" file="both.tsv" sort="true" />
235 <output name="report" file="both.report" />
236 </test>
237 <!-- exclude IDs -->
238 <test>
239 <param name="db" value="test_db" />
240 <param name="exclude_taxids" value="9913" />
241 <param name="u_reads" ftype="fastq" value="input_u.fq" />
242 <output name="out_tab" file="exclude.tsv" sort="true" />
243 <output name="report" file="exclude.report" />
244 </test>
245 <!-- specify host -->
246 <test>
247 <param name="db" value="test_db" />
248 <param name="host_taxids" value="9913" />
249 <param name="u_reads" ftype="fastq" value="input_u.fq" />
250 <output name="out_tab" file="host.tsv" sort="true" />
251 <output name="report" file="host.report" />
252 </test>
253 <!-- minimum length -->
254 <test>
255 <param name="db" value="test_db" />
256 <param name="min_hitlen" value="83" />
257 <param name="u_reads" ftype="fastq" value="input_u.fq" />
258 <output name="out_tab" file="minlen83.tsv" sort="true" />
259 <output name="report" file="minlen83.report" />
260 </test>
261 <!-- norc -->
262 <test>
263 <param name="db" value="test_db" />
264 <param name="norc" value="true" />
265 <param name="u_reads" ftype="fastq" value="input_u.fq" />
266 <output name="out_tab" file="norc.tsv" sort="true" />
267 <output name="report" file="norc.report" />
268 </test>
269 <!-- nofw -->
270 <test>
271 <param name="db" value="test_db" />
272 <param name="nofw" value="true" />
273 <param name="u_reads" ftype="fastq" value="input_u.fq" />
274 <output name="out_tab" file="nofw.tsv" sort="true" />
275 <output name="report" file="nofw.report" />
276 </test>
277 <!-- set seed -->
278 <test>
279 <param name="db" value="test_db" />
280 <param name="seed" value="123" />
281 <param name="u_reads" ftype="fastq" value="input_u.fq" />
282 <output name="out_tab" file="seed123.tsv" sort="true" />
283 <output name="report" file="seed123.report" />
284 </test>
285 <!-- 5' trim -->
286 <test>
287 <param name="db" value="test_db" />
288 <param name="trim5" value="10" />
289 <param name="u_reads" ftype="fastq" value="input_u.fq" />
290 <output name="out_tab" file="trim5_10.tsv" sort="true" />
291 <output name="report" file="trim5_10.report" />
292 </test>
293 <!-- 3' trim -->
294 <test>
295 <param name="db" value="test_db" />
296 <param name="trim3" value="5" />
297 <param name="u_reads" ftype="fastq" value="input_u.fq" />
298 <output name="out_tab" file="trim3_5.tsv" sort="true" />
299 <output name="report" file="trim3_5.report" />
300 </test>
301 <!-- skip start -->
302 <test>
303 <param name="db" value="test_db" />
304 <param name="skip" value="3" />
305 <param name="u_reads" ftype="fastq" value="input_u.fq" />
306 <output name="out_tab" file="skip3.tsv" sort="true" />
307 <output name="report" file="skip3.report" />
308 </test>
309 <!-- skip end -->
310 <test>
311 <param name="db" value="test_db" />
312 <param name="upto" value="6" />
313 <param name="u_reads" ftype="fastq" value="input_u.fq" />
314 <output name="out_tab" file="upto6.tsv" sort="true" />
315 <output name="report" file="upto6.report" />
316 </test>
317 <!-- invalid parameter value -->
318 <test expect_failure="true">
319 <param name="db" value="test_db" />
320 <param name="tab_fmt_cols" value="FooBar" />
321 <param name="u_reads" ftype="fastq" value="input_u.fq" />
322 </test>
323
324 </tests>
325
326 <!-- ***************************************************************** -->
327
328 <help>
329 <![CDATA[
330
331 Overview
332 --------
333
334 **Credit**
335 This wrapper was modified from the original version at https://github.com/jvolkening/galaxy-tools
336
337 **Centrifuge** is a very rapid and memory-efficient system for the
338 classification of DNA sequences from microbial samples, with better
339 sensitivity than and comparable accuracy to other leading systems. The system
340 uses a novel indexing scheme based on the Burrows-Wheeler transform (BWT) and
341 the Ferragina-Manzini (FM) index, optimized specifically for the metagenomic
342 classification problem. Centrifuge requires a relatively small index (e.g.,
343 4.3 GB for ~4,100 bacterial genomes) yet provides very fast classification
344 speed, allowing it to process a typical DNA sequencing run within an hour.
345 Together these advances enable timely and accurate analysis of large
346 metagenomics data sets on conventional desktop computers.
347
348 Usage
349 -----
350
351 Following is the manpage for `centrifuge`, which can be linked with the
352 options above using the help text. Note that not all options are available in
353 the Galaxy wrapper.
354
355 ::
356
357 centrifuge [options]* -x <cf-idx> {-1 <m1> -2 <m2> | -U <r> | --sra-acc <SRA accession number>} [-S <filename>] [--report-file <report>]
358
359 <cf-idx> Index filename prefix (minus trailing .X.cf).
360 <m1> Files with #1 mates, paired with files in <m2>.
361 Could be gzip'ed (extension: .gz) or bzip2'ed (extension: .bz2).
362 <m2> Files with #2 mates, paired with files in <m1>.
363 Could be gzip'ed (extension: .gz) or bzip2'ed (extension: .bz2).
364 <r> Files with unpaired reads.
365 Could be gzip'ed (extension: .gz) or bzip2'ed (extension: .bz2).
366 <SRA accession number> Comma-separated list of SRA accession numbers, e.g. --sra-acc SRR353653,SRR353654.
367 <filename> File for classification output (default: stdout)
368 <report> File for tabular report output (default: centrifuge_report.tsv)
369
370 <m1>, <m2>, <r> can be comma-separated lists (no whitespace) and can be
371 specified many times. E.g. '-U file1.fq,file2.fq -U file3.fq'.
372
373 Options (defaults in parentheses):
374
375 Input:
376 -q query input files are FASTQ .fq/.fastq (default)
377 --qseq query input files are in Illumina's qseq format
378 -f query input files are (multi-)FASTA .fa/.mfa
379 -r query input files are raw one-sequence-per-line
380 -c <m1>, <m2>, <r> are sequences themselves, not files
381 -s/--skip <int> skip the first <int> reads/pairs in the input (none)
382 -u/--upto <int> stop after first <int> reads/pairs (no limit)
383 -5/--trim5 <int> trim <int> bases from 5'/left end of reads (0)
384 -3/--trim3 <int> trim <int> bases from 3'/right end of reads (0)
385 --phred33 qualities are Phred+33 (default)
386 --phred64 qualities are Phred+64
387 --int-quals qualities encoded as space-delimited integers
388 --ignore-quals treat all quality values as 30 on Phred scale (off)
389 --nofw do not align forward (original) version of read (off)
390 --norc do not align reverse-complement version of read (off)
391 --sra-acc SRA accession ID
392
393 Classification:
394 --min-hitlen <int> minimum length of partial hits (default 22, must be greater than 15)
395 --min-totallen <int> minimum summed length of partial hits per read (default 0)
396 --host-taxids <taxids> comma-separated list of taxonomic IDs that will be preferred in classification
397 --exclude-taxids <taxids> comma-separated list of taxonomic IDs that will be excluded in classification
398
399 Output:
400 --out-fmt <str> define output format, either 'tab' or 'sam' (tab)
401 --tab-fmt-cols <str> columns in tabular format, comma separated
402 default: readID,seqID,taxID,score,2ndBestScore,hitLength,queryLength,numMatches
403 -t/--time print wall-clock time taken by search phases
404 --un <path> write unpaired reads that didn't align to <path>
405 --al <path> write unpaired reads that aligned at least once to <path>
406 --un-conc <path> write pairs that didn't align concordantly to <path>
407 --al-conc <path> write pairs that aligned concordantly at least once to <path>
408 (Note: for --un, --al, --un-conc, or --al-conc, add '-gz' to the option name, e.g.
409 --un-gz <path>, to gzip compress output, or add '-bz2' to bzip2 compress output.)
410 --quiet print nothing to stderr except serious errors
411 --met-file <path> send metrics to file at <path> (off)
412 --met-stderr send metrics to stderr (off)
413 --met <int> report internal counters & metrics every <int> secs (1)
414
415 Performance:
416 -o/--offrate <int> override offrate of index; must be >= index's offrate
417 -p/--threads <int> number of alignment threads to launch (1)
418 --mm use memory-mapped I/O for index; many 'bowtie's can share
419
420 Other:
421 --qc-filter filter out reads that are bad according to QSEQ filter
422 --seed <int> seed for random number generator (0)
423 --non-deterministic seed rand. gen. arbitrarily instead of using read attributes
424 --version print version information and quit
425 -h/--help print this usage message
426
427 ]]>
428 </help>
429
430 <!-- ***************************************************************** -->
431
432 <citations>
433 <citation type="doi">10.1101/gr.210641.116</citation>
434 </citations>
435
436 </tool>