comparison centrifuge.xml @ 0:be7bba8229c6 draft default tip

planemo upload for repository https://github.com/jvolkening/galaxy-tools/tree/master/tools/centrifuge commit 5d9e0a6e2ad278547892c86f0acded93814a1356-dirty
author jvolkening
date Fri, 01 Nov 2019 13:53:15 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:be7bba8229c6
1 <tool id="centrifuge" name="Centrifuge" version="1.0.4_beta">
2
3 <description>Read-based metagenome characterization</description>
4
5 <!-- ***************************************************************** -->
6
7 <requirements>
8 <requirement type="package" version="1.0.4_beta">centrifuge</requirement>
9 </requirements>
10
11 <!-- ***************************************************************** -->
12
13 <version_command>centrifuge --version | perl -wnE'print "$1\n" for /centrifuge\S+ version (\S+)/g'</version_command>
14
15 <!-- ***************************************************************** -->
16
17 <command detect_errors="aggressive">
18 <![CDATA[
19
20 centrifuge
21
22 ##--Output Options------------------------------
23
24 --out-fmt $outputs.out_fmt
25 --tab-fmt-cols $outputs.tab_fmt_cols
26
27 ##--General Options-----------------------------
28
29 --threads \${GALAXY_SLOTS:-1}
30
31 #if $general.skip != "":
32 --skip $general.skip
33 #end if
34 #if $general.upto != "":
35 --upto $general.upto
36 #end if
37 #if $general.trim5 != "":
38 --trim5 $general.trim5
39 #end if
40 #if $general.trim3 != "":
41 --trim3 $general.trim3
42 #end if
43
44 $general.ignore_quals
45 $general.nofw
46 $general.norc
47 $general.non_deterministic
48
49 #if $general.seed != "":
50 --seed $general.seed
51 #end if
52
53 ##--Classification------------------------------
54
55 --min-hitlen $classification.min_hitlen
56
57 #if $classification.min_totallen != "":
58 --min-totallen $classification.min_totallen
59 #end if
60
61 #if $classification.host_taxids != "":
62 --host-taxids $classification.host_taxids
63 #end if
64
65 #if $classification.exclude_taxids != "":
66 --exclude-taxids $classification.exclude_taxids
67 #end if
68
69 ##--Inputs--------------------------------------
70
71 -x '${inputs.db.fields.path}'
72
73 #for $s in $inputs.unpaired
74 -U '${s.u_reads}'
75 #end for
76
77 #for $s in $inputs.paired
78 -1 '${s.p_reads.forward}'
79 -2 '${s.p_reads.reverse}'
80 #end for
81
82 #if $inputs.sra:
83 --sra-acc $inputs.sra
84 #end if
85
86 #if $outputs.out_fmt == "tab":
87 -S $out_tab
88 #elif $outputs.out_fmt == "sam":
89 -S $out_sam
90 #end if
91
92 --report-file $report
93
94 ]]>
95 </command>
96
97 <!-- ***************************************************************** -->
98
99 <inputs>
100
101 <section name="inputs" title="Inputs" expanded="True">
102
103 <repeat name="unpaired" title="Unpaired reads" min="0" default="0">
104 <param name="u_reads" type="data" format="fastq" label="Unpaired reads"/>
105 </repeat>
106
107 <repeat name="paired" title="Paired reads" min="0" default="0">
108 <param name="p_reads" type="data_collection" collection_type="paired" format="fastq" label="Paired read collection"/>
109 </repeat>
110
111 <param name="sra" type="text" label="SRA accession">
112 <sanitizer invalid_char="">
113 <valid initial="string.letters,string.digits">
114 <add value="," />
115 </valid>
116 </sanitizer>
117 </param>
118
119 <param name="db" type="select" label="Select a reference database">
120 <options from_data_table="centrifuge_indices">
121 <filter type="sort_by" column="2"/>
122 <validator type="no_options" message="No indexes are available for the selected input dataset"/>
123 </options>
124 </param>
125
126 </section>
127
128 <section name="outputs" title="Outputs" expanded="False">
129
130 <param argument="--out-fmt" name="out_fmt" type="select" label="Output format">
131 <option value="tab" selected="true">tabular</option>
132 <option value="sam">SAM</option>
133 </param>
134
135 <param argument="--tab-fmt-cols" name="tab_fmt_cols" type="text" label="Output columns" value="readID,seqID,taxID,score,2ndBestScore,hitLength,queryLength,numMatches">
136 <sanitizer invalid_char="">
137 <valid initial="string.letters,string.digits">
138 <add value="," />
139 </valid>
140 </sanitizer>
141 </param>
142
143 </section>
144
145 <section name="general" title="General options" expanded="False">
146
147 <param argument="--skip" type="integer" value="" optional="true" label="Initial reads to skip" />
148 <param argument="--upto" type="integer" value="" optional="true" label="Stop after reads" />
149 <param argument="--trim5" type="integer" value="" optional="true" label="Trim 5' bases" />
150 <param argument="--trim3" type="integer" value="" optional="true" label="Trim 3' bases" />
151
152 <param argument="--ignore-quals" name="ignore_quals" type="boolean" truevalue="--ignore-quals" falsevalue="" checked="no" label="Ignore qualities" />
153
154 <param argument="--nofw" type="boolean" truevalue="--nofw" falsevalue="" checked="no" label="Don't map forward strand" />
155 <param argument="--norc" type="boolean" truevalue="--norc" falsevalue="" checked="no" label="Don't map rev-com strand" />
156
157 <param argument="--seed" type="integer" value="" min="0" optional="true" label="Starting seed" />
158 <param argument="--non-deterministic" name="non_deterministic" type="boolean" truevalue="--non-deterministic" falsevalue="" checked="no" label="Use non-deterministic seeding" />
159
160 </section>
161
162 <section name="classification" title="Classification" expanded="True">
163
164 <param argument="--min-hitlen" name="min_hitlen" type="integer" value="22" min="16" label="Minimum hit length" />
165 <param argument="--min-totallen" name="min_totallen" type="integer" optional="true" min="0" label="Minimum summed length" />
166
167 <param argument="--host-taxids" name="host_taxids" type="text" label="Host taxonomic IDs">
168 <sanitizer invalid_char="">
169 <valid initial="string.digits">
170 <add value="," />
171 </valid>
172 </sanitizer>
173 </param>
174 <param argument="--exclude-taxids" name="exclude_taxids" type="text" label="Excluded taxonomic IDs">
175 <sanitizer invalid_char="">
176 <valid initial="string.digits">
177 <add value="," />
178 </valid>
179 </sanitizer>
180 </param>
181
182 </section>
183
184 </inputs>
185
186 <!-- ***************************************************************** -->
187
188 <outputs>
189
190 <data name="out_tab" format="tabular" label="Centrifuge on ${on_string}: Output">
191 <filter>(outputs['out_fmt'] == "tab")</filter>
192 </data>
193 <data name="out_sam" format="sam" label="Centrifuge on ${on_string}: Output">
194 <filter>(outputs['out_fmt'] == "sam")</filter>
195 </data>
196 <data name="report" format="tabular" label="Centrifuge on ${on_string}: Report" />
197
198 </outputs>
199
200 <!-- ***************************************************************** -->
201
202 <tests>
203 <!-- default unpaired -->
204 <test>
205 <param name="db" value="test_db" />
206 <param name="u_reads" ftype="fastq" value="input_u.fq" />
207 <output name="out_tab" file="defaults.tsv" sort="true" />
208 <output name="report" file="defaults.report" />
209 </test>
210 <!-- default paired -->
211 <test>
212 <param name="db" value="test_db" />
213 <param name="p_reads">
214 <collection type="paired">
215 <element name="forward" value="input_f.fq" />
216 <element name="reverse" value="input_r.fq" />
217 </collection>
218 </param>
219 <output name="out_tab" file="paired.tsv" sort="true" />
220 <output name="report" file="paired.report" />
221 </test>
222 <!-- default combined -->
223 <test>
224 <param name="db" value="test_db" />
225 <param name="u_reads" ftype="fastq" value="input_u.fq" />
226 <param name="p_reads">
227 <collection type="paired">
228 <element name="forward" value="input_f.fq" />
229 <element name="reverse" value="input_r.fq" />
230 </collection>
231 </param>
232 <output name="out_tab" file="both.tsv" sort="true" />
233 <output name="report" file="both.report" />
234 </test>
235 <!-- exclude IDs -->
236 <test>
237 <param name="db" value="test_db" />
238 <param name="exclude_taxids" value="9913" />
239 <param name="u_reads" ftype="fastq" value="input_u.fq" />
240 <output name="out_tab" file="exclude.tsv" sort="true" />
241 <output name="report" file="exclude.report" />
242 </test>
243 <!-- specify host -->
244 <test>
245 <param name="db" value="test_db" />
246 <param name="host_taxids" value="9913" />
247 <param name="u_reads" ftype="fastq" value="input_u.fq" />
248 <output name="out_tab" file="host.tsv" sort="true" />
249 <output name="report" file="host.report" />
250 </test>
251 <!-- minimum length -->
252 <test>
253 <param name="db" value="test_db" />
254 <param name="min_hitlen" value="83" />
255 <param name="u_reads" ftype="fastq" value="input_u.fq" />
256 <output name="out_tab" file="minlen83.tsv" sort="true" />
257 <output name="report" file="minlen83.report" />
258 </test>
259 <!-- norc -->
260 <test>
261 <param name="db" value="test_db" />
262 <param name="norc" value="true" />
263 <param name="u_reads" ftype="fastq" value="input_u.fq" />
264 <output name="out_tab" file="norc.tsv" sort="true" />
265 <output name="report" file="norc.report" />
266 </test>
267 <!-- nofw -->
268 <test>
269 <param name="db" value="test_db" />
270 <param name="nofw" value="true" />
271 <param name="u_reads" ftype="fastq" value="input_u.fq" />
272 <output name="out_tab" file="nofw.tsv" sort="true" />
273 <output name="report" file="nofw.report" />
274 </test>
275 <!-- set seed -->
276 <test>
277 <param name="db" value="test_db" />
278 <param name="seed" value="123" />
279 <param name="u_reads" ftype="fastq" value="input_u.fq" />
280 <output name="out_tab" file="seed123.tsv" sort="true" />
281 <output name="report" file="seed123.report" />
282 </test>
283 <!-- 5' trim -->
284 <test>
285 <param name="db" value="test_db" />
286 <param name="trim5" value="10" />
287 <param name="u_reads" ftype="fastq" value="input_u.fq" />
288 <output name="out_tab" file="trim5_10.tsv" sort="true" />
289 <output name="report" file="trim5_10.report" />
290 </test>
291 <!-- 3' trim -->
292 <test>
293 <param name="db" value="test_db" />
294 <param name="trim3" value="5" />
295 <param name="u_reads" ftype="fastq" value="input_u.fq" />
296 <output name="out_tab" file="trim3_5.tsv" sort="true" />
297 <output name="report" file="trim3_5.report" />
298 </test>
299 <!-- skip start -->
300 <test>
301 <param name="db" value="test_db" />
302 <param name="skip" value="3" />
303 <param name="u_reads" ftype="fastq" value="input_u.fq" />
304 <output name="out_tab" file="skip3.tsv" sort="true" />
305 <output name="report" file="skip3.report" />
306 </test>
307 <!-- skip end -->
308 <test>
309 <param name="db" value="test_db" />
310 <param name="upto" value="6" />
311 <param name="u_reads" ftype="fastq" value="input_u.fq" />
312 <output name="out_tab" file="upto6.tsv" sort="true" />
313 <output name="report" file="upto6.report" />
314 </test>
315 <!-- invalid parameter value -->
316 <test expect_failure="true">
317 <param name="db" value="test_db" />
318 <param name="tab_fmt_cols" value="FooBar" />
319 <param name="u_reads" ftype="fastq" value="input_u.fq" />
320 </test>
321
322 </tests>
323
324 <!-- ***************************************************************** -->
325
326 <help>
327 <![CDATA[
328
329 Overview
330 --------
331
332 **Centrifuge** is a very rapid and memory-efficient system for the
333 classification of DNA sequences from microbial samples, with better
334 sensitivity than and comparable accuracy to other leading systems. The system
335 uses a novel indexing scheme based on the Burrows-Wheeler transform (BWT) and
336 the Ferragina-Manzini (FM) index, optimized specifically for the metagenomic
337 classification problem. Centrifuge requires a relatively small index (e.g.,
338 4.3 GB for ~4,100 bacterial genomes) yet provides very fast classification
339 speed, allowing it to process a typical DNA sequencing run within an hour.
340 Together these advances enable timely and accurate analysis of large
341 metagenomics data sets on conventional desktop computers.
342
343 Usage
344 -----
345
346 Following is the manpage for `centrifuge`, which can be linked with the
347 options above using the help text. Note that not all options are available in
348 the Galaxy wrapper.
349
350 ::
351
352 centrifuge [options]* -x <cf-idx> {-1 <m1> -2 <m2> | -U <r> | --sra-acc <SRA accession number>} [-S <filename>] [--report-file <report>]
353
354 <cf-idx> Index filename prefix (minus trailing .X.cf).
355 <m1> Files with #1 mates, paired with files in <m2>.
356 Could be gzip'ed (extension: .gz) or bzip2'ed (extension: .bz2).
357 <m2> Files with #2 mates, paired with files in <m1>.
358 Could be gzip'ed (extension: .gz) or bzip2'ed (extension: .bz2).
359 <r> Files with unpaired reads.
360 Could be gzip'ed (extension: .gz) or bzip2'ed (extension: .bz2).
361 <SRA accession number> Comma-separated list of SRA accession numbers, e.g. --sra-acc SRR353653,SRR353654.
362 <filename> File for classification output (default: stdout)
363 <report> File for tabular report output (default: centrifuge_report.tsv)
364
365 <m1>, <m2>, <r> can be comma-separated lists (no whitespace) and can be
366 specified many times. E.g. '-U file1.fq,file2.fq -U file3.fq'.
367
368 Options (defaults in parentheses):
369
370 Input:
371 -q query input files are FASTQ .fq/.fastq (default)
372 --qseq query input files are in Illumina's qseq format
373 -f query input files are (multi-)FASTA .fa/.mfa
374 -r query input files are raw one-sequence-per-line
375 -c <m1>, <m2>, <r> are sequences themselves, not files
376 -s/--skip <int> skip the first <int> reads/pairs in the input (none)
377 -u/--upto <int> stop after first <int> reads/pairs (no limit)
378 -5/--trim5 <int> trim <int> bases from 5'/left end of reads (0)
379 -3/--trim3 <int> trim <int> bases from 3'/right end of reads (0)
380 --phred33 qualities are Phred+33 (default)
381 --phred64 qualities are Phred+64
382 --int-quals qualities encoded as space-delimited integers
383 --ignore-quals treat all quality values as 30 on Phred scale (off)
384 --nofw do not align forward (original) version of read (off)
385 --norc do not align reverse-complement version of read (off)
386 --sra-acc SRA accession ID
387
388 Classification:
389 --min-hitlen <int> minimum length of partial hits (default 22, must be greater than 15)
390 --min-totallen <int> minimum summed length of partial hits per read (default 0)
391 --host-taxids <taxids> comma-separated list of taxonomic IDs that will be preferred in classification
392 --exclude-taxids <taxids> comma-separated list of taxonomic IDs that will be excluded in classification
393
394 Output:
395 --out-fmt <str> define output format, either 'tab' or 'sam' (tab)
396 --tab-fmt-cols <str> columns in tabular format, comma separated
397 default: readID,seqID,taxID,score,2ndBestScore,hitLength,queryLength,numMatches
398 -t/--time print wall-clock time taken by search phases
399 --un <path> write unpaired reads that didn't align to <path>
400 --al <path> write unpaired reads that aligned at least once to <path>
401 --un-conc <path> write pairs that didn't align concordantly to <path>
402 --al-conc <path> write pairs that aligned concordantly at least once to <path>
403 (Note: for --un, --al, --un-conc, or --al-conc, add '-gz' to the option name, e.g.
404 --un-gz <path>, to gzip compress output, or add '-bz2' to bzip2 compress output.)
405 --quiet print nothing to stderr except serious errors
406 --met-file <path> send metrics to file at <path> (off)
407 --met-stderr send metrics to stderr (off)
408 --met <int> report internal counters & metrics every <int> secs (1)
409
410 Performance:
411 -o/--offrate <int> override offrate of index; must be >= index's offrate
412 -p/--threads <int> number of alignment threads to launch (1)
413 --mm use memory-mapped I/O for index; many 'bowtie's can share
414
415 Other:
416 --qc-filter filter out reads that are bad according to QSEQ filter
417 --seed <int> seed for random number generator (0)
418 --non-deterministic seed rand. gen. arbitrarily instead of using read attributes
419 --version print version information and quit
420 -h/--help print this usage message
421
422 ]]>
423 </help>
424
425 <!-- ***************************************************************** -->
426
427 <citations>
428 <citation type="doi">10.1101/gr.210641.116</citation>
429 </citations>
430
431 </tool>