Mercurial > repos > jvolkening > centrifuge
comparison centrifuge.xml @ 0:be7bba8229c6 draft default tip
planemo upload for repository https://github.com/jvolkening/galaxy-tools/tree/master/tools/centrifuge commit 5d9e0a6e2ad278547892c86f0acded93814a1356-dirty
author | jvolkening |
---|---|
date | Fri, 01 Nov 2019 13:53:15 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:be7bba8229c6 |
---|---|
1 <tool id="centrifuge" name="Centrifuge" version="1.0.4_beta"> | |
2 | |
3 <description>Read-based metagenome characterization</description> | |
4 | |
5 <!-- ***************************************************************** --> | |
6 | |
7 <requirements> | |
8 <requirement type="package" version="1.0.4_beta">centrifuge</requirement> | |
9 </requirements> | |
10 | |
11 <!-- ***************************************************************** --> | |
12 | |
13 <version_command>centrifuge --version | perl -wnE'print "$1\n" for /centrifuge\S+ version (\S+)/g'</version_command> | |
14 | |
15 <!-- ***************************************************************** --> | |
16 | |
17 <command detect_errors="aggressive"> | |
18 <![CDATA[ | |
19 | |
20 centrifuge | |
21 | |
22 ##--Output Options------------------------------ | |
23 | |
24 --out-fmt $outputs.out_fmt | |
25 --tab-fmt-cols $outputs.tab_fmt_cols | |
26 | |
27 ##--General Options----------------------------- | |
28 | |
29 --threads \${GALAXY_SLOTS:-1} | |
30 | |
31 #if $general.skip != "": | |
32 --skip $general.skip | |
33 #end if | |
34 #if $general.upto != "": | |
35 --upto $general.upto | |
36 #end if | |
37 #if $general.trim5 != "": | |
38 --trim5 $general.trim5 | |
39 #end if | |
40 #if $general.trim3 != "": | |
41 --trim3 $general.trim3 | |
42 #end if | |
43 | |
44 $general.ignore_quals | |
45 $general.nofw | |
46 $general.norc | |
47 $general.non_deterministic | |
48 | |
49 #if $general.seed != "": | |
50 --seed $general.seed | |
51 #end if | |
52 | |
53 ##--Classification------------------------------ | |
54 | |
55 --min-hitlen $classification.min_hitlen | |
56 | |
57 #if $classification.min_totallen != "": | |
58 --min-totallen $classification.min_totallen | |
59 #end if | |
60 | |
61 #if $classification.host_taxids != "": | |
62 --host-taxids $classification.host_taxids | |
63 #end if | |
64 | |
65 #if $classification.exclude_taxids != "": | |
66 --exclude-taxids $classification.exclude_taxids | |
67 #end if | |
68 | |
69 ##--Inputs-------------------------------------- | |
70 | |
71 -x '${inputs.db.fields.path}' | |
72 | |
73 #for $s in $inputs.unpaired | |
74 -U '${s.u_reads}' | |
75 #end for | |
76 | |
77 #for $s in $inputs.paired | |
78 -1 '${s.p_reads.forward}' | |
79 -2 '${s.p_reads.reverse}' | |
80 #end for | |
81 | |
82 #if $inputs.sra: | |
83 --sra-acc $inputs.sra | |
84 #end if | |
85 | |
86 #if $outputs.out_fmt == "tab": | |
87 -S $out_tab | |
88 #elif $outputs.out_fmt == "sam": | |
89 -S $out_sam | |
90 #end if | |
91 | |
92 --report-file $report | |
93 | |
94 ]]> | |
95 </command> | |
96 | |
97 <!-- ***************************************************************** --> | |
98 | |
99 <inputs> | |
100 | |
101 <section name="inputs" title="Inputs" expanded="True"> | |
102 | |
103 <repeat name="unpaired" title="Unpaired reads" min="0" default="0"> | |
104 <param name="u_reads" type="data" format="fastq" label="Unpaired reads"/> | |
105 </repeat> | |
106 | |
107 <repeat name="paired" title="Paired reads" min="0" default="0"> | |
108 <param name="p_reads" type="data_collection" collection_type="paired" format="fastq" label="Paired read collection"/> | |
109 </repeat> | |
110 | |
111 <param name="sra" type="text" label="SRA accession"> | |
112 <sanitizer invalid_char=""> | |
113 <valid initial="string.letters,string.digits"> | |
114 <add value="," /> | |
115 </valid> | |
116 </sanitizer> | |
117 </param> | |
118 | |
119 <param name="db" type="select" label="Select a reference database"> | |
120 <options from_data_table="centrifuge_indices"> | |
121 <filter type="sort_by" column="2"/> | |
122 <validator type="no_options" message="No indexes are available for the selected input dataset"/> | |
123 </options> | |
124 </param> | |
125 | |
126 </section> | |
127 | |
128 <section name="outputs" title="Outputs" expanded="False"> | |
129 | |
130 <param argument="--out-fmt" name="out_fmt" type="select" label="Output format"> | |
131 <option value="tab" selected="true">tabular</option> | |
132 <option value="sam">SAM</option> | |
133 </param> | |
134 | |
135 <param argument="--tab-fmt-cols" name="tab_fmt_cols" type="text" label="Output columns" value="readID,seqID,taxID,score,2ndBestScore,hitLength,queryLength,numMatches"> | |
136 <sanitizer invalid_char=""> | |
137 <valid initial="string.letters,string.digits"> | |
138 <add value="," /> | |
139 </valid> | |
140 </sanitizer> | |
141 </param> | |
142 | |
143 </section> | |
144 | |
145 <section name="general" title="General options" expanded="False"> | |
146 | |
147 <param argument="--skip" type="integer" value="" optional="true" label="Initial reads to skip" /> | |
148 <param argument="--upto" type="integer" value="" optional="true" label="Stop after reads" /> | |
149 <param argument="--trim5" type="integer" value="" optional="true" label="Trim 5' bases" /> | |
150 <param argument="--trim3" type="integer" value="" optional="true" label="Trim 3' bases" /> | |
151 | |
152 <param argument="--ignore-quals" name="ignore_quals" type="boolean" truevalue="--ignore-quals" falsevalue="" checked="no" label="Ignore qualities" /> | |
153 | |
154 <param argument="--nofw" type="boolean" truevalue="--nofw" falsevalue="" checked="no" label="Don't map forward strand" /> | |
155 <param argument="--norc" type="boolean" truevalue="--norc" falsevalue="" checked="no" label="Don't map rev-com strand" /> | |
156 | |
157 <param argument="--seed" type="integer" value="" min="0" optional="true" label="Starting seed" /> | |
158 <param argument="--non-deterministic" name="non_deterministic" type="boolean" truevalue="--non-deterministic" falsevalue="" checked="no" label="Use non-deterministic seeding" /> | |
159 | |
160 </section> | |
161 | |
162 <section name="classification" title="Classification" expanded="True"> | |
163 | |
164 <param argument="--min-hitlen" name="min_hitlen" type="integer" value="22" min="16" label="Minimum hit length" /> | |
165 <param argument="--min-totallen" name="min_totallen" type="integer" optional="true" min="0" label="Minimum summed length" /> | |
166 | |
167 <param argument="--host-taxids" name="host_taxids" type="text" label="Host taxonomic IDs"> | |
168 <sanitizer invalid_char=""> | |
169 <valid initial="string.digits"> | |
170 <add value="," /> | |
171 </valid> | |
172 </sanitizer> | |
173 </param> | |
174 <param argument="--exclude-taxids" name="exclude_taxids" type="text" label="Excluded taxonomic IDs"> | |
175 <sanitizer invalid_char=""> | |
176 <valid initial="string.digits"> | |
177 <add value="," /> | |
178 </valid> | |
179 </sanitizer> | |
180 </param> | |
181 | |
182 </section> | |
183 | |
184 </inputs> | |
185 | |
186 <!-- ***************************************************************** --> | |
187 | |
188 <outputs> | |
189 | |
190 <data name="out_tab" format="tabular" label="Centrifuge on ${on_string}: Output"> | |
191 <filter>(outputs['out_fmt'] == "tab")</filter> | |
192 </data> | |
193 <data name="out_sam" format="sam" label="Centrifuge on ${on_string}: Output"> | |
194 <filter>(outputs['out_fmt'] == "sam")</filter> | |
195 </data> | |
196 <data name="report" format="tabular" label="Centrifuge on ${on_string}: Report" /> | |
197 | |
198 </outputs> | |
199 | |
200 <!-- ***************************************************************** --> | |
201 | |
202 <tests> | |
203 <!-- default unpaired --> | |
204 <test> | |
205 <param name="db" value="test_db" /> | |
206 <param name="u_reads" ftype="fastq" value="input_u.fq" /> | |
207 <output name="out_tab" file="defaults.tsv" sort="true" /> | |
208 <output name="report" file="defaults.report" /> | |
209 </test> | |
210 <!-- default paired --> | |
211 <test> | |
212 <param name="db" value="test_db" /> | |
213 <param name="p_reads"> | |
214 <collection type="paired"> | |
215 <element name="forward" value="input_f.fq" /> | |
216 <element name="reverse" value="input_r.fq" /> | |
217 </collection> | |
218 </param> | |
219 <output name="out_tab" file="paired.tsv" sort="true" /> | |
220 <output name="report" file="paired.report" /> | |
221 </test> | |
222 <!-- default combined --> | |
223 <test> | |
224 <param name="db" value="test_db" /> | |
225 <param name="u_reads" ftype="fastq" value="input_u.fq" /> | |
226 <param name="p_reads"> | |
227 <collection type="paired"> | |
228 <element name="forward" value="input_f.fq" /> | |
229 <element name="reverse" value="input_r.fq" /> | |
230 </collection> | |
231 </param> | |
232 <output name="out_tab" file="both.tsv" sort="true" /> | |
233 <output name="report" file="both.report" /> | |
234 </test> | |
235 <!-- exclude IDs --> | |
236 <test> | |
237 <param name="db" value="test_db" /> | |
238 <param name="exclude_taxids" value="9913" /> | |
239 <param name="u_reads" ftype="fastq" value="input_u.fq" /> | |
240 <output name="out_tab" file="exclude.tsv" sort="true" /> | |
241 <output name="report" file="exclude.report" /> | |
242 </test> | |
243 <!-- specify host --> | |
244 <test> | |
245 <param name="db" value="test_db" /> | |
246 <param name="host_taxids" value="9913" /> | |
247 <param name="u_reads" ftype="fastq" value="input_u.fq" /> | |
248 <output name="out_tab" file="host.tsv" sort="true" /> | |
249 <output name="report" file="host.report" /> | |
250 </test> | |
251 <!-- minimum length --> | |
252 <test> | |
253 <param name="db" value="test_db" /> | |
254 <param name="min_hitlen" value="83" /> | |
255 <param name="u_reads" ftype="fastq" value="input_u.fq" /> | |
256 <output name="out_tab" file="minlen83.tsv" sort="true" /> | |
257 <output name="report" file="minlen83.report" /> | |
258 </test> | |
259 <!-- norc --> | |
260 <test> | |
261 <param name="db" value="test_db" /> | |
262 <param name="norc" value="true" /> | |
263 <param name="u_reads" ftype="fastq" value="input_u.fq" /> | |
264 <output name="out_tab" file="norc.tsv" sort="true" /> | |
265 <output name="report" file="norc.report" /> | |
266 </test> | |
267 <!-- nofw --> | |
268 <test> | |
269 <param name="db" value="test_db" /> | |
270 <param name="nofw" value="true" /> | |
271 <param name="u_reads" ftype="fastq" value="input_u.fq" /> | |
272 <output name="out_tab" file="nofw.tsv" sort="true" /> | |
273 <output name="report" file="nofw.report" /> | |
274 </test> | |
275 <!-- set seed --> | |
276 <test> | |
277 <param name="db" value="test_db" /> | |
278 <param name="seed" value="123" /> | |
279 <param name="u_reads" ftype="fastq" value="input_u.fq" /> | |
280 <output name="out_tab" file="seed123.tsv" sort="true" /> | |
281 <output name="report" file="seed123.report" /> | |
282 </test> | |
283 <!-- 5' trim --> | |
284 <test> | |
285 <param name="db" value="test_db" /> | |
286 <param name="trim5" value="10" /> | |
287 <param name="u_reads" ftype="fastq" value="input_u.fq" /> | |
288 <output name="out_tab" file="trim5_10.tsv" sort="true" /> | |
289 <output name="report" file="trim5_10.report" /> | |
290 </test> | |
291 <!-- 3' trim --> | |
292 <test> | |
293 <param name="db" value="test_db" /> | |
294 <param name="trim3" value="5" /> | |
295 <param name="u_reads" ftype="fastq" value="input_u.fq" /> | |
296 <output name="out_tab" file="trim3_5.tsv" sort="true" /> | |
297 <output name="report" file="trim3_5.report" /> | |
298 </test> | |
299 <!-- skip start --> | |
300 <test> | |
301 <param name="db" value="test_db" /> | |
302 <param name="skip" value="3" /> | |
303 <param name="u_reads" ftype="fastq" value="input_u.fq" /> | |
304 <output name="out_tab" file="skip3.tsv" sort="true" /> | |
305 <output name="report" file="skip3.report" /> | |
306 </test> | |
307 <!-- skip end --> | |
308 <test> | |
309 <param name="db" value="test_db" /> | |
310 <param name="upto" value="6" /> | |
311 <param name="u_reads" ftype="fastq" value="input_u.fq" /> | |
312 <output name="out_tab" file="upto6.tsv" sort="true" /> | |
313 <output name="report" file="upto6.report" /> | |
314 </test> | |
315 <!-- invalid parameter value --> | |
316 <test expect_failure="true"> | |
317 <param name="db" value="test_db" /> | |
318 <param name="tab_fmt_cols" value="FooBar" /> | |
319 <param name="u_reads" ftype="fastq" value="input_u.fq" /> | |
320 </test> | |
321 | |
322 </tests> | |
323 | |
324 <!-- ***************************************************************** --> | |
325 | |
326 <help> | |
327 <![CDATA[ | |
328 | |
329 Overview | |
330 -------- | |
331 | |
332 **Centrifuge** is a very rapid and memory-efficient system for the | |
333 classification of DNA sequences from microbial samples, with better | |
334 sensitivity than and comparable accuracy to other leading systems. The system | |
335 uses a novel indexing scheme based on the Burrows-Wheeler transform (BWT) and | |
336 the Ferragina-Manzini (FM) index, optimized specifically for the metagenomic | |
337 classification problem. Centrifuge requires a relatively small index (e.g., | |
338 4.3 GB for ~4,100 bacterial genomes) yet provides very fast classification | |
339 speed, allowing it to process a typical DNA sequencing run within an hour. | |
340 Together these advances enable timely and accurate analysis of large | |
341 metagenomics data sets on conventional desktop computers. | |
342 | |
343 Usage | |
344 ----- | |
345 | |
346 Following is the manpage for `centrifuge`, which can be linked with the | |
347 options above using the help text. Note that not all options are available in | |
348 the Galaxy wrapper. | |
349 | |
350 :: | |
351 | |
352 centrifuge [options]* -x <cf-idx> {-1 <m1> -2 <m2> | -U <r> | --sra-acc <SRA accession number>} [-S <filename>] [--report-file <report>] | |
353 | |
354 <cf-idx> Index filename prefix (minus trailing .X.cf). | |
355 <m1> Files with #1 mates, paired with files in <m2>. | |
356 Could be gzip'ed (extension: .gz) or bzip2'ed (extension: .bz2). | |
357 <m2> Files with #2 mates, paired with files in <m1>. | |
358 Could be gzip'ed (extension: .gz) or bzip2'ed (extension: .bz2). | |
359 <r> Files with unpaired reads. | |
360 Could be gzip'ed (extension: .gz) or bzip2'ed (extension: .bz2). | |
361 <SRA accession number> Comma-separated list of SRA accession numbers, e.g. --sra-acc SRR353653,SRR353654. | |
362 <filename> File for classification output (default: stdout) | |
363 <report> File for tabular report output (default: centrifuge_report.tsv) | |
364 | |
365 <m1>, <m2>, <r> can be comma-separated lists (no whitespace) and can be | |
366 specified many times. E.g. '-U file1.fq,file2.fq -U file3.fq'. | |
367 | |
368 Options (defaults in parentheses): | |
369 | |
370 Input: | |
371 -q query input files are FASTQ .fq/.fastq (default) | |
372 --qseq query input files are in Illumina's qseq format | |
373 -f query input files are (multi-)FASTA .fa/.mfa | |
374 -r query input files are raw one-sequence-per-line | |
375 -c <m1>, <m2>, <r> are sequences themselves, not files | |
376 -s/--skip <int> skip the first <int> reads/pairs in the input (none) | |
377 -u/--upto <int> stop after first <int> reads/pairs (no limit) | |
378 -5/--trim5 <int> trim <int> bases from 5'/left end of reads (0) | |
379 -3/--trim3 <int> trim <int> bases from 3'/right end of reads (0) | |
380 --phred33 qualities are Phred+33 (default) | |
381 --phred64 qualities are Phred+64 | |
382 --int-quals qualities encoded as space-delimited integers | |
383 --ignore-quals treat all quality values as 30 on Phred scale (off) | |
384 --nofw do not align forward (original) version of read (off) | |
385 --norc do not align reverse-complement version of read (off) | |
386 --sra-acc SRA accession ID | |
387 | |
388 Classification: | |
389 --min-hitlen <int> minimum length of partial hits (default 22, must be greater than 15) | |
390 --min-totallen <int> minimum summed length of partial hits per read (default 0) | |
391 --host-taxids <taxids> comma-separated list of taxonomic IDs that will be preferred in classification | |
392 --exclude-taxids <taxids> comma-separated list of taxonomic IDs that will be excluded in classification | |
393 | |
394 Output: | |
395 --out-fmt <str> define output format, either 'tab' or 'sam' (tab) | |
396 --tab-fmt-cols <str> columns in tabular format, comma separated | |
397 default: readID,seqID,taxID,score,2ndBestScore,hitLength,queryLength,numMatches | |
398 -t/--time print wall-clock time taken by search phases | |
399 --un <path> write unpaired reads that didn't align to <path> | |
400 --al <path> write unpaired reads that aligned at least once to <path> | |
401 --un-conc <path> write pairs that didn't align concordantly to <path> | |
402 --al-conc <path> write pairs that aligned concordantly at least once to <path> | |
403 (Note: for --un, --al, --un-conc, or --al-conc, add '-gz' to the option name, e.g. | |
404 --un-gz <path>, to gzip compress output, or add '-bz2' to bzip2 compress output.) | |
405 --quiet print nothing to stderr except serious errors | |
406 --met-file <path> send metrics to file at <path> (off) | |
407 --met-stderr send metrics to stderr (off) | |
408 --met <int> report internal counters & metrics every <int> secs (1) | |
409 | |
410 Performance: | |
411 -o/--offrate <int> override offrate of index; must be >= index's offrate | |
412 -p/--threads <int> number of alignment threads to launch (1) | |
413 --mm use memory-mapped I/O for index; many 'bowtie's can share | |
414 | |
415 Other: | |
416 --qc-filter filter out reads that are bad according to QSEQ filter | |
417 --seed <int> seed for random number generator (0) | |
418 --non-deterministic seed rand. gen. arbitrarily instead of using read attributes | |
419 --version print version information and quit | |
420 -h/--help print this usage message | |
421 | |
422 ]]> | |
423 </help> | |
424 | |
425 <!-- ***************************************************************** --> | |
426 | |
427 <citations> | |
428 <citation type="doi">10.1101/gr.210641.116</citation> | |
429 </citations> | |
430 | |
431 </tool> |