comparison srst2.xml @ 0:6f870ed59b6e draft

Uploaded
author nml
date Mon, 06 Feb 2017 12:31:04 -0500
parents
children 599a4dc309aa
comparison
equal deleted inserted replaced
-1:000000000000 0:6f870ed59b6e
1 <tool id="srst2" name="SRST2" version="0.3.6">
2 <description>Short Read Sequence Typing for Bacterial Pathogens</description>
3 <requirements>
4 <requirement type="package" version="0.1.18">samtools</requirement>
5 <requirement type="package" version="2.1.0">bowtie2</requirement>
6 <requirement type="package" version="0.1.4.6">srst2</requirement>
7 <requirement type="package" version="08-07-2014">vfdb</requirement>
8 </requirements>
9 <stdio>
10 <exit_code range="1:" level="fatal" description="Unknown error has occurred"/>
11 </stdio>
12 <command interpreter="perl">
13 srst2.pl \$BASE/srst2.py $bam_results $scores $pileup
14
15 #if $mlst_or_genedb.job_type == "mlst_only"
16 m $txt_results $alleles
17 #if ($mlst_or_genedb.allele_choice.allele_report=="all")
18 all
19 #else if ($mlst_or_genedb.allele_choice.allele_report=="new")
20 new
21 #end if
22 #else if $mlst_or_genedb.job_type == "custom_only"
23 g $genes_results $fullgenes_results
24 #*
25 to allow multiple custom databases join all db names into comma separated variable then send that variable to the perl script to be parsed
26 make the database names an array and then join
27 *#
28 #set $dbs = ','.join([$database.gene_db.name for $database in ( $mlst_or_genedb.databases )])
29 "$dbs"
30 #else if $mlst_or_genedb.job_type == "vfdb_only"
31 g $genes_results $fullgenes_results $mlst_or_genedb.vfdb_in.name
32 #else if $mlst_or_genedb.job_type == "mlst_custom"
33 b $txt_results $genes_results $fullgenes_results
34 #set $dbs = ','.join([$database.gene_db.name for $database in ( $mlst_or_genedb.databases )])
35 "$dbs"
36 #else if $mlst_or_genedb.job_type == "mlst_vfdb"
37 b $txt_results $genes_results $fullgenes_results $mlst_or_genedb.vfdb_in.name
38 #end if
39
40 #if $single_or_paired.type == "single"
41 "$single_or_paired.input_se.element_identifier"
42 --input_se "$input_se"
43 #elif $single_or_paired.type == "paired"
44 "$single_or_paired.forward_pe.name"
45 --input_pe "$single_or_paired.forward_pe" "$single_or_paired.reverse_pe"
46 #else
47 "$single_or_paired.fastq_collection.forward.name"
48 --input_pe "$single_or_paired.fastq_collection.forward" "$single_or_paired.fastq_collection.reverse"
49 #end if
50
51 #if ($mlst_or_genedb.job_type=="mlst_only")
52 --mlst_db $mlst_db
53 --mlst_definition $mlst_defs
54 --mlst_delimiter "'$mlst_or_genedb.mlst_delim'"
55 --mlst_max_mismatch $mlst_or_genedb.mlst_max_mismatch
56 --report_all_consensus
57 #else if ($mlst_or_genedb.job_type=="mlst_vfdb")
58 --mlst_db $mlst_db
59 --mlst_definition $mlst_defs
60 --mlst_delimiter "'$mlst_or_genedb.mlst_delim'"
61 --mlst_max_mismatch $mlst_or_genedb.mlst_max_mismatch
62 --gene_max_mismatch $mlst_or_genedb.gene_max_mismatch
63 --gene_db \$VF_PATH/${mlst_or_genedb.vfdb_in.fields.path}
64 #else if ($mlst_or_genedb.job_type=="mlst_custom")
65 --gene_db
66 #for $i, $database in enumerate( $mlst_or_genedb.databases )
67 $database.gene_db
68 #end for
69 --mlst_db $mlst_db
70 --mlst_delimiter "'$mlst_or_genedb.mlst_delim'"
71 --mlst_max_mismatch $mlst_or_genedb.mlst_max_mismatch
72 --gene_max_mismatch $mlst_or_genedb.gene_max_mismatch
73 --mlst_definition $mlst_defs
74 #else if ($mlst_or_genedb.job_type=="vfdb_only")
75 --gene_db \$VF_PATH/${mlst_or_genedb.vfdb_in.fields.path}
76 --gene_max_mismatch $mlst_or_genedb.gene_max_mismatch
77 #else if ($mlst_or_genedb.job_type=="custom_only")
78 --gene_db
79 #for $i, $database in enumerate( $mlst_or_genedb.databases )
80 $database.gene_db
81 #end for
82 --gene_max_mismatch $mlst_or_genedb.gene_max_mismatch
83 #end if
84
85 --read_type q
86
87 --save_scores
88
89 #if $options.select == "advanced"
90 #if $options.min_coverage
91 --min_coverage $options.min_coverage
92 #end if
93 #if $options.max_divergence
94 --max_divergence $options.max_divergence
95 #end if
96 #if $options.min_depth
97 --min_depth $options.min_depth
98 #end if
99 #if $options.min_edge_depth
100 --min_edge_depth $options.min_edge_depth
101 #end if
102 #if $options.prob_err
103 --prob_err $options.prob_err
104 #end if
105 #if $options.stop_after
106 --stop_after $options.stop_after
107 #end if
108 --other "'-p \${GALAXY_SLOTS:-1}
109 #if $options.maxins
110 --maxins $options.maxins
111 --minins $options.minins
112 #end if
113 '"
114 #if $options.mapq
115 --mapq $options.mapq
116 #end if
117 #if $options.baseq
118 --baseq $options.baseq
119 #end if
120 #else
121 --other "'-p \${GALAXY_SLOTS:-1}'"
122 #end if
123
124 --output out
125 </command>
126 <inputs>
127 <conditional name="single_or_paired">
128 <param name="type" type="select" label="Read type">
129 <option value="single">Single-end</option>
130 <option value="paired">Paired-end</option>
131 <option value="collection">Collection Paired-end</option>
132 </param>
133 <when value="single">
134 <param name="input_se" type="data" format="fastqsanger" label="Single end read file(s)"/>
135 </when>
136 <when value="paired">
137 <param name="forward_pe" type="data" format="fastqsanger" label="Forward paired-end read file"/>
138 <param name="reverse_pe" type="data" format="fastqsanger" label="Reverse paired-end read file"/>
139 </when>
140 <when value="collection">
141 <param name="fastq_collection" type="data_collection" label="Paired-end reads collection" optional="false" format="txt" collection_type="paired" />
142 </when>
143 </conditional>
144
145 <conditional name="mlst_or_genedb">
146 <param name="job_type" type="select" label="Job type">
147 <option value="mlst_only">MLST only</option>
148 <option value="mlst_vfdb">MLST and VFDB</option>
149 <option value="mlst_custom">MLST and custom database</option>
150 <option value="vfdb_only">VFDB only</option>
151 <option value="custom_only">Custom database only</option>
152 </param>
153 <when value="mlst_only">
154 <param name="mlst_defs" type="data" format="tabular" label="ST definitions for MLST scheme"/>
155 <param name="mlst_db" type="data" format="fasta" label="Fasta file of MLST alleles"/>
156 <param name="mlst_max_mismatch" type="integer" label="Maximum number of mismatches per read for MLST allele calling" value="" help="SRST2.1 default value is 10 however our testing indicates that the value should be set to 250 to prevent erroneous allele calls."/>
157 <conditional name="allele_choice">
158 <param name="allele_report" type="select" label="Reported Alleles" >
159 <option value="all">All</option>
160 <option value="new">Only New</option>
161 </param>
162 <when value="all"/>
163 <when value="new"/>
164 </conditional>
165 <param name="mlst_delim" type="text" label="Character(s) separating gene name from allele number in MLST database" value="" help="Typically _ or -" optional="false" >
166 <validator type="expression" message="Must enter a delimiter.">len(value) >= 1</validator>
167 </param>
168 </when>
169 <when value="mlst_vfdb">
170 <param name="mlst_defs" type="data" format="tabular" label="ST definitions for MLST scheme"/>
171 <param name="mlst_db" type="data" format="fasta" label="Fasta file of MLST alleles"/>
172 <param name="vfdb_in" type="select" label="Choose a VFDB strain">
173 <options from_data_table="vfdb_fasta_files" />
174 </param>
175 <param name="mlst_max_mismatch" type="integer" label="Maximum number of mismatches per read for MLST allele calling" value="" help="SRST2.1 default value is 10 however our testing indicates that the value should be set to 250 to prevent erroneous allele calls."/>
176 <param name="gene_max_mismatch" type="integer" label="Maximum number of mismatches per read for gene allele calling" value="" help="SRST2.1 default value is 10 however our testing indicates that the value should be set to 250 to prevent erroneous allele calls."/>
177 <param name="mlst_delim" type="text" label="Character(s) separating gene name from allele number in MLST database" value="" help="Typically _ or -" optional="false" >
178 <validator type="expression" message="Must enter a delimiter.">len(value) >= 1</validator>
179 </param>
180 </when>
181 <when value="mlst_custom">
182 <param name="mlst_defs" type="data" format="tabular" label="ST definitions for MLST scheme"/>
183 <param name="mlst_db" type="data" format="fasta" label="Fasta file of MLST alleles"/>
184 <repeat name="databases" title="Databases" min="1">
185 <param name="gene_db" type="data" format="fasta" label="Fasta file for gene database" />
186 </repeat>
187 <param name="mlst_max_mismatch" type="integer" label="Maximum number of mismatches per read for MLST allele calling" value="" help="SRST2.1 default value is 10 however our testing indicates that the value should be set to 250 to prevent erroneous allele calls."/>
188 <param name="gene_max_mismatch" type="integer" label="Maximum number of mismatches per read for gene allele calling" value="" help="SRST2.1 default value is 10 however our testing indicates that the value should be set to 250 to prevent erroneous allele calls."/>
189 <param name="mlst_delim" type="text" label="Character(s) separating gene name from allele number in MLST database" value="" help="Typically _ or -" optional="false" >
190 <validator type="expression" message="Must enter a delimiter.">len(value) >= 1</validator>
191 </param>
192 </when>
193 <when value="vfdb_only">
194 <param name="vfdb_in" type="select" label="Choose a VFDB strain">
195 <options from_data_table="vfdb_fasta_files" >
196 <filter type="sort_by" column="2" />
197 <validator type="no_options" message="No strains are available" />
198 </options>
199 </param>
200 <param name="gene_max_mismatch" type="integer" label="Maximum number of mismatches per read for gene allele calling" value="" help="SRST2.1 default value is 10 however our testing indicates that the value should be set to 250 to prevent erroneous allele calls."/>
201 </when>
202 <when value="custom_only">
203 <param name="gene_max_mismatch" type="integer" label="Maximum number of mismatches per read for gene allele calling" value="" help="SRST2.1 default value is 10 however our testing indicates that the value should be set to 250 to prevent erroneous allele calls."/>
204 <repeat name="databases" title="Databases" min="1">
205 <param name="gene_db" type="data" format="fasta" label="Fasta file for gene database" />
206 </repeat>
207 </when>
208 </conditional>
209 <conditional name="options">
210 <param name="select" type="select" label="Options Type">
211 <option value="basic">Basic</option>
212 <option value="advanced">Advanced</option>
213 </param>
214 <when value="advanced">
215 <param name="min_coverage" type="integer" label="Minimum %coverage cutoff for gene reporting" value="90"/>
216 <param name="max_divergence" type="integer" label="Maximum %divergence cutoff for gene reporting" value="10"/>
217 <param name="min_depth" type="integer" label="Minimum mean depth to flag as dubious allele call" value="5"/>
218 <param name="min_edge_depth" type="integer" label="Minimum edge depth to flag as dubious allele call" value="2"/>
219 <param name="prob_err" type="float" label="Probability of sequencing error" value="0.01"/>
220 <param name="stop_after" type="integer" label="Stop mapping after this number of reads have been mapped (otherwise map all)" optional="true"/>
221 <param name="mapq" type="integer" label="Samtools -q parameter" value="1"/>
222 <param name="baseq" type="integer" label="Samtools -Q parameter" value="20"/>
223 <param name="minins" type="integer" label="Bowtie 2 -I parameter. The minimum fragment length for valid paired-end alignments." value="0" >
224 <validator type="in_range" message="Must be less than -X parameter." min="0"/>
225 </param>
226 <param name="maxins" type="integer" label="Bowtie 2 -X parameter. The maximum fragment length for valid paired-end alignments." value="1000" >
227 <validator type="in_range" message="Must be greater than -I parameter." min="0"/>
228 </param>
229
230 </when>
231 <when value="basic"/>
232 </conditional>
233 </inputs>
234
235 <outputs>
236 <data format="bam" name="bam_results" label="Bam Results"/>
237 <data format="tabular" name="scores" label="Scores"/>
238 <data format="tabular" name="pileup" label="Pileup"/>
239 <data format="fasta" name="alleles" label="Alleles">
240 <filter>mlst_or_genedb['job_type']=="mlst_only"</filter>
241 </data>
242 <data format="tabular" name="txt_results" label="Text Results" >
243 <filter>mlst_or_genedb['job_type']!="vfdb_only"</filter>
244 <filter>mlst_or_genedb['job_type']!="custom_only"</filter>
245 </data>
246 <data format="tabular" name="genes_results" label="Genes Results" >
247 <filter>mlst_or_genedb['job_type']!="mlst_only"</filter>
248 </data>
249 <data format="tabular" name="fullgenes_results" label="Full Genes Results" >
250 <filter>mlst_or_genedb['job_type']!= "mlst_only"</filter>
251 </data>
252 </outputs>
253
254 <tests>
255 <test>
256 <output/>
257 </test>
258 </tests>
259
260
261 <help>
262 What it does
263 ============
264
265 Short Read Sequence Typing for Bacterial Pathogens
266
267 This program is designed to take Illumina sequence data, a MLST database and/or a database of gene sequences (e.g. resistance genes, virulence genes, etc) and report the presence of STs and/or reference genes. The tool has a database of virulence factors that was extracted from http://www.mgc.ac.cn/VFs/ .
268
269 For more information about SRST2 and for instructions on how to format custom databases, visit https://github.com/katholt/srst2
270
271
272 Usage
273 =====
274
275 Basic Options
276 -------------
277
278 **Read Type**
279 - Single-end: Single end read file(s) for analysing (--input_se)
280 - Paired-end: Paired end read file(s) for analysing (--input_pe)
281
282 **Job Type**
283 - MLST only: Reports Sequence Types
284 - MLST and VFDB: Reports Sequence Types and user can choose one of the built-in Virulence Factor Datebase (VFDB) strains
285 - MLST and custom database: Reports Sequence Types and user can upload their own custom database
286 - VFDB only: Use can choose one of the built-in Virulence Factor Databasse (VFDB) strains
287 - Custom database only: Use can upload their own custom database
288
289 **ST definitions for MLST scheme:**
290 - Required if you want to calculate STs (--mlst_definitions)
291
292 **Fasta file of MLST alleles:**
293 - Required if you want to calculate STs (--mlst_db)
294
295 **Fasta file for gene database:**
296 - Required if you want details of the sequences. The user must provide their own database (--gene_db)
297
298 **VFDB strain:**
299 - Required if you want details of the sequences. The use may choose one of the listed strains (--gene_db)
300
301 **Read file type:**
302 - fastq
303 - solexa
304 - fasta
305
306 **Character(s) separating gene name from allele number in MLST database:**
307 - Required for all MLST job types
308 - Typically either _ or -
309 - The output from getMLST will identify the delimiter.
310
311 **Maximum number of mismatches per read for MLST allele calling:**
312 - Required for all MLST job types
313 - For MLST schemas with inserts this number should be set to a high value (recommended: 250)
314
315 **Maximum number of mismatches per read for gene allele calling:**
316 - Required for all VDFB or custom database job types
317 - For genes with inserts this number should be set to a high value (recommended: 250).
318
319 **Option Type:**
320 - Basic: Includes only the options listed above
321 - Advanced: Includes the options listed below
322
323 -------------------------------
324
325 Advanced Options
326 ----------------
327
328 **Minimum %coverage cutoff for gene reporting:**
329 - Default is 90 (--min_coverage)
330
331 **Maximum %divergence cutoff for gene reporting:**
332 - Default is 10 (--max_divergence)
333
334 **Minimum mean depth to flag as dubious allele call:**
335 - Default is 5 (--min_depth)
336
337 **Minimum edge depth to flag as dubious allele call:**
338 - Default is 2 (--min_edge_depth)
339
340 **Probability of sequencing error:**
341 - Default is 0.01 (--prob_err)
342
343 **Stop mapping after this number of reads have been mapped (otherwise map all):**
344 - Default maps all (--stop_after)
345
346 **Other arguments to pass to bowtie2:**
347 --other
348
349 **Samtools -q parameter:**
350 - Default is 1 (--mapq)
351
352 **Samtools -Q parameter:**
353 - Default is 20 (--baseq)
354
355 **Bowtie2 -I/--minins:**
356 - The minimum fragment length for valid paired-end alignments. E.g. if -I 60 is specified and a paired-end alignment consists of two 20-bp alignments in the appropriate orientation with a 20-bp gap between them, that alignment is considered valid (as long as -X is also satisfied). A 19-bp gap would not be valid in that case. If trimming options -3 or -5 are also used, the -I constraint is applied with respect to the untrimmed mates.
357 - The larger the difference between -I and -X, the slower Bowtie 2 will run. This is because larger differences bewteen -I and -X require that Bowtie 2 scan a larger window to determine if a concordant alignment exists. For typical fragment length ranges (200 to 400 nucleotides), Bowtie 2 is very efficient.
358 - Default: 0 (essentially imposing no minimum)
359
360 **Bowtie2 -X/--maxins:**
361 - The maximum fragment length for valid paired-end alignments. E.g. if -X 100 is specified and a paired-end alignment consists of two 20-bp alignments in the proper orientation with a 60-bp gap between them, that alignment is considered valid (as long as -I is also satisfied). A 61-bp gap would not be valid in that case. If trimming options -3 or -5 are also used, the -X constraint is applied with respect to the untrimmed mates, not the trimmed mates.
362 - The larger the difference between -I and -X, the slower Bowtie 2 will run. This is because larger differences bewteen -I and -X require that Bowtie 2 scan a larger window to determine if a concordant alignment exists. For typical fragment length ranges (200 to 400 nucleotides), Bowtie 2 is very efficient.
363 - Default: 500.
364
365 **Acknowledgments**
366 Original Author: Mariam Iskander
367
368 Jen Cabral
369
370 Philip Mabon
371
372 Mark Iskander
373
374 </help>
375 <citations>
376 <citation type="doi">10.1128/AAC.01310-13</citation>
377 </citations>
378 </tool>