0
|
1 <tool id="srst2" name="SRST2" version="0.3.6">
|
|
2 <description>Short Read Sequence Typing for Bacterial Pathogens</description>
|
|
3 <requirements>
|
|
4 <requirement type="package" version="0.1.18">samtools</requirement>
|
|
5 <requirement type="package" version="2.1.0">bowtie2</requirement>
|
|
6 <requirement type="package" version="0.1.4.6">srst2</requirement>
|
|
7 <requirement type="package" version="08-07-2014">vfdb</requirement>
|
|
8 </requirements>
|
|
9 <stdio>
|
|
10 <exit_code range="1:" level="fatal" description="Unknown error has occurred"/>
|
|
11 </stdio>
|
|
12 <command interpreter="perl">
|
|
13 srst2.pl \$BASE/srst2.py $bam_results $scores $pileup
|
|
14
|
|
15 #if $mlst_or_genedb.job_type == "mlst_only"
|
|
16 m $txt_results $alleles
|
|
17 #if ($mlst_or_genedb.allele_choice.allele_report=="all")
|
|
18 all
|
|
19 #else if ($mlst_or_genedb.allele_choice.allele_report=="new")
|
|
20 new
|
|
21 #end if
|
|
22 #else if $mlst_or_genedb.job_type == "custom_only"
|
|
23 g $genes_results $fullgenes_results
|
|
24 #*
|
|
25 to allow multiple custom databases join all db names into comma separated variable then send that variable to the perl script to be parsed
|
|
26 make the database names an array and then join
|
|
27 *#
|
|
28 #set $dbs = ','.join([$database.gene_db.name for $database in ( $mlst_or_genedb.databases )])
|
|
29 "$dbs"
|
|
30 #else if $mlst_or_genedb.job_type == "vfdb_only"
|
|
31 g $genes_results $fullgenes_results $mlst_or_genedb.vfdb_in.name
|
|
32 #else if $mlst_or_genedb.job_type == "mlst_custom"
|
|
33 b $txt_results $genes_results $fullgenes_results
|
|
34 #set $dbs = ','.join([$database.gene_db.name for $database in ( $mlst_or_genedb.databases )])
|
|
35 "$dbs"
|
|
36 #else if $mlst_or_genedb.job_type == "mlst_vfdb"
|
|
37 b $txt_results $genes_results $fullgenes_results $mlst_or_genedb.vfdb_in.name
|
|
38 #end if
|
|
39
|
|
40 #if $single_or_paired.type == "single"
|
|
41 "$single_or_paired.input_se.element_identifier"
|
|
42 --input_se "$input_se"
|
|
43 #elif $single_or_paired.type == "paired"
|
|
44 "$single_or_paired.forward_pe.name"
|
|
45 --input_pe "$single_or_paired.forward_pe" "$single_or_paired.reverse_pe"
|
|
46 #else
|
|
47 "$single_or_paired.fastq_collection.forward.name"
|
|
48 --input_pe "$single_or_paired.fastq_collection.forward" "$single_or_paired.fastq_collection.reverse"
|
|
49 #end if
|
|
50
|
|
51 #if ($mlst_or_genedb.job_type=="mlst_only")
|
|
52 --mlst_db $mlst_db
|
|
53 --mlst_definition $mlst_defs
|
|
54 --mlst_delimiter "'$mlst_or_genedb.mlst_delim'"
|
|
55 --mlst_max_mismatch $mlst_or_genedb.mlst_max_mismatch
|
|
56 --report_all_consensus
|
|
57 #else if ($mlst_or_genedb.job_type=="mlst_vfdb")
|
|
58 --mlst_db $mlst_db
|
|
59 --mlst_definition $mlst_defs
|
|
60 --mlst_delimiter "'$mlst_or_genedb.mlst_delim'"
|
|
61 --mlst_max_mismatch $mlst_or_genedb.mlst_max_mismatch
|
|
62 --gene_max_mismatch $mlst_or_genedb.gene_max_mismatch
|
|
63 --gene_db \$VF_PATH/${mlst_or_genedb.vfdb_in.fields.path}
|
|
64 #else if ($mlst_or_genedb.job_type=="mlst_custom")
|
|
65 --gene_db
|
|
66 #for $i, $database in enumerate( $mlst_or_genedb.databases )
|
|
67 $database.gene_db
|
|
68 #end for
|
|
69 --mlst_db $mlst_db
|
|
70 --mlst_delimiter "'$mlst_or_genedb.mlst_delim'"
|
|
71 --mlst_max_mismatch $mlst_or_genedb.mlst_max_mismatch
|
|
72 --gene_max_mismatch $mlst_or_genedb.gene_max_mismatch
|
|
73 --mlst_definition $mlst_defs
|
|
74 #else if ($mlst_or_genedb.job_type=="vfdb_only")
|
|
75 --gene_db \$VF_PATH/${mlst_or_genedb.vfdb_in.fields.path}
|
|
76 --gene_max_mismatch $mlst_or_genedb.gene_max_mismatch
|
|
77 #else if ($mlst_or_genedb.job_type=="custom_only")
|
|
78 --gene_db
|
|
79 #for $i, $database in enumerate( $mlst_or_genedb.databases )
|
|
80 $database.gene_db
|
|
81 #end for
|
|
82 --gene_max_mismatch $mlst_or_genedb.gene_max_mismatch
|
|
83 #end if
|
|
84
|
|
85 --read_type q
|
|
86
|
|
87 --save_scores
|
|
88
|
|
89 #if $options.select == "advanced"
|
|
90 #if $options.min_coverage
|
|
91 --min_coverage $options.min_coverage
|
|
92 #end if
|
|
93 #if $options.max_divergence
|
|
94 --max_divergence $options.max_divergence
|
|
95 #end if
|
|
96 #if $options.min_depth
|
|
97 --min_depth $options.min_depth
|
|
98 #end if
|
|
99 #if $options.min_edge_depth
|
|
100 --min_edge_depth $options.min_edge_depth
|
|
101 #end if
|
|
102 #if $options.prob_err
|
|
103 --prob_err $options.prob_err
|
|
104 #end if
|
|
105 #if $options.stop_after
|
|
106 --stop_after $options.stop_after
|
|
107 #end if
|
|
108 --other "'-p \${GALAXY_SLOTS:-1}
|
|
109 #if $options.maxins
|
|
110 --maxins $options.maxins
|
|
111 --minins $options.minins
|
|
112 #end if
|
|
113 '"
|
|
114 #if $options.mapq
|
|
115 --mapq $options.mapq
|
|
116 #end if
|
|
117 #if $options.baseq
|
|
118 --baseq $options.baseq
|
|
119 #end if
|
|
120 #else
|
|
121 --other "'-p \${GALAXY_SLOTS:-1}'"
|
|
122 #end if
|
|
123
|
|
124 --output out
|
|
125 </command>
|
|
126 <inputs>
|
|
127 <conditional name="single_or_paired">
|
|
128 <param name="type" type="select" label="Read type">
|
|
129 <option value="single">Single-end</option>
|
|
130 <option value="paired">Paired-end</option>
|
|
131 <option value="collection">Collection Paired-end</option>
|
|
132 </param>
|
|
133 <when value="single">
|
|
134 <param name="input_se" type="data" format="fastqsanger" label="Single end read file(s)"/>
|
|
135 </when>
|
|
136 <when value="paired">
|
|
137 <param name="forward_pe" type="data" format="fastqsanger" label="Forward paired-end read file"/>
|
|
138 <param name="reverse_pe" type="data" format="fastqsanger" label="Reverse paired-end read file"/>
|
|
139 </when>
|
|
140 <when value="collection">
|
|
141 <param name="fastq_collection" type="data_collection" label="Paired-end reads collection" optional="false" format="txt" collection_type="paired" />
|
|
142 </when>
|
|
143 </conditional>
|
|
144
|
|
145 <conditional name="mlst_or_genedb">
|
|
146 <param name="job_type" type="select" label="Job type">
|
|
147 <option value="mlst_only">MLST only</option>
|
|
148 <option value="mlst_vfdb">MLST and VFDB</option>
|
|
149 <option value="mlst_custom">MLST and custom database</option>
|
|
150 <option value="vfdb_only">VFDB only</option>
|
|
151 <option value="custom_only">Custom database only</option>
|
|
152 </param>
|
|
153 <when value="mlst_only">
|
|
154 <param name="mlst_defs" type="data" format="tabular" label="ST definitions for MLST scheme"/>
|
|
155 <param name="mlst_db" type="data" format="fasta" label="Fasta file of MLST alleles"/>
|
|
156 <param name="mlst_max_mismatch" type="integer" label="Maximum number of mismatches per read for MLST allele calling" value="" help="SRST2.1 default value is 10 however our testing indicates that the value should be set to 250 to prevent erroneous allele calls."/>
|
|
157 <conditional name="allele_choice">
|
|
158 <param name="allele_report" type="select" label="Reported Alleles" >
|
|
159 <option value="all">All</option>
|
|
160 <option value="new">Only New</option>
|
|
161 </param>
|
|
162 <when value="all"/>
|
|
163 <when value="new"/>
|
|
164 </conditional>
|
|
165 <param name="mlst_delim" type="text" label="Character(s) separating gene name from allele number in MLST database" value="" help="Typically _ or -" optional="false" >
|
|
166 <validator type="expression" message="Must enter a delimiter.">len(value) >= 1</validator>
|
|
167 </param>
|
|
168 </when>
|
|
169 <when value="mlst_vfdb">
|
|
170 <param name="mlst_defs" type="data" format="tabular" label="ST definitions for MLST scheme"/>
|
|
171 <param name="mlst_db" type="data" format="fasta" label="Fasta file of MLST alleles"/>
|
|
172 <param name="vfdb_in" type="select" label="Choose a VFDB strain">
|
|
173 <options from_data_table="vfdb_fasta_files" />
|
|
174 </param>
|
|
175 <param name="mlst_max_mismatch" type="integer" label="Maximum number of mismatches per read for MLST allele calling" value="" help="SRST2.1 default value is 10 however our testing indicates that the value should be set to 250 to prevent erroneous allele calls."/>
|
|
176 <param name="gene_max_mismatch" type="integer" label="Maximum number of mismatches per read for gene allele calling" value="" help="SRST2.1 default value is 10 however our testing indicates that the value should be set to 250 to prevent erroneous allele calls."/>
|
|
177 <param name="mlst_delim" type="text" label="Character(s) separating gene name from allele number in MLST database" value="" help="Typically _ or -" optional="false" >
|
|
178 <validator type="expression" message="Must enter a delimiter.">len(value) >= 1</validator>
|
|
179 </param>
|
|
180 </when>
|
|
181 <when value="mlst_custom">
|
|
182 <param name="mlst_defs" type="data" format="tabular" label="ST definitions for MLST scheme"/>
|
|
183 <param name="mlst_db" type="data" format="fasta" label="Fasta file of MLST alleles"/>
|
|
184 <repeat name="databases" title="Databases" min="1">
|
|
185 <param name="gene_db" type="data" format="fasta" label="Fasta file for gene database" />
|
|
186 </repeat>
|
|
187 <param name="mlst_max_mismatch" type="integer" label="Maximum number of mismatches per read for MLST allele calling" value="" help="SRST2.1 default value is 10 however our testing indicates that the value should be set to 250 to prevent erroneous allele calls."/>
|
|
188 <param name="gene_max_mismatch" type="integer" label="Maximum number of mismatches per read for gene allele calling" value="" help="SRST2.1 default value is 10 however our testing indicates that the value should be set to 250 to prevent erroneous allele calls."/>
|
|
189 <param name="mlst_delim" type="text" label="Character(s) separating gene name from allele number in MLST database" value="" help="Typically _ or -" optional="false" >
|
|
190 <validator type="expression" message="Must enter a delimiter.">len(value) >= 1</validator>
|
|
191 </param>
|
|
192 </when>
|
|
193 <when value="vfdb_only">
|
|
194 <param name="vfdb_in" type="select" label="Choose a VFDB strain">
|
|
195 <options from_data_table="vfdb_fasta_files" >
|
|
196 <filter type="sort_by" column="2" />
|
|
197 <validator type="no_options" message="No strains are available" />
|
|
198 </options>
|
|
199 </param>
|
|
200 <param name="gene_max_mismatch" type="integer" label="Maximum number of mismatches per read for gene allele calling" value="" help="SRST2.1 default value is 10 however our testing indicates that the value should be set to 250 to prevent erroneous allele calls."/>
|
|
201 </when>
|
|
202 <when value="custom_only">
|
|
203 <param name="gene_max_mismatch" type="integer" label="Maximum number of mismatches per read for gene allele calling" value="" help="SRST2.1 default value is 10 however our testing indicates that the value should be set to 250 to prevent erroneous allele calls."/>
|
|
204 <repeat name="databases" title="Databases" min="1">
|
|
205 <param name="gene_db" type="data" format="fasta" label="Fasta file for gene database" />
|
|
206 </repeat>
|
|
207 </when>
|
|
208 </conditional>
|
|
209 <conditional name="options">
|
|
210 <param name="select" type="select" label="Options Type">
|
|
211 <option value="basic">Basic</option>
|
|
212 <option value="advanced">Advanced</option>
|
|
213 </param>
|
|
214 <when value="advanced">
|
|
215 <param name="min_coverage" type="integer" label="Minimum %coverage cutoff for gene reporting" value="90"/>
|
|
216 <param name="max_divergence" type="integer" label="Maximum %divergence cutoff for gene reporting" value="10"/>
|
|
217 <param name="min_depth" type="integer" label="Minimum mean depth to flag as dubious allele call" value="5"/>
|
|
218 <param name="min_edge_depth" type="integer" label="Minimum edge depth to flag as dubious allele call" value="2"/>
|
|
219 <param name="prob_err" type="float" label="Probability of sequencing error" value="0.01"/>
|
|
220 <param name="stop_after" type="integer" label="Stop mapping after this number of reads have been mapped (otherwise map all)" optional="true"/>
|
|
221 <param name="mapq" type="integer" label="Samtools -q parameter" value="1"/>
|
|
222 <param name="baseq" type="integer" label="Samtools -Q parameter" value="20"/>
|
|
223 <param name="minins" type="integer" label="Bowtie 2 -I parameter. The minimum fragment length for valid paired-end alignments." value="0" >
|
|
224 <validator type="in_range" message="Must be less than -X parameter." min="0"/>
|
|
225 </param>
|
|
226 <param name="maxins" type="integer" label="Bowtie 2 -X parameter. The maximum fragment length for valid paired-end alignments." value="1000" >
|
|
227 <validator type="in_range" message="Must be greater than -I parameter." min="0"/>
|
|
228 </param>
|
|
229
|
|
230 </when>
|
|
231 <when value="basic"/>
|
|
232 </conditional>
|
|
233 </inputs>
|
|
234
|
|
235 <outputs>
|
|
236 <data format="bam" name="bam_results" label="Bam Results"/>
|
|
237 <data format="tabular" name="scores" label="Scores"/>
|
|
238 <data format="tabular" name="pileup" label="Pileup"/>
|
|
239 <data format="fasta" name="alleles" label="Alleles">
|
|
240 <filter>mlst_or_genedb['job_type']=="mlst_only"</filter>
|
|
241 </data>
|
|
242 <data format="tabular" name="txt_results" label="Text Results" >
|
|
243 <filter>mlst_or_genedb['job_type']!="vfdb_only"</filter>
|
|
244 <filter>mlst_or_genedb['job_type']!="custom_only"</filter>
|
|
245 </data>
|
|
246 <data format="tabular" name="genes_results" label="Genes Results" >
|
|
247 <filter>mlst_or_genedb['job_type']!="mlst_only"</filter>
|
|
248 </data>
|
|
249 <data format="tabular" name="fullgenes_results" label="Full Genes Results" >
|
|
250 <filter>mlst_or_genedb['job_type']!= "mlst_only"</filter>
|
|
251 </data>
|
|
252 </outputs>
|
|
253
|
|
254 <tests>
|
|
255 <test>
|
|
256 <output/>
|
|
257 </test>
|
|
258 </tests>
|
|
259
|
|
260
|
|
261 <help>
|
|
262 What it does
|
|
263 ============
|
|
264
|
|
265 Short Read Sequence Typing for Bacterial Pathogens
|
|
266
|
|
267 This program is designed to take Illumina sequence data, a MLST database and/or a database of gene sequences (e.g. resistance genes, virulence genes, etc) and report the presence of STs and/or reference genes. The tool has a database of virulence factors that was extracted from http://www.mgc.ac.cn/VFs/ .
|
|
268
|
|
269 For more information about SRST2 and for instructions on how to format custom databases, visit https://github.com/katholt/srst2
|
|
270
|
|
271
|
|
272 Usage
|
|
273 =====
|
|
274
|
|
275 Basic Options
|
|
276 -------------
|
|
277
|
|
278 **Read Type**
|
|
279 - Single-end: Single end read file(s) for analysing (--input_se)
|
|
280 - Paired-end: Paired end read file(s) for analysing (--input_pe)
|
|
281
|
|
282 **Job Type**
|
|
283 - MLST only: Reports Sequence Types
|
|
284 - MLST and VFDB: Reports Sequence Types and user can choose one of the built-in Virulence Factor Datebase (VFDB) strains
|
|
285 - MLST and custom database: Reports Sequence Types and user can upload their own custom database
|
|
286 - VFDB only: Use can choose one of the built-in Virulence Factor Databasse (VFDB) strains
|
|
287 - Custom database only: Use can upload their own custom database
|
|
288
|
|
289 **ST definitions for MLST scheme:**
|
|
290 - Required if you want to calculate STs (--mlst_definitions)
|
|
291
|
|
292 **Fasta file of MLST alleles:**
|
|
293 - Required if you want to calculate STs (--mlst_db)
|
|
294
|
|
295 **Fasta file for gene database:**
|
|
296 - Required if you want details of the sequences. The user must provide their own database (--gene_db)
|
|
297
|
|
298 **VFDB strain:**
|
|
299 - Required if you want details of the sequences. The use may choose one of the listed strains (--gene_db)
|
|
300
|
|
301 **Read file type:**
|
|
302 - fastq
|
|
303 - solexa
|
|
304 - fasta
|
|
305
|
|
306 **Character(s) separating gene name from allele number in MLST database:**
|
|
307 - Required for all MLST job types
|
|
308 - Typically either _ or -
|
|
309 - The output from getMLST will identify the delimiter.
|
|
310
|
|
311 **Maximum number of mismatches per read for MLST allele calling:**
|
|
312 - Required for all MLST job types
|
|
313 - For MLST schemas with inserts this number should be set to a high value (recommended: 250)
|
|
314
|
|
315 **Maximum number of mismatches per read for gene allele calling:**
|
|
316 - Required for all VDFB or custom database job types
|
|
317 - For genes with inserts this number should be set to a high value (recommended: 250).
|
|
318
|
|
319 **Option Type:**
|
|
320 - Basic: Includes only the options listed above
|
|
321 - Advanced: Includes the options listed below
|
|
322
|
|
323 -------------------------------
|
|
324
|
|
325 Advanced Options
|
|
326 ----------------
|
|
327
|
|
328 **Minimum %coverage cutoff for gene reporting:**
|
|
329 - Default is 90 (--min_coverage)
|
|
330
|
|
331 **Maximum %divergence cutoff for gene reporting:**
|
|
332 - Default is 10 (--max_divergence)
|
|
333
|
|
334 **Minimum mean depth to flag as dubious allele call:**
|
|
335 - Default is 5 (--min_depth)
|
|
336
|
|
337 **Minimum edge depth to flag as dubious allele call:**
|
|
338 - Default is 2 (--min_edge_depth)
|
|
339
|
|
340 **Probability of sequencing error:**
|
|
341 - Default is 0.01 (--prob_err)
|
|
342
|
|
343 **Stop mapping after this number of reads have been mapped (otherwise map all):**
|
|
344 - Default maps all (--stop_after)
|
|
345
|
|
346 **Other arguments to pass to bowtie2:**
|
|
347 --other
|
|
348
|
|
349 **Samtools -q parameter:**
|
|
350 - Default is 1 (--mapq)
|
|
351
|
|
352 **Samtools -Q parameter:**
|
|
353 - Default is 20 (--baseq)
|
|
354
|
|
355 **Bowtie2 -I/--minins:**
|
|
356 - The minimum fragment length for valid paired-end alignments. E.g. if -I 60 is specified and a paired-end alignment consists of two 20-bp alignments in the appropriate orientation with a 20-bp gap between them, that alignment is considered valid (as long as -X is also satisfied). A 19-bp gap would not be valid in that case. If trimming options -3 or -5 are also used, the -I constraint is applied with respect to the untrimmed mates.
|
|
357 - The larger the difference between -I and -X, the slower Bowtie 2 will run. This is because larger differences bewteen -I and -X require that Bowtie 2 scan a larger window to determine if a concordant alignment exists. For typical fragment length ranges (200 to 400 nucleotides), Bowtie 2 is very efficient.
|
|
358 - Default: 0 (essentially imposing no minimum)
|
|
359
|
|
360 **Bowtie2 -X/--maxins:**
|
|
361 - The maximum fragment length for valid paired-end alignments. E.g. if -X 100 is specified and a paired-end alignment consists of two 20-bp alignments in the proper orientation with a 60-bp gap between them, that alignment is considered valid (as long as -I is also satisfied). A 61-bp gap would not be valid in that case. If trimming options -3 or -5 are also used, the -X constraint is applied with respect to the untrimmed mates, not the trimmed mates.
|
|
362 - The larger the difference between -I and -X, the slower Bowtie 2 will run. This is because larger differences bewteen -I and -X require that Bowtie 2 scan a larger window to determine if a concordant alignment exists. For typical fragment length ranges (200 to 400 nucleotides), Bowtie 2 is very efficient.
|
|
363 - Default: 500.
|
|
364
|
|
365 **Acknowledgments**
|
|
366 Original Author: Mariam Iskander
|
|
367
|
|
368 Jen Cabral
|
|
369
|
|
370 Philip Mabon
|
|
371
|
|
372 Mark Iskander
|
|
373
|
|
374 </help>
|
|
375 <citations>
|
|
376 <citation type="doi">10.1128/AAC.01310-13</citation>
|
|
377 </citations>
|
|
378 </tool>
|