comparison breseq.xml @ 0:f848a7f97332 draft

"planemo upload commit fadaff2d55736bf8c580541d6089c83cd4106a1f"
author iuc
date Thu, 31 Oct 2019 19:40:40 -0400
parents
children 85c57cc9b558
comparison
equal deleted inserted replaced
-1:000000000000 0:f848a7f97332
1 <tool id="breseq" name="breseq" version="@PACKAGE_VERSION@+@GALAXY_VERSION@">
2
3 <description>find mutations in haploid microbial genomes</description>
4
5 <macros>
6 <import>macros.xml</import>
7 </macros>
8
9 <requirements>
10 <requirement type="package" version="0.34.0">breseq</requirement>
11 </requirements>
12
13 <version_command>breseq --version</version_command>
14
15 <command detect_errors="aggressive">
16 <![CDATA[
17 #set $ref_opts = ""
18 #for $i, $r in enumerate( $references ):
19 #if str($references[$i].reference.source) == "history":
20 #for $ref in $references[$i].reference.own_genome:
21 #if $ref
22 #set $ref_opts = $ref_opts + " --reference '" + str($ref) + "'"
23 #end if
24 #end for
25 #else:
26 #set $ref_opts = $ref_opts + " --reference '" + $references[$i].reference.fixed_genome.fields.path + "'"
27 #end if
28 #end for
29
30 #if str($run.mode) == 'detect'
31 breseq
32
33 --num-processors \${GALAXY_SLOTS:-4}
34
35 -o results
36
37 $ref_opts
38
39 #for $s in $run.fastqs:
40 ${s}
41 #end for
42
43 #if $run.name
44 --name '$run.name'
45 #end if
46
47 $run.polymorphism_prediction
48 $run.predict_junctions
49
50 #if 'gd' in str($run.output_options.formats).split(','):
51 && cp results/output/output.gd '$output'
52 #end if
53
54 #if 'html' in str($run.output_options.formats).split(','):
55 && cp results/output/index.html '$report'
56 && mkdir $report.extra_files_path
57 && cp -R results/output/* $report.extra_files_path
58 #end if
59
60 #if 'zip' in str($run.output_options.formats).split(','):
61 && tar -zcf '$zip_output' results
62 #end if
63
64 #if 'log' in str($run.output_options.formats).split(','):
65 && cp results/output/log.txt '$log'
66 #end if
67 #else
68 #set $first = 1
69 #for $o in str($run.output_options.formats).split(','):
70
71 #if $first == 0
72 &&
73 #end if
74 #set $first = 0
75
76 gdtools ANNOTATE
77
78 --format '$o'
79
80 -o
81 #if $o == 'html':
82 '$annreport'
83 #else if $o == 'gd':
84 '$genomediff'
85 #else if $o == 'tsv':
86 '$tabdelim'
87 #else if $o == 'phylip':
88 '$phylipout'
89 #else if $o == 'json':
90 '$jsonout'
91 #end if
92
93 $ref_opts
94
95 #for $s in $run.gds:
96 ${s}
97 #end for
98 #end for
99 #end if
100 ]]>
101 </command>
102
103 <inputs>
104 <repeat name="references" title="Reference Genome" min="1">
105 <conditional name="reference">
106 <param name="source" type="select" label="Reference source" >
107 <option value="builtin">built-in</option>
108 <option value="history" selected="true">history</option>
109 </param>
110 <when value="builtin">
111 <param name="fixed_genome" argument="--reference" type="select" optional="false" label="Galaxy Built-in Reference(s)">
112 <options from_data_table="genbank_files">
113 <filter type="sort_by" column="3"/>
114 <validator type="no_options" message="No built-in genbank records have been configured"/>
115 </options>
116 </param>
117 </when>
118 <when value="history">
119 <param name="own_genome" argument="--reference" type="data" format="fasta,genbank" multiple="true" optional="false" label="Fasta or Genbank Reference(s)" />
120 </when>
121 </conditional>
122 </repeat>
123
124 <conditional name="run">
125 <param name="mode" type="select" label="Run Mode" help="Detect, annotate, or compare variants.">
126 <option value="detect" selected="true">Detect</option>
127 <option value="annotate">Annotate</option>
128 <option value="compare">Compare</option>
129 </param>
130 <when value="detect">
131
132 <param name="fastqs" type="data" format="fastq" multiple="true" label="Fastq Read Files" />
133
134 <param argument="--polymorphism-prediction" name="polymorphism_prediction" type="select" label="Detection Mode" help="**Polymorphism mode**: Detect variants with frequencies between 0% and 100% if a mixture model is well-supported by the read alignment evidence. Use to analyze a mixed population of genomes evolved from a common ancestor. **Consensus mode**: Detect variants present in 100% of the sample. Use when re-sequencing a clonal haploid genome. This mode is the default.">
135 <option value="" selected="true">Consensus</option>
136 <option value="--polymorphism-prediction">Polymorphism</option>
137 </param>
138
139 <param name="name" argument="--name" type="text" value="" label="Analysis Name" help="Human-readable name of the analysis run for output (DEFAULT=none)." />
140
141 <param name="predict_junctions" type="boolean" truevalue="" falsevalue="--no-junction-prediction" checked="true" label="Predict Junctions" help="Predict new sequence junctions (default). --no-junction-prediction is supplied if 'No' is selected. Otherwise, there is no flag." />
142
143 <section name="output_options" title="Output Options" expanded="false">
144 <param name="formats" type="select" multiple="true" optional="false" display="checkboxes" label="Output Formats">
145 <option value="gd" selected="true">Variants (GenomeDiff)</option>
146 <option value="html">Variant Report (Webpage)</option>
147 <option value="zip">All Variant Results (Gzip)</option>
148 <option value="log">Log (Text)</option>
149 </param>
150 </section>
151
152 </when>
153 <when value="annotate">
154
155 <param name="gds" type="data" format="tabular" multiple="true" optional="false" label="GenomeDiff (gd) Files" help="Files as produced by breseq" />
156
157 <expand macro="annotate_format_opts">
158 <option value="gd" selected="true">Annotated Variants (GenomeDiff)</option>
159 </expand>
160
161 </when>
162 <when value="compare">
163
164 <param name="gds" type="data" format="tabular" multiple="true" optional="false" label="GenomeDiff (gd) Files" help="Files as produced by breseq" min="2" />
165
166 <expand macro="annotate_format_opts">
167 <option value="phylip" selected="true">Variant Comparison (Phylip)</option>
168 <option value="gd">Annotated Variants (GenomeDiff)</option>
169 </expand>
170
171 </when>
172 </conditional>
173
174 </inputs>
175
176 <outputs>
177 <data format="html" name="report" label="${tool.name} on ${on_string}: Variants (Webpage)">
178 <filter>run['mode'] == 'detect' and 'html' in run['output_options']['formats']</filter>
179 </data>
180 <data format="html" name="annreport" label="${tool.name} on ${on_string}: Annotated Variants Report (Webpage)">
181 <filter>run['mode'] != 'detect' and 'html' in run['output_options']['formats']</filter>
182 </data>
183
184 <data format="tabular" name="output" label="${tool.name} on ${on_string}: Variants (GenomeDiff)">
185 <filter>run['mode'] == 'detect' and 'gd' in run['output_options']['formats']</filter>
186 </data>
187 <data format="tabular" name="genomediff" label="${tool.name} on ${on_string}: Annotated Variants (GenomeDiff)">
188 <filter>run['mode'] != 'detect' and 'gd' in run['output_options']['formats']</filter>
189 </data>
190
191 <data format="zip" name="zip_output" label="${tool.name} on ${on_string}: All Variant Results (Gzip)">
192 <filter>'zip' in run['output_options']['formats']</filter>
193 </data>
194 <data format="txt" name="log" label="${tool.name} on ${on_string}: Breseq Log">
195 <filter>'log' in run['output_options']['formats']</filter>
196 </data>
197 <data format="tabular" name="tabdelim" label="${tool.name} on ${on_string}: Annotated Variants (Tabular)">
198 <filter>'tsv' in run['output_options']['formats']</filter>
199 </data>
200 <data format="phylip" name="phylipout" label="${tool.name} on ${on_string}: Variant Comparison (Phylip)">
201 <filter>'phylip' in run['output_options']['formats']</filter>
202 </data>
203 <data format="txt" name="jsonout" label="${tool.name} on ${on_string}: Annotated Variants (JSON)">
204 <filter>'json' in run['output_options']['formats']</filter>
205 </data>
206 </outputs>
207
208 <tests>
209 <test>
210 <repeat name="references">
211 <conditional name="reference">
212 <param name="source" value="history" />
213 <param name="own_genome" value="lambda.gbk" />
214 </conditional>
215 </repeat>
216 <conditional name="run">
217 <param name="mode" value="detect" />
218 <param name="fastqs" value="lambda.short_sequence_repeats.fastq" />
219 <param name="polymorphism_prediction" value="" />
220 <param name="name" value="smallest" />
221 <param name="predict_junctions" value="" />
222 <section name="output_options">
223 <param name="formats" value="html,log,gd,zip" />
224 </section>
225 </conditional>
226
227 <output name="report" file="report.html" compare="sim_size" delta="100" />
228 <output name="log" file="log.txt" lines_diff="4">
229 <assert_contents>
230 <has_text text="breseq --num-processors" />
231 </assert_contents>
232 </output>
233 <output name="output" file="gdout.txt" lines_diff="8" />
234 <output name="zip_output">
235 <assert_contents>
236 <has_archive_member path="results/output/output.gd" />
237 </assert_contents>
238 </output>
239 </test>
240 <test>
241 <repeat name="references">
242 <conditional name="reference">
243 <param name="source" value="builtin" />
244 <param name="fixed_genome" value="lambda1" />
245 </conditional>
246 </repeat>
247 <conditional name="run">
248 <param name="mode" value="detect" />
249 <param name="fastqs" value="lambda.short_sequence_repeats.fastq" />
250 <param name="polymorphism_prediction" value="" />
251 <param name="name" value="smallest" />
252 <param name="predict_junctions" value="" />
253 <section name="output_options">
254 <param name="formats" value="gd" />
255 </section>
256 </conditional>
257
258 <output name="output" file="gdout.txt" lines_diff="8" />
259 </test>
260 <test>
261 <repeat name="references">
262 <conditional name="reference">
263 <param name="source" value="history" />
264 <param name="own_genome" value="lambda.gbk" />
265 </conditional>
266 </repeat>
267 <conditional name="run">
268 <param name="mode" value="annotate" />
269 <param name="gds" value="gdout.txt" />
270 <section name="output_options">
271 <param name="formats" value="html" />
272 </section>
273 </conditional>
274
275 <output name="annreport" file="gdtoolsout.html" compare="sim_size" delta="100" />
276 </test>
277 </tests>
278
279 <help>
280 <![CDATA[
281 **Detect Variants**
282
283 breseq (pronounced: \\brēz-ˈsēk\\ or breeze-seq) is a computational pipeline for
284 the analysis of short-read re-sequencing data (e.g. Illumina, 454, IonTorrent,
285 etc.). It uses reference-based alignment approaches to predict mutations in a
286 sample relative to an already sequenced genome. breseq is intended for microbial
287 genomes (<10 Mb) and re-sequenced samples that are only slightly diverged from
288 the reference sequence (<1 mutation per 1000 bp).
289
290 breseq's primary advantages over other software programs are that it can:
291
292 - Accurately predict new sequence junctions, such as those associated with mobile element insertions.
293 - Integrate multiple sources of evidence for genetic changes into mutation predictions.
294 - Produce annotated output describing biologically relevant mutational events.
295
296 breseq was initially developed to analyze data from the Lenski long-term
297 evolution experiment with `E. coli`_. References: barrick2009a_ barrick2009b_.
298
299 .. _`E. coli`: http://myxo.css.msu.edu/ecoli/
300 .. _barrick2009a: http://barricklab.org/twiki/pub/Lab/ToolsBacterialGenomeResequencing/documentation/references.html#barrick2009a
301 .. _barrick2009b: http://barricklab.org/twiki/pub/Lab/ToolsBacterialGenomeResequencing/documentation/references.html#barrick2009b
302
303 However, breseq may be generally useful to researchers who are:
304
305 - Tracking mutations over time in microbial evolution experiments.
306 - Checking strains for unwanted second-site mutations after genetic manipulations.
307 - Identifying mutations that occur during strain improvement or after long-term culture of engineered strains.
308 - Discovering what mutations arise in pathogens during infection or cause antibiotic resistance.
309
310
311 *Inputs*
312
313 Breseq accepts files in FASTQ format. It does not take pair-end information into
314 account.
315
316 You can either run in clonal (consensus) mode or search for polymorphisms in a
317 population.
318
319 You can also select an external sequence (eg. a transposon) to detect for
320 insertions or horizontal transfer.
321
322
323 *Outputs*
324
325 Breseq outputs a number of files. These are all condensed in a single zipped
326 file.
327
328 It contains output files with the final results, accessible through
329 ``output/index.html``
330
331 It also contains data files with accessory data, including:
332
333 - ``data/reference.fasta`` (file with reference genome: can be used in eg. IGV browser)
334 - ``data/reference.gff`` (file with genomic annotations: can be used in eg. IGV browser)
335 - ``data/areference.bam`` (file with read alignments: can be used in eg. IGV browser)
336 - ``data/unmatched.*`` (files with read that failed to align: can be used to build an assembly or to eg. blast against NCBI)
337
338
339 ----
340
341 **Annotate Variants**
342
343 Annotate a GenomeDiff file (generated by breseq) with information about
344 mutations (what genes they affect, amino acid substitutions, etc.) If multiple
345 input files are provided, then also COMPARE the frequencies for identical
346 mutations across samples.
347 ]]>
348 </help>
349
350 <citations>
351 <citation type="doi">10.1007/978-1-4939-0554-6_12</citation>
352 </citations>
353
354 </tool>