comparison badread.xml @ 0:050e560dd49f draft default tip

planemo upload for repository https://github.com/jvolkening/galaxy-tools/tree/master/tools/badread commit fa7861da52408457fa440bf5efe45963f333f282
author jvolkening
date Wed, 06 Mar 2024 06:40:22 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:050e560dd49f
1 <tool id="badread" name="Badread" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" license="MIT" python_template_version="3.5" profile="21.05">
2
3 <description>long-read simulator</description>
4 <macros>
5 <token name="@TOOL_VERSION@">0.4.1</token>
6 <token name="@VERSION_SUFFIX@">0</token>
7 </macros>
8 <edam_topics>
9 <edam_topic>topic_0080</edam_topic> <!-- Sequence analysis -->
10 </edam_topics>
11 <edam_operations>
12 <edam_operation>operation_2426</edam_operation> <!-- Modelling and simulation -->
13 </edam_operations>
14 <requirements>
15 <requirement type="package" version="@TOOL_VERSION@">badread</requirement>
16 <requirement type="package" version="2.8">pigz</requirement>
17 </requirements>
18 <version_command>badread --version | perl -wpe 's/Badread v//'</version_command>
19
20 <command detect_errors="aggressive"><![CDATA[
21 badread simulate
22 --reference '${io.reference}'
23 --quantity '${sim.quantity}'
24 --length '${sim.length}'
25 --identity '${sim.identity}'
26 --error_model '${sim.error_model}'
27 --qscore_model '${sim.qscore_model}'
28 #if str($sim.seed.seed_bool) == "True":
29 --seed ${sim.seed.seed}
30 #end if
31 --start_adapter '${adapt.start_adapter}'
32 --end_adapter '${adapt.end_adapter}'
33 --start_adapter_seq '${adapt.start_adapter_seq}'
34 --end_adapter_seq '${adapt.end_adapter_seq}'
35 --junk_reads ${problems.junk_reads}
36 --random_reads ${problems.random_reads}
37 --chimera ${problems.chimera}
38 --glitches '${problems.glitches}'
39 $small_plasmid_bias
40 ${io.compress_output}
41 > $output
42 ]]></command>
43
44 <inputs>
45 <section name="io" title="Input/Output" expanded="true">
46 <param type="data"
47 argument="--reference"
48 format="fasta,fasta.gz"
49 label="Reference"
50 help="Reference file in FASTA format"/>
51 <param type="boolean"
52 name="compress_output"
53 checked="true"
54 truevalue="| pigz -p ${GALAXY_SLOTS:-1} --no-name --no-time"
55 falsevalue=""
56 label="Compress output" />
57 </section>
58 <section name="sim" title="Simulation parameters" expanded="true">
59 <param type="text"
60 optional="false"
61 argument="--quantity"
62 label="Quantity"
63 help="Absolute number of bp (e.g. '250M') or relative depth (e.g. '25x')">
64 <validator type="regex">^\d+(?i:[xkmg])?$</validator>
65 <validator type="empty_field" />
66 </param>
67 <param type="text"
68 argument="--length"
69 value="15000,13000"
70 label="Fragment length distribution"
71 help="'mean,stdev'">
72 <validator type="regex">^\d+,\d+$</validator>
73 </param>
74 <param type="text"
75 argument="--identity"
76 value="95,99,2.5"
77 label="Identity distribution"
78 help="'mean,max,stdev' for identity beta distribution, 'mean,stdev' for qscore normal distribution">
79 <validator type="regex">^[\d\.]+,[\d\.]+(,[\d\.]+)?$</validator>
80 </param>
81 <param type="select" argument="--error_model" label="Error model">
82 <option value="nanopore2018">nanopore2018</option>
83 <option value="nanopore2020">nanopore2020</option>
84 <option value="nanopore2023" selected="true">nanopore2023</option>
85 <option value="pacbio2016">pacbio2016</option>
86 <option value="pacbio2021">pacbio2021</option>
87 <option value="random">random</option>
88 </param>
89 <param type="select" argument="--qscore_model" label="Q-score model">
90 <option value="nanopore2018">nanopore2018</option>
91 <option value="nanopore2020">nanopore2020</option>
92 <option value="nanopore2023" selected="true">nanopore2023</option>
93 <option value="pacbio2016">pacbio2016</option>
94 <option value="pacbio2021">pacbio2021</option>
95 <option value="random">random</option>
96 <option value="ideal">ideal</option>
97 </param>
98 <conditional name="seed">
99 <param name="seed_bool" type="select" label="Fixed seed?">
100 <option value="False" selected="true">No</option>
101 <option value="True">Yes</option>
102 </param>
103 <when value="True">
104 <param argument="--seed" type="integer" value="1234" label="Enter seed" />
105 </when>
106 <when value="False" />
107 </conditional>
108 </section>
109 <section name="adapt" title="Adapters">
110 <param type="text"
111 argument="--start_adapter"
112 value="90,60"
113 label="Start adapter"
114 help="'rate,amount'">
115 <validator type="regex">^\d+,\d+$</validator>
116 </param>
117 <param type="text"
118 argument="--end_adapter"
119 value="50,20"
120 label="End adapter"
121 help="'rate,amount'">
122 <validator type="regex">^\d+,\d+$</validator>
123 </param>
124 <param type="text"
125 argument="--start_adapter_seq"
126 value="AATGTACTTCGTTCAGTTACGTATTGCT"
127 label="Start adapter sequence">
128 <validator type="regex">^(?i:[ATGC]+)$</validator>
129 </param>
130 <param type="text"
131 argument="--end_adapter_seq"
132 value="GCAATACGTAACTGAACGAAGT"
133 label="End adapter sequence">
134 <validator type="regex">^(?i:[ATGC]+)$</validator>
135 </param>
136 </section>
137 <section name="problems" title="Problems">
138 <param type="float"
139 argument="--junk_reads"
140 value="1"
141 min="0"
142 max="100"
143 label="Percentage junk reads"
144 help="0-100" />
145 <param type="float"
146 argument="--random_reads"
147 value="1"
148 min="0"
149 max="100"
150 label="Percentage random reads"
151 help="0-100" />
152 <param type="float"
153 argument="--chimera"
154 value="1"
155 min="0"
156 max="50"
157 label="Percentage chimeric formation"
158 help="0-50" />
159 <param type="text"
160 argument="--glitches"
161 value="10000,25,25"
162 label="Read glitches"
163 help="'rate,size,skip'">
164 <validator type="regex">^\d+,\d+,\d+$</validator>
165 </param>
166 <param type="boolean"
167 argument="--small_plasmid_bias"
168 checked="false"
169 truevalue="--small_plasmid_bias"
170 falsevalue=""
171 label="Small plasmid bias"
172 help="Drop circular sequences smaller than fragment length" />
173 </section>
174 </inputs>
175
176 <outputs>
177 <data name="output" format="fastq.gz">
178 <change_format>
179 <when input="io.compress_output" value="" format="fastq" />
180 </change_format>
181 </data>
182 </outputs>
183
184 <tests>
185 <!-- defaults with seed -->
186 <test>
187 <param name="reference" value="ref.fa.gz" ftype="fasta.gz" />
188 <param name="seed_bool" value="True" />
189 <param name="seed" value="22" />
190 <param name="quantity" value="2x" />
191 <output name="output" file="2x.fq.gz" compare="diff" ftype="fastq.gz" decompress="true" />
192 <assert_command>
193 <has_text text="--seed 22" />
194 </assert_command>
195 </test>
196 <!-- defaults without seed -->
197 <test>
198 <param name="reference" value="ref.fa.gz" ftype="fasta.gz" />
199 <param name="quantity" value="2x" />
200 <assert_command>
201 <not_has_text text="--seed " />
202 </assert_command>
203 </test>
204 <!-- use absolute quanitity w/o compression-->
205 <test>
206 <param name="reference" value="ref.fa.gz" ftype="fasta.gz" />
207 <param name="seed_bool" value="True" />
208 <param name="seed" value="22" />
209 <param name="quantity" value="50k" />
210 <param name="compress_output" value="false" />
211 <output name="output" file="50k.fq.gz" compare="diff" ftype="fastq" decompress="true" />
212 </test>
213 <!-- set length dist-->
214 <test>
215 <param name="reference" value="ref.fa.gz" ftype="fasta.gz" />
216 <param name="seed_bool" value="True" />
217 <param name="seed" value="22" />
218 <param name="quantity" value="2x" />
219 <param name="length" value="2000,1000" />
220 <output name="output" file="2x.l2000_1000.fq.gz" compare="diff" decompress="true" />
221 </test>
222 <!-- set identity as beta dist-->
223 <test>
224 <param name="reference" value="ref.fa.gz" ftype="fasta.gz" />
225 <param name="seed_bool" value="True" />
226 <param name="seed" value="22" />
227 <param name="quantity" value="2x" />
228 <param name="identity" value="80,90,5" />
229 <output name="output" file="2x.i80_90_5.fq.gz" compare="diff" decompress="true" />
230 <assert_stderr>
231 <has_text text="identities from a beta distribution" />
232 </assert_stderr>
233 </test>
234 <!-- set identity as normal dist-->
235 <test>
236 <param name="reference" value="ref.fa.gz" ftype="fasta.gz" />
237 <param name="seed_bool" value="True" />
238 <param name="seed" value="22" />
239 <param name="quantity" value="2x" />
240 <param name="identity" value="80,5" />
241 <output name="output" file="2x.i80_5.fq.gz" compare="diff" decompress="true" />
242 <assert_stderr>
243 <has_text text="qscores from a normal distribution" />
244 </assert_stderr>
245 </test>
246 <!-- other models -->
247 <test>
248 <param name="reference" value="ref.fa.gz" ftype="fasta.gz" />
249 <param name="seed_bool" value="True" />
250 <param name="seed" value="22" />
251 <param name="quantity" value="2x" />
252 <param name="error_model" value="pacbio2021" />
253 <param name="qscore_model" value="pacbio2021" />
254 <output name="output" file="2x.em_pacbio2021.qm_pacbio2021.fq.gz" compare="diff" decompress="true" />
255 </test>
256 <!-- set non-default adapters-->
257 <test>
258 <param name="reference" value="ref.fa.gz" ftype="fasta.gz" />
259 <param name="seed_bool" value="True" />
260 <param name="seed" value="22" />
261 <param name="quantity" value="2x" />
262 <param name="start_adapter" value="50,30" />
263 <param name="start_adapter_seq" value="ATGC" />
264 <param name="end_adapter" value="80,40" />
265 <param name="end_adapter_seq" value="GCAT" />
266 <output name="output" file="2x.sa50_30_ATGC.ea80_40_GCAT.fq.gz" compare="diff" decompress="true" />
267 </test>
268 <!-- set problem parameters-->
269 <test>
270 <param name="reference" value="ref.fa.gz" ftype="fasta.gz" />
271 <param name="seed_bool" value="True" />
272 <param name="seed" value="22" />
273 <param name="quantity" value="2x" />
274 <param name="junk_reads" value="80" />
275 <param name="random_reads" value="9.5" />
276 <param name="chimera" value="5" />
277 <param name="glitches" value="2000,50,40" />
278 <output name="output" file="2x.j80.r9.5.c5.g2000_50_40.fq.gz" compare="diff" decompress="true" />
279 </test>
280 <!-- without and with small plasmid bias-->
281 <test>
282 <param name="reference" value="ref.fa.gz" ftype="fasta.gz" />
283 <param name="seed_bool" value="True" />
284 <param name="seed" value="22" />
285 <param name="quantity" value="1M" />
286 <output name="output" decompress="true">
287 <assert_contents>
288 <has_text text="GU320569" />
289 </assert_contents>
290 </output>
291 </test>
292 <test>
293 <param name="reference" value="ref.fa.gz" ftype="fasta.gz" />
294 <param name="seed_bool" value="True" />
295 <param name="seed" value="22" />
296 <param name="quantity" value="1M" />
297 <param name="small_plasmid_bias" value="true" />
298 <output name="output" decompress="true">
299 <assert_contents>
300 <not_has_text text="GU320569" />
301 </assert_contents>
302 </output>
303 </test>
304 </tests>
305
306 <help><![CDATA[
307 **Required arguments:**
308
309 --reference REFERENCE Reference FASTA file (can be gzipped)
310 --quantity QUANTITY Either an absolute value (e.g. 250M) or a
311 relative depth (e.g. 25x)
312
313 **Simulation parameters:**
314
315 --length LENGTH Fragment length distribution (mean and stdev,
316 default: 15000,13000)
317 --identity IDENTITY Sequencing identity distribution
318 (mean,max,stdev for beta distribution or
319 mean,stdev for normal qscore distribution,
320 default: 95,99,2.5)
321 --error_model ERROR_MODEL Can be "nanopore2018", "nanopore2020",
322 "nanopore2023", "pacbio2016", "pacbio2021",
323 "random" or a model filename (default:
324 nanopore2023)
325 --qscore_model QSCORE_MODEL Can be "nanopore2018", "nanopore2020",
326 "nanopore2023", "pacbio2016", "pacbio2021",
327 "random", "ideal" or a model filename
328 (default: nanopore2023)
329 --seed SEED Random number generator seed for deterministic
330 output (default: different output each time)
331
332 **Adapters:**
333
334 --start_adapter START_ADAPTER
335 Adapter parameters for read starts (rate and
336 amount, default: 90,60)
337 --end_adapter END_ADAPTER Adapter parameters for read ends (rate and
338 amount, default: 50,20)
339 --start_adapter_seq START_ADAPTER_SEQ
340 Adapter sequence for read starts (default:
341 AATGTACTTCGTTCAGTTACGTATTGCT)
342 --end_adapter_seq END_ADAPTER_SEQ
343 Adapter sequence for read ends (default:
344 GCAATACGTAACTGAACGAAGT)
345
346 **Problems:**
347
348 --junk_reads JUNK_READS This percentage of reads will be
349 low-complexity junk (default: 1)
350 --random_reads RANDOM_READS This percentage of reads will be random
351 sequence (default: 1)
352 --chimeras CHIMERAS Percentage at which separate fragments join
353 together (default: 1)
354 --glitches GLITCHES Read glitch parameters (rate, size and skip,
355 default: 10000,25,25)
356 --small_plasmid_bias If set, then small circular plasmids are lost
357 when the fragment length is too high (default:
358 small plasmids are included regardless of
359 fragment length)
360
361 ]]></help>
362
363 <citations>
364 <citation type="doi">10.21105/joss.01316</citation>
365 </citations>
366
367 </tool>