Mercurial > repos > jvolkening > badread
comparison badread.xml @ 0:050e560dd49f draft default tip
planemo upload for repository https://github.com/jvolkening/galaxy-tools/tree/master/tools/badread commit fa7861da52408457fa440bf5efe45963f333f282
| author | jvolkening |
|---|---|
| date | Wed, 06 Mar 2024 06:40:22 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:050e560dd49f |
|---|---|
| 1 <tool id="badread" name="Badread" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" license="MIT" python_template_version="3.5" profile="21.05"> | |
| 2 | |
| 3 <description>long-read simulator</description> | |
| 4 <macros> | |
| 5 <token name="@TOOL_VERSION@">0.4.1</token> | |
| 6 <token name="@VERSION_SUFFIX@">0</token> | |
| 7 </macros> | |
| 8 <edam_topics> | |
| 9 <edam_topic>topic_0080</edam_topic> <!-- Sequence analysis --> | |
| 10 </edam_topics> | |
| 11 <edam_operations> | |
| 12 <edam_operation>operation_2426</edam_operation> <!-- Modelling and simulation --> | |
| 13 </edam_operations> | |
| 14 <requirements> | |
| 15 <requirement type="package" version="@TOOL_VERSION@">badread</requirement> | |
| 16 <requirement type="package" version="2.8">pigz</requirement> | |
| 17 </requirements> | |
| 18 <version_command>badread --version | perl -wpe 's/Badread v//'</version_command> | |
| 19 | |
| 20 <command detect_errors="aggressive"><![CDATA[ | |
| 21 badread simulate | |
| 22 --reference '${io.reference}' | |
| 23 --quantity '${sim.quantity}' | |
| 24 --length '${sim.length}' | |
| 25 --identity '${sim.identity}' | |
| 26 --error_model '${sim.error_model}' | |
| 27 --qscore_model '${sim.qscore_model}' | |
| 28 #if str($sim.seed.seed_bool) == "True": | |
| 29 --seed ${sim.seed.seed} | |
| 30 #end if | |
| 31 --start_adapter '${adapt.start_adapter}' | |
| 32 --end_adapter '${adapt.end_adapter}' | |
| 33 --start_adapter_seq '${adapt.start_adapter_seq}' | |
| 34 --end_adapter_seq '${adapt.end_adapter_seq}' | |
| 35 --junk_reads ${problems.junk_reads} | |
| 36 --random_reads ${problems.random_reads} | |
| 37 --chimera ${problems.chimera} | |
| 38 --glitches '${problems.glitches}' | |
| 39 $small_plasmid_bias | |
| 40 ${io.compress_output} | |
| 41 > $output | |
| 42 ]]></command> | |
| 43 | |
| 44 <inputs> | |
| 45 <section name="io" title="Input/Output" expanded="true"> | |
| 46 <param type="data" | |
| 47 argument="--reference" | |
| 48 format="fasta,fasta.gz" | |
| 49 label="Reference" | |
| 50 help="Reference file in FASTA format"/> | |
| 51 <param type="boolean" | |
| 52 name="compress_output" | |
| 53 checked="true" | |
| 54 truevalue="| pigz -p ${GALAXY_SLOTS:-1} --no-name --no-time" | |
| 55 falsevalue="" | |
| 56 label="Compress output" /> | |
| 57 </section> | |
| 58 <section name="sim" title="Simulation parameters" expanded="true"> | |
| 59 <param type="text" | |
| 60 optional="false" | |
| 61 argument="--quantity" | |
| 62 label="Quantity" | |
| 63 help="Absolute number of bp (e.g. '250M') or relative depth (e.g. '25x')"> | |
| 64 <validator type="regex">^\d+(?i:[xkmg])?$</validator> | |
| 65 <validator type="empty_field" /> | |
| 66 </param> | |
| 67 <param type="text" | |
| 68 argument="--length" | |
| 69 value="15000,13000" | |
| 70 label="Fragment length distribution" | |
| 71 help="'mean,stdev'"> | |
| 72 <validator type="regex">^\d+,\d+$</validator> | |
| 73 </param> | |
| 74 <param type="text" | |
| 75 argument="--identity" | |
| 76 value="95,99,2.5" | |
| 77 label="Identity distribution" | |
| 78 help="'mean,max,stdev' for identity beta distribution, 'mean,stdev' for qscore normal distribution"> | |
| 79 <validator type="regex">^[\d\.]+,[\d\.]+(,[\d\.]+)?$</validator> | |
| 80 </param> | |
| 81 <param type="select" argument="--error_model" label="Error model"> | |
| 82 <option value="nanopore2018">nanopore2018</option> | |
| 83 <option value="nanopore2020">nanopore2020</option> | |
| 84 <option value="nanopore2023" selected="true">nanopore2023</option> | |
| 85 <option value="pacbio2016">pacbio2016</option> | |
| 86 <option value="pacbio2021">pacbio2021</option> | |
| 87 <option value="random">random</option> | |
| 88 </param> | |
| 89 <param type="select" argument="--qscore_model" label="Q-score model"> | |
| 90 <option value="nanopore2018">nanopore2018</option> | |
| 91 <option value="nanopore2020">nanopore2020</option> | |
| 92 <option value="nanopore2023" selected="true">nanopore2023</option> | |
| 93 <option value="pacbio2016">pacbio2016</option> | |
| 94 <option value="pacbio2021">pacbio2021</option> | |
| 95 <option value="random">random</option> | |
| 96 <option value="ideal">ideal</option> | |
| 97 </param> | |
| 98 <conditional name="seed"> | |
| 99 <param name="seed_bool" type="select" label="Fixed seed?"> | |
| 100 <option value="False" selected="true">No</option> | |
| 101 <option value="True">Yes</option> | |
| 102 </param> | |
| 103 <when value="True"> | |
| 104 <param argument="--seed" type="integer" value="1234" label="Enter seed" /> | |
| 105 </when> | |
| 106 <when value="False" /> | |
| 107 </conditional> | |
| 108 </section> | |
| 109 <section name="adapt" title="Adapters"> | |
| 110 <param type="text" | |
| 111 argument="--start_adapter" | |
| 112 value="90,60" | |
| 113 label="Start adapter" | |
| 114 help="'rate,amount'"> | |
| 115 <validator type="regex">^\d+,\d+$</validator> | |
| 116 </param> | |
| 117 <param type="text" | |
| 118 argument="--end_adapter" | |
| 119 value="50,20" | |
| 120 label="End adapter" | |
| 121 help="'rate,amount'"> | |
| 122 <validator type="regex">^\d+,\d+$</validator> | |
| 123 </param> | |
| 124 <param type="text" | |
| 125 argument="--start_adapter_seq" | |
| 126 value="AATGTACTTCGTTCAGTTACGTATTGCT" | |
| 127 label="Start adapter sequence"> | |
| 128 <validator type="regex">^(?i:[ATGC]+)$</validator> | |
| 129 </param> | |
| 130 <param type="text" | |
| 131 argument="--end_adapter_seq" | |
| 132 value="GCAATACGTAACTGAACGAAGT" | |
| 133 label="End adapter sequence"> | |
| 134 <validator type="regex">^(?i:[ATGC]+)$</validator> | |
| 135 </param> | |
| 136 </section> | |
| 137 <section name="problems" title="Problems"> | |
| 138 <param type="float" | |
| 139 argument="--junk_reads" | |
| 140 value="1" | |
| 141 min="0" | |
| 142 max="100" | |
| 143 label="Percentage junk reads" | |
| 144 help="0-100" /> | |
| 145 <param type="float" | |
| 146 argument="--random_reads" | |
| 147 value="1" | |
| 148 min="0" | |
| 149 max="100" | |
| 150 label="Percentage random reads" | |
| 151 help="0-100" /> | |
| 152 <param type="float" | |
| 153 argument="--chimera" | |
| 154 value="1" | |
| 155 min="0" | |
| 156 max="50" | |
| 157 label="Percentage chimeric formation" | |
| 158 help="0-50" /> | |
| 159 <param type="text" | |
| 160 argument="--glitches" | |
| 161 value="10000,25,25" | |
| 162 label="Read glitches" | |
| 163 help="'rate,size,skip'"> | |
| 164 <validator type="regex">^\d+,\d+,\d+$</validator> | |
| 165 </param> | |
| 166 <param type="boolean" | |
| 167 argument="--small_plasmid_bias" | |
| 168 checked="false" | |
| 169 truevalue="--small_plasmid_bias" | |
| 170 falsevalue="" | |
| 171 label="Small plasmid bias" | |
| 172 help="Drop circular sequences smaller than fragment length" /> | |
| 173 </section> | |
| 174 </inputs> | |
| 175 | |
| 176 <outputs> | |
| 177 <data name="output" format="fastq.gz"> | |
| 178 <change_format> | |
| 179 <when input="io.compress_output" value="" format="fastq" /> | |
| 180 </change_format> | |
| 181 </data> | |
| 182 </outputs> | |
| 183 | |
| 184 <tests> | |
| 185 <!-- defaults with seed --> | |
| 186 <test> | |
| 187 <param name="reference" value="ref.fa.gz" ftype="fasta.gz" /> | |
| 188 <param name="seed_bool" value="True" /> | |
| 189 <param name="seed" value="22" /> | |
| 190 <param name="quantity" value="2x" /> | |
| 191 <output name="output" file="2x.fq.gz" compare="diff" ftype="fastq.gz" decompress="true" /> | |
| 192 <assert_command> | |
| 193 <has_text text="--seed 22" /> | |
| 194 </assert_command> | |
| 195 </test> | |
| 196 <!-- defaults without seed --> | |
| 197 <test> | |
| 198 <param name="reference" value="ref.fa.gz" ftype="fasta.gz" /> | |
| 199 <param name="quantity" value="2x" /> | |
| 200 <assert_command> | |
| 201 <not_has_text text="--seed " /> | |
| 202 </assert_command> | |
| 203 </test> | |
| 204 <!-- use absolute quanitity w/o compression--> | |
| 205 <test> | |
| 206 <param name="reference" value="ref.fa.gz" ftype="fasta.gz" /> | |
| 207 <param name="seed_bool" value="True" /> | |
| 208 <param name="seed" value="22" /> | |
| 209 <param name="quantity" value="50k" /> | |
| 210 <param name="compress_output" value="false" /> | |
| 211 <output name="output" file="50k.fq.gz" compare="diff" ftype="fastq" decompress="true" /> | |
| 212 </test> | |
| 213 <!-- set length dist--> | |
| 214 <test> | |
| 215 <param name="reference" value="ref.fa.gz" ftype="fasta.gz" /> | |
| 216 <param name="seed_bool" value="True" /> | |
| 217 <param name="seed" value="22" /> | |
| 218 <param name="quantity" value="2x" /> | |
| 219 <param name="length" value="2000,1000" /> | |
| 220 <output name="output" file="2x.l2000_1000.fq.gz" compare="diff" decompress="true" /> | |
| 221 </test> | |
| 222 <!-- set identity as beta dist--> | |
| 223 <test> | |
| 224 <param name="reference" value="ref.fa.gz" ftype="fasta.gz" /> | |
| 225 <param name="seed_bool" value="True" /> | |
| 226 <param name="seed" value="22" /> | |
| 227 <param name="quantity" value="2x" /> | |
| 228 <param name="identity" value="80,90,5" /> | |
| 229 <output name="output" file="2x.i80_90_5.fq.gz" compare="diff" decompress="true" /> | |
| 230 <assert_stderr> | |
| 231 <has_text text="identities from a beta distribution" /> | |
| 232 </assert_stderr> | |
| 233 </test> | |
| 234 <!-- set identity as normal dist--> | |
| 235 <test> | |
| 236 <param name="reference" value="ref.fa.gz" ftype="fasta.gz" /> | |
| 237 <param name="seed_bool" value="True" /> | |
| 238 <param name="seed" value="22" /> | |
| 239 <param name="quantity" value="2x" /> | |
| 240 <param name="identity" value="80,5" /> | |
| 241 <output name="output" file="2x.i80_5.fq.gz" compare="diff" decompress="true" /> | |
| 242 <assert_stderr> | |
| 243 <has_text text="qscores from a normal distribution" /> | |
| 244 </assert_stderr> | |
| 245 </test> | |
| 246 <!-- other models --> | |
| 247 <test> | |
| 248 <param name="reference" value="ref.fa.gz" ftype="fasta.gz" /> | |
| 249 <param name="seed_bool" value="True" /> | |
| 250 <param name="seed" value="22" /> | |
| 251 <param name="quantity" value="2x" /> | |
| 252 <param name="error_model" value="pacbio2021" /> | |
| 253 <param name="qscore_model" value="pacbio2021" /> | |
| 254 <output name="output" file="2x.em_pacbio2021.qm_pacbio2021.fq.gz" compare="diff" decompress="true" /> | |
| 255 </test> | |
| 256 <!-- set non-default adapters--> | |
| 257 <test> | |
| 258 <param name="reference" value="ref.fa.gz" ftype="fasta.gz" /> | |
| 259 <param name="seed_bool" value="True" /> | |
| 260 <param name="seed" value="22" /> | |
| 261 <param name="quantity" value="2x" /> | |
| 262 <param name="start_adapter" value="50,30" /> | |
| 263 <param name="start_adapter_seq" value="ATGC" /> | |
| 264 <param name="end_adapter" value="80,40" /> | |
| 265 <param name="end_adapter_seq" value="GCAT" /> | |
| 266 <output name="output" file="2x.sa50_30_ATGC.ea80_40_GCAT.fq.gz" compare="diff" decompress="true" /> | |
| 267 </test> | |
| 268 <!-- set problem parameters--> | |
| 269 <test> | |
| 270 <param name="reference" value="ref.fa.gz" ftype="fasta.gz" /> | |
| 271 <param name="seed_bool" value="True" /> | |
| 272 <param name="seed" value="22" /> | |
| 273 <param name="quantity" value="2x" /> | |
| 274 <param name="junk_reads" value="80" /> | |
| 275 <param name="random_reads" value="9.5" /> | |
| 276 <param name="chimera" value="5" /> | |
| 277 <param name="glitches" value="2000,50,40" /> | |
| 278 <output name="output" file="2x.j80.r9.5.c5.g2000_50_40.fq.gz" compare="diff" decompress="true" /> | |
| 279 </test> | |
| 280 <!-- without and with small plasmid bias--> | |
| 281 <test> | |
| 282 <param name="reference" value="ref.fa.gz" ftype="fasta.gz" /> | |
| 283 <param name="seed_bool" value="True" /> | |
| 284 <param name="seed" value="22" /> | |
| 285 <param name="quantity" value="1M" /> | |
| 286 <output name="output" decompress="true"> | |
| 287 <assert_contents> | |
| 288 <has_text text="GU320569" /> | |
| 289 </assert_contents> | |
| 290 </output> | |
| 291 </test> | |
| 292 <test> | |
| 293 <param name="reference" value="ref.fa.gz" ftype="fasta.gz" /> | |
| 294 <param name="seed_bool" value="True" /> | |
| 295 <param name="seed" value="22" /> | |
| 296 <param name="quantity" value="1M" /> | |
| 297 <param name="small_plasmid_bias" value="true" /> | |
| 298 <output name="output" decompress="true"> | |
| 299 <assert_contents> | |
| 300 <not_has_text text="GU320569" /> | |
| 301 </assert_contents> | |
| 302 </output> | |
| 303 </test> | |
| 304 </tests> | |
| 305 | |
| 306 <help><![CDATA[ | |
| 307 **Required arguments:** | |
| 308 | |
| 309 --reference REFERENCE Reference FASTA file (can be gzipped) | |
| 310 --quantity QUANTITY Either an absolute value (e.g. 250M) or a | |
| 311 relative depth (e.g. 25x) | |
| 312 | |
| 313 **Simulation parameters:** | |
| 314 | |
| 315 --length LENGTH Fragment length distribution (mean and stdev, | |
| 316 default: 15000,13000) | |
| 317 --identity IDENTITY Sequencing identity distribution | |
| 318 (mean,max,stdev for beta distribution or | |
| 319 mean,stdev for normal qscore distribution, | |
| 320 default: 95,99,2.5) | |
| 321 --error_model ERROR_MODEL Can be "nanopore2018", "nanopore2020", | |
| 322 "nanopore2023", "pacbio2016", "pacbio2021", | |
| 323 "random" or a model filename (default: | |
| 324 nanopore2023) | |
| 325 --qscore_model QSCORE_MODEL Can be "nanopore2018", "nanopore2020", | |
| 326 "nanopore2023", "pacbio2016", "pacbio2021", | |
| 327 "random", "ideal" or a model filename | |
| 328 (default: nanopore2023) | |
| 329 --seed SEED Random number generator seed for deterministic | |
| 330 output (default: different output each time) | |
| 331 | |
| 332 **Adapters:** | |
| 333 | |
| 334 --start_adapter START_ADAPTER | |
| 335 Adapter parameters for read starts (rate and | |
| 336 amount, default: 90,60) | |
| 337 --end_adapter END_ADAPTER Adapter parameters for read ends (rate and | |
| 338 amount, default: 50,20) | |
| 339 --start_adapter_seq START_ADAPTER_SEQ | |
| 340 Adapter sequence for read starts (default: | |
| 341 AATGTACTTCGTTCAGTTACGTATTGCT) | |
| 342 --end_adapter_seq END_ADAPTER_SEQ | |
| 343 Adapter sequence for read ends (default: | |
| 344 GCAATACGTAACTGAACGAAGT) | |
| 345 | |
| 346 **Problems:** | |
| 347 | |
| 348 --junk_reads JUNK_READS This percentage of reads will be | |
| 349 low-complexity junk (default: 1) | |
| 350 --random_reads RANDOM_READS This percentage of reads will be random | |
| 351 sequence (default: 1) | |
| 352 --chimeras CHIMERAS Percentage at which separate fragments join | |
| 353 together (default: 1) | |
| 354 --glitches GLITCHES Read glitch parameters (rate, size and skip, | |
| 355 default: 10000,25,25) | |
| 356 --small_plasmid_bias If set, then small circular plasmids are lost | |
| 357 when the fragment length is too high (default: | |
| 358 small plasmids are included regardless of | |
| 359 fragment length) | |
| 360 | |
| 361 ]]></help> | |
| 362 | |
| 363 <citations> | |
| 364 <citation type="doi">10.21105/joss.01316</citation> | |
| 365 </citations> | |
| 366 | |
| 367 </tool> |
