comparison tophat2_wrapper.xml @ 2:da1f39fe14bc draft

Uploaded
author devteam
date Thu, 18 Dec 2014 13:56:31 -0500
parents ae06af1118dc
children 81f97e12e573
comparison
equal deleted inserted replaced
1:ae06af1118dc 2:da1f39fe14bc
1 <tool id="tophat2" name="Tophat2" version="0.6"> 1 <tool id="tophat2" name="Tophat2" version="0.7">
2 <!-- Wrapper compatible with Tophat version 2.0.0+ --> 2 <!-- Wrapper compatible with Tophat version 2.0.0+ -->
3 <description>Gapped-read mapper for RNA-seq data</description> 3 <description>Gapped-read mapper for RNA-seq data</description>
4 <version_command>tophat2 --version</version_command> 4 <version_command>tophat2 --version</version_command>
5 <requirements> 5 <requirements>
6 <requirement type="package" version="0.1.18">samtools</requirement> 6 <requirement type="package" version="0.1.18">samtools</requirement>
109 --rg-platform "$readGroup.rgpl" 109 --rg-platform "$readGroup.rgpl"
110 --rg-sample "$readGroup.rgsm" 110 --rg-sample "$readGroup.rgsm"
111 #end if 111 #end if
112 112
113 ## Set index path, inputs and parameters specific to paired data. 113 ## Set index path, inputs and parameters specific to paired data.
114 #if $singlePaired.sPaired == "paired" 114 #if $singlePaired.sPaired != "single"
115 -r $singlePaired.mate_inner_distance 115 -r $singlePaired.mate_inner_distance
116 --mate-std-dev=$singlePaired.mate_std_dev 116 --mate-std-dev=$singlePaired.mate_std_dev
117 117
118 #if str($singlePaired.report_discordant_pairs) == "No": 118 #if str($singlePaired.report_discordant_pairs) == "No":
119 --no-discordant 119 --no-discordant
120 #end if 120 #end if
121 121
122 ${index_path} $singlePaired.input1 $singlePaired.input2 122 #if $singlePaired.sPaired == "paired"
123 ${index_path} "$singlePaired.input1" "$singlePaired.input2"
124 #else
125 ${index_path} "$singlePaired.input.forward" "$singlePaired.input.reverse"
126 #end if
123 #else 127 #else
124 ${index_path} $singlePaired.input1 128 ${index_path} "$singlePaired.input1"
125 #end if 129 #end if
126 </command> 130 </command>
127 131
128 <inputs> 132 <inputs>
129 <conditional name="singlePaired"> 133 <conditional name="singlePaired">
130 <param name="sPaired" type="select" label="Is this library mate-paired?"> 134 <param name="sPaired" type="select" label="Is this library mate-paired?">
131 <option value="single">Single-end</option> 135 <option value="single">Single-end</option>
132 <option value="paired">Paired-end</option> 136 <option value="paired">Paired-end (as individual datasets)</option>
137 <option value="paired_collection">Paired-end (as collection)</option>
133 </param> 138 </param>
134 <when value="single"> 139 <when value="single">
135 <param format="fastqsanger" name="input1" type="data" label="RNA-Seq FASTQ file" help="Nucleotide-space: Must have Sanger-scaled quality values with ASCII offset 33"/> 140 <param format="fastqsanger" name="input1" type="data" label="RNA-Seq FASTQ file" help="Nucleotide-space: Must have Sanger-scaled quality values with ASCII offset 33"/>
136 </when> 141 </when>
137 <when value="paired"> 142 <when value="paired">
138 <param format="fastqsanger" name="input1" type="data" label="RNA-Seq FASTQ file, forward reads" help="Nucleotide-space: Must have Sanger-scaled quality values with ASCII offset 33" /> 143 <param format="fastqsanger" name="input1" type="data" label="RNA-Seq FASTQ file, forward reads" help="Nucleotide-space: Must have Sanger-scaled quality values with ASCII offset 33" />
139 <param format="fastqsanger" name="input2" type="data" label="RNA-Seq FASTQ file, reverse reads" help="Nucleotide-space: Must have Sanger-scaled quality values with ASCII offset 33" /> 144 <param format="fastqsanger" name="input2" type="data" label="RNA-Seq FASTQ file, reverse reads" help="Nucleotide-space: Must have Sanger-scaled quality values with ASCII offset 33" />
140 <param name="mate_inner_distance" type="integer" value="300" label="Mean Inner Distance between Mate Pairs" /> 145 <expand macro="paired_parameters" />
141 <param name="mate_std_dev" type="integer" value="20" label="Std. Dev for Distance between Mate Pairs" help="The standard deviation for the distribution on inner distances between mate pairs."/> 146 </when>
142 <!-- Discordant pairs. --> 147 <when value="paired_collection">
143 <param name="report_discordant_pairs" type="select" label="Report discordant pair alignments?"> 148 <param format="fastqsanger" name="input" type="data_collection" collection_type="paired" label="RNA-Seq FASTQ paired reads" help="Nucleotide-space: Must have Sanger-scaled quality values with ASCII offset 33" />
144 <option value="No">No</option> 149 <expand macro="paired_parameters" />
145 <option selected="True" value="Yes">Yes</option>
146 </param>
147 </when> 150 </when>
148 </conditional> 151 </conditional>
149 <expand macro="refGenomeSourceConditional"> 152 <expand macro="refGenomeSourceConditional">
150 <options from_data_table="tophat2_indexes"> 153 <options from_data_table="tophat2_indexes">
151 <filter type="sort_by" column="2"/> 154 <filter type="sort_by" column="2"/>
291 </data> 294 </data>
292 </outputs> 295 </outputs>
293 296
294 <macros> 297 <macros>
295 <import>tophat_macros.xml</import> 298 <import>tophat_macros.xml</import>
299 <xml name="paired_parameters">
300 <param name="mate_inner_distance" type="integer" value="300" label="Mean Inner Distance between Mate Pairs" />
301 <param name="mate_std_dev" type="integer" value="20" label="Std. Dev for Distance between Mate Pairs" help="The standard deviation for the distribution on inner distances between mate pairs."/>
302 <!-- Discordant pairs. -->
303 <param name="report_discordant_pairs" type="select" label="Report discordant pair alignments?">
304 <option value="No">No</option>
305 <option selected="True" value="Yes">Yes</option>
306 </param>
307 </xml>
296 <macro name="dbKeyActions"> 308 <macro name="dbKeyActions">
297 <actions> 309 <actions>
298 <conditional name="refGenomeSource.genomeSource"> 310 <conditional name="refGenomeSource.genomeSource">
299 <when value="indexed"> 311 <when value="indexed">
300 <action type="metadata" name="dbkey"> 312 <action type="metadata" name="dbkey">
346 <param name="settingsType" value="preSet" /> 358 <param name="settingsType" value="preSet" />
347 <param name="specReadGroup" value="No" /> 359 <param name="specReadGroup" value="No" />
348 <output name="junctions" file="tophat2_out2j.bed" /> 360 <output name="junctions" file="tophat2_out2j.bed" />
349 <output name="accepted_hits" file="tophat_out2h.bam" compare="sim_size" /> 361 <output name="accepted_hits" file="tophat_out2h.bam" compare="sim_size" />
350 </test> 362 </test>
363 <test>
364 <!-- Same test as above but with a collection. -->
365 <param name="sPaired" value="paired_collection" />
366 <param name="input">
367 <collection type="paired">
368 <element name="forward" value="tophat_in2.fastqsanger" ftype="fastqsanger" />
369 <element name="reverse" value="tophat_in3.fastqsanger" ftype="fastqsanger" />
370 </collection>
371 </param>
372 <param name="genomeSource" value="history" />
373 <param name="ownFile" ftype="fasta" value="tophat_in1.fasta" />
374 <param name="mate_inner_distance" value="20" />
375 <param name="settingsType" value="preSet" />
376 <param name="specReadGroup" value="No" />
377 <output name="junctions" file="tophat2_out2j.bed" />
378 <output name="accepted_hits" file="tophat_out2h.bam" compare="sim_size" />
379 </test>
351 <!-- Test base-space single-end reads with user-supplied reference fasta and full parameters --> 380 <!-- Test base-space single-end reads with user-supplied reference fasta and full parameters -->
352 <test> 381 <test>
353 <!-- Tophat commands: 382 <!-- Tophat commands:
354 bowtie2-build -f test-data/tophat_in1.fasta tophat_in1 383 bowtie2-build -f test-data/tophat_in1.fasta tophat_in1
355 tophat2 -o tmp_dir -p 1 -a 8 -m 0 -i 70 -I 500000 -g 40 +coverage-search +min-coverage-intron 50 +max-coverage-intro 20000 +segment-mismatches 2 +segment-length 25 +microexon-search tophat_in1 test-data/tophat_in2.fastqsanger 384 tophat2 -o tmp_dir -p 1 -a 8 -m 0 -i 70 -I 500000 -g 40 +coverage-search +min-coverage-intron 50 +max-coverage-intro 20000 +segment-mismatches 2 +segment-length 25 +microexon-search tophat_in1 test-data/tophat_in2.fastqsanger
356 Replace the + with double-dash 385 Replace the + with double-dash
357 Rename the files in tmp_dir appropriately 386 Rename the files in tmp_dir appropriately
358 --> 387 -->
359 <param name="sPaired" value="single"/> 388 <conditional name="singlePaired">
360 <param name="input1" ftype="fastqsanger" value="tophat_in2.fastqsanger"/> 389 <param name="sPaired" value="single"/>
390 <param name="input1" ftype="fastqsanger" value="tophat_in2.fastqsanger"/>
391 </conditional>
361 <param name="genomeSource" value="history"/> 392 <param name="genomeSource" value="history"/>
362 <param name="ownFile" value="tophat_in1.fasta"/> 393 <param name="ownFile" value="tophat_in1.fasta"/>
363 <param name="settingsType" value="full"/> 394 <conditional name="params">
364 <param name="library_type" value="FR Unstranded"/> 395 <param name="settingsType" value="full"/>
365 <param name="read_mismatches" value="2"/> 396 <param name="library_type" value="FR Unstranded"/>
366 <param name="bowtie_n" value="No"/> 397 <param name="read_mismatches" value="2"/>
367 <param name="anchor_length" value="8"/> 398 <param name="bowtie_n" value="No"/>
368 <param name="splice_mismatches" value="0"/> 399 <param name="anchor_length" value="8"/>
369 <param name="min_intron_length" value="70"/> 400 <param name="splice_mismatches" value="0"/>
370 <param name="max_intron_length" value="500000"/> 401 <param name="min_intron_length" value="70"/>
371 <param name="max_multihits" value="40"/> 402 <param name="max_intron_length" value="500000"/>
372 <param name="min_segment_intron" value="50" /> 403 <param name="max_multihits" value="40"/>
373 <param name="max_segment_intron" value="500000" /> 404 <param name="min_segment_intron" value="50" />
374 <param name="seg_mismatches" value="2"/> 405 <param name="max_segment_intron" value="500000" />
375 <param name="seg_length" value="25"/> 406 <param name="seg_mismatches" value="2"/>
376 <param name="allow_indel_search" value="Yes"/> 407 <param name="seg_length" value="25"/>
377 <param name="max_insertion_length" value="3"/> 408 <conditional name="indel_search">
378 <param name="max_deletion_length" value="3"/> 409 <param name="allow_indel_search" value="Yes"/>
379 <param name="use_junctions" value="Yes" /> 410 <param name="max_insertion_length" value="3"/>
380 <param name="use_annotations" value="No" /> 411 <param name="max_deletion_length" value="3"/>
381 <param name="use_juncs" value="No" /> 412 </conditional>
382 <param name="no_novel_juncs" value="No" /> 413 <conditional name="own_junctions">
383 <param name="use_search" value="Yes" /> 414 <param name="use_junctions" value="Yes" />
384 <param name="min_coverage_intron" value="50" /> 415 <conditional name="gene_model_ann">
385 <param name="max_coverage_intron" value="20000" /> 416 <param name="use_annotations" value="No" />
386 <param name="microexon_search" value="Yes" /> 417 </conditional>
387 <param name="b2_settings" value="No" /> 418 <conditional name="raw_juncs">
388 <!-- Fusion search params --> 419 <param name="use_juncs" value="No" />
389 <param name="do_search" value="Yes" /> 420 </conditional>
390 <param name="anchor_len" value="21" /> 421 <conditional name="no_novel_juncs">
391 <param name="min_dist" value="10000021" /> 422 <param name="no_novel_juncs" value="No" />
392 <param name="read_mismatches" value="3" /> 423 </conditional>
393 <param name="multireads" value="4" /> 424 </conditional>
394 <param name="multipairs" value="5" /> 425 <conditional name="coverage_search">
395 <param name="ignore_chromosomes" value="chrM"/> 426 <param name="use_search" value="Yes" />
396 <param name="specReadGroup" value="No" /> 427 <param name="min_coverage_intron" value="50" />
428 <param name="max_coverage_intron" value="20000" />
429 </conditional>
430 <param name="microexon_search" value="Yes" />
431 <conditional name="bowtie2_settings">
432 <param name="b2_settings" value="No" />
433 </conditional>
434 <!-- Fusion search params -->
435 <conditional name="fusion_search">
436 <param name="do_search" value="Yes" />
437 <param name="anchor_len" value="21" />
438 <param name="min_dist" value="10000021" />
439 <param name="read_mismatches" value="3" />
440 <param name="multireads" value="4" />
441 <param name="multipairs" value="5" />
442 <param name="ignore_chromosomes" value="chrM"/>
443 </conditional>
444 </conditional>
445 <conditional name="readGroup">
446 <param name="specReadGroup" value="No" />
447 </conditional>
397 <output name="insertions" file="tophat_out3i.bed" /> 448 <output name="insertions" file="tophat_out3i.bed" />
398 <output name="deletions" file="tophat_out3d.bed" /> 449 <output name="deletions" file="tophat_out3d.bed" />
399 <output name="junctions" file="tophat2_out3j.bed" /> 450 <output name="junctions" file="tophat2_out3j.bed" />
400 <output name="accepted_hits" file="tophat_out3h.bam" compare="sim_size" /> 451 <output name="accepted_hits" file="tophat_out3h.bam" compare="sim_size" />
401 </test> 452 </test>
404 <!-- TopHat commands: 455 <!-- TopHat commands:
405 tophat2 -o tmp_dir -r 20 -p 1 -a 8 -m 0 -i 70 -I 500000 -g 40 +coverage-search +min-coverage-intron 50 +max-coverage-intro 20000 +segment-mismatches 2 +segment-length 25 +microexon-search +report_discordant_pairs tophat_in1 test-data/tophat_in2.fastqsanger test-data/tophat_in3.fastqsanger 456 tophat2 -o tmp_dir -r 20 -p 1 -a 8 -m 0 -i 70 -I 500000 -g 40 +coverage-search +min-coverage-intron 50 +max-coverage-intro 20000 +segment-mismatches 2 +segment-length 25 +microexon-search +report_discordant_pairs tophat_in1 test-data/tophat_in2.fastqsanger test-data/tophat_in3.fastqsanger
406 Replace the + with double-dash 457 Replace the + with double-dash
407 Rename the files in tmp_dir appropriately 458 Rename the files in tmp_dir appropriately
408 --> 459 -->
409 <param name="sPaired" value="paired"/> 460 <conditional name="singlePaired">
410 <param name="input1" ftype="fastqsanger" value="tophat_in2.fastqsanger"/> 461 <param name="sPaired" value="paired"/>
411 <param name="input2" ftype="fastqsanger" value="tophat_in3.fastqsanger"/> 462 <param name="input1" ftype="fastqsanger" value="tophat_in2.fastqsanger"/>
463 <param name="input2" ftype="fastqsanger" value="tophat_in3.fastqsanger"/>
464 <param name="mate_inner_distance" value="20"/>
465 <param name="report_discordant_pairs" value="Yes" />
466 </conditional>
412 <param name="genomeSource" value="indexed"/> 467 <param name="genomeSource" value="indexed"/>
413 <param name="index" value="tophat_test"/> 468 <param name="index" value="tophat_test"/>
414 <param name="mate_inner_distance" value="20"/> 469 <conditional name="params">
415 <param name="settingsType" value="full"/> 470 <param name="settingsType" value="full"/>
416 <param name="library_type" value="FR Unstranded"/> 471 <param name="library_type" value="FR Unstranded"/>
417 <param name="read_mismatches" value="5"/> 472 <param name="read_mismatches" value="5"/>
418 <param name="bowtie_n" value="Yes"/> 473 <!-- Error: the read mismatches (5) and the read gap length (2) should be less than or equal to the read edit dist (2) -->
419 <param name="mate_std_dev" value="20"/> 474 <param name="read_edit_dist" value="5" />
420 <param name="anchor_length" value="8"/> 475 <param name="bowtie_n" value="Yes"/>
421 <param name="splice_mismatches" value="0"/> 476 <param name="mate_std_dev" value="20"/>
422 <param name="min_intron_length" value="70"/> 477 <param name="anchor_length" value="8"/>
423 <param name="max_intron_length" value="500000"/> 478 <param name="splice_mismatches" value="0"/>
424 <param name="max_multihits" value="40"/> 479 <param name="min_intron_length" value="70"/>
425 <param name="min_segment_intron" value="50" /> 480 <param name="max_intron_length" value="500000"/>
426 <param name="max_segment_intron" value="500000" /> 481 <param name="max_multihits" value="40"/>
427 <param name="seg_mismatches" value="2"/> 482 <param name="min_segment_intron" value="50" />
428 <param name="seg_length" value="25"/> 483 <param name="max_segment_intron" value="500000" />
429 <param name="allow_indel_search" value="No"/> 484 <param name="seg_mismatches" value="2"/>
430 <param name="use_junctions" value="Yes" /> 485 <param name="seg_length" value="25"/>
431 <param name="use_annotations" value="No" /> 486 <conditional name="indel_search">
432 <param name="use_juncs" value="No" /> 487 <param name="allow_indel_search" value="No"/>
433 <param name="no_novel_juncs" value="No" /> 488 </conditional>
434 <param name="report_discordant_pairs" value="Yes" /> 489 <conditional name="own_junctions">
435 <param name="use_search" value="No" /> 490 <param name="use_junctions" value="Yes" />
436 <param name="microexon_search" value="Yes" /> 491 <conditional name="gene_model_ann">
437 <param name="b2_settings" value="No" /> 492 <param name="use_annotations" value="No" />
438 <!-- Fusion search params --> 493 </conditional>
439 <param name="do_search" value="Yes" /> 494 <conditional name="raw_juncs">
440 <param name="anchor_len" value="21" /> 495 <param name="use_juncs" value="No" />
441 <param name="min_dist" value="10000021" /> 496 </conditional>
442 <param name="read_mismatches" value="3" /> 497 <conditional name="no_novel_juncs">
443 <param name="multireads" value="4" /> 498 <param name="no_novel_juncs" value="No" />
444 <param name="multipairs" value="5" /> 499 </conditional>
445 <param name="ignore_chromosomes" value="chrM"/> 500 </conditional>
446 <param name="specReadGroup" value="No" /> 501 <conditional name="coverage_search">
502 <param name="use_search" value="No" />
503 </conditional>
504 <param name="microexon_search" value="Yes" />
505 <conditional name="bowtie2_settings">
506 <param name="b2_settings" value="No" />
507 </conditional>
508 <!-- Fusion search params -->
509 <conditional name="fusion_search">
510 <param name="do_search" value="Yes" />
511 <param name="anchor_len" value="21" />
512 <param name="min_dist" value="10000021" />
513 <param name="read_mismatches" value="3" />
514 <param name="multireads" value="4" />
515 <param name="multipairs" value="5" />
516 <param name="ignore_chromosomes" value="chrM"/>
517 </conditional>
518 </conditional>
519 <conditional name="readGroup">
520 <param name="specReadGroup" value="No" />
521 </conditional>
447 <output name="junctions" file="tophat2_out4j.bed" /> 522 <output name="junctions" file="tophat2_out4j.bed" />
448 <output name="accepted_hits" file="tophat_out4h.bam" compare="sim_size" /> 523 <output name="accepted_hits" file="tophat_out4h.bam" compare="sim_size" />
449 </test> 524 </test>
450 </tests> 525 </tests>
451
452 <help> 526 <help>
453 **Tophat Overview** 527 **Tophat Overview**
454 528
455 TopHat_ is a fast splice junction mapper for RNA-Seq reads. It aligns RNA-Seq reads to mammalian-sized genomes using the ultra high-throughput short read aligner Bowtie(2), and then analyzes the mapping results to identify splice junctions between exons. Please cite: Kim D, Pertea G, Trapnell C, Pimentel H, Kelley R, and Salzberg SL. TopHat2: accurate alignment 529 TopHat_ is a fast splice junction mapper for RNA-Seq reads. It aligns RNA-Seq reads to mammalian-sized genomes using the ultra high-throughput short read aligner Bowtie(2), and then analyzes the mapping results to identify splice junctions between exons. Please cite: Kim D, Pertea G, Trapnell C, Pimentel H, Kelley R, and Salzberg SL. TopHat2: accurate alignment
456 of transcriptomes in the presence of insertions, deletions and gene fusions. Genome Biol 14:R36, 2013. 530 of transcriptomes in the presence of insertions, deletions and gene fusions. Genome Biol 14:R36, 2013.
522 --min-coverage-intron The minimum intron length that may be found during coverage search. The default is 50. 596 --min-coverage-intron The minimum intron length that may be found during coverage search. The default is 50.
523 --max-coverage-intron The maximum intron length that may be found during coverage search. The default is 20000. 597 --max-coverage-intron The maximum intron length that may be found during coverage search. The default is 20000.
524 --min-segment-intron The minimum intron length that may be found during split-segment search. The default is 50. 598 --min-segment-intron The minimum intron length that may be found during split-segment search. The default is 50.
525 --max-segment-intron The maximum intron length that may be found during split-segment search. The default is 500000. 599 --max-segment-intron The maximum intron length that may be found during split-segment search. The default is 500000.
526 </help> 600 </help>
601 <citations>
602 <citation type="doi">10.1186/gb-2013-14-4-r36</citation>
603 </citations>
527 </tool> 604 </tool>