Mercurial > repos > devteam > tophat2
changeset 2:da1f39fe14bc draft
Uploaded
author | devteam |
---|---|
date | Thu, 18 Dec 2014 13:56:31 -0500 |
parents | ae06af1118dc |
children | 81f97e12e573 |
files | test-data/bowtie2/tophat_test.1.bt2 test-data/bowtie2/tophat_test.2.bt2 test-data/bowtie2/tophat_test.3.bt2 test-data/bowtie2/tophat_test.4.bt2 test-data/bowtie2/tophat_test.fa test-data/bowtie2/tophat_test.rev.1.bt2 test-data/bowtie2/tophat_test.rev.2.bt2 test-data/bowtie2_indices.loc test-data/tophat2_out3j.bed test-data/tophat2_out4j.bed tool_data_table_conf.xml.test tool_dependencies.xml tophat2_wrapper.xml |
diffstat | 13 files changed, 190 insertions(+), 92 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/bowtie2/tophat_test.fa Thu Dec 18 13:56:31 2014 -0500 @@ -0,0 +1,14 @@ +>test_chromosome +AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA +ACTACTATCTGACTAGACTGGAGGCGCTTGCGACTGAGCTAGGACGTGCC +ACTACGGGGATGACGACTAGGACTACGGACGGACTTAGAGCGTCAGATGC +AGCGACTGGACTATTTAGGACGATCGGACTGAGGAGGGCAGTAGGACGCT +ACGTATTTGGCGCGCGGCGCTACGGCTGAGCGTCGAGCTTGCGATACGCC +GTAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA +AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAG +ACTATTACTTTATTATCTTACTCGGACGTAGACGGATCGGCAACGGGACT +GTAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA +AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAG +TTTTCTACTTGAGACTGGGATCGAGGCGGACTTTTTAGGACGGGACTTGC +AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA +AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/bowtie2_indices.loc Thu Dec 18 13:56:31 2014 -0500 @@ -0,0 +1,1 @@ +tophat_test tophat_test tophat_test ${__HERE__}/bowtie2/tophat_test
--- a/test-data/tophat2_out3j.bed Mon Nov 17 11:23:41 2014 -0500 +++ b/test-data/tophat2_out3j.bed Thu Dec 18 13:56:31 2014 -0500 @@ -1,3 +1,3 @@ track name=junctions description="TopHat junctions" -test_chromosome 177 400 JUNC00000001 27 + 177 400 255,0,0 2 73,50 0,173 -test_chromosome 350 550 JUNC00000002 26 + 350 550 255,0,0 2 50,50 0,150 +test_chromosome 180 400 JUNC00000001 19 + 180 400 255,0,0 2 70,50 0,170 +test_chromosome 350 550 JUNC00000002 23 + 350 550 255,0,0 2 50,50 0,150
--- a/test-data/tophat2_out4j.bed Mon Nov 17 11:23:41 2014 -0500 +++ b/test-data/tophat2_out4j.bed Thu Dec 18 13:56:31 2014 -0500 @@ -1,3 +1,3 @@ track name=junctions description="TopHat junctions" -test_chromosome 177 400 JUNC00000001 51 + 177 400 255,0,0 2 73,50 0,173 -test_chromosome 350 550 JUNC00000002 43 + 350 550 255,0,0 2 50,50 0,150 +test_chromosome 177 400 JUNC00000001 44 + 177 400 255,0,0 2 73,50 0,173 +test_chromosome 350 550 JUNC00000002 42 + 350 550 255,0,0 2 50,50 0,150
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.test Thu Dec 18 13:56:31 2014 -0500 @@ -0,0 +1,6 @@ +<tables> + <table name="tophat2_indexes" comment_char="#"> + <columns>value, dbkey, name, path</columns> + <file path="${__HERE__}/test-data/bowtie2_indices.loc" /> + </table> +</tables>
--- a/tool_dependencies.xml Mon Nov 17 11:23:41 2014 -0500 +++ b/tool_dependencies.xml Thu Dec 18 13:56:31 2014 -0500 @@ -1,12 +1,12 @@ <?xml version="1.0"?> <tool_dependency> <package name="bowtie2" version="2.1.0"> - <repository changeset_revision="017a00c265f1" name="package_bowtie2_2_1_0" owner="devteam" prior_installation_required="False" toolshed="http://toolshed.g2.bx.psu.edu" /> + <repository changeset_revision="017a00c265f1" name="package_bowtie2_2_1_0" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu" /> </package> <package name="samtools" version="0.1.18"> - <repository changeset_revision="171cd8bc208d" name="package_samtools_0_1_18" owner="devteam" prior_installation_required="False" toolshed="http://toolshed.g2.bx.psu.edu" /> + <repository changeset_revision="171cd8bc208d" name="package_samtools_0_1_18" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu" /> </package> <package name="tophat2" version="2.0.9"> - <repository changeset_revision="8549fd545473" name="package_tophat2_2_0_9" owner="devteam" prior_installation_required="False" toolshed="http://toolshed.g2.bx.psu.edu" /> + <repository changeset_revision="8549fd545473" name="package_tophat2_2_0_9" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu" /> </package> </tool_dependency>
--- a/tophat2_wrapper.xml Mon Nov 17 11:23:41 2014 -0500 +++ b/tophat2_wrapper.xml Thu Dec 18 13:56:31 2014 -0500 @@ -1,4 +1,4 @@ -<tool id="tophat2" name="Tophat2" version="0.6"> +<tool id="tophat2" name="Tophat2" version="0.7"> <!-- Wrapper compatible with Tophat version 2.0.0+ --> <description>Gapped-read mapper for RNA-seq data</description> <version_command>tophat2 --version</version_command> @@ -111,7 +111,7 @@ #end if ## Set index path, inputs and parameters specific to paired data. - #if $singlePaired.sPaired == "paired" + #if $singlePaired.sPaired != "single" -r $singlePaired.mate_inner_distance --mate-std-dev=$singlePaired.mate_std_dev @@ -119,9 +119,13 @@ --no-discordant #end if - ${index_path} $singlePaired.input1 $singlePaired.input2 + #if $singlePaired.sPaired == "paired" + ${index_path} "$singlePaired.input1" "$singlePaired.input2" + #else + ${index_path} "$singlePaired.input.forward" "$singlePaired.input.reverse" + #end if #else - ${index_path} $singlePaired.input1 + ${index_path} "$singlePaired.input1" #end if </command> @@ -129,7 +133,8 @@ <conditional name="singlePaired"> <param name="sPaired" type="select" label="Is this library mate-paired?"> <option value="single">Single-end</option> - <option value="paired">Paired-end</option> + <option value="paired">Paired-end (as individual datasets)</option> + <option value="paired_collection">Paired-end (as collection)</option> </param> <when value="single"> <param format="fastqsanger" name="input1" type="data" label="RNA-Seq FASTQ file" help="Nucleotide-space: Must have Sanger-scaled quality values with ASCII offset 33"/> @@ -137,13 +142,11 @@ <when value="paired"> <param format="fastqsanger" name="input1" type="data" label="RNA-Seq FASTQ file, forward reads" help="Nucleotide-space: Must have Sanger-scaled quality values with ASCII offset 33" /> <param format="fastqsanger" name="input2" type="data" label="RNA-Seq FASTQ file, reverse reads" help="Nucleotide-space: Must have Sanger-scaled quality values with ASCII offset 33" /> - <param name="mate_inner_distance" type="integer" value="300" label="Mean Inner Distance between Mate Pairs" /> - <param name="mate_std_dev" type="integer" value="20" label="Std. Dev for Distance between Mate Pairs" help="The standard deviation for the distribution on inner distances between mate pairs."/> - <!-- Discordant pairs. --> - <param name="report_discordant_pairs" type="select" label="Report discordant pair alignments?"> - <option value="No">No</option> - <option selected="True" value="Yes">Yes</option> - </param> + <expand macro="paired_parameters" /> + </when> + <when value="paired_collection"> + <param format="fastqsanger" name="input" type="data_collection" collection_type="paired" label="RNA-Seq FASTQ paired reads" help="Nucleotide-space: Must have Sanger-scaled quality values with ASCII offset 33" /> + <expand macro="paired_parameters" /> </when> </conditional> <expand macro="refGenomeSourceConditional"> @@ -293,6 +296,15 @@ <macros> <import>tophat_macros.xml</import> + <xml name="paired_parameters"> + <param name="mate_inner_distance" type="integer" value="300" label="Mean Inner Distance between Mate Pairs" /> + <param name="mate_std_dev" type="integer" value="20" label="Std. Dev for Distance between Mate Pairs" help="The standard deviation for the distribution on inner distances between mate pairs."/> + <!-- Discordant pairs. --> + <param name="report_discordant_pairs" type="select" label="Report discordant pair alignments?"> + <option value="No">No</option> + <option selected="True" value="Yes">Yes</option> + </param> + </xml> <macro name="dbKeyActions"> <actions> <conditional name="refGenomeSource.genomeSource"> @@ -348,6 +360,23 @@ <output name="junctions" file="tophat2_out2j.bed" /> <output name="accepted_hits" file="tophat_out2h.bam" compare="sim_size" /> </test> + <test> + <!-- Same test as above but with a collection. --> + <param name="sPaired" value="paired_collection" /> + <param name="input"> + <collection type="paired"> + <element name="forward" value="tophat_in2.fastqsanger" ftype="fastqsanger" /> + <element name="reverse" value="tophat_in3.fastqsanger" ftype="fastqsanger" /> + </collection> + </param> + <param name="genomeSource" value="history" /> + <param name="ownFile" ftype="fasta" value="tophat_in1.fasta" /> + <param name="mate_inner_distance" value="20" /> + <param name="settingsType" value="preSet" /> + <param name="specReadGroup" value="No" /> + <output name="junctions" file="tophat2_out2j.bed" /> + <output name="accepted_hits" file="tophat_out2h.bam" compare="sim_size" /> + </test> <!-- Test base-space single-end reads with user-supplied reference fasta and full parameters --> <test> <!-- Tophat commands: @@ -356,44 +385,66 @@ Replace the + with double-dash Rename the files in tmp_dir appropriately --> - <param name="sPaired" value="single"/> - <param name="input1" ftype="fastqsanger" value="tophat_in2.fastqsanger"/> + <conditional name="singlePaired"> + <param name="sPaired" value="single"/> + <param name="input1" ftype="fastqsanger" value="tophat_in2.fastqsanger"/> + </conditional> <param name="genomeSource" value="history"/> <param name="ownFile" value="tophat_in1.fasta"/> - <param name="settingsType" value="full"/> - <param name="library_type" value="FR Unstranded"/> - <param name="read_mismatches" value="2"/> - <param name="bowtie_n" value="No"/> - <param name="anchor_length" value="8"/> - <param name="splice_mismatches" value="0"/> - <param name="min_intron_length" value="70"/> - <param name="max_intron_length" value="500000"/> - <param name="max_multihits" value="40"/> - <param name="min_segment_intron" value="50" /> - <param name="max_segment_intron" value="500000" /> - <param name="seg_mismatches" value="2"/> - <param name="seg_length" value="25"/> - <param name="allow_indel_search" value="Yes"/> - <param name="max_insertion_length" value="3"/> - <param name="max_deletion_length" value="3"/> - <param name="use_junctions" value="Yes" /> - <param name="use_annotations" value="No" /> - <param name="use_juncs" value="No" /> - <param name="no_novel_juncs" value="No" /> - <param name="use_search" value="Yes" /> - <param name="min_coverage_intron" value="50" /> - <param name="max_coverage_intron" value="20000" /> - <param name="microexon_search" value="Yes" /> - <param name="b2_settings" value="No" /> - <!-- Fusion search params --> - <param name="do_search" value="Yes" /> - <param name="anchor_len" value="21" /> - <param name="min_dist" value="10000021" /> - <param name="read_mismatches" value="3" /> - <param name="multireads" value="4" /> - <param name="multipairs" value="5" /> - <param name="ignore_chromosomes" value="chrM"/> - <param name="specReadGroup" value="No" /> + <conditional name="params"> + <param name="settingsType" value="full"/> + <param name="library_type" value="FR Unstranded"/> + <param name="read_mismatches" value="2"/> + <param name="bowtie_n" value="No"/> + <param name="anchor_length" value="8"/> + <param name="splice_mismatches" value="0"/> + <param name="min_intron_length" value="70"/> + <param name="max_intron_length" value="500000"/> + <param name="max_multihits" value="40"/> + <param name="min_segment_intron" value="50" /> + <param name="max_segment_intron" value="500000" /> + <param name="seg_mismatches" value="2"/> + <param name="seg_length" value="25"/> + <conditional name="indel_search"> + <param name="allow_indel_search" value="Yes"/> + <param name="max_insertion_length" value="3"/> + <param name="max_deletion_length" value="3"/> + </conditional> + <conditional name="own_junctions"> + <param name="use_junctions" value="Yes" /> + <conditional name="gene_model_ann"> + <param name="use_annotations" value="No" /> + </conditional> + <conditional name="raw_juncs"> + <param name="use_juncs" value="No" /> + </conditional> + <conditional name="no_novel_juncs"> + <param name="no_novel_juncs" value="No" /> + </conditional> + </conditional> + <conditional name="coverage_search"> + <param name="use_search" value="Yes" /> + <param name="min_coverage_intron" value="50" /> + <param name="max_coverage_intron" value="20000" /> + </conditional> + <param name="microexon_search" value="Yes" /> + <conditional name="bowtie2_settings"> + <param name="b2_settings" value="No" /> + </conditional> + <!-- Fusion search params --> + <conditional name="fusion_search"> + <param name="do_search" value="Yes" /> + <param name="anchor_len" value="21" /> + <param name="min_dist" value="10000021" /> + <param name="read_mismatches" value="3" /> + <param name="multireads" value="4" /> + <param name="multipairs" value="5" /> + <param name="ignore_chromosomes" value="chrM"/> + </conditional> + </conditional> + <conditional name="readGroup"> + <param name="specReadGroup" value="No" /> + </conditional> <output name="insertions" file="tophat_out3i.bed" /> <output name="deletions" file="tophat_out3d.bed" /> <output name="junctions" file="tophat2_out3j.bed" /> @@ -406,49 +457,72 @@ Replace the + with double-dash Rename the files in tmp_dir appropriately --> - <param name="sPaired" value="paired"/> - <param name="input1" ftype="fastqsanger" value="tophat_in2.fastqsanger"/> - <param name="input2" ftype="fastqsanger" value="tophat_in3.fastqsanger"/> + <conditional name="singlePaired"> + <param name="sPaired" value="paired"/> + <param name="input1" ftype="fastqsanger" value="tophat_in2.fastqsanger"/> + <param name="input2" ftype="fastqsanger" value="tophat_in3.fastqsanger"/> + <param name="mate_inner_distance" value="20"/> + <param name="report_discordant_pairs" value="Yes" /> + </conditional> <param name="genomeSource" value="indexed"/> <param name="index" value="tophat_test"/> - <param name="mate_inner_distance" value="20"/> - <param name="settingsType" value="full"/> - <param name="library_type" value="FR Unstranded"/> - <param name="read_mismatches" value="5"/> - <param name="bowtie_n" value="Yes"/> - <param name="mate_std_dev" value="20"/> - <param name="anchor_length" value="8"/> - <param name="splice_mismatches" value="0"/> - <param name="min_intron_length" value="70"/> - <param name="max_intron_length" value="500000"/> - <param name="max_multihits" value="40"/> - <param name="min_segment_intron" value="50" /> - <param name="max_segment_intron" value="500000" /> - <param name="seg_mismatches" value="2"/> - <param name="seg_length" value="25"/> - <param name="allow_indel_search" value="No"/> - <param name="use_junctions" value="Yes" /> - <param name="use_annotations" value="No" /> - <param name="use_juncs" value="No" /> - <param name="no_novel_juncs" value="No" /> - <param name="report_discordant_pairs" value="Yes" /> - <param name="use_search" value="No" /> - <param name="microexon_search" value="Yes" /> - <param name="b2_settings" value="No" /> - <!-- Fusion search params --> - <param name="do_search" value="Yes" /> - <param name="anchor_len" value="21" /> - <param name="min_dist" value="10000021" /> - <param name="read_mismatches" value="3" /> - <param name="multireads" value="4" /> - <param name="multipairs" value="5" /> - <param name="ignore_chromosomes" value="chrM"/> - <param name="specReadGroup" value="No" /> + <conditional name="params"> + <param name="settingsType" value="full"/> + <param name="library_type" value="FR Unstranded"/> + <param name="read_mismatches" value="5"/> + <!-- Error: the read mismatches (5) and the read gap length (2) should be less than or equal to the read edit dist (2) --> + <param name="read_edit_dist" value="5" /> + <param name="bowtie_n" value="Yes"/> + <param name="mate_std_dev" value="20"/> + <param name="anchor_length" value="8"/> + <param name="splice_mismatches" value="0"/> + <param name="min_intron_length" value="70"/> + <param name="max_intron_length" value="500000"/> + <param name="max_multihits" value="40"/> + <param name="min_segment_intron" value="50" /> + <param name="max_segment_intron" value="500000" /> + <param name="seg_mismatches" value="2"/> + <param name="seg_length" value="25"/> + <conditional name="indel_search"> + <param name="allow_indel_search" value="No"/> + </conditional> + <conditional name="own_junctions"> + <param name="use_junctions" value="Yes" /> + <conditional name="gene_model_ann"> + <param name="use_annotations" value="No" /> + </conditional> + <conditional name="raw_juncs"> + <param name="use_juncs" value="No" /> + </conditional> + <conditional name="no_novel_juncs"> + <param name="no_novel_juncs" value="No" /> + </conditional> + </conditional> + <conditional name="coverage_search"> + <param name="use_search" value="No" /> + </conditional> + <param name="microexon_search" value="Yes" /> + <conditional name="bowtie2_settings"> + <param name="b2_settings" value="No" /> + </conditional> + <!-- Fusion search params --> + <conditional name="fusion_search"> + <param name="do_search" value="Yes" /> + <param name="anchor_len" value="21" /> + <param name="min_dist" value="10000021" /> + <param name="read_mismatches" value="3" /> + <param name="multireads" value="4" /> + <param name="multipairs" value="5" /> + <param name="ignore_chromosomes" value="chrM"/> + </conditional> + </conditional> + <conditional name="readGroup"> + <param name="specReadGroup" value="No" /> + </conditional> <output name="junctions" file="tophat2_out4j.bed" /> <output name="accepted_hits" file="tophat_out4h.bam" compare="sim_size" /> </test> </tests> - <help> **Tophat Overview** @@ -524,4 +598,7 @@ --min-segment-intron The minimum intron length that may be found during split-segment search. The default is 50. --max-segment-intron The maximum intron length that may be found during split-segment search. The default is 500000. </help> + <citations> + <citation type="doi">10.1186/gb-2013-14-4-r36</citation> + </citations> </tool>