annotate sm_tophat2_toolshed.xml @ 1:038c61725cfb draft

Uploaded
author sarahinraauzeville
date Thu, 11 Feb 2016 08:45:28 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
1 <!--# Copyright (C) 2013 INRA
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
2 # This program is free software: you can redistribute it and/or modify
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
3 # it under the terms of the GNU General Public License as published by
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
4 # the Free Software Foundation, either version 3 of the License, or
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
5 # (at your option) any later version.
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
6 #
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
7 # This program is distributed in the hope that it will be useful,
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
8 # but WITHOUT ANY WARRANTY; without even the implied warranty of
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
9 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
10 # GNU General Public License for more details.
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
11 #
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
12 # You should have received a copy of the GNU General Public License
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
13 # along with this program. If not, see http://www.gnu.org/licenses/.
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
14 #-->
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
15 <tool id="sm_tophat2" name="Tophat 2 for Illumina">
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
16 <description>Find splice junctions using RNA-seq data</description>
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
17 <command interpreter="perl">sm_tophat2.pl $lib $input_read1 $input_read2 $reference_source.reference_source_selector
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
18 #if $reference_source.reference_source_selector =="cached":
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
19 $reference_source.ref_file_cached.fields.path
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
20 #end if
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
21 #if $reference_source.reference_source_selector =="history":
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
22 $reference_source.ref_file
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
23 #end if
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
24 $p $r $max_intron $output_bam $output_bed $output_unmapped_bam $zip $gtf_cond.gtf
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
25 #if $gtf_cond.gtf =="T":
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
26 $gtf_cond.input_gtf
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
27 #end if
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
28 </command>
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
29 <version_command>echo tophat2 version : ; tophat2 --version</version_command>
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
30 <inputs>
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
31 <param format="fastq, fastqsanger, fastqillumina" name="input_read1" type="data" label="Your RNA-Seq FASTQ file (read 1)"/>
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
32 <param format="fastq, fastqsanger, fastqillumina" name="input_read2" type="data" label="Your RNA-Seq FASTQ file (read 2)"/>
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
33
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
34 <conditional name="reference_source">
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
35 <param name="reference_source_selector" type="select" label="Load reference genome from">
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
36 <option value="cached">Local cache</option>
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
37 <option value="history">History</option>
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
38 </param>
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
39 <when value="cached">
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
40 <param name="ref_file_cached" type="select" label="Using reference genome" help="Select genome from the list">
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
41 <options from_data_table="tophat_ind">
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
42 <filter type="sort_by" column="2" />
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
43 <validator type="no_options" message="No indexes are available" />
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
44 </options>
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
45 <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/>
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
46 </param>
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
47
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
48 </when>
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
49 <when value="history">
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
50 <param name="ref_file" type="data" format="fasta" label="Use the following dataset as the reference sequence" help="You can upload a FASTA sequence to the history and use it as reference" />
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
51 </when>
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
52 </conditional>
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
53
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
54 <param name="p" size="20" type="text" value="16" label="Number of threads used to align reads"/>
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
55 <param name="max_intron" size="20" type="text" value="5000" label="Maximum intron length"/>
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
56 <param name="r" size="20" type="text" value="200" label="Expected (mean) inner distance between mate pairs"/>
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
57 <param name="zip" type="select" display="checkboxes" multiple="True" label="Your RNA-seq FASTQ file are zipped" help="Please check this option if your files are zipped.">
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
58 <option value="YES">Yes</option>
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
59 </param>
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
60
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
61 <conditional name="gtf_cond">
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
62 <param name="gtf" type="select" help="Do you have a gtf file available ?" label="GTF file available">
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
63 <option value="T">Yes</option>
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
64 <option value="F" selected="true">No</option>
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
65 </param>
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
66 <when value="F" />
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
67 <when value="T">
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
68 <param format="gtf, gff" name="input_gtf" type="data" label="Your GTF file"/>
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
69 </when>
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
70 </conditional>
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
71
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
72 <param name="lib" type="select" label="Library type">
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
73 <option value="fr-unstranded">fr-unstranded</option>
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
74 <option value="fr-firststrand">fr-firststrand</option>
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
75 <option value="fr-secondstrand">fr-secondstrand</option>
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
76 </param>
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
77
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
78 </inputs>
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
79 <outputs>
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
80 <data format="bam" name="output_bam" label ="{$input_read1.name}-Tophat_mapped.bam"/>
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
81 <data format="bed" name="output_bed" label ="{$input_read1.name}-Tophat.bed"/>
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
82 <data format="bam" name="output_unmapped_bam" label ="{$input_read1.name}-Tophat_unmapped.bam"/>
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
83 </outputs>
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
84 <help>
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
85 .. class:: infomark
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
86
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
87 What it does : TopHat 2 is a program that aligns RNA-Seq reads to a genome in order to identify exon-exon splice junctions. It is built on the ultrafast short read mapping program Bowtie 2. TopHat runs on Linux and OS X.
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
88
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
89
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
90 *What types of reads can I use TopHat 2 with?*
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
91
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
92 TopHat was designed to work with reads produced by the Illumina Genome Analyzer, although users have been successful in using TopHat with reads from other technologies. In TopHat 1.1.0, we began supporting Applied Biosystems' Colorspace format. The software is optimized for reads 75bp or longer.
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
93
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
94 Mixing paired- and single- end reads together is not supported.
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
95
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
96
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
97
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
98 *How does TopHat 2 find junctions?*
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
99
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
100 TopHat can find splice junctions without a reference annotation. By first mapping RNA-Seq reads to the genome, TopHat identifies potential exons, since many RNA-Seq reads will contiguously align to the genome. Using this initial mapping information, TopHat builds a database of possible splice junctions and then maps the reads against these junctions to confirm them.
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
101
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
102 Short read sequencing machines can currently produce reads 100bp or longer but many exons are shorter than this so they would be missed in the initial mapping. TopHat solves this problem mainly by splitting all input reads into smaller segments which are then mapped independently. The segment alignments are put back together in a final step of the program to produce the end-to-end read alignments.
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
103
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
104 TopHat generates its database of possible splice junctions from two sources of evidence. The first and strongest source of evidence for a splice junction is when two segments from the same read (for reads of at least 45bp) are mapped at a certain distance on the same genomic sequence or when an internal segment fails to map - again suggesting that such reads are spanning multiple exons. With this approach, "GT-AG", "GC-AG" and "AT-AC" introns will be found ab initio. The second source is pairings of "coverage islands", which are distinct regions of piled up reads in the initial mapping. Neighboring islands are often spliced together in the transcriptome, so TopHat looks for ways to join these with an intron. We only suggest users use this second option (--coverage-search) for short reads (inf. 45bp) and with a small number of reads (inf or egal 10 million). This latter option will only report alignments across "GT-AG" introns
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
105
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
106
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
107 Command line : Please see "information" then "stdout".
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
108
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
109
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
110 Parameters :
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
111
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
112 -o/--output-dir string
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
113
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
114 Sets the name of the directory in which TopHat will write all of its output. The default is "./tophat_out".
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
115
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
116
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
117 -r/--mate-inner-dist int
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
118
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
119 This is the expected (mean) inner distance between mate pairs. For, example, for paired end runs with fragments selected at 300bp, where each end is 50bp, you should set -r to be 200. The default is 50bp.
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
120
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
121
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
122 -I/--max-intron-length int
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
123
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
124 The maximum intron length. When searching for junctions ab initio, TopHat will ignore donor/acceptor pairs farther than this many bases apart, except when such a pair is supported by a split segment alignment of a long read. The default is 500000.
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
125
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
126
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
127 -p/--num-threads int
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
128
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
129 Use this many threads to align reads. The default is 1.
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
130
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
131
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
132 --library-type
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
133 fr-unstranded, fr-firststrand, fr-secondstrand
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
134
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
135
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
136
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
137 ----
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
138
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
139 Version Galaxy Tool : V2.0
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
140
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
141 Versions of bioinformatics tools used : Tophat 2
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
142
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
143 ----
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
144
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
145 Contacts (noms et emails) : sigenae-support@listes.inra.fr
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
146
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
147 E-learning available : Yes.
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
148
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
149 Please cite :
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
150
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
151 Depending on the help provided you can cite us in acknowledgements, references or both.
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
152
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
153 Examples :
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
154 Acknowledgements
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
155 We wish to thank the SIGENAE group for ....
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
156
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
157 References
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
158 X. SIGENAE [http://www.sigenae.org/]
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
159 </help>
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
160
038c61725cfb Uploaded
sarahinraauzeville
parents:
diff changeset
161 </tool>