# HG changeset patch # User sarahinraauzeville # Date 1455198328 18000 # Node ID 038c61725cfb809134a2980335886674f292adfe # Parent 15aa80493a82a7ca5edc42bf4ddc6845e6630e3f Uploaded diff -r 15aa80493a82 -r 038c61725cfb sm_tophat2_toolshed.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sm_tophat2_toolshed.xml Thu Feb 11 08:45:28 2016 -0500 @@ -0,0 +1,161 @@ + + + Find splice junctions using RNA-seq data + sm_tophat2.pl $lib $input_read1 $input_read2 $reference_source.reference_source_selector + #if $reference_source.reference_source_selector =="cached": + $reference_source.ref_file_cached.fields.path + #end if + #if $reference_source.reference_source_selector =="history": + $reference_source.ref_file + #end if + $p $r $max_intron $output_bam $output_bed $output_unmapped_bam $zip $gtf_cond.gtf + #if $gtf_cond.gtf =="T": + $gtf_cond.input_gtf + #end if + + echo tophat2 version : ; tophat2 --version + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +.. class:: infomark + +What it does : TopHat 2 is a program that aligns RNA-Seq reads to a genome in order to identify exon-exon splice junctions. It is built on the ultrafast short read mapping program Bowtie 2. TopHat runs on Linux and OS X. + + +*What types of reads can I use TopHat 2 with?* + +TopHat was designed to work with reads produced by the Illumina Genome Analyzer, although users have been successful in using TopHat with reads from other technologies. In TopHat 1.1.0, we began supporting Applied Biosystems' Colorspace format. The software is optimized for reads 75bp or longer. + +Mixing paired- and single- end reads together is not supported. + + + +*How does TopHat 2 find junctions?* + +TopHat can find splice junctions without a reference annotation. By first mapping RNA-Seq reads to the genome, TopHat identifies potential exons, since many RNA-Seq reads will contiguously align to the genome. Using this initial mapping information, TopHat builds a database of possible splice junctions and then maps the reads against these junctions to confirm them. + +Short read sequencing machines can currently produce reads 100bp or longer but many exons are shorter than this so they would be missed in the initial mapping. TopHat solves this problem mainly by splitting all input reads into smaller segments which are then mapped independently. The segment alignments are put back together in a final step of the program to produce the end-to-end read alignments. + +TopHat generates its database of possible splice junctions from two sources of evidence. The first and strongest source of evidence for a splice junction is when two segments from the same read (for reads of at least 45bp) are mapped at a certain distance on the same genomic sequence or when an internal segment fails to map - again suggesting that such reads are spanning multiple exons. With this approach, "GT-AG", "GC-AG" and "AT-AC" introns will be found ab initio. The second source is pairings of "coverage islands", which are distinct regions of piled up reads in the initial mapping. Neighboring islands are often spliced together in the transcriptome, so TopHat looks for ways to join these with an intron. We only suggest users use this second option (--coverage-search) for short reads (inf. 45bp) and with a small number of reads (inf or egal 10 million). This latter option will only report alignments across "GT-AG" introns + + +Command line : Please see "information" then "stdout". + + +Parameters : + +-o/--output-dir string + +Sets the name of the directory in which TopHat will write all of its output. The default is "./tophat_out". + + +-r/--mate-inner-dist int + +This is the expected (mean) inner distance between mate pairs. For, example, for paired end runs with fragments selected at 300bp, where each end is 50bp, you should set -r to be 200. The default is 50bp. + + +-I/--max-intron-length int + +The maximum intron length. When searching for junctions ab initio, TopHat will ignore donor/acceptor pairs farther than this many bases apart, except when such a pair is supported by a split segment alignment of a long read. The default is 500000. + + +-p/--num-threads int + +Use this many threads to align reads. The default is 1. + + +--library-type +fr-unstranded, fr-firststrand, fr-secondstrand + + + +---- + +Version Galaxy Tool : V2.0 + +Versions of bioinformatics tools used : Tophat 2 + +---- + +Contacts (noms et emails) : sigenae-support@listes.inra.fr + +E-learning available : Yes. + +Please cite : + + Depending on the help provided you can cite us in acknowledgements, references or both. + + Examples : + Acknowledgements + We wish to thank the SIGENAE group for .... + + References + X. SIGENAE [http://www.sigenae.org/] + + +