sharplabtool: tools/ngs_rna/tophat

author	xuebing
date	Fri, 09 Mar 2012 19:37:19 -0500
parents
children

rev	line source
0 9071e359b9a3 Uploaded xuebing parents: diff changeset	1 #!/usr/bin/env python
9071e359b9a3 Uploaded xuebing parents: diff changeset	2
9071e359b9a3 Uploaded xuebing parents: diff changeset	3 import optparse, os, shutil, subprocess, sys, tempfile, fileinput
9071e359b9a3 Uploaded xuebing parents: diff changeset	4
9071e359b9a3 Uploaded xuebing parents: diff changeset	5 def stop_err( msg ):
9071e359b9a3 Uploaded xuebing parents: diff changeset	6 sys.stderr.write( "%s\n" % msg )
9071e359b9a3 Uploaded xuebing parents: diff changeset	7 sys.exit()
9071e359b9a3 Uploaded xuebing parents: diff changeset	8
9071e359b9a3 Uploaded xuebing parents: diff changeset	9 def __main__():
9071e359b9a3 Uploaded xuebing parents: diff changeset	10 #Parse Command Line
9071e359b9a3 Uploaded xuebing parents: diff changeset	11 parser = optparse.OptionParser()
9071e359b9a3 Uploaded xuebing parents: diff changeset	12 parser.add_option( '-p', '--num-threads', dest='num_threads', help='Use this many threads to align reads. The default is 1.' )
9071e359b9a3 Uploaded xuebing parents: diff changeset	13 parser.add_option( '-C', '--color-space', dest='color_space', action='store_true', help='This indicates color-space data' )
9071e359b9a3 Uploaded xuebing parents: diff changeset	14 parser.add_option( '-J', '--junctions-output', dest='junctions_output_file', help='Junctions output file; formate is BED.' )
9071e359b9a3 Uploaded xuebing parents: diff changeset	15 parser.add_option( '-H', '--hits-output', dest='accepted_hits_output_file', help='Accepted hits output file; formate is BAM.' )
9071e359b9a3 Uploaded xuebing parents: diff changeset	16 parser.add_option( '', '--own-file', dest='own_file', help='' )
9071e359b9a3 Uploaded xuebing parents: diff changeset	17 parser.add_option( '-D', '--indexes-path', dest='index_path', help='Indexes directory; location of .ebwt and .fa files.' )
9071e359b9a3 Uploaded xuebing parents: diff changeset	18 parser.add_option( '-r', '--mate-inner-dist', dest='mate_inner_dist', help='This is the expected (mean) inner distance between mate pairs. \
9071e359b9a3 Uploaded xuebing parents: diff changeset	19 For, example, for paired end runs with fragments selected at 300bp, \
9071e359b9a3 Uploaded xuebing parents: diff changeset	20 where each end is 50bp, you should set -r to be 200. There is no default, \
9071e359b9a3 Uploaded xuebing parents: diff changeset	21 and this parameter is required for paired end runs.')
9071e359b9a3 Uploaded xuebing parents: diff changeset	22 parser.add_option( '', '--mate-std-dev', dest='mate_std_dev', help='Standard deviation of distribution on inner distances between male pairs.' )
9071e359b9a3 Uploaded xuebing parents: diff changeset	23 parser.add_option( '-a', '--min-anchor-length', dest='min_anchor_length',
9071e359b9a3 Uploaded xuebing parents: diff changeset	24 help='The "anchor length". TopHat will report junctions spanned by reads with at least this many bases on each side of the junction.' )
9071e359b9a3 Uploaded xuebing parents: diff changeset	25 parser.add_option( '-m', '--splice-mismatches', dest='splice_mismatches', help='The maximum number of mismatches that can appear in the anchor region of a spliced alignment.' )
9071e359b9a3 Uploaded xuebing parents: diff changeset	26 parser.add_option( '-i', '--min-intron-length', dest='min_intron_length',
9071e359b9a3 Uploaded xuebing parents: diff changeset	27 help='The minimum intron length. TopHat will ignore donor/acceptor pairs closer than this many bases apart.' )
9071e359b9a3 Uploaded xuebing parents: diff changeset	28 parser.add_option( '-I', '--max-intron-length', dest='max_intron_length',
9071e359b9a3 Uploaded xuebing parents: diff changeset	29 help='The maximum intron length. When searching for junctions ab initio, TopHat will ignore donor/acceptor pairs farther than this many bases apart, except when such a pair is supported by a split segment alignment of a long read.' )
9071e359b9a3 Uploaded xuebing parents: diff changeset	30 parser.add_option( '-F', '--junction_filter', dest='junction_filter', help='Filter out junctions supported by too few alignments (number of reads divided by average depth of coverage)' )
9071e359b9a3 Uploaded xuebing parents: diff changeset	31 parser.add_option( '-g', '--max_multihits', dest='max_multihits', help='Maximum number of alignments to be allowed' )
9071e359b9a3 Uploaded xuebing parents: diff changeset	32 parser.add_option( '', '--seg-mismatches', dest='seg_mismatches', help='Number of mismatches allowed in each segment alignment for reads mapped independently' )
9071e359b9a3 Uploaded xuebing parents: diff changeset	33 parser.add_option( '', '--seg-length', dest='seg_length', help='Minimum length of read segments' )
9071e359b9a3 Uploaded xuebing parents: diff changeset	34 parser.add_option( '', '--library-type', dest='library_type', help='TopHat will treat the reads as strand specific. Every read alignment will have an XS attribute tag. Consider supplying library type options below to select the correct RNA-seq protocol.' )
9071e359b9a3 Uploaded xuebing parents: diff changeset	35 parser.add_option( '', '--allow-indels', action="store_true", help='Allow indel search. Indel search is disabled by default.' )
9071e359b9a3 Uploaded xuebing parents: diff changeset	36 parser.add_option( '', '--max-insertion-length', dest='max_insertion_length', help='The maximum insertion length. The default is 3.' )
9071e359b9a3 Uploaded xuebing parents: diff changeset	37 parser.add_option( '', '--max-deletion-length', dest='max_deletion_length', help='The maximum deletion length. The default is 3.' )
9071e359b9a3 Uploaded xuebing parents: diff changeset	38
9071e359b9a3 Uploaded xuebing parents: diff changeset	39 # Options for supplying own junctions
9071e359b9a3 Uploaded xuebing parents: diff changeset	40 parser.add_option( '-G', '--GTF', dest='gene_model_annotations', help='Supply TopHat with a list of gene model annotations. \
9071e359b9a3 Uploaded xuebing parents: diff changeset	41 TopHat will use the exon records in this file to build \
9071e359b9a3 Uploaded xuebing parents: diff changeset	42 a set of known splice junctions for each gene, and will \
9071e359b9a3 Uploaded xuebing parents: diff changeset	43 attempt to align reads to these junctions even if they \
9071e359b9a3 Uploaded xuebing parents: diff changeset	44 would not normally be covered by the initial mapping.')
9071e359b9a3 Uploaded xuebing parents: diff changeset	45 parser.add_option( '-j', '--raw-juncs', dest='raw_juncs', help='Supply TopHat with a list of raw junctions. Junctions are \
9071e359b9a3 Uploaded xuebing parents: diff changeset	46 specified one per line, in a tab-delimited format. Records \
9071e359b9a3 Uploaded xuebing parents: diff changeset	47 look like: <chrom> <left> <right> <+/-> left and right are \
9071e359b9a3 Uploaded xuebing parents: diff changeset	48 zero-based coordinates, and specify the last character of the \
9071e359b9a3 Uploaded xuebing parents: diff changeset	49 left sequenced to be spliced to the first character of the right \
9071e359b9a3 Uploaded xuebing parents: diff changeset	50 sequence, inclusive.')
9071e359b9a3 Uploaded xuebing parents: diff changeset	51 parser.add_option( '', '--no-novel-juncs', action="store_true", dest='no_novel_juncs', help="Only look for junctions indicated in the \
9071e359b9a3 Uploaded xuebing parents: diff changeset	52 supplied GFF file. (ignored without -G)")
9071e359b9a3 Uploaded xuebing parents: diff changeset	53 # Types of search.
9071e359b9a3 Uploaded xuebing parents: diff changeset	54 parser.add_option( '', '--microexon-search', action="store_true", dest='microexon_search', help='With this option, the pipeline will attempt to find alignments incident to microexons. Works only for reads 50bp or longer.')
9071e359b9a3 Uploaded xuebing parents: diff changeset	55 parser.add_option( '', '--closure-search', action="store_true", dest='closure_search', help='Enables the mate pair closure-based search for junctions. Closure-based search should only be used when the expected inner distance between mates is small (<= 50bp)')
9071e359b9a3 Uploaded xuebing parents: diff changeset	56 parser.add_option( '', '--no-closure-search', action="store_false", dest='closure_search' )
9071e359b9a3 Uploaded xuebing parents: diff changeset	57 parser.add_option( '', '--coverage-search', action="store_true", dest='coverage_search', help='Enables the coverage based search for junctions. Use when coverage search is disabled by default (such as for reads 75bp or longer), for maximum sensitivity.')
9071e359b9a3 Uploaded xuebing parents: diff changeset	58 parser.add_option( '', '--no-coverage-search', action="store_false", dest='coverage_search' )
9071e359b9a3 Uploaded xuebing parents: diff changeset	59 parser.add_option( '', '--min-segment-intron', dest='min_segment_intron', help='Minimum intron length that may be found during split-segment search' )
9071e359b9a3 Uploaded xuebing parents: diff changeset	60 parser.add_option( '', '--max-segment-intron', dest='max_segment_intron', help='Maximum intron length that may be found during split-segment search' )
9071e359b9a3 Uploaded xuebing parents: diff changeset	61 parser.add_option( '', '--min-closure-exon', dest='min_closure_exon', help='Minimum length for exonic hops in potential splice graph' )
9071e359b9a3 Uploaded xuebing parents: diff changeset	62 parser.add_option( '', '--min-closure-intron', dest='min_closure_intron', help='Minimum intron length that may be found during closure search' )
9071e359b9a3 Uploaded xuebing parents: diff changeset	63 parser.add_option( '', '--max-closure-intron', dest='max_closure_intron', help='Maximum intron length that may be found during closure search' )
9071e359b9a3 Uploaded xuebing parents: diff changeset	64 parser.add_option( '', '--min-coverage-intron', dest='min_coverage_intron', help='Minimum intron length that may be found during coverage search' )
9071e359b9a3 Uploaded xuebing parents: diff changeset	65 parser.add_option( '', '--max-coverage-intron', dest='max_coverage_intron', help='Maximum intron length that may be found during coverage search' )
9071e359b9a3 Uploaded xuebing parents: diff changeset	66
9071e359b9a3 Uploaded xuebing parents: diff changeset	67 # Wrapper options.
9071e359b9a3 Uploaded xuebing parents: diff changeset	68 parser.add_option( '-1', '--input1', dest='input1', help='The (forward or single-end) reads file in Sanger FASTQ format' )
9071e359b9a3 Uploaded xuebing parents: diff changeset	69 parser.add_option( '-2', '--input2', dest='input2', help='The reverse reads file in Sanger FASTQ format' )
9071e359b9a3 Uploaded xuebing parents: diff changeset	70 parser.add_option( '', '--single-paired', dest='single_paired', help='' )
9071e359b9a3 Uploaded xuebing parents: diff changeset	71 parser.add_option( '', '--settings', dest='settings', help='' )
9071e359b9a3 Uploaded xuebing parents: diff changeset	72
9071e359b9a3 Uploaded xuebing parents: diff changeset	73 (options, args) = parser.parse_args()
9071e359b9a3 Uploaded xuebing parents: diff changeset	74
9071e359b9a3 Uploaded xuebing parents: diff changeset	75 # output version # of tool
9071e359b9a3 Uploaded xuebing parents: diff changeset	76 try:
9071e359b9a3 Uploaded xuebing parents: diff changeset	77 tmp = tempfile.NamedTemporaryFile().name
9071e359b9a3 Uploaded xuebing parents: diff changeset	78 tmp_stdout = open( tmp, 'wb' )
9071e359b9a3 Uploaded xuebing parents: diff changeset	79 proc = subprocess.Popen( args='tophat -v', shell=True, stdout=tmp_stdout )
9071e359b9a3 Uploaded xuebing parents: diff changeset	80 tmp_stdout.close()
9071e359b9a3 Uploaded xuebing parents: diff changeset	81 returncode = proc.wait()
9071e359b9a3 Uploaded xuebing parents: diff changeset	82 stdout = open( tmp_stdout.name, 'rb' ).readline().strip()
9071e359b9a3 Uploaded xuebing parents: diff changeset	83 if stdout:
9071e359b9a3 Uploaded xuebing parents: diff changeset	84 sys.stdout.write( '%s\n' % stdout )
9071e359b9a3 Uploaded xuebing parents: diff changeset	85 else:
9071e359b9a3 Uploaded xuebing parents: diff changeset	86 raise Exception
9071e359b9a3 Uploaded xuebing parents: diff changeset	87 except:
9071e359b9a3 Uploaded xuebing parents: diff changeset	88 sys.stdout.write( 'Could not determine Tophat version\n' )
9071e359b9a3 Uploaded xuebing parents: diff changeset	89
9071e359b9a3 Uploaded xuebing parents: diff changeset	90 # Color or base space
9071e359b9a3 Uploaded xuebing parents: diff changeset	91 space = ''
9071e359b9a3 Uploaded xuebing parents: diff changeset	92 if options.color_space:
9071e359b9a3 Uploaded xuebing parents: diff changeset	93 space = '-C'
9071e359b9a3 Uploaded xuebing parents: diff changeset	94
9071e359b9a3 Uploaded xuebing parents: diff changeset	95 # Creat bowtie index if necessary.
9071e359b9a3 Uploaded xuebing parents: diff changeset	96 tmp_index_dir = tempfile.mkdtemp()
9071e359b9a3 Uploaded xuebing parents: diff changeset	97 if options.own_file:
9071e359b9a3 Uploaded xuebing parents: diff changeset	98 index_path = os.path.join( tmp_index_dir, '.'.join( os.path.split( options.own_file )[1].split( '.' )[:-1] ) )
9071e359b9a3 Uploaded xuebing parents: diff changeset	99 try:
9071e359b9a3 Uploaded xuebing parents: diff changeset	100 os.link( options.own_file, index_path + '.fa' )
9071e359b9a3 Uploaded xuebing parents: diff changeset	101 except:
9071e359b9a3 Uploaded xuebing parents: diff changeset	102 # Tophat prefers (but doesn't require) fasta file to be in same directory, with .fa extension
9071e359b9a3 Uploaded xuebing parents: diff changeset	103 pass
9071e359b9a3 Uploaded xuebing parents: diff changeset	104 cmd_index = 'bowtie-build %s -f %s %s' % ( space, options.own_file, index_path )
9071e359b9a3 Uploaded xuebing parents: diff changeset	105 try:
9071e359b9a3 Uploaded xuebing parents: diff changeset	106 tmp = tempfile.NamedTemporaryFile( dir=tmp_index_dir ).name
9071e359b9a3 Uploaded xuebing parents: diff changeset	107 tmp_stderr = open( tmp, 'wb' )
9071e359b9a3 Uploaded xuebing parents: diff changeset	108 proc = subprocess.Popen( args=cmd_index, shell=True, cwd=tmp_index_dir, stderr=tmp_stderr.fileno() )
9071e359b9a3 Uploaded xuebing parents: diff changeset	109 returncode = proc.wait()
9071e359b9a3 Uploaded xuebing parents: diff changeset	110 tmp_stderr.close()
9071e359b9a3 Uploaded xuebing parents: diff changeset	111 # get stderr, allowing for case where it's very large
9071e359b9a3 Uploaded xuebing parents: diff changeset	112 tmp_stderr = open( tmp, 'rb' )
9071e359b9a3 Uploaded xuebing parents: diff changeset	113 stderr = ''
9071e359b9a3 Uploaded xuebing parents: diff changeset	114 buffsize = 1048576
9071e359b9a3 Uploaded xuebing parents: diff changeset	115 try:
9071e359b9a3 Uploaded xuebing parents: diff changeset	116 while True:
9071e359b9a3 Uploaded xuebing parents: diff changeset	117 stderr += tmp_stderr.read( buffsize )
9071e359b9a3 Uploaded xuebing parents: diff changeset	118 if not stderr or len( stderr ) % buffsize != 0:
9071e359b9a3 Uploaded xuebing parents: diff changeset	119 break
9071e359b9a3 Uploaded xuebing parents: diff changeset	120 except OverflowError:
9071e359b9a3 Uploaded xuebing parents: diff changeset	121 pass
9071e359b9a3 Uploaded xuebing parents: diff changeset	122 tmp_stderr.close()
9071e359b9a3 Uploaded xuebing parents: diff changeset	123 if returncode != 0:
9071e359b9a3 Uploaded xuebing parents: diff changeset	124 raise Exception, stderr
9071e359b9a3 Uploaded xuebing parents: diff changeset	125 except Exception, e:
9071e359b9a3 Uploaded xuebing parents: diff changeset	126 if os.path.exists( tmp_index_dir ):
9071e359b9a3 Uploaded xuebing parents: diff changeset	127 shutil.rmtree( tmp_index_dir )
9071e359b9a3 Uploaded xuebing parents: diff changeset	128 stop_err( 'Error indexing reference sequence\n' + str( e ) )
9071e359b9a3 Uploaded xuebing parents: diff changeset	129 else:
9071e359b9a3 Uploaded xuebing parents: diff changeset	130 index_path = options.index_path
9071e359b9a3 Uploaded xuebing parents: diff changeset	131
9071e359b9a3 Uploaded xuebing parents: diff changeset	132 # Build tophat command.
9071e359b9a3 Uploaded xuebing parents: diff changeset	133 cmd = 'tophat %s %s %s'
9071e359b9a3 Uploaded xuebing parents: diff changeset	134 reads = options.input1
9071e359b9a3 Uploaded xuebing parents: diff changeset	135 if options.input2:
9071e359b9a3 Uploaded xuebing parents: diff changeset	136 reads += ' ' + options.input2
9071e359b9a3 Uploaded xuebing parents: diff changeset	137 opts = '-p %s %s' % ( options.num_threads, space )
9071e359b9a3 Uploaded xuebing parents: diff changeset	138 if options.single_paired == 'paired':
9071e359b9a3 Uploaded xuebing parents: diff changeset	139 opts += ' -r %s' % options.mate_inner_dist
9071e359b9a3 Uploaded xuebing parents: diff changeset	140 if options.settings == 'preSet':
9071e359b9a3 Uploaded xuebing parents: diff changeset	141 cmd = cmd % ( opts, index_path, reads )
9071e359b9a3 Uploaded xuebing parents: diff changeset	142 else:
9071e359b9a3 Uploaded xuebing parents: diff changeset	143 try:
9071e359b9a3 Uploaded xuebing parents: diff changeset	144 if int( options.min_anchor_length ) >= 3:
9071e359b9a3 Uploaded xuebing parents: diff changeset	145 opts += ' -a %s' % options.min_anchor_length
9071e359b9a3 Uploaded xuebing parents: diff changeset	146 else:
9071e359b9a3 Uploaded xuebing parents: diff changeset	147 raise Exception, 'Minimum anchor length must be 3 or greater'
9071e359b9a3 Uploaded xuebing parents: diff changeset	148 opts += ' -m %s' % options.splice_mismatches
9071e359b9a3 Uploaded xuebing parents: diff changeset	149 opts += ' -i %s' % options.min_intron_length
9071e359b9a3 Uploaded xuebing parents: diff changeset	150 opts += ' -I %s' % options.max_intron_length
9071e359b9a3 Uploaded xuebing parents: diff changeset	151 if float( options.junction_filter ) != 0.0:
9071e359b9a3 Uploaded xuebing parents: diff changeset	152 opts += ' -F %s' % options.junction_filter
9071e359b9a3 Uploaded xuebing parents: diff changeset	153 opts += ' -g %s' % options.max_multihits
9071e359b9a3 Uploaded xuebing parents: diff changeset	154 # Custom junctions options.
9071e359b9a3 Uploaded xuebing parents: diff changeset	155 if options.gene_model_annotations:
9071e359b9a3 Uploaded xuebing parents: diff changeset	156 opts += ' -G %s' % options.gene_model_annotations
9071e359b9a3 Uploaded xuebing parents: diff changeset	157 if options.raw_juncs:
9071e359b9a3 Uploaded xuebing parents: diff changeset	158 opts += ' -j %s' % options.raw_juncs
9071e359b9a3 Uploaded xuebing parents: diff changeset	159 if options.no_novel_juncs:
9071e359b9a3 Uploaded xuebing parents: diff changeset	160 opts += ' --no-novel-juncs'
9071e359b9a3 Uploaded xuebing parents: diff changeset	161 if options.library_type:
9071e359b9a3 Uploaded xuebing parents: diff changeset	162 opts += ' --library-type %s' % options.library_type
9071e359b9a3 Uploaded xuebing parents: diff changeset	163 if options.allow_indels:
9071e359b9a3 Uploaded xuebing parents: diff changeset	164 # Max options do not work for Tophat v1.2.0, despite documentation to the contrary.
9071e359b9a3 Uploaded xuebing parents: diff changeset	165 opts += ' --allow-indels'
9071e359b9a3 Uploaded xuebing parents: diff changeset	166 #opts += ' --max-insertion-length %i --max-deletion-length %i' % ( int( options.max_insertion_length ), int( options.max_deletion_length ) )
9071e359b9a3 Uploaded xuebing parents: diff changeset	167 # need to warn user of this fact
9071e359b9a3 Uploaded xuebing parents: diff changeset	168 sys.stdout.write( "Max insertion length and max deletion length options don't work in Tophat v1.2.0\n" )
9071e359b9a3 Uploaded xuebing parents: diff changeset	169
9071e359b9a3 Uploaded xuebing parents: diff changeset	170 # Search type options.
9071e359b9a3 Uploaded xuebing parents: diff changeset	171 if options.coverage_search:
9071e359b9a3 Uploaded xuebing parents: diff changeset	172 opts += ' --coverage-search --min-coverage-intron %s --max-coverage-intron %s' % ( options.min_coverage_intron, options.max_coverage_intron )
9071e359b9a3 Uploaded xuebing parents: diff changeset	173 else:
9071e359b9a3 Uploaded xuebing parents: diff changeset	174 opts += ' --no-coverage-search'
9071e359b9a3 Uploaded xuebing parents: diff changeset	175 if options.closure_search:
9071e359b9a3 Uploaded xuebing parents: diff changeset	176 opts += ' --closure-search --min-closure-exon %s --min-closure-intron %s --max-closure-intron %s' % ( options.min_closure_exon, options.min_closure_intron, options.max_closure_intron )
9071e359b9a3 Uploaded xuebing parents: diff changeset	177 else:
9071e359b9a3 Uploaded xuebing parents: diff changeset	178 opts += ' --no-closure-search'
9071e359b9a3 Uploaded xuebing parents: diff changeset	179 if options.microexon_search:
9071e359b9a3 Uploaded xuebing parents: diff changeset	180 opts += ' --microexon-search'
9071e359b9a3 Uploaded xuebing parents: diff changeset	181 if options.single_paired == 'paired':
9071e359b9a3 Uploaded xuebing parents: diff changeset	182 opts += ' --mate-std-dev %s' % options.mate_std_dev
9071e359b9a3 Uploaded xuebing parents: diff changeset	183 if options.seg_mismatches:
9071e359b9a3 Uploaded xuebing parents: diff changeset	184 opts += ' --segment-mismatches %d' % int( options.seg_mismatches )
9071e359b9a3 Uploaded xuebing parents: diff changeset	185 if options.seg_length:
9071e359b9a3 Uploaded xuebing parents: diff changeset	186 opts += ' --segment-length %d' % int( options.seg_length )
9071e359b9a3 Uploaded xuebing parents: diff changeset	187 if options.min_segment_intron:
9071e359b9a3 Uploaded xuebing parents: diff changeset	188 opts += ' --min-segment-intron %d' % int( options.min_segment_intron )
9071e359b9a3 Uploaded xuebing parents: diff changeset	189 if options.max_segment_intron:
9071e359b9a3 Uploaded xuebing parents: diff changeset	190 opts += ' --max-segment-intron %d' % int( options.max_segment_intron )
9071e359b9a3 Uploaded xuebing parents: diff changeset	191 cmd = cmd % ( opts, index_path, reads )
9071e359b9a3 Uploaded xuebing parents: diff changeset	192 except Exception, e:
9071e359b9a3 Uploaded xuebing parents: diff changeset	193 # Clean up temp dirs
9071e359b9a3 Uploaded xuebing parents: diff changeset	194 if os.path.exists( tmp_index_dir ):
9071e359b9a3 Uploaded xuebing parents: diff changeset	195 shutil.rmtree( tmp_index_dir )
9071e359b9a3 Uploaded xuebing parents: diff changeset	196 stop_err( 'Something is wrong with the alignment parameters and the alignment could not be run\n' + str( e ) )
9071e359b9a3 Uploaded xuebing parents: diff changeset	197 #print cmd
9071e359b9a3 Uploaded xuebing parents: diff changeset	198
9071e359b9a3 Uploaded xuebing parents: diff changeset	199 # Run
9071e359b9a3 Uploaded xuebing parents: diff changeset	200 try:
9071e359b9a3 Uploaded xuebing parents: diff changeset	201 tmp_out = tempfile.NamedTemporaryFile().name
9071e359b9a3 Uploaded xuebing parents: diff changeset	202 tmp_stdout = open( tmp_out, 'wb' )
9071e359b9a3 Uploaded xuebing parents: diff changeset	203 tmp_err = tempfile.NamedTemporaryFile().name
9071e359b9a3 Uploaded xuebing parents: diff changeset	204 tmp_stderr = open( tmp_err, 'wb' )
9071e359b9a3 Uploaded xuebing parents: diff changeset	205 proc = subprocess.Popen( args=cmd, shell=True, cwd=".", stdout=tmp_stdout, stderr=tmp_stderr )
9071e359b9a3 Uploaded xuebing parents: diff changeset	206 returncode = proc.wait()
9071e359b9a3 Uploaded xuebing parents: diff changeset	207 tmp_stderr.close()
9071e359b9a3 Uploaded xuebing parents: diff changeset	208 # get stderr, allowing for case where it's very large
9071e359b9a3 Uploaded xuebing parents: diff changeset	209 tmp_stderr = open( tmp_err, 'rb' )
9071e359b9a3 Uploaded xuebing parents: diff changeset	210 stderr = ''
9071e359b9a3 Uploaded xuebing parents: diff changeset	211 buffsize = 1048576
9071e359b9a3 Uploaded xuebing parents: diff changeset	212 try:
9071e359b9a3 Uploaded xuebing parents: diff changeset	213 while True:
9071e359b9a3 Uploaded xuebing parents: diff changeset	214 stderr += tmp_stderr.read( buffsize )
9071e359b9a3 Uploaded xuebing parents: diff changeset	215 if not stderr or len( stderr ) % buffsize != 0:
9071e359b9a3 Uploaded xuebing parents: diff changeset	216 break
9071e359b9a3 Uploaded xuebing parents: diff changeset	217 except OverflowError:
9071e359b9a3 Uploaded xuebing parents: diff changeset	218 pass
9071e359b9a3 Uploaded xuebing parents: diff changeset	219 tmp_stdout.close()
9071e359b9a3 Uploaded xuebing parents: diff changeset	220 tmp_stderr.close()
9071e359b9a3 Uploaded xuebing parents: diff changeset	221 if returncode != 0:
9071e359b9a3 Uploaded xuebing parents: diff changeset	222 raise Exception, stderr
9071e359b9a3 Uploaded xuebing parents: diff changeset	223
9071e359b9a3 Uploaded xuebing parents: diff changeset	224 # Copy output files from tmp directory to specified files.
9071e359b9a3 Uploaded xuebing parents: diff changeset	225 shutil.copyfile( os.path.join( "tophat_out", "junctions.bed" ), options.junctions_output_file )
9071e359b9a3 Uploaded xuebing parents: diff changeset	226 shutil.copyfile( os.path.join( "tophat_out", "accepted_hits.bam" ), options.accepted_hits_output_file )
9071e359b9a3 Uploaded xuebing parents: diff changeset	227
9071e359b9a3 Uploaded xuebing parents: diff changeset	228 # TODO: look for errors in program output.
9071e359b9a3 Uploaded xuebing parents: diff changeset	229 except Exception, e:
9071e359b9a3 Uploaded xuebing parents: diff changeset	230 stop_err( 'Error in tophat:\n' + str( e ) )
9071e359b9a3 Uploaded xuebing parents: diff changeset	231
9071e359b9a3 Uploaded xuebing parents: diff changeset	232 # Clean up temp dirs
9071e359b9a3 Uploaded xuebing parents: diff changeset	233 if os.path.exists( tmp_index_dir ):
9071e359b9a3 Uploaded xuebing parents: diff changeset	234 shutil.rmtree( tmp_index_dir )
9071e359b9a3 Uploaded xuebing parents: diff changeset	235
9071e359b9a3 Uploaded xuebing parents: diff changeset	236 if __name__=="__main__": __main__()

0

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

1 #!/usr/bin/env python

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

2

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

3 import optparse, os, shutil, subprocess, sys, tempfile, fileinput

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

4

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

5 def stop_err( msg ):

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

6 sys.stderr.write( "%s\n" % msg )

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

7 sys.exit()

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

8

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

9 def __main__():

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

10 #Parse Command Line

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

11 parser = optparse.OptionParser()

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

12 parser.add_option( '-p', '--num-threads', dest='num_threads', help='Use this many threads to align reads. The default is 1.' )

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

13 parser.add_option( '-C', '--color-space', dest='color_space', action='store_true', help='This indicates color-space data' )

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

14 parser.add_option( '-J', '--junctions-output', dest='junctions_output_file', help='Junctions output file; formate is BED.' )

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

15 parser.add_option( '-H', '--hits-output', dest='accepted_hits_output_file', help='Accepted hits output file; formate is BAM.' )

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

16 parser.add_option( '', '--own-file', dest='own_file', help='' )

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

17 parser.add_option( '-D', '--indexes-path', dest='index_path', help='Indexes directory; location of .ebwt and .fa files.' )

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

18 parser.add_option( '-r', '--mate-inner-dist', dest='mate_inner_dist', help='This is the expected (mean) inner distance between mate pairs. \

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

19 For, example, for paired end runs with fragments selected at 300bp, \

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

20 where each end is 50bp, you should set -r to be 200. There is no default, \

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

21 and this parameter is required for paired end runs.')

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

22 parser.add_option( '', '--mate-std-dev', dest='mate_std_dev', help='Standard deviation of distribution on inner distances between male pairs.' )

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

23 parser.add_option( '-a', '--min-anchor-length', dest='min_anchor_length',

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

24 help='The "anchor length". TopHat will report junctions spanned by reads with at least this many bases on each side of the junction.' )

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

25 parser.add_option( '-m', '--splice-mismatches', dest='splice_mismatches', help='The maximum number of mismatches that can appear in the anchor region of a spliced alignment.' )

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

26 parser.add_option( '-i', '--min-intron-length', dest='min_intron_length',

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

27 help='The minimum intron length. TopHat will ignore donor/acceptor pairs closer than this many bases apart.' )

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

28 parser.add_option( '-I', '--max-intron-length', dest='max_intron_length',

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

29 help='The maximum intron length. When searching for junctions ab initio, TopHat will ignore donor/acceptor pairs farther than this many bases apart, except when such a pair is supported by a split segment alignment of a long read.' )

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

30 parser.add_option( '-F', '--junction_filter', dest='junction_filter', help='Filter out junctions supported by too few alignments (number of reads divided by average depth of coverage)' )

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

31 parser.add_option( '-g', '--max_multihits', dest='max_multihits', help='Maximum number of alignments to be allowed' )

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

32 parser.add_option( '', '--seg-mismatches', dest='seg_mismatches', help='Number of mismatches allowed in each segment alignment for reads mapped independently' )

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

33 parser.add_option( '', '--seg-length', dest='seg_length', help='Minimum length of read segments' )

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

34 parser.add_option( '', '--library-type', dest='library_type', help='TopHat will treat the reads as strand specific. Every read alignment will have an XS attribute tag. Consider supplying library type options below to select the correct RNA-seq protocol.' )

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

35 parser.add_option( '', '--allow-indels', action="store_true", help='Allow indel search. Indel search is disabled by default.' )

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

36 parser.add_option( '', '--max-insertion-length', dest='max_insertion_length', help='The maximum insertion length. The default is 3.' )

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

37 parser.add_option( '', '--max-deletion-length', dest='max_deletion_length', help='The maximum deletion length. The default is 3.' )

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

38

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

39 # Options for supplying own junctions

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

40 parser.add_option( '-G', '--GTF', dest='gene_model_annotations', help='Supply TopHat with a list of gene model annotations. \

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

41 TopHat will use the exon records in this file to build \

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

42 a set of known splice junctions for each gene, and will \

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

43 attempt to align reads to these junctions even if they \

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

44 would not normally be covered by the initial mapping.')

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

45 parser.add_option( '-j', '--raw-juncs', dest='raw_juncs', help='Supply TopHat with a list of raw junctions. Junctions are \

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

46 specified one per line, in a tab-delimited format. Records \

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

47 look like: <chrom> <left> <right> <+/-> left and right are \

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

48 zero-based coordinates, and specify the last character of the \

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

49 left sequenced to be spliced to the first character of the right \

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

50 sequence, inclusive.')

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

51 parser.add_option( '', '--no-novel-juncs', action="store_true", dest='no_novel_juncs', help="Only look for junctions indicated in the \

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

52 supplied GFF file. (ignored without -G)")

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

53 # Types of search.

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

54 parser.add_option( '', '--microexon-search', action="store_true", dest='microexon_search', help='With this option, the pipeline will attempt to find alignments incident to microexons. Works only for reads 50bp or longer.')

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

55 parser.add_option( '', '--closure-search', action="store_true", dest='closure_search', help='Enables the mate pair closure-based search for junctions. Closure-based search should only be used when the expected inner distance between mates is small (<= 50bp)')

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

56 parser.add_option( '', '--no-closure-search', action="store_false", dest='closure_search' )

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

57 parser.add_option( '', '--coverage-search', action="store_true", dest='coverage_search', help='Enables the coverage based search for junctions. Use when coverage search is disabled by default (such as for reads 75bp or longer), for maximum sensitivity.')

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

58 parser.add_option( '', '--no-coverage-search', action="store_false", dest='coverage_search' )

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

59 parser.add_option( '', '--min-segment-intron', dest='min_segment_intron', help='Minimum intron length that may be found during split-segment search' )

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

60 parser.add_option( '', '--max-segment-intron', dest='max_segment_intron', help='Maximum intron length that may be found during split-segment search' )

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

61 parser.add_option( '', '--min-closure-exon', dest='min_closure_exon', help='Minimum length for exonic hops in potential splice graph' )

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

62 parser.add_option( '', '--min-closure-intron', dest='min_closure_intron', help='Minimum intron length that may be found during closure search' )

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

63 parser.add_option( '', '--max-closure-intron', dest='max_closure_intron', help='Maximum intron length that may be found during closure search' )

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

64 parser.add_option( '', '--min-coverage-intron', dest='min_coverage_intron', help='Minimum intron length that may be found during coverage search' )

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

65 parser.add_option( '', '--max-coverage-intron', dest='max_coverage_intron', help='Maximum intron length that may be found during coverage search' )

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

66

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

67 # Wrapper options.

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

68 parser.add_option( '-1', '--input1', dest='input1', help='The (forward or single-end) reads file in Sanger FASTQ format' )

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

69 parser.add_option( '-2', '--input2', dest='input2', help='The reverse reads file in Sanger FASTQ format' )

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

70 parser.add_option( '', '--single-paired', dest='single_paired', help='' )

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

71 parser.add_option( '', '--settings', dest='settings', help='' )

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

72

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

73 (options, args) = parser.parse_args()

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

74

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

75 # output version # of tool

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

76 try:

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

77 tmp = tempfile.NamedTemporaryFile().name

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

78 tmp_stdout = open( tmp, 'wb' )

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

79 proc = subprocess.Popen( args='tophat -v', shell=True, stdout=tmp_stdout )

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

80 tmp_stdout.close()

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

81 returncode = proc.wait()

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

82 stdout = open( tmp_stdout.name, 'rb' ).readline().strip()

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

83 if stdout:

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

84 sys.stdout.write( '%s\n' % stdout )

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

85 else:

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

86 raise Exception

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

87 except:

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

88 sys.stdout.write( 'Could not determine Tophat version\n' )

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

89

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

90 # Color or base space

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

91 space = ''

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

92 if options.color_space:

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

93 space = '-C'

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

94

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

95 # Creat bowtie index if necessary.

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

96 tmp_index_dir = tempfile.mkdtemp()

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

97 if options.own_file:

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

98 index_path = os.path.join( tmp_index_dir, '.'.join( os.path.split( options.own_file )[1].split( '.' )[:-1] ) )

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

99 try:

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

100 os.link( options.own_file, index_path + '.fa' )

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

101 except:

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

102 # Tophat prefers (but doesn't require) fasta file to be in same directory, with .fa extension

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

103 pass

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

104 cmd_index = 'bowtie-build %s -f %s %s' % ( space, options.own_file, index_path )

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

105 try:

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

106 tmp = tempfile.NamedTemporaryFile( dir=tmp_index_dir ).name

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

107 tmp_stderr = open( tmp, 'wb' )

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

108 proc = subprocess.Popen( args=cmd_index, shell=True, cwd=tmp_index_dir, stderr=tmp_stderr.fileno() )

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

109 returncode = proc.wait()

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

110 tmp_stderr.close()

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

111 # get stderr, allowing for case where it's very large

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

112 tmp_stderr = open( tmp, 'rb' )

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

113 stderr = ''

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

114 buffsize = 1048576

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

115 try:

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

116 while True:

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

117 stderr += tmp_stderr.read( buffsize )

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

118 if not stderr or len( stderr ) % buffsize != 0:

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

119 break

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

120 except OverflowError:

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

121 pass

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

122 tmp_stderr.close()

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

123 if returncode != 0:

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

124 raise Exception, stderr

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

125 except Exception, e:

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

126 if os.path.exists( tmp_index_dir ):

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

127 shutil.rmtree( tmp_index_dir )

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

128 stop_err( 'Error indexing reference sequence\n' + str( e ) )

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

129 else:

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

130 index_path = options.index_path

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

131

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

132 # Build tophat command.

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

133 cmd = 'tophat %s %s %s'

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

134 reads = options.input1

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

135 if options.input2:

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

136 reads += ' ' + options.input2

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

137 opts = '-p %s %s' % ( options.num_threads, space )

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

138 if options.single_paired == 'paired':

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

139 opts += ' -r %s' % options.mate_inner_dist

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

140 if options.settings == 'preSet':

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

141 cmd = cmd % ( opts, index_path, reads )

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

142 else:

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

143 try:

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

144 if int( options.min_anchor_length ) >= 3:

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

145 opts += ' -a %s' % options.min_anchor_length

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

146 else:

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

147 raise Exception, 'Minimum anchor length must be 3 or greater'

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

148 opts += ' -m %s' % options.splice_mismatches

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

149 opts += ' -i %s' % options.min_intron_length

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

150 opts += ' -I %s' % options.max_intron_length

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

151 if float( options.junction_filter ) != 0.0:

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

152 opts += ' -F %s' % options.junction_filter

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

153 opts += ' -g %s' % options.max_multihits

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

154 # Custom junctions options.

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

155 if options.gene_model_annotations:

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

156 opts += ' -G %s' % options.gene_model_annotations

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

157 if options.raw_juncs:

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

158 opts += ' -j %s' % options.raw_juncs

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

159 if options.no_novel_juncs:

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

160 opts += ' --no-novel-juncs'

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

161 if options.library_type:

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

162 opts += ' --library-type %s' % options.library_type

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

163 if options.allow_indels:

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

164 # Max options do not work for Tophat v1.2.0, despite documentation to the contrary.

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

165 opts += ' --allow-indels'

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

166 #opts += ' --max-insertion-length %i --max-deletion-length %i' % ( int( options.max_insertion_length ), int( options.max_deletion_length ) )

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

167 # need to warn user of this fact

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

168 sys.stdout.write( "Max insertion length and max deletion length options don't work in Tophat v1.2.0\n" )

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

169

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

170 # Search type options.

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

171 if options.coverage_search:

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

172 opts += ' --coverage-search --min-coverage-intron %s --max-coverage-intron %s' % ( options.min_coverage_intron, options.max_coverage_intron )

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

173 else:

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

174 opts += ' --no-coverage-search'

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

175 if options.closure_search:

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

176 opts += ' --closure-search --min-closure-exon %s --min-closure-intron %s --max-closure-intron %s' % ( options.min_closure_exon, options.min_closure_intron, options.max_closure_intron )

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

177 else:

9071e359b9a3 Uploaded

xuebing

parents:

diff changeset

178 opts += ' --no-closure-search'

9071e359b9a3 Uploaded

xuebing