annotate deseq/differential_expression_analysis_pipeline_for_rnaseq_data-a03838a6eb54/DiffExpAnal/tophat_parallel.py @ 10:6e573fd3c41b draft

Uploaded
author yufei-luo
date Mon, 13 May 2013 10:06:30 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
10
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
1 #!/usr/bin/env python
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
2 """
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
3 Yufei LUO
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
4 """
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
5
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
6 import optparse, os, shutil, subprocess, sys, tempfile, fileinput, tarfile,random
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
7
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
8 def stop_err( msg ):
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
9 sys.stderr.write( "%s\n" % msg )
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
10 sys.exit()
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
11
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
12 def toTar(tarFileName, accepted_hits_outputNames):
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
13 fileName = os.path.splitext(tarFileName)[0]
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
14 fileNameBaseName = os.path.basename(fileName)
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
15 dir = os.path.dirname(tarFileName)
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
16 tfile = tarfile.open(tarFileName + ".tmp.tar", "w")
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
17 currentPath = os.getcwd()
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
18 os.chdir(dir)
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
19 for file in accepted_hits_outputNames:
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
20 relativeFileName = os.path.basename(file)
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
21 tfile.add(relativeFileName)
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
22 os.system("mv %s %s" % (tarFileName + ".tmp.tar", tarFileName))
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
23 tfile.close()
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
24 os.chdir(currentPath)
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
25
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
26
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
27 def __main__():
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
28 #Parse Command Line
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
29 parser = optparse.OptionParser()
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
30 parser.add_option('-o', '--outputTxtFile', dest='outputTxtFile', help='for Differential expression analysis pipeline, new output option gives a txt output containing the list of mapping results.')
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
31 parser.add_option('-t', '--tar', dest='outputTar', default=None, help='output all accepted hits results in a tar file.' )
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
32 parser.add_option( '-p', '--num-threads', dest='num_threads', help='Use this many threads to align reads. The default is 1.' )
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
33 parser.add_option( '-C', '--color-space', dest='color_space', action='store_true', help='This indicates color-space data' )
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
34 parser.add_option( '-J', '--junctions-output', dest='junctions_output_file', default='junctions_output.bed', help='Junctions output file; formate is BED.' )
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
35 parser.add_option( '-H', '--hits-output', dest='accepted_hits_output_file', default='hits_output_%s.bam' % random.randrange(0, 10000), help='Accepted hits output file; formate is BAM.' )
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
36 parser.add_option( '', '--own-file', dest='own_file', help='' )
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
37 parser.add_option( '-D', '--indexes-path', dest='index_path', help='Indexes directory; location of .ebwt and .fa files.' )
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
38 parser.add_option( '-r', '--mate-inner-dist', dest='mate_inner_dist', help='This is the expected (mean) inner distance between mate pairs. \
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
39 For, example, for paired end runs with fragments selected at 300bp, \
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
40 where each end is 50bp, you should set -r to be 200. There is no default, \
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
41 and this parameter is required for paired end runs.')
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
42 parser.add_option( '', '--mate-std-dev', dest='mate_std_dev', help='Standard deviation of distribution on inner distances between male pairs.' )
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
43 parser.add_option( '-a', '--min-anchor-length', dest='min_anchor_length',
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
44 help='The "anchor length". TopHat will report junctions spanned by reads with at least this many bases on each side of the junction.' )
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
45 parser.add_option( '-m', '--splice-mismatches', dest='splice_mismatches', help='The maximum number of mismatches that can appear in the anchor region of a spliced alignment.' )
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
46 parser.add_option( '-i', '--min-intron-length', dest='min_intron_length',
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
47 help='The minimum intron length. TopHat will ignore donor/acceptor pairs closer than this many bases apart.' )
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
48 parser.add_option( '-I', '--max-intron-length', dest='max_intron_length',
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
49 help='The maximum intron length. When searching for junctions ab initio, TopHat will ignore donor/acceptor pairs farther than this many bases apart, except when such a pair is supported by a split segment alignment of a long read.' )
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
50 parser.add_option( '-F', '--junction_filter', dest='junction_filter', help='Filter out junctions supported by too few alignments (number of reads divided by average depth of coverage)' )
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
51 parser.add_option( '-g', '--max_multihits', dest='max_multihits', help='Maximum number of alignments to be allowed' )
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
52 parser.add_option( '', '--initial-read-mismatches', dest='initial_read_mismatches', help='Number of mismatches allowed in the initial read mapping' )
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
53 parser.add_option( '', '--seg-mismatches', dest='seg_mismatches', help='Number of mismatches allowed in each segment alignment for reads mapped independently' )
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
54 parser.add_option( '', '--seg-length', dest='seg_length', help='Minimum length of read segments' )
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
55 parser.add_option( '', '--library-type', dest='library_type', help='TopHat will treat the reads as strand specific. Every read alignment will have an XS attribute tag. Consider supplying library type options below to select the correct RNA-seq protocol.' )
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
56 parser.add_option( '', '--allow-indels', action="store_true", help='Allow indel search. Indel search is disabled by default.(Not used since version 1.3.0)' )
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
57 parser.add_option( '', '--max-insertion-length', dest='max_insertion_length', help='The maximum insertion length. The default is 3.' )
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
58 parser.add_option( '', '--max-deletion-length', dest='max_deletion_length', help='The maximum deletion length. The default is 3.' )
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
59
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
60 # Options for supplying own junctions
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
61 parser.add_option( '-G', '--GTF', dest='gene_model_annotations', help='Supply TopHat with a list of gene model annotations. \
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
62 TopHat will use the exon records in this file to build \
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
63 a set of known splice junctions for each gene, and will \
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
64 attempt to align reads to these junctions even if they \
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
65 would not normally be covered by the initial mapping.')
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
66 parser.add_option( '-j', '--raw-juncs', dest='raw_juncs', help='Supply TopHat with a list of raw junctions. Junctions are \
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
67 specified one per line, in a tab-delimited format. Records \
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
68 look like: <chrom> <left> <right> <+/-> left and right are \
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
69 zero-based coordinates, and specify the last character of the \
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
70 left sequenced to be spliced to the first character of the right \
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
71 sequence, inclusive.')
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
72 parser.add_option( '', '--no-novel-juncs', action="store_true", dest='no_novel_juncs', help="Only look for junctions indicated in the \
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
73 supplied GFF file. (ignored without -G)")
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
74 parser.add_option( '', '--no-novel-indels', action="store_true", dest='no_novel_indels', help="Skip indel search. Indel search is enabled by default.")
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
75 # Types of search.
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
76 parser.add_option( '', '--microexon-search', action="store_true", dest='microexon_search', help='With this option, the pipeline will attempt to find alignments incident to microexons. Works only for reads 50bp or longer.')
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
77 parser.add_option( '', '--closure-search', action="store_true", dest='closure_search', help='Enables the mate pair closure-based search for junctions. Closure-based search should only be used when the expected inner distance between mates is small (<= 50bp)')
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
78 parser.add_option( '', '--no-closure-search', action="store_false", dest='closure_search' )
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
79 parser.add_option( '', '--coverage-search', action="store_true", dest='coverage_search', help='Enables the coverage based search for junctions. Use when coverage search is disabled by default (such as for reads 75bp or longer), for maximum sensitivity.')
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
80 parser.add_option( '', '--no-coverage-search', action="store_false", dest='coverage_search' )
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
81 parser.add_option( '', '--min-segment-intron', dest='min_segment_intron', help='Minimum intron length that may be found during split-segment search' )
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
82 parser.add_option( '', '--max-segment-intron', dest='max_segment_intron', help='Maximum intron length that may be found during split-segment search' )
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
83 parser.add_option( '', '--min-closure-exon', dest='min_closure_exon', help='Minimum length for exonic hops in potential splice graph' )
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
84 parser.add_option( '', '--min-closure-intron', dest='min_closure_intron', help='Minimum intron length that may be found during closure search' )
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
85 parser.add_option( '', '--max-closure-intron', dest='max_closure_intron', help='Maximum intron length that may be found during closure search' )
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
86 parser.add_option( '', '--min-coverage-intron', dest='min_coverage_intron', help='Minimum intron length that may be found during coverage search' )
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
87 parser.add_option( '', '--max-coverage-intron', dest='max_coverage_intron', help='Maximum intron length that may be found during coverage search' )
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
88
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
89 # Wrapper options.
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
90 parser.add_option( '-1', '--input1', dest='input1', help='A list of the (forward or single-end) reads files of Sanger FASTQ format, txt format' )
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
91 #parser.add_option( '-1', '--input1', dest='input1', help='The (forward or single-end) reads file in Sanger FASTQ format' )
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
92 #parser.add_option( '-2', '--input2', dest='input2', help='The reverse reads file in Sanger FASTQ format' )
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
93 parser.add_option( '-2', '--input2', dest='input2', help='The list of reverse reads file in Sanger FASTQ format' )
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
94 parser.add_option( '', '--single-paired', dest='single_paired', help='' )
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
95 parser.add_option( '', '--settings', dest='settings', help='' )
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
96
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
97 (options, args) = parser.parse_args()
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
98
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
99 # output version # of tool
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
100 try:
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
101 tmp_files = []
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
102 tmp = tempfile.NamedTemporaryFile().name
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
103 tmp_files.append(tmp)
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
104 tmp_stdout = open( tmp, 'wb' )
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
105 proc = subprocess.Popen( args='tophat -v', shell=True, stdout=tmp_stdout )
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
106 tmp_stdout.close()
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
107 returncode = proc.wait()
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
108 stdout = open( tmp_stdout.name, 'rb' ).readline().strip()
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
109 if stdout:
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
110 sys.stdout.write( '%s\n' % stdout )
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
111 else:
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
112 raise Exception
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
113 except:
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
114 sys.stdout.write( 'Could not determine Tophat version\n' )
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
115
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
116 # Color or base space
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
117 space = ''
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
118 if options.color_space:
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
119 space = '-C'
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
120
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
121
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
122 #reads = options.input1
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
123 file = open(options.input1,"r")
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
124 lines = file.readlines()
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
125 inputFileNames = []
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
126 accepted_hits_outputNames = []
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
127 outputName = options.outputTxtFile
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
128 resDirName = os.path.dirname(outputName) + '/'
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
129 out = open(outputName, "w")
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
130 for line in lines:
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
131 tab = line.split()
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
132 inputFileNames.append(tab[1])
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
133 aHitOutName = resDirName + tab[0] + '_' + options.accepted_hits_output_file
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
134 accepted_hits_outputNames.append(aHitOutName)
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
135 out.write(tab[0] + '\t' + aHitOutName + '\n')
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
136 file.close()
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
137 out.close()
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
138
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
139 if options.input2:
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
140 revFile = open(options.input2,"r")
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
141 lines = revFile.readlines()
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
142 inputRevFileNames = []
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
143 for line in lines:
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
144 revTab = line.split()
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
145 inputRevFileNames.append(revTab[1])
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
146 revFile.close()
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
147
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
148
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
149 # Creat bowtie index if necessary.
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
150 tmp_index_dirs = []
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
151 index_paths = []
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
152 tmp_index_dir = tempfile.mkdtemp()
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
153 tmp_index_dirs.append(tmp_index_dir)
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
154 if options.own_file:
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
155 index_path = os.path.join( tmp_index_dir, '.'.join( os.path.split( options.own_file )[1].split( '.' )[:-1] ) )
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
156 index_paths.append(index_path)
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
157 try:
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
158 os.link( options.own_file, index_path + '.fa' )
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
159 except:
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
160 # Tophat prefers (but doesn't require) fasta file to be in same directory, with .fa extension
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
161 pass
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
162 cmd_index = 'bowtie-build %s -f %s %s' % ( space, options.own_file, index_path )
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
163 try:
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
164 tmp = tempfile.NamedTemporaryFile( dir=tmp_index_dir ).name
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
165 tmp_stderr = open( tmp, 'wb' )
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
166 proc = subprocess.Popen( args=cmd_index, shell=True, cwd=tmp_index_dir, stderr=tmp_stderr.fileno() )
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
167 returncode = proc.wait()
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
168 tmp_stderr.close()
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
169 # get stderr, allowing for case where it's very large
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
170 tmp_stderr = open( tmp, 'rb' )
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
171 stderr = ''
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
172 buffsize = 1048576
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
173 try:
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
174 while True:
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
175 stderr += tmp_stderr.read( buffsize )
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
176 if not stderr or len( stderr ) % buffsize != 0:
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
177 break
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
178 except OverflowError:
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
179 pass
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
180 tmp_stderr.close()
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
181 if returncode != 0:
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
182 raise Exception, stderr
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
183 except Exception, e:
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
184 if os.path.exists( tmp_index_dir ):
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
185 shutil.rmtree( tmp_index_dir )
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
186 stop_err( 'Error indexing reference sequence\n' + str( e ) )
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
187 else:
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
188 for file in inputFileNames:
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
189 tmp_index_dir = tempfile.mkdtemp()
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
190 index_path = tmp_index_dir + '/' + os.path.basename(file).split('.')[0]
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
191 index_paths.append(index_path)
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
192 tmp_index_dirs.append(tmp_index_dir)
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
193
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
194
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
195
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
196 # Build tophat command.
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
197 cmds = []
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
198 # for inputFileName in inputFileNames:
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
199 for i in range(len(inputFileNames)):
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
200 cmd = 'tophat %s %s %s '
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
201 input_files = inputFileNames[i]
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
202 if options.input2:
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
203 input_files += ' ' + inputRevFileNames[i]
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
204 opts = '-p %s %s' % ( options.num_threads, space )
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
205 if options.single_paired == 'paired':
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
206 opts += '-r %s ' % options.mate_inner_dist
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
207 if options.settings == 'preSet':
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
208 if options.own_file:
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
209 cmd = cmd % ( opts, index_paths[0], input_files ) #here add paired end file
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
210 else:
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
211 cmd = cmd % ( opts, index_paths[i], input_files ) #here add paired end file
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
212 else:
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
213 try:
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
214 if int( options.min_anchor_length ) >= 3:
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
215 opts += '-a %s ' % options.min_anchor_length
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
216 else:
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
217 raise Exception, 'Minimum anchor length must be 3 or greater'
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
218 opts += '-m %s ' % options.splice_mismatches
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
219 opts += '-i %s ' % options.min_intron_length
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
220 opts += '-I %s ' % options.max_intron_length
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
221 if float( options.junction_filter ) != 0.0:
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
222 opts += '-F %s ' % options.junction_filter
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
223 opts += '-g %s ' % options.max_multihits
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
224 # Custom junctions options.
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
225 if options.gene_model_annotations:
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
226 opts += '-G %s ' % options.gene_model_annotations
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
227 if options.raw_juncs:
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
228 opts += '-j %s ' % options.raw_juncs
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
229 if options.no_novel_juncs:
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
230 opts += '--no-novel-juncs '
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
231 if options.library_type:
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
232 opts += '--library-type %s ' % options.library_type
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
233 if options.no_novel_indels:
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
234 opts += '--no-novel-indels '
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
235 else:
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
236 if options.max_insertion_length:
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
237 opts += '--max-insertion-length %i ' % int( options.max_insertion_length )
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
238 if options.max_deletion_length:
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
239 opts += '--max-deletion-length %i ' % int( options.max_deletion_length )
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
240 # Max options do not work for Tophat v1.2.0, despite documentation to the contrary. (Fixed in version 1.3.1)
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
241 # need to warn user of this fact
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
242 #sys.stdout.write( "Max insertion length and max deletion length options don't work in Tophat v1.2.0\n" )
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
243
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
244 # Search type options.
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
245 if options.coverage_search:
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
246 opts += '--coverage-search --min-coverage-intron %s --max-coverage-intron %s ' % ( options.min_coverage_intron, options.max_coverage_intron )
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
247 else:
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
248 opts += '--no-coverage-search '
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
249 if options.closure_search:
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
250 opts += '--closure-search --min-closure-exon %s --min-closure-intron %s --max-closure-intron %s ' % ( options.min_closure_exon, options.min_closure_intron, options.max_closure_intron )
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
251 else:
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
252 opts += '--no-closure-search '
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
253 if options.microexon_search:
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
254 opts += '--microexon-search '
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
255 if options.single_paired == 'paired':
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
256 opts += '--mate-std-dev %s ' % options.mate_std_dev
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
257 if options.initial_read_mismatches:
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
258 opts += '--initial-read-mismatches %d ' % int( options.initial_read_mismatches )
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
259 if options.seg_mismatches:
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
260 opts += '--segment-mismatches %d ' % int( options.seg_mismatches )
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
261 if options.seg_length:
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
262 opts += '--segment-length %d ' % int( options.seg_length )
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
263 if options.min_segment_intron:
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
264 opts += '--min-segment-intron %d ' % int( options.min_segment_intron )
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
265 if options.max_segment_intron:
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
266 opts += '--max-segment-intron %d ' % int( options.max_segment_intron )
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
267 if options.own_file:
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
268 cmd = cmd % ( opts, index_paths[0], input_files ) #here to add paired end file
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
269 else:
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
270 cmd = cmd % ( opts, index_paths[i], input_files ) #here to add paired end file
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
271 except Exception, e:
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
272 # Clean up temp dirs
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
273 if os.path.exists( tmp_index_dir ):
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
274 shutil.rmtree( tmp_index_dir )
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
275 stop_err( 'Something is wrong with the alignment parameters and the alignment could not be run\n' + str( e ) )
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
276
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
277 cmds.append(cmd)
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
278
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
279 # Run the command line for each file.
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
280 for i in range(len(cmds)):
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
281 try:
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
282 tmp_out = tempfile.NamedTemporaryFile().name
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
283 tmp_files.append(tmp_out)
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
284 tmp_stdout = open( tmp_out, 'wb' )
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
285 tmp_err = tempfile.NamedTemporaryFile().name
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
286 tmp_files.append(tmp_err)
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
287 tmp_stderr = open( tmp_err, 'wb' )
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
288 proc = subprocess.Popen( args=cmds[i], shell=True, cwd=".", stdout=tmp_stdout, stderr=tmp_stderr )
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
289 returncode = proc.wait()
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
290 tmp_stderr.close()
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
291 # get stderr, allowing for case where it's very large
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
292 tmp_stderr = open( tmp_err, 'rb' )
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
293 stderr = ''
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
294 buffsize = 1048576
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
295 try:
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
296 while True:
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
297 stderr += tmp_stderr.read( buffsize )
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
298 if not stderr or len( stderr ) % buffsize != 0:
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
299 break
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
300 except OverflowError:
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
301 pass
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
302 tmp_stdout.close()
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
303 tmp_stderr.close()
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
304 if returncode != 0:
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
305 raise Exception, stderr
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
306
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
307 # Copy output files from tmp directory to specified files.
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
308 #shutil.copyfile( os.path.join( "tophat_out", "junctions.bed" ), junctions_outputNames[i] )
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
309 shutil.copyfile( os.path.join( "tophat_out", "accepted_hits.bam" ), accepted_hits_outputNames[i] )
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
310 # TODO: look for errors in program output.
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
311 except Exception, e:
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
312 stop_err( 'Error in tophat:\n' + str( e ) )
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
313
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
314 if options.outputTar != None:
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
315 toTar(options.outputTar, accepted_hits_outputNames)
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
316
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
317
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
318 # Clean up temp dirs
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
319 for tmp_index_dir in tmp_index_dirs:
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
320 if os.path.exists( tmp_index_dir ):
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
321 shutil.rmtree( tmp_index_dir )
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
322
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
323 for tmp in tmp_files:
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
324 os.remove(tmp)
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
325
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
326
6e573fd3c41b Uploaded
yufei-luo
parents:
diff changeset
327 if __name__=="__main__": __main__()