comparison deseq/differential_expression_analysis_pipeline_for_rnaseq_data-a03838a6eb54/DiffExpAnal/bam_to_sam_parallel.py @ 10:6e573fd3c41b draft

Uploaded
author yufei-luo
date Mon, 13 May 2013 10:06:30 -0400
parents
children
comparison
equal deleted inserted replaced
9:a03838a6eb54 10:6e573fd3c41b
1 #!/usr/bin/env python
2 """
3 Yufei LUO
4 """
5 import optparse, os, sys, subprocess, tempfile, shutil, tarfile, random
6 #from galaxy import eggs
7 #import pkg_resources; pkg_resources.require( "bx-python" )
8 #from bx.cookbook import doc_optparse
9 #from galaxy import util
10
11 def stop_err( msg ):
12 sys.stderr.write( '%s\n' % msg )
13 sys.exit()
14
15 def toTar(tarFileName, samOutputNames):
16 dir = os.path.dirname(tarFileName)
17 tfile = tarfile.open(tarFileName + ".tmp.tar", "w")
18 currentPath = os.getcwd()
19 os.chdir(dir)
20 for file in samOutputNames:
21 relativeFileName = os.path.basename(file)
22 tfile.add(relativeFileName)
23 os.system("mv %s %s" % (tarFileName + ".tmp.tar", tarFileName))
24 tfile.close()
25 os.chdir(currentPath)
26
27
28 def __main__():
29 #Parse Command Line
30 parser = optparse.OptionParser()
31 parser.add_option('-t', '--tar', dest='outputTar', default=None, help='output all SAM results in a tar file.' )
32 parser.add_option( '', '--input1', dest='input1', help='The input list of BAM datasets on txt format.' )
33 #parser.add_option( '', '--input1', dest='input1', help='The input BAM dataset' )
34 parser.add_option( '', '--output1', dest='output1', help='The output list of SAM datasets on txt format.' )
35 #parser.add_option( '', '--output1', dest='output1', help='The output SAM dataset' )
36 parser.add_option( '', '--header', dest='header', action='store_true', default=False, help='Write SAM Header' )
37 ( options, args ) = parser.parse_args()
38
39
40 #Parse the input txt file and read a list of BAM files.
41 file = open(options.input1, "r")
42 lines = file.readlines()
43 inputFileNames = []
44 samOutputNames = []
45 outputName = options.output1
46 resDirName = os.path.dirname(outputName) + '/'
47 #Write output txt file and define all output sam file names.
48 out = open(outputName, "w")
49 for line in lines:
50 tab = line.split()
51 inputFileNames.append(tab[1])
52 samOutName = resDirName + tab[0] + '_samOutput_%s.sam' % random.randrange(0, 10000)
53 samOutputNames.append(samOutName)
54 out.write(tab[0] + '\t' + samOutName + '\n')
55 file.close()
56 out.close()
57
58 # output version # of tool
59 try:
60 tmp_files = []
61 tmp = tempfile.NamedTemporaryFile().name
62 tmp_files.append(tmp)
63 tmp_stdout = open( tmp, 'wb' )
64 proc = subprocess.Popen( args='samtools 2>&1', shell=True, stdout=tmp_stdout )
65 tmp_stdout.close()
66 returncode = proc.wait()
67 stdout = None
68 for line in open( tmp_stdout.name, 'rb' ):
69 if line.lower().find( 'version' ) >= 0:
70 stdout = line.strip()
71 break
72 if stdout:
73 sys.stdout.write( 'Samtools %s\n' % stdout )
74 else:
75 raise Exception
76 except:
77 sys.stdout.write( 'Could not determine Samtools version\n' )
78
79
80
81 tmp_dirs = []
82 for i in range(len(inputFileNames)):
83 try:
84 # exit if input file empty
85 if os.path.getsize( inputFileNames[i] ) == 0:
86 raise Exception, 'Initial input txt file is empty.'
87 # Sort alignments by leftmost coordinates. File <out.prefix>.bam will be created. This command
88 # may also create temporary files <out.prefix>.%d.bam when the whole alignment cannot be fitted
89 # into memory ( controlled by option -m ).
90 tmp_dir = tempfile.mkdtemp()
91 tmp_dirs.append(tmp_dir)
92 tmp_sorted_aligns_file = tempfile.NamedTemporaryFile( dir=tmp_dir )
93 tmp_sorted_aligns_file_base = tmp_sorted_aligns_file.name
94 tmp_sorted_aligns_file_name = '%s.bam' % tmp_sorted_aligns_file.name
95 tmp_files.append(tmp_sorted_aligns_file_name)
96 tmp_sorted_aligns_file.close()
97
98 command = 'samtools sort %s %s' % ( inputFileNames[i], tmp_sorted_aligns_file_base )
99 tmp = tempfile.NamedTemporaryFile( dir=tmp_dir ).name
100 tmp_stderr = open( tmp, 'wb' )
101 proc = subprocess.Popen( args=command, shell=True, cwd=tmp_dir, stderr=tmp_stderr.fileno() )
102 returncode = proc.wait()
103 tmp_stderr.close()
104 # get stderr, allowing for case where it's very large
105 tmp_stderr = open( tmp, 'rb' )
106 stderr = ''
107 buffsize = 1048576
108 try:
109 while True:
110 stderr += tmp_stderr.read( buffsize )
111 if not stderr or len( stderr ) % buffsize != 0:
112 break
113 except OverflowError:
114 pass
115 tmp_stderr.close()
116 if returncode != 0:
117 raise Exception, stderr
118 # exit if sorted BAM file empty
119 if os.path.getsize( tmp_sorted_aligns_file_name) == 0:
120 raise Exception, 'Intermediate sorted BAM file empty'
121 except Exception, e:
122 stop_err( 'Error sorting alignments from (%s), %s' % ( inputFileNames[i], str( e ) ) )
123
124 try:
125 # Extract all alignments from the input BAM file to SAM format ( since no region is specified, all the alignments will be extracted ).
126 if options.header:
127 view_options = "-h"
128 else:
129 view_options = ""
130 command = 'samtools view %s -o %s %s' % ( view_options, samOutputNames[i], tmp_sorted_aligns_file_name )
131 tmp = tempfile.NamedTemporaryFile( dir=tmp_dir ).name
132 tmp_stderr = open( tmp, 'wb' )
133 proc = subprocess.Popen( args=command, shell=True, cwd=tmp_dir, stderr=tmp_stderr.fileno() )
134 returncode = proc.wait()
135 tmp_stderr.close()
136 # get stderr, allowing for case where it's very large
137 tmp_stderr = open( tmp, 'rb' )
138 stderr = ''
139 buffsize = 1048576
140 try:
141 while True:
142 stderr += tmp_stderr.read( buffsize )
143 if not stderr or len( stderr ) % buffsize != 0:
144 break
145 except OverflowError:
146 pass
147 tmp_stderr.close()
148 if returncode != 0:
149 raise Exception, stderr
150 except Exception, e:
151 stop_err( 'Error extracting alignments from (%s), %s' % ( inputFileNames[i], str( e ) ) )
152 if os.path.getsize( samOutputNames[i] ) > 0:
153 sys.stdout.write( 'BAM file converted to SAM' )
154 else:
155 stop_err( 'The output file is empty, there may be an error with your input file.' )
156
157 if options.outputTar != None:
158 toTar(options.outputTar, samOutputNames)
159 #clean up temp files
160 for tmp_dir in tmp_dirs:
161 if os.path.exists( tmp_dir ):
162 shutil.rmtree( tmp_dir )
163 #print tmp_files
164 #for tmp in tmp_files:
165 # os.remove(tmp)
166
167
168 if __name__=="__main__": __main__()