annotate samtools_slice_bam.py @ 0:68ba55e96489 draft

Uploaded tool tarball.
author devteam
date Mon, 26 Aug 2013 14:25:12 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
68ba55e96489 Uploaded tool tarball.
devteam
parents:
diff changeset
1 #!/usr/bin/env python
68ba55e96489 Uploaded tool tarball.
devteam
parents:
diff changeset
2 #Dan Blankenberg
68ba55e96489 Uploaded tool tarball.
devteam
parents:
diff changeset
3
68ba55e96489 Uploaded tool tarball.
devteam
parents:
diff changeset
4 """
68ba55e96489 Uploaded tool tarball.
devteam
parents:
diff changeset
5 A wrapper script for slicing a BAM file by provided BED file using SAMTools.
68ba55e96489 Uploaded tool tarball.
devteam
parents:
diff changeset
6 %prog input_filename.sam output_filename.bam
68ba55e96489 Uploaded tool tarball.
devteam
parents:
diff changeset
7 """
68ba55e96489 Uploaded tool tarball.
devteam
parents:
diff changeset
8 #TODO: Confirm that the sort is necessary e.g. if input regions are out of order
68ba55e96489 Uploaded tool tarball.
devteam
parents:
diff changeset
9
68ba55e96489 Uploaded tool tarball.
devteam
parents:
diff changeset
10
68ba55e96489 Uploaded tool tarball.
devteam
parents:
diff changeset
11 import sys, optparse, os, tempfile, subprocess, shutil
68ba55e96489 Uploaded tool tarball.
devteam
parents:
diff changeset
12
68ba55e96489 Uploaded tool tarball.
devteam
parents:
diff changeset
13 CHUNK_SIZE = 2**20 #1mb
68ba55e96489 Uploaded tool tarball.
devteam
parents:
diff changeset
14
68ba55e96489 Uploaded tool tarball.
devteam
parents:
diff changeset
15 def cleanup_before_exit( tmp_dir ):
68ba55e96489 Uploaded tool tarball.
devteam
parents:
diff changeset
16 if tmp_dir and os.path.exists( tmp_dir ):
68ba55e96489 Uploaded tool tarball.
devteam
parents:
diff changeset
17 shutil.rmtree( tmp_dir )
68ba55e96489 Uploaded tool tarball.
devteam
parents:
diff changeset
18
68ba55e96489 Uploaded tool tarball.
devteam
parents:
diff changeset
19 def __main__():
68ba55e96489 Uploaded tool tarball.
devteam
parents:
diff changeset
20 #Parse Command Line
68ba55e96489 Uploaded tool tarball.
devteam
parents:
diff changeset
21 parser = optparse.OptionParser()
68ba55e96489 Uploaded tool tarball.
devteam
parents:
diff changeset
22 (options, args) = parser.parse_args()
68ba55e96489 Uploaded tool tarball.
devteam
parents:
diff changeset
23
68ba55e96489 Uploaded tool tarball.
devteam
parents:
diff changeset
24 assert len( args ) == 4, "Invalid command line: samtools_slice_bam.py input.bam input.bam.bai input.interval output.bam"
68ba55e96489 Uploaded tool tarball.
devteam
parents:
diff changeset
25 input_bam_filename, input_index_filename, input_interval_filename, output_bam_filename = args
68ba55e96489 Uploaded tool tarball.
devteam
parents:
diff changeset
26
68ba55e96489 Uploaded tool tarball.
devteam
parents:
diff changeset
27 tmp_dir = tempfile.mkdtemp( prefix='tmp-samtools_slice_bam-' )
68ba55e96489 Uploaded tool tarball.
devteam
parents:
diff changeset
28
68ba55e96489 Uploaded tool tarball.
devteam
parents:
diff changeset
29 tmp_input_bam_filename = os.path.join( tmp_dir, 'input_bam.bam' )
68ba55e96489 Uploaded tool tarball.
devteam
parents:
diff changeset
30 os.symlink( input_bam_filename, tmp_input_bam_filename )
68ba55e96489 Uploaded tool tarball.
devteam
parents:
diff changeset
31 os.symlink( input_index_filename, "%s.bai" % tmp_input_bam_filename )
68ba55e96489 Uploaded tool tarball.
devteam
parents:
diff changeset
32
68ba55e96489 Uploaded tool tarball.
devteam
parents:
diff changeset
33 #Slice BAM
68ba55e96489 Uploaded tool tarball.
devteam
parents:
diff changeset
34 unsorted_bam_filename = os.path.join( tmp_dir, 'unsorted.bam' )
68ba55e96489 Uploaded tool tarball.
devteam
parents:
diff changeset
35 unsorted_stderr_filename = os.path.join( tmp_dir, 'unsorted.stderr' )
68ba55e96489 Uploaded tool tarball.
devteam
parents:
diff changeset
36 cmd = 'samtools view -b -L "%s" "%s" > "%s"' % ( input_interval_filename, tmp_input_bam_filename, unsorted_bam_filename )
68ba55e96489 Uploaded tool tarball.
devteam
parents:
diff changeset
37 proc = subprocess.Popen( args=cmd, stderr=open( unsorted_stderr_filename, 'wb' ), shell=True, cwd=tmp_dir )
68ba55e96489 Uploaded tool tarball.
devteam
parents:
diff changeset
38 return_code = proc.wait()
68ba55e96489 Uploaded tool tarball.
devteam
parents:
diff changeset
39 if return_code:
68ba55e96489 Uploaded tool tarball.
devteam
parents:
diff changeset
40 stderr_target = sys.stderr
68ba55e96489 Uploaded tool tarball.
devteam
parents:
diff changeset
41 else:
68ba55e96489 Uploaded tool tarball.
devteam
parents:
diff changeset
42 stderr_target = sys.stdout
68ba55e96489 Uploaded tool tarball.
devteam
parents:
diff changeset
43 stderr = open( unsorted_stderr_filename )
68ba55e96489 Uploaded tool tarball.
devteam
parents:
diff changeset
44 while True:
68ba55e96489 Uploaded tool tarball.
devteam
parents:
diff changeset
45 chunk = stderr.read( CHUNK_SIZE )
68ba55e96489 Uploaded tool tarball.
devteam
parents:
diff changeset
46 if chunk:
68ba55e96489 Uploaded tool tarball.
devteam
parents:
diff changeset
47 stderr_target.write( chunk )
68ba55e96489 Uploaded tool tarball.
devteam
parents:
diff changeset
48 else:
68ba55e96489 Uploaded tool tarball.
devteam
parents:
diff changeset
49 break
68ba55e96489 Uploaded tool tarball.
devteam
parents:
diff changeset
50 stderr.close()
68ba55e96489 Uploaded tool tarball.
devteam
parents:
diff changeset
51
68ba55e96489 Uploaded tool tarball.
devteam
parents:
diff changeset
52 #sort sam, so indexing will not fail
68ba55e96489 Uploaded tool tarball.
devteam
parents:
diff changeset
53 #TODO: confirm if sorting is necessary (is original BAM order maintained, or does the output follow the order of input intervals?)
68ba55e96489 Uploaded tool tarball.
devteam
parents:
diff changeset
54 sorted_stderr_filename = os.path.join( tmp_dir, 'sorted.stderr' )
68ba55e96489 Uploaded tool tarball.
devteam
parents:
diff changeset
55 sorting_prefix = os.path.join( tmp_dir, 'sorted_bam' )
68ba55e96489 Uploaded tool tarball.
devteam
parents:
diff changeset
56 cmd = 'samtools sort -o "%s" "%s" > "%s"' % ( unsorted_bam_filename, sorting_prefix, output_bam_filename )
68ba55e96489 Uploaded tool tarball.
devteam
parents:
diff changeset
57 proc = subprocess.Popen( args=cmd, stderr=open( sorted_stderr_filename, 'wb' ), shell=True, cwd=tmp_dir )
68ba55e96489 Uploaded tool tarball.
devteam
parents:
diff changeset
58 return_code = proc.wait()
68ba55e96489 Uploaded tool tarball.
devteam
parents:
diff changeset
59
68ba55e96489 Uploaded tool tarball.
devteam
parents:
diff changeset
60 if return_code:
68ba55e96489 Uploaded tool tarball.
devteam
parents:
diff changeset
61 stderr_target = sys.stderr
68ba55e96489 Uploaded tool tarball.
devteam
parents:
diff changeset
62 else:
68ba55e96489 Uploaded tool tarball.
devteam
parents:
diff changeset
63 stderr_target = sys.stdout
68ba55e96489 Uploaded tool tarball.
devteam
parents:
diff changeset
64 stderr = open( sorted_stderr_filename )
68ba55e96489 Uploaded tool tarball.
devteam
parents:
diff changeset
65 while True:
68ba55e96489 Uploaded tool tarball.
devteam
parents:
diff changeset
66 chunk = stderr.read( CHUNK_SIZE )
68ba55e96489 Uploaded tool tarball.
devteam
parents:
diff changeset
67 if chunk:
68ba55e96489 Uploaded tool tarball.
devteam
parents:
diff changeset
68 stderr_target.write( chunk )
68ba55e96489 Uploaded tool tarball.
devteam
parents:
diff changeset
69 else:
68ba55e96489 Uploaded tool tarball.
devteam
parents:
diff changeset
70 break
68ba55e96489 Uploaded tool tarball.
devteam
parents:
diff changeset
71 stderr.close()
68ba55e96489 Uploaded tool tarball.
devteam
parents:
diff changeset
72
68ba55e96489 Uploaded tool tarball.
devteam
parents:
diff changeset
73 cleanup_before_exit( tmp_dir )
68ba55e96489 Uploaded tool tarball.
devteam
parents:
diff changeset
74
68ba55e96489 Uploaded tool tarball.
devteam
parents:
diff changeset
75 if __name__=="__main__": __main__()