Mercurial > repos > devteam > samtools_slice_bam
comparison samtools_slice_bam.py @ 0:68ba55e96489 draft
Uploaded tool tarball.
author | devteam |
---|---|
date | Mon, 26 Aug 2013 14:25:12 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:68ba55e96489 |
---|---|
1 #!/usr/bin/env python | |
2 #Dan Blankenberg | |
3 | |
4 """ | |
5 A wrapper script for slicing a BAM file by provided BED file using SAMTools. | |
6 %prog input_filename.sam output_filename.bam | |
7 """ | |
8 #TODO: Confirm that the sort is necessary e.g. if input regions are out of order | |
9 | |
10 | |
11 import sys, optparse, os, tempfile, subprocess, shutil | |
12 | |
13 CHUNK_SIZE = 2**20 #1mb | |
14 | |
15 def cleanup_before_exit( tmp_dir ): | |
16 if tmp_dir and os.path.exists( tmp_dir ): | |
17 shutil.rmtree( tmp_dir ) | |
18 | |
19 def __main__(): | |
20 #Parse Command Line | |
21 parser = optparse.OptionParser() | |
22 (options, args) = parser.parse_args() | |
23 | |
24 assert len( args ) == 4, "Invalid command line: samtools_slice_bam.py input.bam input.bam.bai input.interval output.bam" | |
25 input_bam_filename, input_index_filename, input_interval_filename, output_bam_filename = args | |
26 | |
27 tmp_dir = tempfile.mkdtemp( prefix='tmp-samtools_slice_bam-' ) | |
28 | |
29 tmp_input_bam_filename = os.path.join( tmp_dir, 'input_bam.bam' ) | |
30 os.symlink( input_bam_filename, tmp_input_bam_filename ) | |
31 os.symlink( input_index_filename, "%s.bai" % tmp_input_bam_filename ) | |
32 | |
33 #Slice BAM | |
34 unsorted_bam_filename = os.path.join( tmp_dir, 'unsorted.bam' ) | |
35 unsorted_stderr_filename = os.path.join( tmp_dir, 'unsorted.stderr' ) | |
36 cmd = 'samtools view -b -L "%s" "%s" > "%s"' % ( input_interval_filename, tmp_input_bam_filename, unsorted_bam_filename ) | |
37 proc = subprocess.Popen( args=cmd, stderr=open( unsorted_stderr_filename, 'wb' ), shell=True, cwd=tmp_dir ) | |
38 return_code = proc.wait() | |
39 if return_code: | |
40 stderr_target = sys.stderr | |
41 else: | |
42 stderr_target = sys.stdout | |
43 stderr = open( unsorted_stderr_filename ) | |
44 while True: | |
45 chunk = stderr.read( CHUNK_SIZE ) | |
46 if chunk: | |
47 stderr_target.write( chunk ) | |
48 else: | |
49 break | |
50 stderr.close() | |
51 | |
52 #sort sam, so indexing will not fail | |
53 #TODO: confirm if sorting is necessary (is original BAM order maintained, or does the output follow the order of input intervals?) | |
54 sorted_stderr_filename = os.path.join( tmp_dir, 'sorted.stderr' ) | |
55 sorting_prefix = os.path.join( tmp_dir, 'sorted_bam' ) | |
56 cmd = 'samtools sort -o "%s" "%s" > "%s"' % ( unsorted_bam_filename, sorting_prefix, output_bam_filename ) | |
57 proc = subprocess.Popen( args=cmd, stderr=open( sorted_stderr_filename, 'wb' ), shell=True, cwd=tmp_dir ) | |
58 return_code = proc.wait() | |
59 | |
60 if return_code: | |
61 stderr_target = sys.stderr | |
62 else: | |
63 stderr_target = sys.stdout | |
64 stderr = open( sorted_stderr_filename ) | |
65 while True: | |
66 chunk = stderr.read( CHUNK_SIZE ) | |
67 if chunk: | |
68 stderr_target.write( chunk ) | |
69 else: | |
70 break | |
71 stderr.close() | |
72 | |
73 cleanup_before_exit( tmp_dir ) | |
74 | |
75 if __name__=="__main__": __main__() |