0
|
1 #!/usr/bin/env python
|
|
2
|
|
3 """
|
|
4 Creates a pileup file from a bam file and a reference.
|
|
5
|
|
6 usage: %prog [options]
|
|
7 -p, --input1=p: bam file
|
|
8 -o, --output1=o: Output pileup
|
|
9 -R, --ref=R: Reference file type
|
|
10 -n, --ownFile=n: User-supplied fasta reference file
|
|
11 -d, --dbkey=d: dbkey of user-supplied file
|
|
12 -x, --indexDir=x: Index directory
|
|
13 -b, --bamIndex=b: BAM index file
|
|
14 -s, --lastCol=s: Print the mapping quality as the last column
|
|
15 -i, --indels=i: Only output lines containing indels
|
|
16 -M, --mapCap=M: Cap mapping quality
|
|
17 -c, --consensus=c: Call the consensus sequence using MAQ consensu model
|
|
18 -T, --theta=T: Theta paramter (error dependency coefficient)
|
|
19 -N, --hapNum=N: Number of haplotypes in sample
|
|
20 -r, --fraction=r: Expected fraction of differences between a pair of haplotypes
|
|
21 -I, --phredProb=I: Phred probability of an indel in sequencing/prep
|
|
22
|
|
23 """
|
|
24
|
|
25 import os, shutil, subprocess, sys, tempfile
|
|
26 from galaxy import eggs
|
|
27 import pkg_resources; pkg_resources.require( "bx-python" )
|
|
28 from bx.cookbook import doc_optparse
|
|
29
|
|
30 def stop_err( msg ):
|
|
31 sys.stderr.write( '%s\n' % msg )
|
|
32 sys.exit()
|
|
33
|
|
34 def check_seq_file( dbkey, GALAXY_DATA_INDEX_DIR ):
|
|
35 seqFile = '%s/sam_fa_indices.loc' % GALAXY_DATA_INDEX_DIR
|
|
36 seqPath = ''
|
|
37 for line in open( seqFile ):
|
|
38 line = line.rstrip( '\r\n' )
|
|
39 if line and not line.startswith( '#' ) and line.startswith( 'index' ):
|
|
40 fields = line.split( '\t' )
|
|
41 if len( fields ) < 3:
|
|
42 continue
|
|
43 if fields[1] == dbkey:
|
|
44 seqPath = fields[2].strip()
|
|
45 break
|
|
46 return seqPath
|
|
47
|
|
48 def __main__():
|
|
49 #Parse Command Line
|
|
50 options, args = doc_optparse.parse( __doc__ )
|
|
51 seqPath = check_seq_file( options.dbkey, options.indexDir )
|
|
52 # output version # of tool
|
|
53 try:
|
|
54 tmp = tempfile.NamedTemporaryFile().name
|
|
55 tmp_stdout = open( tmp, 'wb' )
|
|
56 proc = subprocess.Popen( args='samtools 2>&1', shell=True, stdout=tmp_stdout )
|
|
57 tmp_stdout.close()
|
|
58 returncode = proc.wait()
|
|
59 stdout = None
|
|
60 for line in open( tmp_stdout.name, 'rb' ):
|
|
61 if line.lower().find( 'version' ) >= 0:
|
|
62 stdout = line.strip()
|
|
63 break
|
|
64 if stdout:
|
|
65 sys.stdout.write( 'Samtools %s\n' % stdout )
|
|
66 else:
|
|
67 raise Exception
|
|
68 except:
|
|
69 sys.stdout.write( 'Could not determine Samtools version\n' )
|
|
70 #prepare file names
|
|
71 tmpDir = tempfile.mkdtemp()
|
|
72 tmpf0 = tempfile.NamedTemporaryFile( dir=tmpDir )
|
|
73 tmpf0_name = tmpf0.name
|
|
74 tmpf0.close()
|
|
75 tmpf0bam_name = '%s.bam' % tmpf0_name
|
|
76 tmpf0bambai_name = '%s.bam.bai' % tmpf0_name
|
|
77 tmpf1 = tempfile.NamedTemporaryFile( dir=tmpDir )
|
|
78 tmpf1_name = tmpf1.name
|
|
79 tmpf1.close()
|
|
80 tmpf1fai_name = '%s.fai' % tmpf1_name
|
|
81 #link bam and bam index to working directory (can't move because need to leave original)
|
|
82 os.symlink( options.input1, tmpf0bam_name )
|
|
83 os.symlink( options.bamIndex, tmpf0bambai_name )
|
|
84 #get parameters for pileup command
|
|
85 if options.lastCol == 'yes':
|
|
86 lastCol = '-s'
|
|
87 else:
|
|
88 lastCol = ''
|
|
89 if options.indels == 'yes':
|
|
90 indels = '-i'
|
|
91 else:
|
|
92 indels = ''
|
|
93 opts = '%s %s -M %s' % ( lastCol, indels, options.mapCap )
|
|
94 if options.consensus == 'yes':
|
|
95 opts += ' -c -T %s -N %s -r %s -I %s' % ( options.theta, options.hapNum, options.fraction, options.phredProb )
|
|
96 #prepare basic pileup command
|
|
97 cmd = 'samtools pileup %s -f %s %s > %s'
|
|
98 try:
|
|
99 # have to nest try-except in try-finally to handle 2.4
|
|
100 try:
|
|
101 #index reference if necessary and prepare pileup command
|
|
102 if options.ref == 'indexed':
|
|
103 if not os.path.exists( "%s.fai" % seqPath ):
|
|
104 raise Exception, "No sequences are available for '%s', request them by reporting this error." % options.dbkey
|
|
105 cmd = cmd % ( opts, seqPath, tmpf0bam_name, options.output1 )
|
|
106 elif options.ref == 'history':
|
|
107 os.symlink( options.ownFile, tmpf1_name )
|
|
108 cmdIndex = 'samtools faidx %s' % ( tmpf1_name )
|
|
109 tmp = tempfile.NamedTemporaryFile( dir=tmpDir ).name
|
|
110 tmp_stderr = open( tmp, 'wb' )
|
|
111 proc = subprocess.Popen( args=cmdIndex, shell=True, cwd=tmpDir, stderr=tmp_stderr.fileno() )
|
|
112 returncode = proc.wait()
|
|
113 tmp_stderr.close()
|
|
114 # get stderr, allowing for case where it's very large
|
|
115 tmp_stderr = open( tmp, 'rb' )
|
|
116 stderr = ''
|
|
117 buffsize = 1048576
|
|
118 try:
|
|
119 while True:
|
|
120 stderr += tmp_stderr.read( buffsize )
|
|
121 if not stderr or len( stderr ) % buffsize != 0:
|
|
122 break
|
|
123 except OverflowError:
|
|
124 pass
|
|
125 tmp_stderr.close()
|
|
126 #did index succeed?
|
|
127 if returncode != 0:
|
|
128 raise Exception, 'Error creating index file\n' + stderr
|
|
129 cmd = cmd % ( opts, tmpf1_name, tmpf0bam_name, options.output1 )
|
|
130 #perform pileup command
|
|
131 tmp = tempfile.NamedTemporaryFile( dir=tmpDir ).name
|
|
132 tmp_stderr = open( tmp, 'wb' )
|
|
133 proc = subprocess.Popen( args=cmd, shell=True, cwd=tmpDir, stderr=tmp_stderr.fileno() )
|
|
134 returncode = proc.wait()
|
|
135 tmp_stderr.close()
|
|
136 #did it succeed?
|
|
137 # get stderr, allowing for case where it's very large
|
|
138 tmp_stderr = open( tmp, 'rb' )
|
|
139 stderr = ''
|
|
140 buffsize = 1048576
|
|
141 try:
|
|
142 while True:
|
|
143 stderr += tmp_stderr.read( buffsize )
|
|
144 if not stderr or len( stderr ) % buffsize != 0:
|
|
145 break
|
|
146 except OverflowError:
|
|
147 pass
|
|
148 tmp_stderr.close()
|
|
149 if returncode != 0:
|
|
150 raise Exception, stderr
|
|
151 except Exception, e:
|
|
152 stop_err( 'Error running Samtools pileup tool\n' + str( e ) )
|
|
153 finally:
|
|
154 #clean up temp files
|
|
155 if os.path.exists( tmpDir ):
|
|
156 shutil.rmtree( tmpDir )
|
|
157 # check that there are results in the output file
|
|
158 if os.path.getsize( options.output1 ) > 0:
|
|
159 sys.stdout.write( 'Converted BAM to pileup' )
|
|
160 else:
|
|
161 stop_err( 'The output file is empty. Your input file may have had no matches, or there may be an error with your input file or settings.' )
|
|
162
|
|
163 if __name__ == "__main__" : __main__()
|