Mercurial > repos > devteam > sam_pileup
annotate sam_pileup.py @ 3:890d97772e2a draft
Uploaded
author | devteam |
---|---|
date | Thu, 09 Jan 2014 14:28:39 -0500 |
parents | e7d863c5c5d6 |
children |
rev | line source |
---|---|
0 | 1 #!/usr/bin/env python |
2 | |
3 """ | |
4 Creates a pileup file from a bam file and a reference. | |
5 | |
6 usage: %prog [options] | |
7 -p, --input1=p: bam file | |
8 -o, --output1=o: Output pileup | |
9 -R, --ref=R: Reference file type | |
10 -n, --ownFile=n: User-supplied fasta reference file | |
11 -b, --bamIndex=b: BAM index file | |
1
e7d863c5c5d6
Update sam_pileup to use the fasta_indexes data table.
Dave Bouvier <dave@bx.psu.edu>
parents:
0
diff
changeset
|
12 -g, --index=g: Path of the indexed reference genome |
0 | 13 -s, --lastCol=s: Print the mapping quality as the last column |
14 -i, --indels=i: Only output lines containing indels | |
15 -M, --mapCap=M: Cap mapping quality | |
16 -c, --consensus=c: Call the consensus sequence using MAQ consensu model | |
17 -T, --theta=T: Theta paramter (error dependency coefficient) | |
18 -N, --hapNum=N: Number of haplotypes in sample | |
19 -r, --fraction=r: Expected fraction of differences between a pair of haplotypes | |
20 -I, --phredProb=I: Phred probability of an indel in sequencing/prep | |
21 | |
22 """ | |
23 | |
24 import os, shutil, subprocess, sys, tempfile | |
25 from galaxy import eggs | |
26 import pkg_resources; pkg_resources.require( "bx-python" ) | |
27 from bx.cookbook import doc_optparse | |
28 | |
29 def stop_err( msg ): | |
30 sys.stderr.write( '%s\n' % msg ) | |
31 sys.exit() | |
32 | |
33 def __main__(): | |
34 #Parse Command Line | |
35 options, args = doc_optparse.parse( __doc__ ) | |
36 # output version # of tool | |
37 try: | |
38 tmp = tempfile.NamedTemporaryFile().name | |
39 tmp_stdout = open( tmp, 'wb' ) | |
40 proc = subprocess.Popen( args='samtools 2>&1', shell=True, stdout=tmp_stdout ) | |
41 tmp_stdout.close() | |
42 returncode = proc.wait() | |
43 stdout = None | |
44 for line in open( tmp_stdout.name, 'rb' ): | |
45 if line.lower().find( 'version' ) >= 0: | |
46 stdout = line.strip() | |
47 break | |
48 if stdout: | |
49 sys.stdout.write( 'Samtools %s\n' % stdout ) | |
50 else: | |
51 raise Exception | |
52 except: | |
53 sys.stdout.write( 'Could not determine Samtools version\n' ) | |
54 #prepare file names | |
55 tmpDir = tempfile.mkdtemp() | |
56 tmpf0 = tempfile.NamedTemporaryFile( dir=tmpDir ) | |
57 tmpf0_name = tmpf0.name | |
58 tmpf0.close() | |
59 tmpf0bam_name = '%s.bam' % tmpf0_name | |
60 tmpf0bambai_name = '%s.bam.bai' % tmpf0_name | |
61 tmpf1 = tempfile.NamedTemporaryFile( dir=tmpDir ) | |
62 tmpf1_name = tmpf1.name | |
63 tmpf1.close() | |
64 #link bam and bam index to working directory (can't move because need to leave original) | |
65 os.symlink( options.input1, tmpf0bam_name ) | |
66 os.symlink( options.bamIndex, tmpf0bambai_name ) | |
67 #get parameters for pileup command | |
68 if options.lastCol == 'yes': | |
69 lastCol = '-s' | |
70 else: | |
71 lastCol = '' | |
72 if options.indels == 'yes': | |
73 indels = '-i' | |
74 else: | |
75 indels = '' | |
76 opts = '%s %s -M %s' % ( lastCol, indels, options.mapCap ) | |
77 if options.consensus == 'yes': | |
78 opts += ' -c -T %s -N %s -r %s -I %s' % ( options.theta, options.hapNum, options.fraction, options.phredProb ) | |
79 #prepare basic pileup command | |
80 cmd = 'samtools pileup %s -f %s %s > %s' | |
81 try: | |
82 # have to nest try-except in try-finally to handle 2.4 | |
83 try: | |
84 #index reference if necessary and prepare pileup command | |
85 if options.ref == 'indexed': | |
1
e7d863c5c5d6
Update sam_pileup to use the fasta_indexes data table.
Dave Bouvier <dave@bx.psu.edu>
parents:
0
diff
changeset
|
86 if not os.path.exists( "%s.fai" % options.index ): |
e7d863c5c5d6
Update sam_pileup to use the fasta_indexes data table.
Dave Bouvier <dave@bx.psu.edu>
parents:
0
diff
changeset
|
87 raise Exception, "Indexed genome %s not present, request it by reporting this error." % options.index |
e7d863c5c5d6
Update sam_pileup to use the fasta_indexes data table.
Dave Bouvier <dave@bx.psu.edu>
parents:
0
diff
changeset
|
88 cmd = cmd % ( opts, options.index, tmpf0bam_name, options.output1 ) |
0 | 89 elif options.ref == 'history': |
90 os.symlink( options.ownFile, tmpf1_name ) | |
91 cmdIndex = 'samtools faidx %s' % ( tmpf1_name ) | |
92 tmp = tempfile.NamedTemporaryFile( dir=tmpDir ).name | |
93 tmp_stderr = open( tmp, 'wb' ) | |
94 proc = subprocess.Popen( args=cmdIndex, shell=True, cwd=tmpDir, stderr=tmp_stderr.fileno() ) | |
95 returncode = proc.wait() | |
96 tmp_stderr.close() | |
97 # get stderr, allowing for case where it's very large | |
98 tmp_stderr = open( tmp, 'rb' ) | |
99 stderr = '' | |
100 buffsize = 1048576 | |
101 try: | |
102 while True: | |
103 stderr += tmp_stderr.read( buffsize ) | |
104 if not stderr or len( stderr ) % buffsize != 0: | |
105 break | |
106 except OverflowError: | |
107 pass | |
108 tmp_stderr.close() | |
109 #did index succeed? | |
110 if returncode != 0: | |
111 raise Exception, 'Error creating index file\n' + stderr | |
112 cmd = cmd % ( opts, tmpf1_name, tmpf0bam_name, options.output1 ) | |
113 #perform pileup command | |
114 tmp = tempfile.NamedTemporaryFile( dir=tmpDir ).name | |
115 tmp_stderr = open( tmp, 'wb' ) | |
116 proc = subprocess.Popen( args=cmd, shell=True, cwd=tmpDir, stderr=tmp_stderr.fileno() ) | |
117 returncode = proc.wait() | |
118 tmp_stderr.close() | |
119 #did it succeed? | |
120 # get stderr, allowing for case where it's very large | |
121 tmp_stderr = open( tmp, 'rb' ) | |
122 stderr = '' | |
123 buffsize = 1048576 | |
124 try: | |
125 while True: | |
126 stderr += tmp_stderr.read( buffsize ) | |
127 if not stderr or len( stderr ) % buffsize != 0: | |
128 break | |
129 except OverflowError: | |
130 pass | |
131 tmp_stderr.close() | |
132 if returncode != 0: | |
133 raise Exception, stderr | |
134 except Exception, e: | |
135 stop_err( 'Error running Samtools pileup tool\n' + str( e ) ) | |
136 finally: | |
137 #clean up temp files | |
138 if os.path.exists( tmpDir ): | |
139 shutil.rmtree( tmpDir ) | |
140 # check that there are results in the output file | |
141 if os.path.getsize( options.output1 ) > 0: | |
142 sys.stdout.write( 'Converted BAM to pileup' ) | |
143 else: | |
144 stop_err( 'The output file is empty. Your input file may have had no matches, or there may be an error with your input file or settings.' ) | |
145 | |
146 if __name__ == "__main__" : __main__() |