comparison shear_wrapper.py @ 0:65255beda972

Uploaded
author jjohnson
date Fri, 08 Nov 2013 16:23:10 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:65255beda972
1 #!/usr/bin/env python
2
3 import optparse, os, shutil, subprocess, sys, tempfile, os.path
4
5 def stop_err( msg, code ):
6 sys.stderr.write( '%s\n' % msg )
7 sys.exit(code)
8
9 def __main__():
10 #Parse Command Line
11 parser = optparse.OptionParser()
12 parser.add_option( '-j', '--jar_path', dest='jar_path', help='Path to SHEAR.jar' )
13 parser.add_option( '-C', '--command', dest='command', help='SHEAR command to run' )
14 parser.add_option( '-p', '--prefix', dest='prefix', help='Prefix for all generated files. Required.' )
15 parser.add_option( '-b', '--bam', dest='bam', help='BAM alignment file containing the input sequences to the assembly.' )
16 parser.add_option( '-B', '--bai', dest='bai', help='BAM index file (.bai)' )
17 parser.add_option( '-f', '--fasta_reference', dest='fasta', help='The reference sequence fasta file' )
18 parser.add_option( '-F', '--fasta_index', dest='fai', help='The .fai index file for the reference sequence fasta' )
19 parser.add_option( '-t', '--twobit', dest='twobit', help='The .2bit encoding of the reference sequence fasta generated by faToTwoBit' )
20 parser.add_option( '-i', '--bwa_index', dest='bwa_index', help='The bwa index of the reference sequence' )
21 parser.add_option( '-R', '--report', dest='report', help='The SHEAR output report' )
22 parser.add_option( '-s', '--sdi', dest='sdi', help='The SHEAR sdi input from the SHEAR sv command' )
23 parser.add_option( '-o', '--output', dest='output', help='The SHEAR output assembly fasta file' )
24 parser.add_option( '-D', '--svidx_dir', dest='svidx_dir', help='The SHEAR output assembly fasta file' )
25 parser.add_option( '-S', '--sv-only', dest='sv_only', action="store_true", help='SV Only prediction mode.' )
26 parser.add_option( '-r', '--region', dest='region', help='Region of the input alignment to analyze' )
27 (options, args) = parser.parse_args()
28
29 def make_ref(src, dest, copy=False):
30 if copy:
31 shutil.copy( src, dest )
32 else:
33 os.symlink( src, dest )
34
35 # output version # of tool
36 #try:
37 #except:
38 # sys.stdout.write( 'Could not determine BWA version\n' )
39 if not options.jar_path:
40 stop_err('path to SHEAR.jar is required',1)
41 if options.command:
42 command = options.command
43 else:
44 stop_err('SHEAR command is required',1)
45 args = [ 'java','-jar' ]
46 args.append( options.jar_path )
47 args.append( command )
48 # Check required params for command
49 buffsize = 1048576
50 copy_ref = True
51 prefix = options.prefix if options.prefix and len(options.prefix) > 0 else 'ref'
52 if command in ['sv']:
53 args.append( '-p' )
54 args.append( prefix )
55 if options.sv_only:
56 args.append('--sv-only')
57 if options.region:
58 args.append('-r')
59 args.append(options.region)
60 copy_ref = True # GATK has issue with symlinks for .fa and .fai
61 if options.svidx_dir and command in ['sv']:
62 if not os.path.isdir(options.svidx_dir):
63 os.makedirs(options.svidx_dir)
64 ref_prefix = os.path.join(options.svidx_dir,prefix)
65 copy_ref = True
66 else:
67 ref_prefix = os.path.join(os.getcwd(),prefix)
68
69 if command in ['sv','assemble']:
70 ref_fasta = ref_prefix + '.fa'
71 ref_index = ref_fasta + '.fai'
72 if options.fasta:
73 make_ref( options.fasta, ref_fasta, copy=copy_ref )
74 else:
75 stop_err('Reference fasta file is required',3)
76 # if reference fasta index is not supllied, generate it
77 if options.fai:
78 make_ref( options.fai, ref_index, copy=copy_ref )
79 elif os.path.exists(options.fasta + '.fai'):
80 make_ref( options.fasta + '.fai', ref_index, copy=copy_ref )
81 else:
82 # generate fasta fai index
83 cmd1 = 'samtools faidx %s' % (ref_fasta )
84 try:
85 tmp_name = tempfile.NamedTemporaryFile(prefix='fai_', suffix='.err').name
86 tmp_stderr = open( tmp_name, 'wb' )
87 proc = subprocess.Popen( args=cmd1, shell=True, stderr=tmp_stderr.fileno() )
88 returncode = proc.wait()
89 tmp_stderr.close()
90 if returncode != 0:
91 stderr = ''
92 # get stderr, allowing for case where it's very large
93 tmp_stderr = open( tmp, 'rb' )
94 try:
95 while True:
96 stderr += tmp_stderr.read( buffsize )
97 if not stderr or len( stderr ) % buffsize != 0:
98 break
99 except OverflowError:
100 pass
101 raise Exception, stderr
102 except Exception, e:
103 stop_err( 'Error indexing reference sequence fasta file. ' + str( e ),5 )
104 args.append( '-f' )
105 args.append( ref_fasta )
106
107 if command in ['sv']:
108 if not options.bam:
109 stop_err('BAM alignment file is required',2)
110
111 # if reference 2bit is not supplied, generate it
112 twobit = ref_prefix + '.2bit'
113 if options.twobit:
114 make_ref( options.twobit, twobit, copy=copy_ref )
115 else:
116 # generate 2bit index
117 cmd1 = 'faToTwoBit %s %s' % (ref_fasta, twobit )
118 try:
119 tmp_name = tempfile.NamedTemporaryFile(prefix='twobit_', suffix='.err').name
120 tmp_stderr = open( tmp_name, 'wb' )
121 proc = subprocess.Popen( args=cmd1, shell=True, stderr=tmp_stderr.fileno() )
122 returncode = proc.wait()
123 tmp_stderr.close()
124 if returncode != 0:
125 stderr = ''
126 # get stderr, allowing for case where it's very large
127 tmp_stderr = open( tmp, 'rb' )
128 try:
129 while True:
130 stderr += tmp_stderr.read( buffsize )
131 if not stderr or len( stderr ) % buffsize != 0:
132 break
133 except OverflowError:
134 pass
135 raise Exception, stderr
136 except Exception, e:
137 stop_err( 'Error indexing reference sequence fasta file. ' + str( e ),6 )
138 args.append( '-t' )
139 args.append( twobit )
140
141 # if bwa index is not supplied, generate it
142 bwa_index = ref_fasta
143 if options.bwa_index:
144 if copy_ref:
145 shutil.copytree(os.path.dirname(bwa_index),options.svidx_dir)
146 bwa_index = options.bwa_index
147 else:
148 ONE_GB = 2**30
149 if os.stat( ref_fasta ).st_size <= ONE_GB: #use 1 GB as cut off for memory vs. max of 2gb database size; this is somewhat arbitrary
150 index_algorithm = 'is'
151 else:
152 index_algorithm = 'bwtsw'
153 cmd1 = 'bwa index -a %s %s' % ( index_algorithm, ref_fasta )
154 try:
155 tmp_name = tempfile.NamedTemporaryFile(prefix='bwa_index_', suffix='.err').name
156 tmp_stderr = open( tmp_name, 'wb' )
157 proc = subprocess.Popen( args=cmd1, shell=True, stderr=tmp_stderr.fileno() )
158 returncode = proc.wait()
159 tmp_stderr.close()
160 if returncode != 0:
161 stderr = ''
162 # get stderr, allowing for case where it's very large
163 tmp_stderr = open( tmp, 'rb' )
164 try:
165 while True:
166 stderr += tmp_stderr.read( buffsize )
167 if not stderr or len( stderr ) % buffsize != 0:
168 break
169 except OverflowError:
170 pass
171 raise Exception, stderr
172 bwa_index = ref_fasta
173 except Exception, e:
174 # clean up temp dirs
175 stop_err( 'Error creating bwa index. ' + str( e ),7 )
176 args.append( '-i' )
177 args.append( bwa_index )
178
179 bam_file = os.path.join(os.getcwd(),prefix + '.bam')
180 bai_file = bam_file + '.bai'
181 if options.bam:
182 os.symlink( options.bam, bam_file )
183 else:
184 stop_err('BAM alignment file is required',2)
185 # if bam index is not supplied, generate it
186 if options.bai:
187 os.symlink( options.bai, bai_file )
188 elif os.path.exists(options.bam + '.bai'):
189 os.symlink( options.bam + '.bai', bai_file )
190 else:
191 # generate bam index
192 cmd1 = 'samtools index %s %s' % (bam_file, bai_file )
193 try:
194 tmp_name = tempfile.NamedTemporaryFile(prefix='bai_', suffix='.err').name
195 tmp_stderr = open( tmp_name, 'wb' )
196 proc = subprocess.Popen( args=cmd1, shell=True, stderr=tmp_stderr.fileno() )
197 returncode = proc.wait()
198 tmp_stderr.close()
199 if returncode != 0:
200 stderr = ''
201 # get stderr, allowing for case where it's very large
202 tmp_stderr = open( tmp, 'rb' )
203 try:
204 while True:
205 stderr += tmp_stderr.read( buffsize )
206 if not stderr or len( stderr ) % buffsize != 0:
207 break
208 except OverflowError:
209 pass
210 raise Exception, stderr
211 except Exception, e:
212 stop_err( 'Error indexing BAM alignment file. ' + str( e ),4)
213 args.append( '-b' )
214 args.append( bam_file )
215
216 if command in ['assemble']:
217 if not options.sdi or not options.output:
218 stop_err('SHEAR .sdi file and output file are required for assemble',2)
219 args.append( '-s' )
220 args.append( options.sdi )
221 args.append( '-o' )
222 args.append( options.output )
223
224 # Run SHEAR command
225 print >> sys.stdout, "%s" % ' '.join(args)
226 try:
227 proc = subprocess.Popen( args=args, shell=False )
228 returncode = proc.wait()
229 if returncode != 0:
230 stop_err( 'Error running SHEAR ' + command, returncode )
231 except Exception, e:
232 # clean up temp dirs
233 stop_err( 'Error running SHEAR %s %s' % (command,str(e)),9 )
234
235 if __name__=="__main__": __main__()
236