Mercurial > repos > jjohnson > shear
comparison shear_wrapper.py @ 0:65255beda972
Uploaded
author | jjohnson |
---|---|
date | Fri, 08 Nov 2013 16:23:10 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:65255beda972 |
---|---|
1 #!/usr/bin/env python | |
2 | |
3 import optparse, os, shutil, subprocess, sys, tempfile, os.path | |
4 | |
5 def stop_err( msg, code ): | |
6 sys.stderr.write( '%s\n' % msg ) | |
7 sys.exit(code) | |
8 | |
9 def __main__(): | |
10 #Parse Command Line | |
11 parser = optparse.OptionParser() | |
12 parser.add_option( '-j', '--jar_path', dest='jar_path', help='Path to SHEAR.jar' ) | |
13 parser.add_option( '-C', '--command', dest='command', help='SHEAR command to run' ) | |
14 parser.add_option( '-p', '--prefix', dest='prefix', help='Prefix for all generated files. Required.' ) | |
15 parser.add_option( '-b', '--bam', dest='bam', help='BAM alignment file containing the input sequences to the assembly.' ) | |
16 parser.add_option( '-B', '--bai', dest='bai', help='BAM index file (.bai)' ) | |
17 parser.add_option( '-f', '--fasta_reference', dest='fasta', help='The reference sequence fasta file' ) | |
18 parser.add_option( '-F', '--fasta_index', dest='fai', help='The .fai index file for the reference sequence fasta' ) | |
19 parser.add_option( '-t', '--twobit', dest='twobit', help='The .2bit encoding of the reference sequence fasta generated by faToTwoBit' ) | |
20 parser.add_option( '-i', '--bwa_index', dest='bwa_index', help='The bwa index of the reference sequence' ) | |
21 parser.add_option( '-R', '--report', dest='report', help='The SHEAR output report' ) | |
22 parser.add_option( '-s', '--sdi', dest='sdi', help='The SHEAR sdi input from the SHEAR sv command' ) | |
23 parser.add_option( '-o', '--output', dest='output', help='The SHEAR output assembly fasta file' ) | |
24 parser.add_option( '-D', '--svidx_dir', dest='svidx_dir', help='The SHEAR output assembly fasta file' ) | |
25 parser.add_option( '-S', '--sv-only', dest='sv_only', action="store_true", help='SV Only prediction mode.' ) | |
26 parser.add_option( '-r', '--region', dest='region', help='Region of the input alignment to analyze' ) | |
27 (options, args) = parser.parse_args() | |
28 | |
29 def make_ref(src, dest, copy=False): | |
30 if copy: | |
31 shutil.copy( src, dest ) | |
32 else: | |
33 os.symlink( src, dest ) | |
34 | |
35 # output version # of tool | |
36 #try: | |
37 #except: | |
38 # sys.stdout.write( 'Could not determine BWA version\n' ) | |
39 if not options.jar_path: | |
40 stop_err('path to SHEAR.jar is required',1) | |
41 if options.command: | |
42 command = options.command | |
43 else: | |
44 stop_err('SHEAR command is required',1) | |
45 args = [ 'java','-jar' ] | |
46 args.append( options.jar_path ) | |
47 args.append( command ) | |
48 # Check required params for command | |
49 buffsize = 1048576 | |
50 copy_ref = True | |
51 prefix = options.prefix if options.prefix and len(options.prefix) > 0 else 'ref' | |
52 if command in ['sv']: | |
53 args.append( '-p' ) | |
54 args.append( prefix ) | |
55 if options.sv_only: | |
56 args.append('--sv-only') | |
57 if options.region: | |
58 args.append('-r') | |
59 args.append(options.region) | |
60 copy_ref = True # GATK has issue with symlinks for .fa and .fai | |
61 if options.svidx_dir and command in ['sv']: | |
62 if not os.path.isdir(options.svidx_dir): | |
63 os.makedirs(options.svidx_dir) | |
64 ref_prefix = os.path.join(options.svidx_dir,prefix) | |
65 copy_ref = True | |
66 else: | |
67 ref_prefix = os.path.join(os.getcwd(),prefix) | |
68 | |
69 if command in ['sv','assemble']: | |
70 ref_fasta = ref_prefix + '.fa' | |
71 ref_index = ref_fasta + '.fai' | |
72 if options.fasta: | |
73 make_ref( options.fasta, ref_fasta, copy=copy_ref ) | |
74 else: | |
75 stop_err('Reference fasta file is required',3) | |
76 # if reference fasta index is not supllied, generate it | |
77 if options.fai: | |
78 make_ref( options.fai, ref_index, copy=copy_ref ) | |
79 elif os.path.exists(options.fasta + '.fai'): | |
80 make_ref( options.fasta + '.fai', ref_index, copy=copy_ref ) | |
81 else: | |
82 # generate fasta fai index | |
83 cmd1 = 'samtools faidx %s' % (ref_fasta ) | |
84 try: | |
85 tmp_name = tempfile.NamedTemporaryFile(prefix='fai_', suffix='.err').name | |
86 tmp_stderr = open( tmp_name, 'wb' ) | |
87 proc = subprocess.Popen( args=cmd1, shell=True, stderr=tmp_stderr.fileno() ) | |
88 returncode = proc.wait() | |
89 tmp_stderr.close() | |
90 if returncode != 0: | |
91 stderr = '' | |
92 # get stderr, allowing for case where it's very large | |
93 tmp_stderr = open( tmp, 'rb' ) | |
94 try: | |
95 while True: | |
96 stderr += tmp_stderr.read( buffsize ) | |
97 if not stderr or len( stderr ) % buffsize != 0: | |
98 break | |
99 except OverflowError: | |
100 pass | |
101 raise Exception, stderr | |
102 except Exception, e: | |
103 stop_err( 'Error indexing reference sequence fasta file. ' + str( e ),5 ) | |
104 args.append( '-f' ) | |
105 args.append( ref_fasta ) | |
106 | |
107 if command in ['sv']: | |
108 if not options.bam: | |
109 stop_err('BAM alignment file is required',2) | |
110 | |
111 # if reference 2bit is not supplied, generate it | |
112 twobit = ref_prefix + '.2bit' | |
113 if options.twobit: | |
114 make_ref( options.twobit, twobit, copy=copy_ref ) | |
115 else: | |
116 # generate 2bit index | |
117 cmd1 = 'faToTwoBit %s %s' % (ref_fasta, twobit ) | |
118 try: | |
119 tmp_name = tempfile.NamedTemporaryFile(prefix='twobit_', suffix='.err').name | |
120 tmp_stderr = open( tmp_name, 'wb' ) | |
121 proc = subprocess.Popen( args=cmd1, shell=True, stderr=tmp_stderr.fileno() ) | |
122 returncode = proc.wait() | |
123 tmp_stderr.close() | |
124 if returncode != 0: | |
125 stderr = '' | |
126 # get stderr, allowing for case where it's very large | |
127 tmp_stderr = open( tmp, 'rb' ) | |
128 try: | |
129 while True: | |
130 stderr += tmp_stderr.read( buffsize ) | |
131 if not stderr or len( stderr ) % buffsize != 0: | |
132 break | |
133 except OverflowError: | |
134 pass | |
135 raise Exception, stderr | |
136 except Exception, e: | |
137 stop_err( 'Error indexing reference sequence fasta file. ' + str( e ),6 ) | |
138 args.append( '-t' ) | |
139 args.append( twobit ) | |
140 | |
141 # if bwa index is not supplied, generate it | |
142 bwa_index = ref_fasta | |
143 if options.bwa_index: | |
144 if copy_ref: | |
145 shutil.copytree(os.path.dirname(bwa_index),options.svidx_dir) | |
146 bwa_index = options.bwa_index | |
147 else: | |
148 ONE_GB = 2**30 | |
149 if os.stat( ref_fasta ).st_size <= ONE_GB: #use 1 GB as cut off for memory vs. max of 2gb database size; this is somewhat arbitrary | |
150 index_algorithm = 'is' | |
151 else: | |
152 index_algorithm = 'bwtsw' | |
153 cmd1 = 'bwa index -a %s %s' % ( index_algorithm, ref_fasta ) | |
154 try: | |
155 tmp_name = tempfile.NamedTemporaryFile(prefix='bwa_index_', suffix='.err').name | |
156 tmp_stderr = open( tmp_name, 'wb' ) | |
157 proc = subprocess.Popen( args=cmd1, shell=True, stderr=tmp_stderr.fileno() ) | |
158 returncode = proc.wait() | |
159 tmp_stderr.close() | |
160 if returncode != 0: | |
161 stderr = '' | |
162 # get stderr, allowing for case where it's very large | |
163 tmp_stderr = open( tmp, 'rb' ) | |
164 try: | |
165 while True: | |
166 stderr += tmp_stderr.read( buffsize ) | |
167 if not stderr or len( stderr ) % buffsize != 0: | |
168 break | |
169 except OverflowError: | |
170 pass | |
171 raise Exception, stderr | |
172 bwa_index = ref_fasta | |
173 except Exception, e: | |
174 # clean up temp dirs | |
175 stop_err( 'Error creating bwa index. ' + str( e ),7 ) | |
176 args.append( '-i' ) | |
177 args.append( bwa_index ) | |
178 | |
179 bam_file = os.path.join(os.getcwd(),prefix + '.bam') | |
180 bai_file = bam_file + '.bai' | |
181 if options.bam: | |
182 os.symlink( options.bam, bam_file ) | |
183 else: | |
184 stop_err('BAM alignment file is required',2) | |
185 # if bam index is not supplied, generate it | |
186 if options.bai: | |
187 os.symlink( options.bai, bai_file ) | |
188 elif os.path.exists(options.bam + '.bai'): | |
189 os.symlink( options.bam + '.bai', bai_file ) | |
190 else: | |
191 # generate bam index | |
192 cmd1 = 'samtools index %s %s' % (bam_file, bai_file ) | |
193 try: | |
194 tmp_name = tempfile.NamedTemporaryFile(prefix='bai_', suffix='.err').name | |
195 tmp_stderr = open( tmp_name, 'wb' ) | |
196 proc = subprocess.Popen( args=cmd1, shell=True, stderr=tmp_stderr.fileno() ) | |
197 returncode = proc.wait() | |
198 tmp_stderr.close() | |
199 if returncode != 0: | |
200 stderr = '' | |
201 # get stderr, allowing for case where it's very large | |
202 tmp_stderr = open( tmp, 'rb' ) | |
203 try: | |
204 while True: | |
205 stderr += tmp_stderr.read( buffsize ) | |
206 if not stderr or len( stderr ) % buffsize != 0: | |
207 break | |
208 except OverflowError: | |
209 pass | |
210 raise Exception, stderr | |
211 except Exception, e: | |
212 stop_err( 'Error indexing BAM alignment file. ' + str( e ),4) | |
213 args.append( '-b' ) | |
214 args.append( bam_file ) | |
215 | |
216 if command in ['assemble']: | |
217 if not options.sdi or not options.output: | |
218 stop_err('SHEAR .sdi file and output file are required for assemble',2) | |
219 args.append( '-s' ) | |
220 args.append( options.sdi ) | |
221 args.append( '-o' ) | |
222 args.append( options.output ) | |
223 | |
224 # Run SHEAR command | |
225 print >> sys.stdout, "%s" % ' '.join(args) | |
226 try: | |
227 proc = subprocess.Popen( args=args, shell=False ) | |
228 returncode = proc.wait() | |
229 if returncode != 0: | |
230 stop_err( 'Error running SHEAR ' + command, returncode ) | |
231 except Exception, e: | |
232 # clean up temp dirs | |
233 stop_err( 'Error running SHEAR %s %s' % (command,str(e)),9 ) | |
234 | |
235 if __name__=="__main__": __main__() | |
236 |