annotate dwgsim_wrapper.py @ 0:1f7032731f66

Uploaded
author nilshomer
date Sun, 14 Aug 2011 20:07:45 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
1 #!/usr/bin/env python
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
2
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
3 """
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
4 Runs DWGSIM
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
5
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
6 usage: dwgsim_wrapper.py [options]
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
7 -e,--errorOne=e: base/color error rate of the first read [0.020]
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
8 -E,--errorTwo=E: base/color error rate of the second read [0.020]
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
9 -d,--innertDist=d: inner distance between the two ends [500]
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
10 -s,--stdev=s: standard deviation [50]
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
11 -N,--numReads=N: number of read pairs [1000000]
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
12 -1,--lengthOne=1: length of the first read [70]
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
13 -2,--lengthTwo=2: length of the second read [70]
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
14 -r,--mutRate=r: rate of mutations [0.0010]
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
15 -R,--fracIndels=R: fraction of indels [0.10]
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
16 -X,--indelExt=X: probability an indel is extended [0.30]
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
17 -y,--randProb=y: probability of a random DNA read [0.10]
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
18 -n,--maxN=n: maximum number of Ns allowed in a given read [0]
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
19 -c,--colorSpace=c: generate reads in color space (SOLiD reads)
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
20 -S,--strand=S: strand 0: default, 1: same strand, 2: opposite strand
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
21 -H,--haploid=H: haploid mode
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
22 -f,--fasta=f: the reference genome FASTA
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
23 -3,--outputBFAST=3: the BFAST output FASTQ
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
24 -4,--outputBWA1=4: the first BWA output FASTQ
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
25 -5,--outputBWA2=5: the second BWA output FASTQ
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
26 -6,--outputMutations=6: the output mutations TXT
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
27 """
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
28
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
29 import optparse, os, shutil, subprocess, sys, tempfile
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
30
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
31 def stop_err( msg ):
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
32 sys.stderr.write( '%s\n' % msg )
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
33 sys.exit()
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
34
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
35 def run_process ( cmd, name, tmp_dir, buffsize ):
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
36 try:
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
37 tmp = tempfile.NamedTemporaryFile( dir=tmp_dir ).name
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
38 tmp_stderr = open( tmp, 'wb' )
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
39 proc = subprocess.Popen( args=cmd, shell=True, cwd=tmp_dir, stderr=tmp_stderr.fileno() )
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
40 returncode = proc.wait()
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
41 tmp_stderr.close()
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
42 # get stderr, allowing for case where it's very large
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
43 tmp_stderr = open( tmp, 'rb' )
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
44 stderr = ''
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
45 try:
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
46 while True:
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
47 stderr += tmp_stderr.read( buffsize )
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
48 if not stderr or len( stderr ) % buffsize != 0:
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
49 break
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
50 except OverflowError:
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
51 pass
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
52 tmp_stderr.close()
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
53 if returncode != 0:
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
54 raise Exception, stderr
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
55 except Exception, e:
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
56 raise Exception, 'Error in \'' + name + '\'. \n' + str( e )
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
57
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
58 def check_output ( output, canBeEmpty ):
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
59 if 0 < os.path.getsize( output ):
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
60 return True
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
61 elif False == canBeEmpty:
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
62 raise Exception, 'The output file is empty:' + output
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
63
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
64 def __main__():
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
65 #Parse Command Line
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
66 parser = optparse.OptionParser()
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
67 parser.add_option( '-e', '--errorOne', dest='errorOne', type='float', help='base/color error rate of the first read' )
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
68 parser.add_option( '-E', '--errorTwo', dest='errorTwo', type='float', help='base/color error rate of the second read' )
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
69 parser.add_option( '-d', '--innertDist', dest='innertDist', type='int', help='inner distance between the two ends' )
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
70 parser.add_option( '-s', '--stdev', dest='stdev', type='float', help='standard deviation' )
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
71 parser.add_option( '-N', '--numReads', dest='numReads', type='int', help='number of read pairs' )
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
72 parser.add_option( '-1', '--lengthOne', dest='lengthOne', type='int', help='length of the first read' )
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
73 parser.add_option( '-2', '--lengthTwo', dest='lengthTwo', type='int', help='length of the second read' )
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
74 parser.add_option( '-r', '--mutRate', dest='mutRate', type='float', help='rate of mutations' )
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
75 parser.add_option( '-R', '--fracIndels', dest='fracIndels', type='float', help='fraction of indels' )
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
76 parser.add_option( '-X', '--indelExt', dest='indelExt', type='float', help='probability an indel is extended' )
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
77 parser.add_option( '-y', '--randProb', dest='randProb', type='float', help='probability of a random DNA read' )
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
78 parser.add_option( '-n', '--maxN', dest='maxN', type='int', help='maximum number of Ns allowed in a given read' )
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
79 parser.add_option( '-c', '--colorSpace', action='store_true', dest='colorSpace', default=False, help='generate reads in color space (SOLiD reads)' )
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
80 parser.add_option( '-S', '--strand', dest='strand', type='choice', default='0', choices=('0', '1', '2'), help='strand 0: default, 1: same strand, 2: opposite strand' )
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
81 parser.add_option( '-H', '--haploid', action='store_true', dest='haploid', default=False, help='haploid mode' )
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
82 parser.add_option( '-f', '--fasta', dest='fasta', help='The reference genome fasta' )
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
83 parser.add_option( '-3', '--outputBFAST', dest='outputBFAST', help='the BFAST output FASTQ' )
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
84 parser.add_option( '-4', '--outputBWA1', dest='outputBWA1', help='the first BWA output FASTQ' )
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
85 parser.add_option( '-5', '--outputBWA2', dest='outputBWA2', help='the second BWA output FASTQ' )
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
86 parser.add_option( '-6', '--outputMutations', dest='outputMutations', help='the output mutations TXT' )
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
87
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
88 (options, args) = parser.parse_args()
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
89
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
90 # output version # of tool
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
91 try:
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
92 tmp = tempfile.NamedTemporaryFile().name
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
93 tmp_stdout = open( tmp, 'wb' )
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
94 proc = subprocess.Popen( args='dwgsim 2>&1', shell=True, stdout=tmp_stdout )
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
95 tmp_stdout.close()
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
96 returncode = proc.wait()
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
97 stdout = None
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
98 for line in open( tmp_stdout.name, 'rb' ):
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
99 if line.lower().find( 'version' ) >= 0:
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
100 stdout = line.strip()
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
101 break
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
102 if stdout:
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
103 sys.stdout.write( '%s\n' % stdout )
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
104 else:
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
105 raise Exception
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
106 except:
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
107 sys.stdout.write( 'Could not determine DWGSIM version\n' )
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
108
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
109 buffsize = 1048576
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
110
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
111 # make temp directory for dwgsim, requires trailing slash
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
112 tmp_dir = '%s/' % tempfile.mkdtemp()
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
113
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
114 #'generic' options used in all dwgsim commands here
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
115
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
116 try:
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
117 reference_filepath = tempfile.NamedTemporaryFile( dir=tmp_dir, suffix='.fa' ).name
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
118 os.symlink( options.fasta, reference_filepath )
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
119 assert reference_filepath and os.path.exists( reference_filepath ), 'A valid genome reference was not provided.'
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
120 tmp_dir = '%s/' % tempfile.mkdtemp()
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
121 dwgsim_output_prefix = "dwgsim_output_prefix"
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
122 dwgsim_cmd = 'dwgsim -e %s -E %s -d %s -s %s -N %s -1 %s -2 %s -r %s -R %s -X %s -y %s -n %s' % \
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
123 (options.errorOne, \
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
124 options.errorTwo, \
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
125 options.innertDist, \
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
126 options.stdev, \
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
127 options.numReads, \
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
128 options.lengthOne, \
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
129 options.lengthTwo, \
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
130 options.mutRate, \
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
131 options.fracIndels, \
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
132 options.indelExt, \
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
133 options.randProb, \
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
134 options.maxN)
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
135 if options.colorSpace:
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
136 dwgsim_cmd = dwgsim_cmd + ' -c'
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
137 if options.haploid:
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
138 dwgsim_cmd = dwgsim_cmd + ' -H'
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
139 dwgsim_cmd = dwgsim_cmd + ' ' + options.fasta
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
140 dwgsim_cmd = dwgsim_cmd + ' ' + tmp_dir + '/' + dwgsim_output_prefix
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
141
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
142 # need to nest try-except in try-finally to handle 2.4
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
143 try:
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
144 # dwgsim
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
145 run_process ( dwgsim_cmd, 'dwgsim', tmp_dir, buffsize )
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
146 # Move files
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
147 cmd = 'mv ' + tmp_dir + '/' + dwgsim_output_prefix + '.mutations.txt' + ' ' + options.outputMutations
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
148 run_process ( cmd, 'mv #1', tmp_dir, buffsize )
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
149 cmd = 'mv ' + tmp_dir + '/' + dwgsim_output_prefix + '.bfast.fastq' + ' ' + options.outputBFAST
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
150 run_process ( cmd, 'mv #2', tmp_dir, buffsize )
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
151 cmd = 'mv ' + tmp_dir + '/' + dwgsim_output_prefix + '.bwa.read1.fastq' + ' ' + options.outputBWA1
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
152 run_process ( cmd, 'mv #3', tmp_dir, buffsize )
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
153 cmd = 'mv ' + tmp_dir + '/' + dwgsim_output_prefix + '.bwa.read2.fastq' + ' ' + options.outputBWA2
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
154 run_process ( cmd, 'mv #4', tmp_dir, buffsize )
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
155 # check that there are results in the output file
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
156 check_output ( options.outputMutations, True )
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
157 check_output ( options.outputBFAST, False )
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
158 check_output ( options.outputBWA1, False )
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
159 check_output ( options.outputBWA2, False )
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
160 sys.stdout.write( 'DWGSIM successful' )
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
161 except Exception, e:
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
162 stop_err( 'DWGSIM failed.\n' + str( e ) )
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
163 finally:
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
164 # clean up temp dir
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
165 if os.path.exists( tmp_dir ):
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
166 shutil.rmtree( tmp_dir )
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
167
1f7032731f66 Uploaded
nilshomer
parents:
diff changeset
168 if __name__=="__main__": __main__()