0
|
1 #!/usr/bin/env python
|
|
2
|
|
3 """
|
|
4 Runs DWGSIM_EVAL
|
|
5
|
|
6 usage: dwgsim_eval_wrapper.py [options]
|
|
7 -a,--alignmentScore=a: split alignments by alignment score instead of mapping quality
|
|
8 -b,--bwa=b: alignments are from BWA
|
|
9 -c,--colorSpace=c: color space alignments
|
|
10 -d,--scoreFactor=d: divide quality/alignment score by this factor
|
|
11 -g,--wiggle=g: gap "wiggle"
|
|
12 -n,--numReads=n: number of raw input paired-end reads (otherwise, inferred from all SAM records present).
|
|
13 -q,--minMapq=q: consider only alignments with this mapping quality or greater.
|
|
14 -z,--singleEnd=z: input contains only single end reads
|
|
15 -S,--sam=s: input SAM file
|
|
16 -B,--bam=B: input BAM file
|
|
17 -p,--printIncorrect=p: print incorrect alignments
|
|
18 -s,--numSnps=s: consider only alignments with the number of specified SNPs
|
|
19 -e,--numErrors=e: consider only alignments with the number of specified errors
|
|
20 -i,--indels=i: consider only alignments with indels
|
|
21 -o,--output=u: The file to save the output
|
|
22 """
|
|
23
|
|
24 import optparse, os, shutil, subprocess, sys, tempfile
|
|
25
|
|
26 def stop_err( msg ):
|
|
27 sys.stderr.write( '%s\n' % msg )
|
|
28 sys.exit()
|
|
29
|
|
30 def run_process ( cmd, name, tmp_dir, buffsize ):
|
|
31 try:
|
|
32 tmp = tempfile.NamedTemporaryFile( dir=tmp_dir ).name
|
|
33 tmp_stderr = open( tmp, 'wb' )
|
|
34 proc = subprocess.Popen( args=cmd, shell=True, cwd=tmp_dir, stderr=tmp_stderr.fileno() )
|
|
35 returncode = proc.wait()
|
|
36 tmp_stderr.close()
|
|
37 # get stderr, allowing for case where it's very large
|
|
38 tmp_stderr = open( tmp, 'rb' )
|
|
39 stderr = ''
|
|
40 try:
|
|
41 while True:
|
|
42 stderr += tmp_stderr.read( buffsize )
|
|
43 if not stderr or len( stderr ) % buffsize != 0:
|
|
44 break
|
|
45 except OverflowError:
|
|
46 pass
|
|
47 tmp_stderr.close()
|
|
48 if returncode != 0:
|
|
49 raise Exception, stderr
|
|
50 except Exception, e:
|
|
51 raise Exception, 'Error in \'' + name + '\'. \n' + str( e )
|
|
52
|
|
53 def check_output ( output, canBeEmpty ):
|
|
54 if 0 < os.path.getsize( output ):
|
|
55 return True
|
|
56 elif False == canBeEmpty:
|
|
57 raise Exception, 'The output file is empty:' + output
|
|
58
|
|
59 def __main__():
|
|
60 #Parse Command Line
|
|
61 parser = optparse.OptionParser()
|
|
62
|
|
63 parser.add_option( '-a', '--alignmentScore', action='store_true', dest='alignmentScore', default=False, help='split alignments by alignment score instead of mapping quality' )
|
|
64 parser.add_option( '-b', '--bwa', action='store_true', dest='bwa', default=False, help='alignments are from BWA' )
|
|
65 parser.add_option( '-c', '--colorSpace', action='store_true', dest='colorSpace', default=False, help='generate reads in color space (SOLiD reads)' )
|
|
66 parser.add_option( '-d', '--scoreFactor', dest='scoreFactor', type='int', help='divide quality/alignment score by this factor' )
|
|
67 parser.add_option( '-g', '--wiggle', dest='wiggle', type='int', help='gap "wiggle"' )
|
|
68 parser.add_option( '-n', '--numReads', dest='numReads', type='int', help='number of raw input paired-end reads (otherwise, inferred from all SAM records present).' )
|
|
69 parser.add_option( '-q', '--minMapq', dest='minMapq', type='int', help='consider only alignments with this mapping quality or greater.' )
|
|
70 parser.add_option( '-z', '--singleEnd', action='store_true', dest='singleEnd', default=False, help='input contains only single end reads' )
|
|
71 parser.add_option( '-S', '--sam', dest='sam', default=None, help='input SAM' )
|
|
72 parser.add_option( '-B', '--bam', dest='bam', default=None, help='input BAM' )
|
|
73 parser.add_option( '-p', '--printIncorrect', action='store_true', dest='printIncorrect', default=False, help='print incorrect alignments' )
|
|
74 parser.add_option( '-s', '--numSnps', dest='numSnps', type="int", help='consider only alignments with the number of specified SNPs' )
|
|
75 parser.add_option( '-e', '--numErrors', dest='numErrors', type="int", default=False, help='consider only alignments with the number of specified errors' )
|
|
76 parser.add_option( '-i', '--indels', action='store_true', dest='indels', default=False, help='consider only alignments with indels' )
|
|
77 parser.add_option( '-o', '--output', dest='output', help='The file to save the output' )
|
|
78
|
|
79 (options, args) = parser.parse_args()
|
|
80
|
|
81 # output version # of tool
|
|
82 try:
|
|
83 tmp = tempfile.NamedTemporaryFile().name
|
|
84 tmp_stdout = open( tmp, 'wb' )
|
|
85 proc = subprocess.Popen( args='dwgsim_eval 2>&1', shell=True, stdout=tmp_stdout )
|
|
86 tmp_stdout.close()
|
|
87 returncode = proc.wait()
|
|
88 stdout = None
|
|
89 for line in open( tmp_stdout.name, 'rb' ):
|
|
90 if line.lower().find( 'version' ) >= 0:
|
|
91 stdout = line.strip()
|
|
92 break
|
|
93 if stdout:
|
|
94 sys.stdout.write( '%s\n' % stdout )
|
|
95 else:
|
|
96 raise Exception
|
|
97 except:
|
|
98 sys.stdout.write( 'Could not determine DWGSIM_EVAL version\n' )
|
|
99
|
|
100 buffsize = 1048576
|
|
101
|
|
102 # make temp directory for dwgsim, requires trailing slash
|
|
103 tmp_dir = '%s/' % tempfile.mkdtemp()
|
|
104
|
|
105 #'generic' options used in all dwgsim commands here
|
|
106
|
|
107 try:
|
|
108 tmp_dir = '%s/' % tempfile.mkdtemp()
|
|
109 dwgsim_eval_cmd = 'dwgsim_eval'
|
|
110 if True == options.alignmentScore:
|
|
111 dwgsim_eval_cmd = dwgsim_eval_cmd + ' -a'
|
|
112 if True == options.bwa:
|
|
113 dwgsim_eval_cmd = dwgsim_eval_cmd + ' -b'
|
|
114 if True == options.colorSpace:
|
|
115 dwgsim_eval_cmd = dwgsim_eval_cmd + ' -c'
|
|
116 use_p = False
|
|
117 if 0 <= options.numSnps:
|
|
118 use_p = True
|
|
119 dwgsim_eval_cmd = dwgsim_eval_cmd + (' -s %s' % options.numSnps)
|
|
120 if 0 <= options.numErrors:
|
|
121 use_p = True
|
|
122 dwgsim_eval_cmd = dwgsim_eval_cmd + (' -e %s' % options.numErrors)
|
|
123 if True == options.indels:
|
|
124 use_p = True
|
|
125 dwgsim_eval_cmd = dwgsim_eval_cmd + ' -i'
|
|
126 if True == use_p or True == options.printIncorrect:
|
|
127 dwgsim_eval_cmd = dwgsim_eval_cmd + ' -p'
|
|
128 if True == options.singleEnd:
|
|
129 dwgsim_eval_cmd = dwgsim_eval_cmd + ' -z'
|
|
130 dwgsim_eval_cmd = '%s -d %s -g %s -n %s -q %s' % (dwgsim_eval_cmd, \
|
|
131 options.scoreFactor, \
|
|
132 options.wiggle, \
|
|
133 options.numReads, \
|
|
134 options.minMapq)
|
|
135 if None != options.sam:
|
|
136 dwgsim_eval_cmd = dwgsim_eval_cmd + ' -S ' + options.sam
|
|
137 elif None != options.bam:
|
|
138 dwgsim_eval_cmd = dwgsim_eval_cmd + ' ' + options.bam
|
|
139 else:
|
|
140 raise Exception, 'Input file was neither a SAM nor BAM'
|
|
141 dwgsim_eval_cmd = dwgsim_eval_cmd + ' > ' + options.output
|
|
142
|
|
143 # need to nest try-except in try-finally to handle 2.4
|
|
144 try:
|
|
145 # dwgsim
|
|
146 run_process ( dwgsim_eval_cmd, 'dwgsim', tmp_dir, buffsize )
|
|
147 # check that there are results in the output file
|
|
148 check_output ( options.output, False )
|
|
149 sys.stdout.write( 'DWGSIM_EVAL successful' )
|
|
150 except Exception, e:
|
|
151 stop_err( 'DWGSIM_EVAL failed.\n' + str( e ) )
|
|
152 finally:
|
|
153 # clean up temp dir
|
|
154 if os.path.exists( tmp_dir ):
|
|
155 shutil.rmtree( tmp_dir )
|
|
156
|
|
157 if __name__=="__main__": __main__()
|