comparison dwgsim_eval_wrapper.py @ 0:ed05f8167910

Uploaded
author nilshomer
date Sun, 14 Aug 2011 20:08:43 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:ed05f8167910
1 #!/usr/bin/env python
2
3 """
4 Runs DWGSIM_EVAL
5
6 usage: dwgsim_eval_wrapper.py [options]
7 -a,--alignmentScore=a: split alignments by alignment score instead of mapping quality
8 -b,--bwa=b: alignments are from BWA
9 -c,--colorSpace=c: color space alignments
10 -d,--scoreFactor=d: divide quality/alignment score by this factor
11 -g,--wiggle=g: gap "wiggle"
12 -n,--numReads=n: number of raw input paired-end reads (otherwise, inferred from all SAM records present).
13 -q,--minMapq=q: consider only alignments with this mapping quality or greater.
14 -z,--singleEnd=z: input contains only single end reads
15 -S,--sam=s: input SAM file
16 -B,--bam=B: input BAM file
17 -p,--printIncorrect=p: print incorrect alignments
18 -s,--numSnps=s: consider only alignments with the number of specified SNPs
19 -e,--numErrors=e: consider only alignments with the number of specified errors
20 -i,--indels=i: consider only alignments with indels
21 -o,--output=u: The file to save the output
22 """
23
24 import optparse, os, shutil, subprocess, sys, tempfile
25
26 def stop_err( msg ):
27 sys.stderr.write( '%s\n' % msg )
28 sys.exit()
29
30 def run_process ( cmd, name, tmp_dir, buffsize ):
31 try:
32 tmp = tempfile.NamedTemporaryFile( dir=tmp_dir ).name
33 tmp_stderr = open( tmp, 'wb' )
34 proc = subprocess.Popen( args=cmd, shell=True, cwd=tmp_dir, stderr=tmp_stderr.fileno() )
35 returncode = proc.wait()
36 tmp_stderr.close()
37 # get stderr, allowing for case where it's very large
38 tmp_stderr = open( tmp, 'rb' )
39 stderr = ''
40 try:
41 while True:
42 stderr += tmp_stderr.read( buffsize )
43 if not stderr or len( stderr ) % buffsize != 0:
44 break
45 except OverflowError:
46 pass
47 tmp_stderr.close()
48 if returncode != 0:
49 raise Exception, stderr
50 except Exception, e:
51 raise Exception, 'Error in \'' + name + '\'. \n' + str( e )
52
53 def check_output ( output, canBeEmpty ):
54 if 0 < os.path.getsize( output ):
55 return True
56 elif False == canBeEmpty:
57 raise Exception, 'The output file is empty:' + output
58
59 def __main__():
60 #Parse Command Line
61 parser = optparse.OptionParser()
62
63 parser.add_option( '-a', '--alignmentScore', action='store_true', dest='alignmentScore', default=False, help='split alignments by alignment score instead of mapping quality' )
64 parser.add_option( '-b', '--bwa', action='store_true', dest='bwa', default=False, help='alignments are from BWA' )
65 parser.add_option( '-c', '--colorSpace', action='store_true', dest='colorSpace', default=False, help='generate reads in color space (SOLiD reads)' )
66 parser.add_option( '-d', '--scoreFactor', dest='scoreFactor', type='int', help='divide quality/alignment score by this factor' )
67 parser.add_option( '-g', '--wiggle', dest='wiggle', type='int', help='gap "wiggle"' )
68 parser.add_option( '-n', '--numReads', dest='numReads', type='int', help='number of raw input paired-end reads (otherwise, inferred from all SAM records present).' )
69 parser.add_option( '-q', '--minMapq', dest='minMapq', type='int', help='consider only alignments with this mapping quality or greater.' )
70 parser.add_option( '-z', '--singleEnd', action='store_true', dest='singleEnd', default=False, help='input contains only single end reads' )
71 parser.add_option( '-S', '--sam', dest='sam', default=None, help='input SAM' )
72 parser.add_option( '-B', '--bam', dest='bam', default=None, help='input BAM' )
73 parser.add_option( '-p', '--printIncorrect', action='store_true', dest='printIncorrect', default=False, help='print incorrect alignments' )
74 parser.add_option( '-s', '--numSnps', dest='numSnps', type="int", help='consider only alignments with the number of specified SNPs' )
75 parser.add_option( '-e', '--numErrors', dest='numErrors', type="int", default=False, help='consider only alignments with the number of specified errors' )
76 parser.add_option( '-i', '--indels', action='store_true', dest='indels', default=False, help='consider only alignments with indels' )
77 parser.add_option( '-o', '--output', dest='output', help='The file to save the output' )
78
79 (options, args) = parser.parse_args()
80
81 # output version # of tool
82 try:
83 tmp = tempfile.NamedTemporaryFile().name
84 tmp_stdout = open( tmp, 'wb' )
85 proc = subprocess.Popen( args='dwgsim_eval 2>&1', shell=True, stdout=tmp_stdout )
86 tmp_stdout.close()
87 returncode = proc.wait()
88 stdout = None
89 for line in open( tmp_stdout.name, 'rb' ):
90 if line.lower().find( 'version' ) >= 0:
91 stdout = line.strip()
92 break
93 if stdout:
94 sys.stdout.write( '%s\n' % stdout )
95 else:
96 raise Exception
97 except:
98 sys.stdout.write( 'Could not determine DWGSIM_EVAL version\n' )
99
100 buffsize = 1048576
101
102 # make temp directory for dwgsim, requires trailing slash
103 tmp_dir = '%s/' % tempfile.mkdtemp()
104
105 #'generic' options used in all dwgsim commands here
106
107 try:
108 tmp_dir = '%s/' % tempfile.mkdtemp()
109 dwgsim_eval_cmd = 'dwgsim_eval'
110 if True == options.alignmentScore:
111 dwgsim_eval_cmd = dwgsim_eval_cmd + ' -a'
112 if True == options.bwa:
113 dwgsim_eval_cmd = dwgsim_eval_cmd + ' -b'
114 if True == options.colorSpace:
115 dwgsim_eval_cmd = dwgsim_eval_cmd + ' -c'
116 use_p = False
117 if 0 <= options.numSnps:
118 use_p = True
119 dwgsim_eval_cmd = dwgsim_eval_cmd + (' -s %s' % options.numSnps)
120 if 0 <= options.numErrors:
121 use_p = True
122 dwgsim_eval_cmd = dwgsim_eval_cmd + (' -e %s' % options.numErrors)
123 if True == options.indels:
124 use_p = True
125 dwgsim_eval_cmd = dwgsim_eval_cmd + ' -i'
126 if True == use_p or True == options.printIncorrect:
127 dwgsim_eval_cmd = dwgsim_eval_cmd + ' -p'
128 if True == options.singleEnd:
129 dwgsim_eval_cmd = dwgsim_eval_cmd + ' -z'
130 dwgsim_eval_cmd = '%s -d %s -g %s -n %s -q %s' % (dwgsim_eval_cmd, \
131 options.scoreFactor, \
132 options.wiggle, \
133 options.numReads, \
134 options.minMapq)
135 if None != options.sam:
136 dwgsim_eval_cmd = dwgsim_eval_cmd + ' -S ' + options.sam
137 elif None != options.bam:
138 dwgsim_eval_cmd = dwgsim_eval_cmd + ' ' + options.bam
139 else:
140 raise Exception, 'Input file was neither a SAM nor BAM'
141 dwgsim_eval_cmd = dwgsim_eval_cmd + ' > ' + options.output
142
143 # need to nest try-except in try-finally to handle 2.4
144 try:
145 # dwgsim
146 run_process ( dwgsim_eval_cmd, 'dwgsim', tmp_dir, buffsize )
147 # check that there are results in the output file
148 check_output ( options.output, False )
149 sys.stdout.write( 'DWGSIM_EVAL successful' )
150 except Exception, e:
151 stop_err( 'DWGSIM_EVAL failed.\n' + str( e ) )
152 finally:
153 # clean up temp dir
154 if os.path.exists( tmp_dir ):
155 shutil.rmtree( tmp_dir )
156
157 if __name__=="__main__": __main__()