annotate dwgsim_eval_wrapper.py @ 1:eb58ceeedfba default tip

Uploaded
author nilshomer
date Sun, 14 Aug 2011 20:09:02 -0400
parents ed05f8167910
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
1 #!/usr/bin/env python
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
2
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
3 """
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
4 Runs DWGSIM_EVAL
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
5
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
6 usage: dwgsim_eval_wrapper.py [options]
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
7 -a,--alignmentScore=a: split alignments by alignment score instead of mapping quality
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
8 -b,--bwa=b: alignments are from BWA
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
9 -c,--colorSpace=c: color space alignments
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
10 -d,--scoreFactor=d: divide quality/alignment score by this factor
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
11 -g,--wiggle=g: gap "wiggle"
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
12 -n,--numReads=n: number of raw input paired-end reads (otherwise, inferred from all SAM records present).
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
13 -q,--minMapq=q: consider only alignments with this mapping quality or greater.
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
14 -z,--singleEnd=z: input contains only single end reads
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
15 -S,--sam=s: input SAM file
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
16 -B,--bam=B: input BAM file
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
17 -p,--printIncorrect=p: print incorrect alignments
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
18 -s,--numSnps=s: consider only alignments with the number of specified SNPs
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
19 -e,--numErrors=e: consider only alignments with the number of specified errors
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
20 -i,--indels=i: consider only alignments with indels
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
21 -o,--output=u: The file to save the output
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
22 """
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
23
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
24 import optparse, os, shutil, subprocess, sys, tempfile
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
25
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
26 def stop_err( msg ):
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
27 sys.stderr.write( '%s\n' % msg )
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
28 sys.exit()
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
29
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
30 def run_process ( cmd, name, tmp_dir, buffsize ):
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
31 try:
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
32 tmp = tempfile.NamedTemporaryFile( dir=tmp_dir ).name
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
33 tmp_stderr = open( tmp, 'wb' )
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
34 proc = subprocess.Popen( args=cmd, shell=True, cwd=tmp_dir, stderr=tmp_stderr.fileno() )
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
35 returncode = proc.wait()
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
36 tmp_stderr.close()
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
37 # get stderr, allowing for case where it's very large
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
38 tmp_stderr = open( tmp, 'rb' )
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
39 stderr = ''
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
40 try:
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
41 while True:
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
42 stderr += tmp_stderr.read( buffsize )
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
43 if not stderr or len( stderr ) % buffsize != 0:
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
44 break
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
45 except OverflowError:
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
46 pass
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
47 tmp_stderr.close()
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
48 if returncode != 0:
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
49 raise Exception, stderr
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
50 except Exception, e:
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
51 raise Exception, 'Error in \'' + name + '\'. \n' + str( e )
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
52
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
53 def check_output ( output, canBeEmpty ):
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
54 if 0 < os.path.getsize( output ):
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
55 return True
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
56 elif False == canBeEmpty:
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
57 raise Exception, 'The output file is empty:' + output
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
58
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
59 def __main__():
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
60 #Parse Command Line
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
61 parser = optparse.OptionParser()
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
62
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
63 parser.add_option( '-a', '--alignmentScore', action='store_true', dest='alignmentScore', default=False, help='split alignments by alignment score instead of mapping quality' )
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
64 parser.add_option( '-b', '--bwa', action='store_true', dest='bwa', default=False, help='alignments are from BWA' )
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
65 parser.add_option( '-c', '--colorSpace', action='store_true', dest='colorSpace', default=False, help='generate reads in color space (SOLiD reads)' )
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
66 parser.add_option( '-d', '--scoreFactor', dest='scoreFactor', type='int', help='divide quality/alignment score by this factor' )
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
67 parser.add_option( '-g', '--wiggle', dest='wiggle', type='int', help='gap "wiggle"' )
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
68 parser.add_option( '-n', '--numReads', dest='numReads', type='int', help='number of raw input paired-end reads (otherwise, inferred from all SAM records present).' )
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
69 parser.add_option( '-q', '--minMapq', dest='minMapq', type='int', help='consider only alignments with this mapping quality or greater.' )
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
70 parser.add_option( '-z', '--singleEnd', action='store_true', dest='singleEnd', default=False, help='input contains only single end reads' )
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
71 parser.add_option( '-S', '--sam', dest='sam', default=None, help='input SAM' )
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
72 parser.add_option( '-B', '--bam', dest='bam', default=None, help='input BAM' )
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
73 parser.add_option( '-p', '--printIncorrect', action='store_true', dest='printIncorrect', default=False, help='print incorrect alignments' )
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
74 parser.add_option( '-s', '--numSnps', dest='numSnps', type="int", help='consider only alignments with the number of specified SNPs' )
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
75 parser.add_option( '-e', '--numErrors', dest='numErrors', type="int", default=False, help='consider only alignments with the number of specified errors' )
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
76 parser.add_option( '-i', '--indels', action='store_true', dest='indels', default=False, help='consider only alignments with indels' )
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
77 parser.add_option( '-o', '--output', dest='output', help='The file to save the output' )
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
78
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
79 (options, args) = parser.parse_args()
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
80
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
81 # output version # of tool
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
82 try:
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
83 tmp = tempfile.NamedTemporaryFile().name
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
84 tmp_stdout = open( tmp, 'wb' )
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
85 proc = subprocess.Popen( args='dwgsim_eval 2>&1', shell=True, stdout=tmp_stdout )
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
86 tmp_stdout.close()
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
87 returncode = proc.wait()
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
88 stdout = None
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
89 for line in open( tmp_stdout.name, 'rb' ):
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
90 if line.lower().find( 'version' ) >= 0:
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
91 stdout = line.strip()
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
92 break
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
93 if stdout:
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
94 sys.stdout.write( '%s\n' % stdout )
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
95 else:
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
96 raise Exception
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
97 except:
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
98 sys.stdout.write( 'Could not determine DWGSIM_EVAL version\n' )
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
99
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
100 buffsize = 1048576
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
101
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
102 # make temp directory for dwgsim, requires trailing slash
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
103 tmp_dir = '%s/' % tempfile.mkdtemp()
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
104
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
105 #'generic' options used in all dwgsim commands here
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
106
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
107 try:
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
108 tmp_dir = '%s/' % tempfile.mkdtemp()
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
109 dwgsim_eval_cmd = 'dwgsim_eval'
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
110 if True == options.alignmentScore:
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
111 dwgsim_eval_cmd = dwgsim_eval_cmd + ' -a'
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
112 if True == options.bwa:
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
113 dwgsim_eval_cmd = dwgsim_eval_cmd + ' -b'
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
114 if True == options.colorSpace:
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
115 dwgsim_eval_cmd = dwgsim_eval_cmd + ' -c'
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
116 use_p = False
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
117 if 0 <= options.numSnps:
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
118 use_p = True
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
119 dwgsim_eval_cmd = dwgsim_eval_cmd + (' -s %s' % options.numSnps)
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
120 if 0 <= options.numErrors:
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
121 use_p = True
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
122 dwgsim_eval_cmd = dwgsim_eval_cmd + (' -e %s' % options.numErrors)
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
123 if True == options.indels:
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
124 use_p = True
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
125 dwgsim_eval_cmd = dwgsim_eval_cmd + ' -i'
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
126 if True == use_p or True == options.printIncorrect:
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
127 dwgsim_eval_cmd = dwgsim_eval_cmd + ' -p'
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
128 if True == options.singleEnd:
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
129 dwgsim_eval_cmd = dwgsim_eval_cmd + ' -z'
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
130 dwgsim_eval_cmd = '%s -d %s -g %s -n %s -q %s' % (dwgsim_eval_cmd, \
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
131 options.scoreFactor, \
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
132 options.wiggle, \
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
133 options.numReads, \
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
134 options.minMapq)
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
135 if None != options.sam:
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
136 dwgsim_eval_cmd = dwgsim_eval_cmd + ' -S ' + options.sam
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
137 elif None != options.bam:
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
138 dwgsim_eval_cmd = dwgsim_eval_cmd + ' ' + options.bam
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
139 else:
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
140 raise Exception, 'Input file was neither a SAM nor BAM'
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
141 dwgsim_eval_cmd = dwgsim_eval_cmd + ' > ' + options.output
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
142
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
143 # need to nest try-except in try-finally to handle 2.4
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
144 try:
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
145 # dwgsim
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
146 run_process ( dwgsim_eval_cmd, 'dwgsim', tmp_dir, buffsize )
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
147 # check that there are results in the output file
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
148 check_output ( options.output, False )
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
149 sys.stdout.write( 'DWGSIM_EVAL successful' )
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
150 except Exception, e:
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
151 stop_err( 'DWGSIM_EVAL failed.\n' + str( e ) )
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
152 finally:
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
153 # clean up temp dir
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
154 if os.path.exists( tmp_dir ):
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
155 shutil.rmtree( tmp_dir )
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
156
ed05f8167910 Uploaded
nilshomer
parents:
diff changeset
157 if __name__=="__main__": __main__()