Mercurial > repos > nilshomer > dwgsim_eval
comparison dwgsim_eval_wrapper.py @ 0:ed05f8167910
Uploaded
author | nilshomer |
---|---|
date | Sun, 14 Aug 2011 20:08:43 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:ed05f8167910 |
---|---|
1 #!/usr/bin/env python | |
2 | |
3 """ | |
4 Runs DWGSIM_EVAL | |
5 | |
6 usage: dwgsim_eval_wrapper.py [options] | |
7 -a,--alignmentScore=a: split alignments by alignment score instead of mapping quality | |
8 -b,--bwa=b: alignments are from BWA | |
9 -c,--colorSpace=c: color space alignments | |
10 -d,--scoreFactor=d: divide quality/alignment score by this factor | |
11 -g,--wiggle=g: gap "wiggle" | |
12 -n,--numReads=n: number of raw input paired-end reads (otherwise, inferred from all SAM records present). | |
13 -q,--minMapq=q: consider only alignments with this mapping quality or greater. | |
14 -z,--singleEnd=z: input contains only single end reads | |
15 -S,--sam=s: input SAM file | |
16 -B,--bam=B: input BAM file | |
17 -p,--printIncorrect=p: print incorrect alignments | |
18 -s,--numSnps=s: consider only alignments with the number of specified SNPs | |
19 -e,--numErrors=e: consider only alignments with the number of specified errors | |
20 -i,--indels=i: consider only alignments with indels | |
21 -o,--output=u: The file to save the output | |
22 """ | |
23 | |
24 import optparse, os, shutil, subprocess, sys, tempfile | |
25 | |
26 def stop_err( msg ): | |
27 sys.stderr.write( '%s\n' % msg ) | |
28 sys.exit() | |
29 | |
30 def run_process ( cmd, name, tmp_dir, buffsize ): | |
31 try: | |
32 tmp = tempfile.NamedTemporaryFile( dir=tmp_dir ).name | |
33 tmp_stderr = open( tmp, 'wb' ) | |
34 proc = subprocess.Popen( args=cmd, shell=True, cwd=tmp_dir, stderr=tmp_stderr.fileno() ) | |
35 returncode = proc.wait() | |
36 tmp_stderr.close() | |
37 # get stderr, allowing for case where it's very large | |
38 tmp_stderr = open( tmp, 'rb' ) | |
39 stderr = '' | |
40 try: | |
41 while True: | |
42 stderr += tmp_stderr.read( buffsize ) | |
43 if not stderr or len( stderr ) % buffsize != 0: | |
44 break | |
45 except OverflowError: | |
46 pass | |
47 tmp_stderr.close() | |
48 if returncode != 0: | |
49 raise Exception, stderr | |
50 except Exception, e: | |
51 raise Exception, 'Error in \'' + name + '\'. \n' + str( e ) | |
52 | |
53 def check_output ( output, canBeEmpty ): | |
54 if 0 < os.path.getsize( output ): | |
55 return True | |
56 elif False == canBeEmpty: | |
57 raise Exception, 'The output file is empty:' + output | |
58 | |
59 def __main__(): | |
60 #Parse Command Line | |
61 parser = optparse.OptionParser() | |
62 | |
63 parser.add_option( '-a', '--alignmentScore', action='store_true', dest='alignmentScore', default=False, help='split alignments by alignment score instead of mapping quality' ) | |
64 parser.add_option( '-b', '--bwa', action='store_true', dest='bwa', default=False, help='alignments are from BWA' ) | |
65 parser.add_option( '-c', '--colorSpace', action='store_true', dest='colorSpace', default=False, help='generate reads in color space (SOLiD reads)' ) | |
66 parser.add_option( '-d', '--scoreFactor', dest='scoreFactor', type='int', help='divide quality/alignment score by this factor' ) | |
67 parser.add_option( '-g', '--wiggle', dest='wiggle', type='int', help='gap "wiggle"' ) | |
68 parser.add_option( '-n', '--numReads', dest='numReads', type='int', help='number of raw input paired-end reads (otherwise, inferred from all SAM records present).' ) | |
69 parser.add_option( '-q', '--minMapq', dest='minMapq', type='int', help='consider only alignments with this mapping quality or greater.' ) | |
70 parser.add_option( '-z', '--singleEnd', action='store_true', dest='singleEnd', default=False, help='input contains only single end reads' ) | |
71 parser.add_option( '-S', '--sam', dest='sam', default=None, help='input SAM' ) | |
72 parser.add_option( '-B', '--bam', dest='bam', default=None, help='input BAM' ) | |
73 parser.add_option( '-p', '--printIncorrect', action='store_true', dest='printIncorrect', default=False, help='print incorrect alignments' ) | |
74 parser.add_option( '-s', '--numSnps', dest='numSnps', type="int", help='consider only alignments with the number of specified SNPs' ) | |
75 parser.add_option( '-e', '--numErrors', dest='numErrors', type="int", default=False, help='consider only alignments with the number of specified errors' ) | |
76 parser.add_option( '-i', '--indels', action='store_true', dest='indels', default=False, help='consider only alignments with indels' ) | |
77 parser.add_option( '-o', '--output', dest='output', help='The file to save the output' ) | |
78 | |
79 (options, args) = parser.parse_args() | |
80 | |
81 # output version # of tool | |
82 try: | |
83 tmp = tempfile.NamedTemporaryFile().name | |
84 tmp_stdout = open( tmp, 'wb' ) | |
85 proc = subprocess.Popen( args='dwgsim_eval 2>&1', shell=True, stdout=tmp_stdout ) | |
86 tmp_stdout.close() | |
87 returncode = proc.wait() | |
88 stdout = None | |
89 for line in open( tmp_stdout.name, 'rb' ): | |
90 if line.lower().find( 'version' ) >= 0: | |
91 stdout = line.strip() | |
92 break | |
93 if stdout: | |
94 sys.stdout.write( '%s\n' % stdout ) | |
95 else: | |
96 raise Exception | |
97 except: | |
98 sys.stdout.write( 'Could not determine DWGSIM_EVAL version\n' ) | |
99 | |
100 buffsize = 1048576 | |
101 | |
102 # make temp directory for dwgsim, requires trailing slash | |
103 tmp_dir = '%s/' % tempfile.mkdtemp() | |
104 | |
105 #'generic' options used in all dwgsim commands here | |
106 | |
107 try: | |
108 tmp_dir = '%s/' % tempfile.mkdtemp() | |
109 dwgsim_eval_cmd = 'dwgsim_eval' | |
110 if True == options.alignmentScore: | |
111 dwgsim_eval_cmd = dwgsim_eval_cmd + ' -a' | |
112 if True == options.bwa: | |
113 dwgsim_eval_cmd = dwgsim_eval_cmd + ' -b' | |
114 if True == options.colorSpace: | |
115 dwgsim_eval_cmd = dwgsim_eval_cmd + ' -c' | |
116 use_p = False | |
117 if 0 <= options.numSnps: | |
118 use_p = True | |
119 dwgsim_eval_cmd = dwgsim_eval_cmd + (' -s %s' % options.numSnps) | |
120 if 0 <= options.numErrors: | |
121 use_p = True | |
122 dwgsim_eval_cmd = dwgsim_eval_cmd + (' -e %s' % options.numErrors) | |
123 if True == options.indels: | |
124 use_p = True | |
125 dwgsim_eval_cmd = dwgsim_eval_cmd + ' -i' | |
126 if True == use_p or True == options.printIncorrect: | |
127 dwgsim_eval_cmd = dwgsim_eval_cmd + ' -p' | |
128 if True == options.singleEnd: | |
129 dwgsim_eval_cmd = dwgsim_eval_cmd + ' -z' | |
130 dwgsim_eval_cmd = '%s -d %s -g %s -n %s -q %s' % (dwgsim_eval_cmd, \ | |
131 options.scoreFactor, \ | |
132 options.wiggle, \ | |
133 options.numReads, \ | |
134 options.minMapq) | |
135 if None != options.sam: | |
136 dwgsim_eval_cmd = dwgsim_eval_cmd + ' -S ' + options.sam | |
137 elif None != options.bam: | |
138 dwgsim_eval_cmd = dwgsim_eval_cmd + ' ' + options.bam | |
139 else: | |
140 raise Exception, 'Input file was neither a SAM nor BAM' | |
141 dwgsim_eval_cmd = dwgsim_eval_cmd + ' > ' + options.output | |
142 | |
143 # need to nest try-except in try-finally to handle 2.4 | |
144 try: | |
145 # dwgsim | |
146 run_process ( dwgsim_eval_cmd, 'dwgsim', tmp_dir, buffsize ) | |
147 # check that there are results in the output file | |
148 check_output ( options.output, False ) | |
149 sys.stdout.write( 'DWGSIM_EVAL successful' ) | |
150 except Exception, e: | |
151 stop_err( 'DWGSIM_EVAL failed.\n' + str( e ) ) | |
152 finally: | |
153 # clean up temp dir | |
154 if os.path.exists( tmp_dir ): | |
155 shutil.rmtree( tmp_dir ) | |
156 | |
157 if __name__=="__main__": __main__() |