Mercurial > repos > nilshomer > dwgsim_eval
comparison dwgsim_eval_wrapper.py @ 0:ed05f8167910
Uploaded
| author | nilshomer |
|---|---|
| date | Sun, 14 Aug 2011 20:08:43 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:ed05f8167910 |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 | |
| 3 """ | |
| 4 Runs DWGSIM_EVAL | |
| 5 | |
| 6 usage: dwgsim_eval_wrapper.py [options] | |
| 7 -a,--alignmentScore=a: split alignments by alignment score instead of mapping quality | |
| 8 -b,--bwa=b: alignments are from BWA | |
| 9 -c,--colorSpace=c: color space alignments | |
| 10 -d,--scoreFactor=d: divide quality/alignment score by this factor | |
| 11 -g,--wiggle=g: gap "wiggle" | |
| 12 -n,--numReads=n: number of raw input paired-end reads (otherwise, inferred from all SAM records present). | |
| 13 -q,--minMapq=q: consider only alignments with this mapping quality or greater. | |
| 14 -z,--singleEnd=z: input contains only single end reads | |
| 15 -S,--sam=s: input SAM file | |
| 16 -B,--bam=B: input BAM file | |
| 17 -p,--printIncorrect=p: print incorrect alignments | |
| 18 -s,--numSnps=s: consider only alignments with the number of specified SNPs | |
| 19 -e,--numErrors=e: consider only alignments with the number of specified errors | |
| 20 -i,--indels=i: consider only alignments with indels | |
| 21 -o,--output=u: The file to save the output | |
| 22 """ | |
| 23 | |
| 24 import optparse, os, shutil, subprocess, sys, tempfile | |
| 25 | |
| 26 def stop_err( msg ): | |
| 27 sys.stderr.write( '%s\n' % msg ) | |
| 28 sys.exit() | |
| 29 | |
| 30 def run_process ( cmd, name, tmp_dir, buffsize ): | |
| 31 try: | |
| 32 tmp = tempfile.NamedTemporaryFile( dir=tmp_dir ).name | |
| 33 tmp_stderr = open( tmp, 'wb' ) | |
| 34 proc = subprocess.Popen( args=cmd, shell=True, cwd=tmp_dir, stderr=tmp_stderr.fileno() ) | |
| 35 returncode = proc.wait() | |
| 36 tmp_stderr.close() | |
| 37 # get stderr, allowing for case where it's very large | |
| 38 tmp_stderr = open( tmp, 'rb' ) | |
| 39 stderr = '' | |
| 40 try: | |
| 41 while True: | |
| 42 stderr += tmp_stderr.read( buffsize ) | |
| 43 if not stderr or len( stderr ) % buffsize != 0: | |
| 44 break | |
| 45 except OverflowError: | |
| 46 pass | |
| 47 tmp_stderr.close() | |
| 48 if returncode != 0: | |
| 49 raise Exception, stderr | |
| 50 except Exception, e: | |
| 51 raise Exception, 'Error in \'' + name + '\'. \n' + str( e ) | |
| 52 | |
| 53 def check_output ( output, canBeEmpty ): | |
| 54 if 0 < os.path.getsize( output ): | |
| 55 return True | |
| 56 elif False == canBeEmpty: | |
| 57 raise Exception, 'The output file is empty:' + output | |
| 58 | |
| 59 def __main__(): | |
| 60 #Parse Command Line | |
| 61 parser = optparse.OptionParser() | |
| 62 | |
| 63 parser.add_option( '-a', '--alignmentScore', action='store_true', dest='alignmentScore', default=False, help='split alignments by alignment score instead of mapping quality' ) | |
| 64 parser.add_option( '-b', '--bwa', action='store_true', dest='bwa', default=False, help='alignments are from BWA' ) | |
| 65 parser.add_option( '-c', '--colorSpace', action='store_true', dest='colorSpace', default=False, help='generate reads in color space (SOLiD reads)' ) | |
| 66 parser.add_option( '-d', '--scoreFactor', dest='scoreFactor', type='int', help='divide quality/alignment score by this factor' ) | |
| 67 parser.add_option( '-g', '--wiggle', dest='wiggle', type='int', help='gap "wiggle"' ) | |
| 68 parser.add_option( '-n', '--numReads', dest='numReads', type='int', help='number of raw input paired-end reads (otherwise, inferred from all SAM records present).' ) | |
| 69 parser.add_option( '-q', '--minMapq', dest='minMapq', type='int', help='consider only alignments with this mapping quality or greater.' ) | |
| 70 parser.add_option( '-z', '--singleEnd', action='store_true', dest='singleEnd', default=False, help='input contains only single end reads' ) | |
| 71 parser.add_option( '-S', '--sam', dest='sam', default=None, help='input SAM' ) | |
| 72 parser.add_option( '-B', '--bam', dest='bam', default=None, help='input BAM' ) | |
| 73 parser.add_option( '-p', '--printIncorrect', action='store_true', dest='printIncorrect', default=False, help='print incorrect alignments' ) | |
| 74 parser.add_option( '-s', '--numSnps', dest='numSnps', type="int", help='consider only alignments with the number of specified SNPs' ) | |
| 75 parser.add_option( '-e', '--numErrors', dest='numErrors', type="int", default=False, help='consider only alignments with the number of specified errors' ) | |
| 76 parser.add_option( '-i', '--indels', action='store_true', dest='indels', default=False, help='consider only alignments with indels' ) | |
| 77 parser.add_option( '-o', '--output', dest='output', help='The file to save the output' ) | |
| 78 | |
| 79 (options, args) = parser.parse_args() | |
| 80 | |
| 81 # output version # of tool | |
| 82 try: | |
| 83 tmp = tempfile.NamedTemporaryFile().name | |
| 84 tmp_stdout = open( tmp, 'wb' ) | |
| 85 proc = subprocess.Popen( args='dwgsim_eval 2>&1', shell=True, stdout=tmp_stdout ) | |
| 86 tmp_stdout.close() | |
| 87 returncode = proc.wait() | |
| 88 stdout = None | |
| 89 for line in open( tmp_stdout.name, 'rb' ): | |
| 90 if line.lower().find( 'version' ) >= 0: | |
| 91 stdout = line.strip() | |
| 92 break | |
| 93 if stdout: | |
| 94 sys.stdout.write( '%s\n' % stdout ) | |
| 95 else: | |
| 96 raise Exception | |
| 97 except: | |
| 98 sys.stdout.write( 'Could not determine DWGSIM_EVAL version\n' ) | |
| 99 | |
| 100 buffsize = 1048576 | |
| 101 | |
| 102 # make temp directory for dwgsim, requires trailing slash | |
| 103 tmp_dir = '%s/' % tempfile.mkdtemp() | |
| 104 | |
| 105 #'generic' options used in all dwgsim commands here | |
| 106 | |
| 107 try: | |
| 108 tmp_dir = '%s/' % tempfile.mkdtemp() | |
| 109 dwgsim_eval_cmd = 'dwgsim_eval' | |
| 110 if True == options.alignmentScore: | |
| 111 dwgsim_eval_cmd = dwgsim_eval_cmd + ' -a' | |
| 112 if True == options.bwa: | |
| 113 dwgsim_eval_cmd = dwgsim_eval_cmd + ' -b' | |
| 114 if True == options.colorSpace: | |
| 115 dwgsim_eval_cmd = dwgsim_eval_cmd + ' -c' | |
| 116 use_p = False | |
| 117 if 0 <= options.numSnps: | |
| 118 use_p = True | |
| 119 dwgsim_eval_cmd = dwgsim_eval_cmd + (' -s %s' % options.numSnps) | |
| 120 if 0 <= options.numErrors: | |
| 121 use_p = True | |
| 122 dwgsim_eval_cmd = dwgsim_eval_cmd + (' -e %s' % options.numErrors) | |
| 123 if True == options.indels: | |
| 124 use_p = True | |
| 125 dwgsim_eval_cmd = dwgsim_eval_cmd + ' -i' | |
| 126 if True == use_p or True == options.printIncorrect: | |
| 127 dwgsim_eval_cmd = dwgsim_eval_cmd + ' -p' | |
| 128 if True == options.singleEnd: | |
| 129 dwgsim_eval_cmd = dwgsim_eval_cmd + ' -z' | |
| 130 dwgsim_eval_cmd = '%s -d %s -g %s -n %s -q %s' % (dwgsim_eval_cmd, \ | |
| 131 options.scoreFactor, \ | |
| 132 options.wiggle, \ | |
| 133 options.numReads, \ | |
| 134 options.minMapq) | |
| 135 if None != options.sam: | |
| 136 dwgsim_eval_cmd = dwgsim_eval_cmd + ' -S ' + options.sam | |
| 137 elif None != options.bam: | |
| 138 dwgsim_eval_cmd = dwgsim_eval_cmd + ' ' + options.bam | |
| 139 else: | |
| 140 raise Exception, 'Input file was neither a SAM nor BAM' | |
| 141 dwgsim_eval_cmd = dwgsim_eval_cmd + ' > ' + options.output | |
| 142 | |
| 143 # need to nest try-except in try-finally to handle 2.4 | |
| 144 try: | |
| 145 # dwgsim | |
| 146 run_process ( dwgsim_eval_cmd, 'dwgsim', tmp_dir, buffsize ) | |
| 147 # check that there are results in the output file | |
| 148 check_output ( options.output, False ) | |
| 149 sys.stdout.write( 'DWGSIM_EVAL successful' ) | |
| 150 except Exception, e: | |
| 151 stop_err( 'DWGSIM_EVAL failed.\n' + str( e ) ) | |
| 152 finally: | |
| 153 # clean up temp dir | |
| 154 if os.path.exists( tmp_dir ): | |
| 155 shutil.rmtree( tmp_dir ) | |
| 156 | |
| 157 if __name__=="__main__": __main__() |
