sharplabtool: tools/sr_mapping/srma

annotate tools/sr_mapping/srma_wrapper.py @ 0:9071e359b9a3

Uploaded

author	xuebing
date	Fri, 09 Mar 2012 19:37:19 -0500
parents
children

rev	line source
0 9071e359b9a3 Uploaded xuebing parents: diff changeset	1 #!/usr/bin/env python
9071e359b9a3 Uploaded xuebing parents: diff changeset	2
9071e359b9a3 Uploaded xuebing parents: diff changeset	3 """
9071e359b9a3 Uploaded xuebing parents: diff changeset	4 Runs SRMA on a SAM/BAM file;
9071e359b9a3 Uploaded xuebing parents: diff changeset	5 TODO: more documentation
9071e359b9a3 Uploaded xuebing parents: diff changeset	6
9071e359b9a3 Uploaded xuebing parents: diff changeset	7 usage: srma_wrapper.py [options]
9071e359b9a3 Uploaded xuebing parents: diff changeset	8
9071e359b9a3 Uploaded xuebing parents: diff changeset	9 See below for options
9071e359b9a3 Uploaded xuebing parents: diff changeset	10 """
9071e359b9a3 Uploaded xuebing parents: diff changeset	11
9071e359b9a3 Uploaded xuebing parents: diff changeset	12 import optparse, os, shutil, subprocess, sys, tempfile
9071e359b9a3 Uploaded xuebing parents: diff changeset	13
9071e359b9a3 Uploaded xuebing parents: diff changeset	14 def stop_err( msg ):
9071e359b9a3 Uploaded xuebing parents: diff changeset	15 sys.stderr.write( '%s\n' % msg )
9071e359b9a3 Uploaded xuebing parents: diff changeset	16 sys.exit()
9071e359b9a3 Uploaded xuebing parents: diff changeset	17
9071e359b9a3 Uploaded xuebing parents: diff changeset	18 def parseRefLoc( refLoc, refUID ):
9071e359b9a3 Uploaded xuebing parents: diff changeset	19 for line in open( refLoc ):
9071e359b9a3 Uploaded xuebing parents: diff changeset	20 if not line.startswith( '#' ):
9071e359b9a3 Uploaded xuebing parents: diff changeset	21 fields = line.strip().split( '\t' )
9071e359b9a3 Uploaded xuebing parents: diff changeset	22 if len( fields ) >= 3:
9071e359b9a3 Uploaded xuebing parents: diff changeset	23 if fields[0] == refUID:
9071e359b9a3 Uploaded xuebing parents: diff changeset	24 return fields[1]
9071e359b9a3 Uploaded xuebing parents: diff changeset	25 return None
9071e359b9a3 Uploaded xuebing parents: diff changeset	26
9071e359b9a3 Uploaded xuebing parents: diff changeset	27 def __main__():
9071e359b9a3 Uploaded xuebing parents: diff changeset	28 #Parse Command Line
9071e359b9a3 Uploaded xuebing parents: diff changeset	29 parser = optparse.OptionParser()
9071e359b9a3 Uploaded xuebing parents: diff changeset	30 parser.add_option( '-r', '--ref', dest='ref', help='The reference genome to index and use' )
9071e359b9a3 Uploaded xuebing parents: diff changeset	31 parser.add_option( '-u', '--refUID', dest='refUID', help='The pre-index reference genome unique Identifier' )
9071e359b9a3 Uploaded xuebing parents: diff changeset	32 parser.add_option( '-L', '--refLocations', dest='refLocations', help='The filepath to the srma indices location file' )
9071e359b9a3 Uploaded xuebing parents: diff changeset	33 parser.add_option( '-i', '--input', dest='input', help='The SAM/BAM input file' )
9071e359b9a3 Uploaded xuebing parents: diff changeset	34 parser.add_option( '-I', '--inputIndex', dest='inputIndex', help='The SAM/BAM input index file' )
9071e359b9a3 Uploaded xuebing parents: diff changeset	35 parser.add_option( '-o', '--output', dest='output', help='The SAM/BAM output file' )
9071e359b9a3 Uploaded xuebing parents: diff changeset	36 parser.add_option( '-O', '--offset', dest='offset', help='The alignment offset' )
9071e359b9a3 Uploaded xuebing parents: diff changeset	37 parser.add_option( '-Q', '--minMappingQuality', dest='minMappingQuality', help='The minimum mapping quality' )
9071e359b9a3 Uploaded xuebing parents: diff changeset	38 parser.add_option( '-P', '--minAlleleProbability', dest='minAlleleProbability', help='The minimum allele probability conditioned on coverage (for the binomial quantile).' )
9071e359b9a3 Uploaded xuebing parents: diff changeset	39 parser.add_option( '-C', '--minAlleleCoverage', dest='minAlleleCoverage', help='The minimum haploid coverage for the consensus' )
9071e359b9a3 Uploaded xuebing parents: diff changeset	40 parser.add_option( '-R', '--range', dest='range', help='A range to examine' )
9071e359b9a3 Uploaded xuebing parents: diff changeset	41 parser.add_option( '-c', '--correctBases', dest='correctBases', help='Correct bases ' )
9071e359b9a3 Uploaded xuebing parents: diff changeset	42 parser.add_option( '-q', '--useSequenceQualities', dest='useSequenceQualities', help='Use sequence qualities ' )
9071e359b9a3 Uploaded xuebing parents: diff changeset	43 parser.add_option( '-M', '--maxHeapSize', dest='maxHeapSize', help='The maximum number of nodes on the heap before re-alignment is ignored' )
9071e359b9a3 Uploaded xuebing parents: diff changeset	44 parser.add_option( '-s', '--fileSource', dest='fileSource', help='Whether to use a previously indexed reference sequence or one from history (indexed or history)' )
9071e359b9a3 Uploaded xuebing parents: diff changeset	45 parser.add_option( '-p', '--params', dest='params', help='Parameter setting to use (pre_set or full)' )
9071e359b9a3 Uploaded xuebing parents: diff changeset	46 parser.add_option( '-j', '--jarBin', dest='jarBin', default='', help='The path to where jars are stored' )
9071e359b9a3 Uploaded xuebing parents: diff changeset	47 parser.add_option( '-f', '--jarFile', dest='jarFile', help='The file name of the jar file to use')
9071e359b9a3 Uploaded xuebing parents: diff changeset	48 (options, args) = parser.parse_args()
9071e359b9a3 Uploaded xuebing parents: diff changeset	49
9071e359b9a3 Uploaded xuebing parents: diff changeset	50 # make temp directory for srma
9071e359b9a3 Uploaded xuebing parents: diff changeset	51 tmp_dir = tempfile.mkdtemp()
9071e359b9a3 Uploaded xuebing parents: diff changeset	52 buffsize = 1048576
9071e359b9a3 Uploaded xuebing parents: diff changeset	53
9071e359b9a3 Uploaded xuebing parents: diff changeset	54 # set up reference filenames
9071e359b9a3 Uploaded xuebing parents: diff changeset	55 reference_filepath_name = None
9071e359b9a3 Uploaded xuebing parents: diff changeset	56 # need to create SRMA dict and Samtools fai files for custom genome
9071e359b9a3 Uploaded xuebing parents: diff changeset	57 if options.fileSource == 'history':
9071e359b9a3 Uploaded xuebing parents: diff changeset	58 try:
9071e359b9a3 Uploaded xuebing parents: diff changeset	59 reference_filepath = tempfile.NamedTemporaryFile( dir=tmp_dir, suffix='.fa' )
9071e359b9a3 Uploaded xuebing parents: diff changeset	60 reference_filepath_name = reference_filepath.name
9071e359b9a3 Uploaded xuebing parents: diff changeset	61 reference_filepath.close()
9071e359b9a3 Uploaded xuebing parents: diff changeset	62 fai_filepath_name = '%s.fai' % reference_filepath_name
9071e359b9a3 Uploaded xuebing parents: diff changeset	63 dict_filepath_name = reference_filepath_name.replace( '.fa', '.dict' )
9071e359b9a3 Uploaded xuebing parents: diff changeset	64 os.symlink( options.ref, reference_filepath_name )
9071e359b9a3 Uploaded xuebing parents: diff changeset	65 # create fai file using Samtools
9071e359b9a3 Uploaded xuebing parents: diff changeset	66 index_fai_cmd = 'samtools faidx %s' % reference_filepath_name
9071e359b9a3 Uploaded xuebing parents: diff changeset	67 try:
9071e359b9a3 Uploaded xuebing parents: diff changeset	68 tmp = tempfile.NamedTemporaryFile( dir=tmp_dir ).name
9071e359b9a3 Uploaded xuebing parents: diff changeset	69 tmp_stderr = open( tmp, 'wb' )
9071e359b9a3 Uploaded xuebing parents: diff changeset	70 proc = subprocess.Popen( args=index_fai_cmd, shell=True, cwd=tmp_dir, stderr=tmp_stderr.fileno() )
9071e359b9a3 Uploaded xuebing parents: diff changeset	71 returncode = proc.wait()
9071e359b9a3 Uploaded xuebing parents: diff changeset	72 tmp_stderr.close()
9071e359b9a3 Uploaded xuebing parents: diff changeset	73 # get stderr, allowing for case where it's very large
9071e359b9a3 Uploaded xuebing parents: diff changeset	74 tmp_stderr = open( tmp, 'rb' )
9071e359b9a3 Uploaded xuebing parents: diff changeset	75 stderr = ''
9071e359b9a3 Uploaded xuebing parents: diff changeset	76 try:
9071e359b9a3 Uploaded xuebing parents: diff changeset	77 while True:
9071e359b9a3 Uploaded xuebing parents: diff changeset	78 stderr += tmp_stderr.read( buffsize )
9071e359b9a3 Uploaded xuebing parents: diff changeset	79 if not stderr or len( stderr ) % buffsize != 0:
9071e359b9a3 Uploaded xuebing parents: diff changeset	80 break
9071e359b9a3 Uploaded xuebing parents: diff changeset	81 except OverflowError:
9071e359b9a3 Uploaded xuebing parents: diff changeset	82 pass
9071e359b9a3 Uploaded xuebing parents: diff changeset	83 tmp_stderr.close()
9071e359b9a3 Uploaded xuebing parents: diff changeset	84 if returncode != 0:
9071e359b9a3 Uploaded xuebing parents: diff changeset	85 raise Exception, stderr
9071e359b9a3 Uploaded xuebing parents: diff changeset	86 except Exception, e:
9071e359b9a3 Uploaded xuebing parents: diff changeset	87 # clean up temp dir
9071e359b9a3 Uploaded xuebing parents: diff changeset	88 if os.path.exists( tmp_dir ):
9071e359b9a3 Uploaded xuebing parents: diff changeset	89 shutil.rmtree( tmp_dir )
9071e359b9a3 Uploaded xuebing parents: diff changeset	90 stop_err( 'Error creating Samtools index for custom genome file: %s\n' % str( e ) )
9071e359b9a3 Uploaded xuebing parents: diff changeset	91 # create dict file using SRMA
9071e359b9a3 Uploaded xuebing parents: diff changeset	92 dict_cmd = 'java -cp "%s" net.sf.picard.sam.CreateSequenceDictionary R=%s O=%s' % ( os.path.join( options.jarBin, options.jarFile ), reference_filepath_name, dict_filepath_name )
9071e359b9a3 Uploaded xuebing parents: diff changeset	93 try:
9071e359b9a3 Uploaded xuebing parents: diff changeset	94 tmp = tempfile.NamedTemporaryFile( dir=tmp_dir ).name
9071e359b9a3 Uploaded xuebing parents: diff changeset	95 tmp_stderr = open( tmp, 'wb' )
9071e359b9a3 Uploaded xuebing parents: diff changeset	96 proc = subprocess.Popen( args=dict_cmd, shell=True, cwd=tmp_dir, stderr=tmp_stderr.fileno() )
9071e359b9a3 Uploaded xuebing parents: diff changeset	97 returncode = proc.wait()
9071e359b9a3 Uploaded xuebing parents: diff changeset	98 tmp_stderr.close()
9071e359b9a3 Uploaded xuebing parents: diff changeset	99 # get stderr, allowing for case where it's very large
9071e359b9a3 Uploaded xuebing parents: diff changeset	100 tmp_stderr = open( tmp, 'rb' )
9071e359b9a3 Uploaded xuebing parents: diff changeset	101 stderr = ''
9071e359b9a3 Uploaded xuebing parents: diff changeset	102 try:
9071e359b9a3 Uploaded xuebing parents: diff changeset	103 while True:
9071e359b9a3 Uploaded xuebing parents: diff changeset	104 stderr += tmp_stderr.read( buffsize )
9071e359b9a3 Uploaded xuebing parents: diff changeset	105 if not stderr or len( stderr ) % buffsize != 0:
9071e359b9a3 Uploaded xuebing parents: diff changeset	106 break
9071e359b9a3 Uploaded xuebing parents: diff changeset	107 except OverflowError:
9071e359b9a3 Uploaded xuebing parents: diff changeset	108 pass
9071e359b9a3 Uploaded xuebing parents: diff changeset	109 tmp_stderr.close()
9071e359b9a3 Uploaded xuebing parents: diff changeset	110 if returncode != 0:
9071e359b9a3 Uploaded xuebing parents: diff changeset	111 raise Exception, stderr
9071e359b9a3 Uploaded xuebing parents: diff changeset	112 except Exception, e:
9071e359b9a3 Uploaded xuebing parents: diff changeset	113 # clean up temp dir
9071e359b9a3 Uploaded xuebing parents: diff changeset	114 if os.path.exists( tmp_dir ):
9071e359b9a3 Uploaded xuebing parents: diff changeset	115 shutil.rmtree( tmp_dir )
9071e359b9a3 Uploaded xuebing parents: diff changeset	116 stop_err( 'Error creating index for custom genome file: %s\n' % str( e ) )
9071e359b9a3 Uploaded xuebing parents: diff changeset	117 except Exception, e:
9071e359b9a3 Uploaded xuebing parents: diff changeset	118 # clean up temp dir
9071e359b9a3 Uploaded xuebing parents: diff changeset	119 if os.path.exists( tmp_dir ):
9071e359b9a3 Uploaded xuebing parents: diff changeset	120 shutil.rmtree( tmp_dir )
9071e359b9a3 Uploaded xuebing parents: diff changeset	121 stop_err( 'Problem handling SRMA index (dict file) for custom genome file: %s\n' % str( e ) )
9071e359b9a3 Uploaded xuebing parents: diff changeset	122 # using built-in dict/index files
9071e359b9a3 Uploaded xuebing parents: diff changeset	123 else:
9071e359b9a3 Uploaded xuebing parents: diff changeset	124 if options.ref:
9071e359b9a3 Uploaded xuebing parents: diff changeset	125 reference_filepath_name = options.ref
9071e359b9a3 Uploaded xuebing parents: diff changeset	126 else:
9071e359b9a3 Uploaded xuebing parents: diff changeset	127 reference_filepath_name = parseRefLoc( options.refLocation, options.refUID )
9071e359b9a3 Uploaded xuebing parents: diff changeset	128 if reference_filepath_name is None:
9071e359b9a3 Uploaded xuebing parents: diff changeset	129 raise ValueError( 'A valid genome reference was not provided.' )
9071e359b9a3 Uploaded xuebing parents: diff changeset	130
9071e359b9a3 Uploaded xuebing parents: diff changeset	131 # set up aligning and generate aligning command options
9071e359b9a3 Uploaded xuebing parents: diff changeset	132 if options.params == 'pre_set':
9071e359b9a3 Uploaded xuebing parents: diff changeset	133 srma_cmds = ''
9071e359b9a3 Uploaded xuebing parents: diff changeset	134 else:
9071e359b9a3 Uploaded xuebing parents: diff changeset	135 if options.useSequenceQualities == 'true':
9071e359b9a3 Uploaded xuebing parents: diff changeset	136 useSequenceQualities = 'true'
9071e359b9a3 Uploaded xuebing parents: diff changeset	137 else:
9071e359b9a3 Uploaded xuebing parents: diff changeset	138 useSequenceQualities = 'false'
9071e359b9a3 Uploaded xuebing parents: diff changeset	139 ranges = 'null'
9071e359b9a3 Uploaded xuebing parents: diff changeset	140 if options.range == 'None':
9071e359b9a3 Uploaded xuebing parents: diff changeset	141 range = 'null'
9071e359b9a3 Uploaded xuebing parents: diff changeset	142 else:
9071e359b9a3 Uploaded xuebing parents: diff changeset	143 range = options.range
9071e359b9a3 Uploaded xuebing parents: diff changeset	144 srma_cmds = "OFFSET=%s MIN_MAPQ=%s MINIMUM_ALLELE_PROBABILITY=%s MINIMUM_ALLELE_COVERAGE=%s RANGES=%s RANGE=%s CORRECT_BASES=%s USE_SEQUENCE_QUALITIES=%s MAX_HEAP_SIZE=%s" % ( options.offset, options.minMappingQuality, options.minAlleleProbability, options.minAlleleCoverage, ranges, range, options.correctBases, options.useSequenceQualities, options.maxHeapSize )
9071e359b9a3 Uploaded xuebing parents: diff changeset	145
9071e359b9a3 Uploaded xuebing parents: diff changeset	146 # perform alignments
9071e359b9a3 Uploaded xuebing parents: diff changeset	147 buffsize = 1048576
9071e359b9a3 Uploaded xuebing parents: diff changeset	148 try:
9071e359b9a3 Uploaded xuebing parents: diff changeset	149 #symlink input bam and index files due to the naming conventions required by srma here
9071e359b9a3 Uploaded xuebing parents: diff changeset	150 input_bam_filename = os.path.join( tmp_dir, '%s.bam' % os.path.split( options.input )[-1] )
9071e359b9a3 Uploaded xuebing parents: diff changeset	151 os.symlink( options.input, input_bam_filename )
9071e359b9a3 Uploaded xuebing parents: diff changeset	152 input_bai_filename = "%s.bai" % os.path.splitext( input_bam_filename )[0]
9071e359b9a3 Uploaded xuebing parents: diff changeset	153 os.symlink( options.inputIndex, input_bai_filename )
9071e359b9a3 Uploaded xuebing parents: diff changeset	154
9071e359b9a3 Uploaded xuebing parents: diff changeset	155 #create a temp output name, ending in .bam due to required naming conventions? unkown if required
9071e359b9a3 Uploaded xuebing parents: diff changeset	156 output_bam_filename = os.path.join( tmp_dir, "%s.bam" % os.path.split( options.output )[-1] )
9071e359b9a3 Uploaded xuebing parents: diff changeset	157 # generate commandline
9071e359b9a3 Uploaded xuebing parents: diff changeset	158 cmd = 'java -jar %s I=%s O=%s R=%s %s' % ( os.path.join( options.jarBin, options.jarFile ), input_bam_filename, output_bam_filename, reference_filepath_name, srma_cmds )
9071e359b9a3 Uploaded xuebing parents: diff changeset	159
9071e359b9a3 Uploaded xuebing parents: diff changeset	160 # need to nest try-except in try-finally to handle 2.4
9071e359b9a3 Uploaded xuebing parents: diff changeset	161 try:
9071e359b9a3 Uploaded xuebing parents: diff changeset	162 try:
9071e359b9a3 Uploaded xuebing parents: diff changeset	163 tmp = tempfile.NamedTemporaryFile( dir=tmp_dir ).name
9071e359b9a3 Uploaded xuebing parents: diff changeset	164 tmp_stderr = open( tmp, 'wb' )
9071e359b9a3 Uploaded xuebing parents: diff changeset	165 proc = subprocess.Popen( args=cmd, shell=True, cwd=tmp_dir, stderr=tmp_stderr.fileno() )
9071e359b9a3 Uploaded xuebing parents: diff changeset	166 returncode = proc.wait()
9071e359b9a3 Uploaded xuebing parents: diff changeset	167 tmp_stderr.close()
9071e359b9a3 Uploaded xuebing parents: diff changeset	168 # get stderr, allowing for case where it's very large
9071e359b9a3 Uploaded xuebing parents: diff changeset	169 tmp_stderr = open( tmp, 'rb' )
9071e359b9a3 Uploaded xuebing parents: diff changeset	170 stderr = ''
9071e359b9a3 Uploaded xuebing parents: diff changeset	171 try:
9071e359b9a3 Uploaded xuebing parents: diff changeset	172 while True:
9071e359b9a3 Uploaded xuebing parents: diff changeset	173 stderr += tmp_stderr.read( buffsize )
9071e359b9a3 Uploaded xuebing parents: diff changeset	174 if not stderr or len( stderr ) % buffsize != 0:
9071e359b9a3 Uploaded xuebing parents: diff changeset	175 break
9071e359b9a3 Uploaded xuebing parents: diff changeset	176 except OverflowError:
9071e359b9a3 Uploaded xuebing parents: diff changeset	177 pass
9071e359b9a3 Uploaded xuebing parents: diff changeset	178 tmp_stderr.close()
9071e359b9a3 Uploaded xuebing parents: diff changeset	179 if returncode != 0:
9071e359b9a3 Uploaded xuebing parents: diff changeset	180 raise Exception, stderr
9071e359b9a3 Uploaded xuebing parents: diff changeset	181 except Exception, e:
9071e359b9a3 Uploaded xuebing parents: diff changeset	182 raise Exception, 'Error executing SRMA. ' + str( e )
9071e359b9a3 Uploaded xuebing parents: diff changeset	183 # move file from temp location (with .bam name) to provided path
9071e359b9a3 Uploaded xuebing parents: diff changeset	184 shutil.move( output_bam_filename, options.output )
9071e359b9a3 Uploaded xuebing parents: diff changeset	185 # check that there are results in the output file
9071e359b9a3 Uploaded xuebing parents: diff changeset	186 if os.path.getsize( options.output ) <= 0:
9071e359b9a3 Uploaded xuebing parents: diff changeset	187 raise Exception, 'The output file is empty. You may simply have no matches, or there may be an error with your input file or settings.'
9071e359b9a3 Uploaded xuebing parents: diff changeset	188 except Exception, e:
9071e359b9a3 Uploaded xuebing parents: diff changeset	189 stop_err( 'The re-alignment failed.\n' + str( e ) )
9071e359b9a3 Uploaded xuebing parents: diff changeset	190 finally:
9071e359b9a3 Uploaded xuebing parents: diff changeset	191 # clean up temp dir
9071e359b9a3 Uploaded xuebing parents: diff changeset	192 if os.path.exists( tmp_dir ):
9071e359b9a3 Uploaded xuebing parents: diff changeset	193 shutil.rmtree( tmp_dir )
9071e359b9a3 Uploaded xuebing parents: diff changeset	194
9071e359b9a3 Uploaded xuebing parents: diff changeset	195 if __name__=="__main__": __main__()

Mercurial > repos > xuebing > sharplabtool

annotate tools/sr_mapping/srma_wrapper.py @ 0:9071e359b9a3