# HG changeset patch # User subazini # Date 1418829395 18000 # Node ID 55b3da30e4e51f5ae1cfc5f2664105da7c05b44b # Parent abe73a62b59a60b9e733f0a8b4dcff8a936a866f Uploaded diff -r abe73a62b59a -r 55b3da30e4e5 stampy_wrapper.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/stampy_wrapper.py Wed Dec 17 10:16:35 2014 -0500 @@ -0,0 +1,115 @@ +#!/usr/bin/env python + +import optparse, os, shutil, subprocess, sys, tempfile, fileinput, commands +import time +import re +import psutil +def stop_err( msg ): + sys.stderr.write( "%s\n" % msg ) + sys.exit() + +def __main__(): + #Parse Command Line + start = time.time() + #print "start time", start + parser = optparse.OptionParser() + parser.add_option( '-s','--species',dest='species') + parser.add_option( '-a', '--assembly', dest='assembly' ) + parser.add_option( '-G', '--genome_index', dest='genome_index' ) + parser.add_option( '-e', '--genome', dest='genome') + parser.add_option( '-D', '--indexes-path', dest='index_path', help='Indexes directory; location of .stidx and .sthash files.' ) + + # Wrapper options. + parser.add_option( '-O', '--output', dest='output' ) + parser.add_option( '-1', '--input1', dest='input1', help='The (forward or single-end) reads file in Sanger FASTQ format' ) + parser.add_option( '-2', '--input2', dest='input2', help='The reverse reads file in Sanger FASTQ format' ) + parser.add_option( '-g', '--use_genome', dest='use_genome' ) + parser.add_option( '-H', '--hashbuild', dest='build_hash' ) + parser.add_option( '-M', '--map', dest='map', action="store_true" ) + + # Read group options. + parser.add_option( '-d', '--sd', dest='sd' ) + parser.add_option( '-i', '--insert', dest='insert' ) + parser.add_option( '-r', '--subrate', dest='subrate' ) + + parser.add_option( '-t', '--settings', dest='settings' ) + + + (options, args) = parser.parse_args() + + # Creat genome index if necessary. + tmp_index_dir = tempfile.mkdtemp() + + # Genome index from history + if options.genome_index: + index_path = os.path.join( tmp_index_dir, '.'.join( os.path.split( options.genome_index )[1].split( '.' )[:-1] ) ) + try: + os.link( options.genome_index, index_path + '.fa' ) + #print options.genome_index + except: + pass + # Indexing genome + cmd_index = 'stampy.py --species=%s --assembly=%s -G %s %s' %(options.species,options.assembly,options.genome_index,options.genome) + #print cmd_index + #print options.genome + + try: + tmp = tempfile.NamedTemporaryFile( dir=tmp_index_dir ).name + tmp_stderr = open( tmp, 'wb' ) + proc=subprocess.Popen(cmd_index,shell=True) + returncode = proc.wait() + tmp_stderr.close() + tmp_stderr = open( tmp, 'rb' ) + stderr = '' + buffsize = 1048576 + try: + while True: + stderr += tmp_stderr.read( buffsize ) + if not stderr or len( stderr ) % buffsize != 0: + break + except OverflowError: + pass + tmp_stderr.close() + + except Exception, e: + if os.path.exists( tmp_index_dir ): + shutil.rmtree( tmp_index_dir ) + stop_err( 'Error indexing reference sequence\n' + str( e ) ) + else: + index_path = options.index_path + try: + tmp = tempfile.NamedTemporaryFile( dir=tmp_index_dir).name + tmp_stderr = open( tmp, 'wb' ) + # Hashing genome + cmd_index2 = 'stampy.py -g %s -H %s > %s' %(options.genome_index,options.genome_index,options.output) + #print cmd_index2 + proc=subprocess.Popen(cmd_index2, shell=True) + returncode = proc.wait() + #print proc.returncode + + except Exception, e: + if os.path.exists( tmp_index_dir ): + shutil.rmtree( tmp_index_dir ) + cmd_index3 = 'stampy.py -g %s -h %s' %(options.genome_index,options.genome_index) + if options.settings == "full": + opts = ' --insertsize=%s --insertsd=%s --substitutionrate=%s -M %s %s > %s' %(options.insert,options.sd,options.subrate,options.input1,options.input2,options.output) + else: + opts = '-M %s %s > %s' %(options.input1,options.input2,options.output) + cmd_index3 += opts + try: + tmp = tempfile.NamedTemporaryFile( dir=tmp_index_dir ).name + tmp_stderr = open( tmp, 'wb' ) + #print cmd_index3 + proc=subprocess.call(cmd_index3, shell=True) + except Exception, e: + if os.path.exists( tmp_index_dir ): + shutil.rmtree( tmp_index_dir ) + + end = time.time() + #print "end time", end + elapsed = end - start + #print ("Time elapsed: {0:4f}sec",format(elapsed)) + +if __name__=="__main__": __main__() + +