Mercurial > repos > subazini > ngsaligners
view novocraft_wrapper.py @ 6:a6848bc30dfb draft default tip
Uploaded
author | subazini |
---|---|
date | Wed, 17 Dec 2014 10:27:41 -0500 |
parents | c43f9a578e05 |
children |
line wrap: on
line source
#!/usr/bin/env python import optparse, os, shutil, subprocess, sys, tempfile, fileinput,re import time def stop_err( msg ): sys.stderr.write( "%s\n" % msg ) sys.exit() def __main__(): #Parse Command Line start = time.time() print "start time", start parser = optparse.OptionParser() # Wrapper options. parser.add_option( '-0', '--output', dest='output' ) parser.add_option( '-1', '--input1', dest='input1', help='The (forward or single-end) reads file in Sanger FASTQ format' ) parser.add_option( '-2', '--input2', dest='input2', help='The reverse reads file in Sanger FASTQ format' ) parser.add_option( '-Q', '--Quality', dest='quality', help='' ) parser.add_option( '-t', '--align', dest='align' ) parser.add_option( '-g', '--open', dest='open' ) parser.add_option( '-x', '--extend', dest='extend' ) parser.add_option( '-n', '--trunc', dest='trunc' ) parser.add_option( '-r', '--refer', dest='genome_reference' ) parser.add_option( '-f', '--ref1', dest='genome_index' ) parser.add_option( '-k', '--kmer', dest='kmer' ) parser.add_option( '-s', '--step', dest='step' ) parser.add_option( '-l', '--qual', dest='quality' ) parser.add_option( '-m', '--repeat', dest='repeat' ) parser.add_option( '-H', '--hclip', dest='hclip' ) parser.add_option( '-p', '--pam', dest='pam' ) parser.add_option( '-d', '--sd', dest='sd' ) parser.add_option( '-i', '--insert', dest='insert' ) parser.add_option( '-e', '--settings', dest='settings' ) (options, args) = parser.parse_args() print options.genome_reference print options.output print options.input1 print options.input2 tmp_index_dir = tempfile.mkdtemp() #indexing genome if options.genome_reference: index_path = os.path.join( tmp_index_dir, '.'.join( os.path.split( options.genome_reference )[1].split( '.' )[:-1] ) ) try: os.link( options.genome_reference, index_path + '.fa' ) Mtest= os.link( options.genome_reference, index_path + '.fa' ) print Mtest except: pass cmd_index = 'novoindex %s %s' % ( index_path, options.genome_reference) if options.settings == "full": opts = ' -k %s -s %s ' %(options.kmer,options.step) cmd_index += opts print cmd_index try: tmp = tempfile.NamedTemporaryFile( dir=tmp_index_dir ).name tmp_stderr = open( tmp, 'wb' ) proc = subprocess.Popen( args=cmd_index, shell=True, cwd=tmp_index_dir, stderr=tmp_stderr.fileno() ) pid = proc.pid with file('/proc/%s/smaps' % pid) as fp: sx1= sum([int(x) for x in re.findall('^Pss:\s+(\d+)',fp.read(),re.M)]) returncode = proc.wait() tmp_stderr.close() # get stderr, allowing for case where it's very large tmp_stderr = open( tmp, 'rb' ) stderr = '' buffsize = 1048576 try: while True: stderr += tmp_stderr.read( buffsize ) if not stderr or len( stderr ) % buffsize != 0: break except OverflowError: pass tmp_stderr.close() if returncode != 0: raise Exception, stderr except Exception, e: if os.path.exists( tmp_index_dir ): shutil.rmtree( tmp_index_dir ) stop_err( 'Error indexing reference sequence\n' + str( e ) ) opts = '' #using genome reference from history if options.genome_index: index_path = options.genome_index sx1 = 0 #Aligning with reference if options.input2: cmd_index2 = 'novoalign -f %s %s -d %s ' %( options.input1,options.input2,index_path) else: cmd_index2 = 'novoalign -f %s -d %s ' %( options.input1,index_path) # Selected settimgs if options.settings == "full": opts = ' -t %s -g %s -x %s -n %s -H %s -h %s -i %s %s,%s > %s' %(options.align,options.open,options.extend,options.trunc,options.hclip,options.repeat,options.pam,options.insert,options.sd,options.output) else: opts = ' > %s' %(options.output) cmd_index2 += opts print cmd_index2 try: tmp_out = tempfile.NamedTemporaryFile().name tmp_stdout = open( tmp_out, 'wb' ) tmp_err = tempfile.NamedTemporaryFile().name tmp_stderr = open( tmp_err, 'wb' ) proc = subprocess.Popen( args=cmd_index2, shell=True, cwd=".", stdout=tmp_stdout, stderr=tmp_stderr ) pid = proc.pid with file('/proc/%s/smaps' % pid) as fp: sx2= sum([int(x) for x in re.findall('^Pss:\s+(\d+)',fp.read(),re.M)]) sx = sx1 + sx2 print cmd_index2 print "memory usage (KiBs)", sx returncode = proc.wait() tmp_stderr.close() # get stderr, allowing for case where it's very large tmp_stderr = open( tmp_err, 'rb' ) stderr = '' buffsize = 1048576 try: while True: stderr += tmp_stderr.read( buffsize ) if not stderr or len( stderr ) % buffsize != 0: break except OverflowError: pass tmp_stdout.close() tmp_stderr.close() if returncode != 0: raise Exception, stderr end = time.time() print "end time", end elapsed = end - start print ("Time elapsed: {0:4f}sec",format(elapsed)) except Exception, e: stop_err( 'Error in Novoalign:\n' + str( e ) ) if __name__=="__main__": __main__()