# HG changeset patch # User subazini # Date 1418829439 18000 # Node ID c43f9a578e0505764a875ed8d7b046ed1df99985 # Parent 55b3da30e4e51f5ae1cfc5f2664105da7c05b44b Uploaded diff -r 55b3da30e4e5 -r c43f9a578e05 novocraft_wrapper.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/novocraft_wrapper.py Wed Dec 17 10:17:19 2014 -0500 @@ -0,0 +1,134 @@ +#!/usr/bin/env python + +import optparse, os, shutil, subprocess, sys, tempfile, fileinput,re +import time +def stop_err( msg ): + sys.stderr.write( "%s\n" % msg ) + sys.exit() + +def __main__(): + #Parse Command Line + start = time.time() + print "start time", start + parser = optparse.OptionParser() + # Wrapper options. + parser.add_option( '-0', '--output', dest='output' ) + parser.add_option( '-1', '--input1', dest='input1', help='The (forward or single-end) reads file in Sanger FASTQ format' ) + parser.add_option( '-2', '--input2', dest='input2', help='The reverse reads file in Sanger FASTQ format' ) + parser.add_option( '-Q', '--Quality', dest='quality', help='' ) + parser.add_option( '-t', '--align', dest='align' ) + parser.add_option( '-g', '--open', dest='open' ) + parser.add_option( '-x', '--extend', dest='extend' ) + parser.add_option( '-n', '--trunc', dest='trunc' ) + parser.add_option( '-r', '--refer', dest='genome_reference' ) + parser.add_option( '-f', '--ref1', dest='genome_index' ) + parser.add_option( '-k', '--kmer', dest='kmer' ) + parser.add_option( '-s', '--step', dest='step' ) + parser.add_option( '-l', '--qual', dest='quality' ) + parser.add_option( '-m', '--repeat', dest='repeat' ) + parser.add_option( '-H', '--hclip', dest='hclip' ) + parser.add_option( '-p', '--pam', dest='pam' ) + parser.add_option( '-d', '--sd', dest='sd' ) + parser.add_option( '-i', '--insert', dest='insert' ) + parser.add_option( '-e', '--settings', dest='settings' ) + (options, args) = parser.parse_args() + print options.genome_reference + print options.output + print options.input1 + print options.input2 + + tmp_index_dir = tempfile.mkdtemp() + #indexing genome + if options.genome_reference: + index_path = os.path.join( tmp_index_dir, '.'.join( os.path.split( options.genome_reference )[1].split( '.' )[:-1] ) ) + try: + os.link( options.genome_reference, index_path + '.fa' ) + Mtest= os.link( options.genome_reference, index_path + '.fa' ) + print Mtest + except: + pass + cmd_index = 'novoindex %s %s' % ( index_path, options.genome_reference) + if options.settings == "full": + opts = ' -k %s -s %s ' %(options.kmer,options.step) + cmd_index += opts + print cmd_index + try: + tmp = tempfile.NamedTemporaryFile( dir=tmp_index_dir ).name + tmp_stderr = open( tmp, 'wb' ) + proc = subprocess.Popen( args=cmd_index, shell=True, cwd=tmp_index_dir, stderr=tmp_stderr.fileno() ) + pid = proc.pid + with file('/proc/%s/smaps' % pid) as fp: + sx1= sum([int(x) for x in re.findall('^Pss:\s+(\d+)',fp.read(),re.M)]) + returncode = proc.wait() + tmp_stderr.close() + # get stderr, allowing for case where it's very large + tmp_stderr = open( tmp, 'rb' ) + stderr = '' + buffsize = 1048576 + try: + while True: + stderr += tmp_stderr.read( buffsize ) + if not stderr or len( stderr ) % buffsize != 0: + break + except OverflowError: + pass + tmp_stderr.close() + if returncode != 0: + raise Exception, stderr + except Exception, e: + if os.path.exists( tmp_index_dir ): + shutil.rmtree( tmp_index_dir ) + stop_err( 'Error indexing reference sequence\n' + str( e ) ) + opts = '' + #using genome reference from history + if options.genome_index: + index_path = options.genome_index + sx1 = 0 + #Aligning with reference + if options.input2: + cmd_index2 = 'novoalign -f %s %s -d %s ' %( options.input1,options.input2,index_path) + else: + cmd_index2 = 'novoalign -f %s -d %s ' %( options.input1,index_path) + # Selected settimgs + if options.settings == "full": + opts = ' -t %s -g %s -x %s -n %s -H %s -h %s -i %s %s,%s > %s' %(options.align,options.open,options.extend,options.trunc,options.hclip,options.repeat,options.pam,options.insert,options.sd,options.output) + else: + opts = ' > %s' %(options.output) + cmd_index2 += opts + print cmd_index2 + try: + tmp_out = tempfile.NamedTemporaryFile().name + tmp_stdout = open( tmp_out, 'wb' ) + tmp_err = tempfile.NamedTemporaryFile().name + tmp_stderr = open( tmp_err, 'wb' ) + proc = subprocess.Popen( args=cmd_index2, shell=True, cwd=".", stdout=tmp_stdout, stderr=tmp_stderr ) + pid = proc.pid + with file('/proc/%s/smaps' % pid) as fp: + sx2= sum([int(x) for x in re.findall('^Pss:\s+(\d+)',fp.read(),re.M)]) + sx = sx1 + sx2 + print cmd_index2 + print "memory usage (KiBs)", sx + returncode = proc.wait() + tmp_stderr.close() + # get stderr, allowing for case where it's very large + tmp_stderr = open( tmp_err, 'rb' ) + stderr = '' + buffsize = 1048576 + try: + while True: + stderr += tmp_stderr.read( buffsize ) + if not stderr or len( stderr ) % buffsize != 0: + break + except OverflowError: + pass + tmp_stdout.close() + tmp_stderr.close() + if returncode != 0: + raise Exception, stderr + end = time.time() + print "end time", end + elapsed = end - start + print ("Time elapsed: {0:4f}sec",format(elapsed)) + except Exception, e: + stop_err( 'Error in Novoalign:\n' + str( e ) ) +if __name__=="__main__": __main__()