changeset 3:c43f9a578e05 draft

Uploaded
author subazini
date Wed, 17 Dec 2014 10:17:19 -0500
parents 55b3da30e4e5
children c46b941c2281
files novocraft_wrapper.py
diffstat 1 files changed, 134 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/novocraft_wrapper.py	Wed Dec 17 10:17:19 2014 -0500
@@ -0,0 +1,134 @@
+#!/usr/bin/env python
+
+import optparse, os, shutil, subprocess, sys, tempfile, fileinput,re
+import time
+def stop_err( msg ):
+    sys.stderr.write( "%s\n" % msg )
+    sys.exit()
+
+def __main__():
+    #Parse Command Line
+    start = time.time()
+    print "start time", start
+    parser = optparse.OptionParser()
+    # Wrapper options.
+    parser.add_option( '-0', '--output', dest='output' )
+    parser.add_option( '-1', '--input1', dest='input1', help='The (forward or single-end) reads file in Sanger FASTQ format' )
+    parser.add_option( '-2', '--input2', dest='input2', help='The reverse reads file in Sanger FASTQ format' )
+    parser.add_option( '-Q', '--Quality', dest='quality', help='' )
+    parser.add_option( '-t', '--align', dest='align' )
+    parser.add_option( '-g', '--open', dest='open' )
+    parser.add_option( '-x', '--extend', dest='extend' )
+    parser.add_option( '-n', '--trunc', dest='trunc' )
+    parser.add_option( '-r', '--refer', dest='genome_reference' )
+    parser.add_option( '-f', '--ref1', dest='genome_index' )
+    parser.add_option( '-k', '--kmer', dest='kmer' )
+    parser.add_option( '-s', '--step', dest='step' )
+    parser.add_option( '-l', '--qual', dest='quality' )
+    parser.add_option( '-m', '--repeat', dest='repeat' )
+    parser.add_option( '-H', '--hclip', dest='hclip' )
+    parser.add_option( '-p', '--pam', dest='pam' )
+    parser.add_option( '-d', '--sd', dest='sd' )
+    parser.add_option( '-i', '--insert', dest='insert' )
+    parser.add_option( '-e', '--settings', dest='settings' )
+    (options, args) = parser.parse_args()
+    print options.genome_reference    
+    print options.output
+    print options.input1
+    print options.input2
+
+    tmp_index_dir = tempfile.mkdtemp()
+    #indexing genome
+    if options.genome_reference:
+        index_path = os.path.join( tmp_index_dir, '.'.join( os.path.split( options.genome_reference )[1].split( '.' )[:-1] ) )
+        try:
+            os.link( options.genome_reference, index_path + '.fa' )
+            Mtest= os.link( options.genome_reference, index_path + '.fa' )
+            print Mtest
+        except:
+            pass
+        cmd_index = 'novoindex %s %s' % ( index_path, options.genome_reference)
+       	if options.settings == "full":
+            opts = ' -k %s -s %s ' %(options.kmer,options.step)
+            cmd_index +=  opts   
+        print cmd_index
+        try:
+            tmp = tempfile.NamedTemporaryFile( dir=tmp_index_dir ).name
+            tmp_stderr = open( tmp, 'wb' )
+            proc = subprocess.Popen( args=cmd_index, shell=True, cwd=tmp_index_dir, stderr=tmp_stderr.fileno() )
+            pid = proc.pid
+            with file('/proc/%s/smaps' % pid) as fp:
+       		sx1= sum([int(x) for x in re.findall('^Pss:\s+(\d+)',fp.read(),re.M)])
+            returncode = proc.wait()
+            tmp_stderr.close()
+            # get stderr, allowing for case where it's very large
+            tmp_stderr = open( tmp, 'rb' )
+            stderr = ''
+            buffsize = 1048576
+            try:
+                while True:
+                    stderr += tmp_stderr.read( buffsize )
+                    if not stderr or len( stderr ) % buffsize != 0:
+                        break
+            except OverflowError:
+                pass
+            tmp_stderr.close()
+            if returncode != 0:
+                raise Exception, stderr
+        except Exception, e:
+            if os.path.exists( tmp_index_dir ):
+                shutil.rmtree( tmp_index_dir )
+            stop_err( 'Error indexing reference sequence\n' + str( e ) )
+    opts = ''
+    #using genome reference from history
+    if options.genome_index:
+        index_path = options.genome_index
+        sx1 = 0
+    #Aligning with reference
+    if options.input2:
+    	cmd_index2 = 'novoalign -f %s %s -d %s '  %( options.input1,options.input2,index_path) 
+    else:
+	cmd_index2 = 'novoalign -f %s -d %s '  %( options.input1,index_path) 
+    # Selected settimgs
+    if options.settings == "full":
+    	opts = ' -t %s -g %s -x %s -n %s -H %s -h %s -i %s %s,%s > %s' %(options.align,options.open,options.extend,options.trunc,options.hclip,options.repeat,options.pam,options.insert,options.sd,options.output)
+    else:
+    	opts = '  > %s' %(options.output)
+    cmd_index2 +=  opts
+    print cmd_index2
+    try:
+        tmp_out = tempfile.NamedTemporaryFile().name
+        tmp_stdout = open( tmp_out, 'wb' )
+        tmp_err = tempfile.NamedTemporaryFile().name
+        tmp_stderr = open( tmp_err, 'wb' )
+        proc = subprocess.Popen( args=cmd_index2, shell=True, cwd=".", stdout=tmp_stdout, stderr=tmp_stderr )
+        pid = proc.pid
+        with file('/proc/%s/smaps' % pid) as fp:
+       		sx2= sum([int(x) for x in re.findall('^Pss:\s+(\d+)',fp.read(),re.M)])
+       	sx = sx1 + sx2
+       	print cmd_index2
+        print "memory usage (KiBs)", sx
+        returncode = proc.wait()
+        tmp_stderr.close()
+        # get stderr, allowing for case where it's very large
+        tmp_stderr = open( tmp_err, 'rb' )
+        stderr = ''
+        buffsize = 1048576
+        try:
+            while True:
+                stderr += tmp_stderr.read( buffsize )
+                if not stderr or len( stderr ) % buffsize != 0:
+                    break
+        except OverflowError:
+            pass
+        tmp_stdout.close()
+        tmp_stderr.close()
+        if returncode != 0:
+            raise Exception, stderr
+        end =  time.time() 
+        print "end time", end
+        elapsed = end - start  
+        print ("Time elapsed: {0:4f}sec",format(elapsed)) 
+    except Exception, e:
+        stop_err( 'Error in Novoalign:\n' + str( e ) ) 
+if __name__=="__main__": __main__()