annotate novocraft_wrapper.py @ 6:a6848bc30dfb draft default tip

Uploaded
author subazini
date Wed, 17 Dec 2014 10:27:41 -0500
parents c43f9a578e05
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
3
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
1 #!/usr/bin/env python
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
2
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
3 import optparse, os, shutil, subprocess, sys, tempfile, fileinput,re
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
4 import time
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
5 def stop_err( msg ):
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
6 sys.stderr.write( "%s\n" % msg )
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
7 sys.exit()
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
8
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
9 def __main__():
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
10 #Parse Command Line
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
11 start = time.time()
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
12 print "start time", start
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
13 parser = optparse.OptionParser()
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
14 # Wrapper options.
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
15 parser.add_option( '-0', '--output', dest='output' )
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
16 parser.add_option( '-1', '--input1', dest='input1', help='The (forward or single-end) reads file in Sanger FASTQ format' )
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
17 parser.add_option( '-2', '--input2', dest='input2', help='The reverse reads file in Sanger FASTQ format' )
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
18 parser.add_option( '-Q', '--Quality', dest='quality', help='' )
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
19 parser.add_option( '-t', '--align', dest='align' )
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
20 parser.add_option( '-g', '--open', dest='open' )
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
21 parser.add_option( '-x', '--extend', dest='extend' )
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
22 parser.add_option( '-n', '--trunc', dest='trunc' )
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
23 parser.add_option( '-r', '--refer', dest='genome_reference' )
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
24 parser.add_option( '-f', '--ref1', dest='genome_index' )
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
25 parser.add_option( '-k', '--kmer', dest='kmer' )
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
26 parser.add_option( '-s', '--step', dest='step' )
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
27 parser.add_option( '-l', '--qual', dest='quality' )
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
28 parser.add_option( '-m', '--repeat', dest='repeat' )
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
29 parser.add_option( '-H', '--hclip', dest='hclip' )
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
30 parser.add_option( '-p', '--pam', dest='pam' )
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
31 parser.add_option( '-d', '--sd', dest='sd' )
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
32 parser.add_option( '-i', '--insert', dest='insert' )
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
33 parser.add_option( '-e', '--settings', dest='settings' )
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
34 (options, args) = parser.parse_args()
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
35 print options.genome_reference
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
36 print options.output
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
37 print options.input1
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
38 print options.input2
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
39
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
40 tmp_index_dir = tempfile.mkdtemp()
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
41 #indexing genome
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
42 if options.genome_reference:
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
43 index_path = os.path.join( tmp_index_dir, '.'.join( os.path.split( options.genome_reference )[1].split( '.' )[:-1] ) )
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
44 try:
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
45 os.link( options.genome_reference, index_path + '.fa' )
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
46 Mtest= os.link( options.genome_reference, index_path + '.fa' )
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
47 print Mtest
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
48 except:
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
49 pass
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
50 cmd_index = 'novoindex %s %s' % ( index_path, options.genome_reference)
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
51 if options.settings == "full":
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
52 opts = ' -k %s -s %s ' %(options.kmer,options.step)
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
53 cmd_index += opts
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
54 print cmd_index
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
55 try:
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
56 tmp = tempfile.NamedTemporaryFile( dir=tmp_index_dir ).name
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
57 tmp_stderr = open( tmp, 'wb' )
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
58 proc = subprocess.Popen( args=cmd_index, shell=True, cwd=tmp_index_dir, stderr=tmp_stderr.fileno() )
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
59 pid = proc.pid
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
60 with file('/proc/%s/smaps' % pid) as fp:
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
61 sx1= sum([int(x) for x in re.findall('^Pss:\s+(\d+)',fp.read(),re.M)])
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
62 returncode = proc.wait()
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
63 tmp_stderr.close()
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
64 # get stderr, allowing for case where it's very large
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
65 tmp_stderr = open( tmp, 'rb' )
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
66 stderr = ''
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
67 buffsize = 1048576
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
68 try:
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
69 while True:
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
70 stderr += tmp_stderr.read( buffsize )
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
71 if not stderr or len( stderr ) % buffsize != 0:
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
72 break
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
73 except OverflowError:
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
74 pass
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
75 tmp_stderr.close()
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
76 if returncode != 0:
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
77 raise Exception, stderr
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
78 except Exception, e:
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
79 if os.path.exists( tmp_index_dir ):
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
80 shutil.rmtree( tmp_index_dir )
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
81 stop_err( 'Error indexing reference sequence\n' + str( e ) )
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
82 opts = ''
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
83 #using genome reference from history
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
84 if options.genome_index:
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
85 index_path = options.genome_index
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
86 sx1 = 0
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
87 #Aligning with reference
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
88 if options.input2:
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
89 cmd_index2 = 'novoalign -f %s %s -d %s ' %( options.input1,options.input2,index_path)
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
90 else:
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
91 cmd_index2 = 'novoalign -f %s -d %s ' %( options.input1,index_path)
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
92 # Selected settimgs
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
93 if options.settings == "full":
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
94 opts = ' -t %s -g %s -x %s -n %s -H %s -h %s -i %s %s,%s > %s' %(options.align,options.open,options.extend,options.trunc,options.hclip,options.repeat,options.pam,options.insert,options.sd,options.output)
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
95 else:
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
96 opts = ' > %s' %(options.output)
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
97 cmd_index2 += opts
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
98 print cmd_index2
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
99 try:
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
100 tmp_out = tempfile.NamedTemporaryFile().name
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
101 tmp_stdout = open( tmp_out, 'wb' )
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
102 tmp_err = tempfile.NamedTemporaryFile().name
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
103 tmp_stderr = open( tmp_err, 'wb' )
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
104 proc = subprocess.Popen( args=cmd_index2, shell=True, cwd=".", stdout=tmp_stdout, stderr=tmp_stderr )
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
105 pid = proc.pid
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
106 with file('/proc/%s/smaps' % pid) as fp:
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
107 sx2= sum([int(x) for x in re.findall('^Pss:\s+(\d+)',fp.read(),re.M)])
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
108 sx = sx1 + sx2
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
109 print cmd_index2
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
110 print "memory usage (KiBs)", sx
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
111 returncode = proc.wait()
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
112 tmp_stderr.close()
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
113 # get stderr, allowing for case where it's very large
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
114 tmp_stderr = open( tmp_err, 'rb' )
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
115 stderr = ''
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
116 buffsize = 1048576
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
117 try:
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
118 while True:
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
119 stderr += tmp_stderr.read( buffsize )
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
120 if not stderr or len( stderr ) % buffsize != 0:
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
121 break
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
122 except OverflowError:
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
123 pass
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
124 tmp_stdout.close()
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
125 tmp_stderr.close()
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
126 if returncode != 0:
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
127 raise Exception, stderr
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
128 end = time.time()
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
129 print "end time", end
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
130 elapsed = end - start
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
131 print ("Time elapsed: {0:4f}sec",format(elapsed))
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
132 except Exception, e:
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
133 stop_err( 'Error in Novoalign:\n' + str( e ) )
c43f9a578e05 Uploaded
subazini
parents:
diff changeset
134 if __name__=="__main__": __main__()