3
|
1 #!/usr/bin/env python
|
|
2
|
|
3 import optparse, os, shutil, subprocess, sys, tempfile, fileinput,re
|
|
4 import time
|
|
5 def stop_err( msg ):
|
|
6 sys.stderr.write( "%s\n" % msg )
|
|
7 sys.exit()
|
|
8
|
|
9 def __main__():
|
|
10 #Parse Command Line
|
|
11 start = time.time()
|
|
12 print "start time", start
|
|
13 parser = optparse.OptionParser()
|
|
14 # Wrapper options.
|
|
15 parser.add_option( '-0', '--output', dest='output' )
|
|
16 parser.add_option( '-1', '--input1', dest='input1', help='The (forward or single-end) reads file in Sanger FASTQ format' )
|
|
17 parser.add_option( '-2', '--input2', dest='input2', help='The reverse reads file in Sanger FASTQ format' )
|
|
18 parser.add_option( '-Q', '--Quality', dest='quality', help='' )
|
|
19 parser.add_option( '-t', '--align', dest='align' )
|
|
20 parser.add_option( '-g', '--open', dest='open' )
|
|
21 parser.add_option( '-x', '--extend', dest='extend' )
|
|
22 parser.add_option( '-n', '--trunc', dest='trunc' )
|
|
23 parser.add_option( '-r', '--refer', dest='genome_reference' )
|
|
24 parser.add_option( '-f', '--ref1', dest='genome_index' )
|
|
25 parser.add_option( '-k', '--kmer', dest='kmer' )
|
|
26 parser.add_option( '-s', '--step', dest='step' )
|
|
27 parser.add_option( '-l', '--qual', dest='quality' )
|
|
28 parser.add_option( '-m', '--repeat', dest='repeat' )
|
|
29 parser.add_option( '-H', '--hclip', dest='hclip' )
|
|
30 parser.add_option( '-p', '--pam', dest='pam' )
|
|
31 parser.add_option( '-d', '--sd', dest='sd' )
|
|
32 parser.add_option( '-i', '--insert', dest='insert' )
|
|
33 parser.add_option( '-e', '--settings', dest='settings' )
|
|
34 (options, args) = parser.parse_args()
|
|
35 print options.genome_reference
|
|
36 print options.output
|
|
37 print options.input1
|
|
38 print options.input2
|
|
39
|
|
40 tmp_index_dir = tempfile.mkdtemp()
|
|
41 #indexing genome
|
|
42 if options.genome_reference:
|
|
43 index_path = os.path.join( tmp_index_dir, '.'.join( os.path.split( options.genome_reference )[1].split( '.' )[:-1] ) )
|
|
44 try:
|
|
45 os.link( options.genome_reference, index_path + '.fa' )
|
|
46 Mtest= os.link( options.genome_reference, index_path + '.fa' )
|
|
47 print Mtest
|
|
48 except:
|
|
49 pass
|
|
50 cmd_index = 'novoindex %s %s' % ( index_path, options.genome_reference)
|
|
51 if options.settings == "full":
|
|
52 opts = ' -k %s -s %s ' %(options.kmer,options.step)
|
|
53 cmd_index += opts
|
|
54 print cmd_index
|
|
55 try:
|
|
56 tmp = tempfile.NamedTemporaryFile( dir=tmp_index_dir ).name
|
|
57 tmp_stderr = open( tmp, 'wb' )
|
|
58 proc = subprocess.Popen( args=cmd_index, shell=True, cwd=tmp_index_dir, stderr=tmp_stderr.fileno() )
|
|
59 pid = proc.pid
|
|
60 with file('/proc/%s/smaps' % pid) as fp:
|
|
61 sx1= sum([int(x) for x in re.findall('^Pss:\s+(\d+)',fp.read(),re.M)])
|
|
62 returncode = proc.wait()
|
|
63 tmp_stderr.close()
|
|
64 # get stderr, allowing for case where it's very large
|
|
65 tmp_stderr = open( tmp, 'rb' )
|
|
66 stderr = ''
|
|
67 buffsize = 1048576
|
|
68 try:
|
|
69 while True:
|
|
70 stderr += tmp_stderr.read( buffsize )
|
|
71 if not stderr or len( stderr ) % buffsize != 0:
|
|
72 break
|
|
73 except OverflowError:
|
|
74 pass
|
|
75 tmp_stderr.close()
|
|
76 if returncode != 0:
|
|
77 raise Exception, stderr
|
|
78 except Exception, e:
|
|
79 if os.path.exists( tmp_index_dir ):
|
|
80 shutil.rmtree( tmp_index_dir )
|
|
81 stop_err( 'Error indexing reference sequence\n' + str( e ) )
|
|
82 opts = ''
|
|
83 #using genome reference from history
|
|
84 if options.genome_index:
|
|
85 index_path = options.genome_index
|
|
86 sx1 = 0
|
|
87 #Aligning with reference
|
|
88 if options.input2:
|
|
89 cmd_index2 = 'novoalign -f %s %s -d %s ' %( options.input1,options.input2,index_path)
|
|
90 else:
|
|
91 cmd_index2 = 'novoalign -f %s -d %s ' %( options.input1,index_path)
|
|
92 # Selected settimgs
|
|
93 if options.settings == "full":
|
|
94 opts = ' -t %s -g %s -x %s -n %s -H %s -h %s -i %s %s,%s > %s' %(options.align,options.open,options.extend,options.trunc,options.hclip,options.repeat,options.pam,options.insert,options.sd,options.output)
|
|
95 else:
|
|
96 opts = ' > %s' %(options.output)
|
|
97 cmd_index2 += opts
|
|
98 print cmd_index2
|
|
99 try:
|
|
100 tmp_out = tempfile.NamedTemporaryFile().name
|
|
101 tmp_stdout = open( tmp_out, 'wb' )
|
|
102 tmp_err = tempfile.NamedTemporaryFile().name
|
|
103 tmp_stderr = open( tmp_err, 'wb' )
|
|
104 proc = subprocess.Popen( args=cmd_index2, shell=True, cwd=".", stdout=tmp_stdout, stderr=tmp_stderr )
|
|
105 pid = proc.pid
|
|
106 with file('/proc/%s/smaps' % pid) as fp:
|
|
107 sx2= sum([int(x) for x in re.findall('^Pss:\s+(\d+)',fp.read(),re.M)])
|
|
108 sx = sx1 + sx2
|
|
109 print cmd_index2
|
|
110 print "memory usage (KiBs)", sx
|
|
111 returncode = proc.wait()
|
|
112 tmp_stderr.close()
|
|
113 # get stderr, allowing for case where it's very large
|
|
114 tmp_stderr = open( tmp_err, 'rb' )
|
|
115 stderr = ''
|
|
116 buffsize = 1048576
|
|
117 try:
|
|
118 while True:
|
|
119 stderr += tmp_stderr.read( buffsize )
|
|
120 if not stderr or len( stderr ) % buffsize != 0:
|
|
121 break
|
|
122 except OverflowError:
|
|
123 pass
|
|
124 tmp_stdout.close()
|
|
125 tmp_stderr.close()
|
|
126 if returncode != 0:
|
|
127 raise Exception, stderr
|
|
128 end = time.time()
|
|
129 print "end time", end
|
|
130 elapsed = end - start
|
|
131 print ("Time elapsed: {0:4f}sec",format(elapsed))
|
|
132 except Exception, e:
|
|
133 stop_err( 'Error in Novoalign:\n' + str( e ) )
|
|
134 if __name__=="__main__": __main__()
|