Mercurial > repos > subazini > ngsaligners
comparison novocraft_wrapper.py @ 3:c43f9a578e05 draft
Uploaded
author | subazini |
---|---|
date | Wed, 17 Dec 2014 10:17:19 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
2:55b3da30e4e5 | 3:c43f9a578e05 |
---|---|
1 #!/usr/bin/env python | |
2 | |
3 import optparse, os, shutil, subprocess, sys, tempfile, fileinput,re | |
4 import time | |
5 def stop_err( msg ): | |
6 sys.stderr.write( "%s\n" % msg ) | |
7 sys.exit() | |
8 | |
9 def __main__(): | |
10 #Parse Command Line | |
11 start = time.time() | |
12 print "start time", start | |
13 parser = optparse.OptionParser() | |
14 # Wrapper options. | |
15 parser.add_option( '-0', '--output', dest='output' ) | |
16 parser.add_option( '-1', '--input1', dest='input1', help='The (forward or single-end) reads file in Sanger FASTQ format' ) | |
17 parser.add_option( '-2', '--input2', dest='input2', help='The reverse reads file in Sanger FASTQ format' ) | |
18 parser.add_option( '-Q', '--Quality', dest='quality', help='' ) | |
19 parser.add_option( '-t', '--align', dest='align' ) | |
20 parser.add_option( '-g', '--open', dest='open' ) | |
21 parser.add_option( '-x', '--extend', dest='extend' ) | |
22 parser.add_option( '-n', '--trunc', dest='trunc' ) | |
23 parser.add_option( '-r', '--refer', dest='genome_reference' ) | |
24 parser.add_option( '-f', '--ref1', dest='genome_index' ) | |
25 parser.add_option( '-k', '--kmer', dest='kmer' ) | |
26 parser.add_option( '-s', '--step', dest='step' ) | |
27 parser.add_option( '-l', '--qual', dest='quality' ) | |
28 parser.add_option( '-m', '--repeat', dest='repeat' ) | |
29 parser.add_option( '-H', '--hclip', dest='hclip' ) | |
30 parser.add_option( '-p', '--pam', dest='pam' ) | |
31 parser.add_option( '-d', '--sd', dest='sd' ) | |
32 parser.add_option( '-i', '--insert', dest='insert' ) | |
33 parser.add_option( '-e', '--settings', dest='settings' ) | |
34 (options, args) = parser.parse_args() | |
35 print options.genome_reference | |
36 print options.output | |
37 print options.input1 | |
38 print options.input2 | |
39 | |
40 tmp_index_dir = tempfile.mkdtemp() | |
41 #indexing genome | |
42 if options.genome_reference: | |
43 index_path = os.path.join( tmp_index_dir, '.'.join( os.path.split( options.genome_reference )[1].split( '.' )[:-1] ) ) | |
44 try: | |
45 os.link( options.genome_reference, index_path + '.fa' ) | |
46 Mtest= os.link( options.genome_reference, index_path + '.fa' ) | |
47 print Mtest | |
48 except: | |
49 pass | |
50 cmd_index = 'novoindex %s %s' % ( index_path, options.genome_reference) | |
51 if options.settings == "full": | |
52 opts = ' -k %s -s %s ' %(options.kmer,options.step) | |
53 cmd_index += opts | |
54 print cmd_index | |
55 try: | |
56 tmp = tempfile.NamedTemporaryFile( dir=tmp_index_dir ).name | |
57 tmp_stderr = open( tmp, 'wb' ) | |
58 proc = subprocess.Popen( args=cmd_index, shell=True, cwd=tmp_index_dir, stderr=tmp_stderr.fileno() ) | |
59 pid = proc.pid | |
60 with file('/proc/%s/smaps' % pid) as fp: | |
61 sx1= sum([int(x) for x in re.findall('^Pss:\s+(\d+)',fp.read(),re.M)]) | |
62 returncode = proc.wait() | |
63 tmp_stderr.close() | |
64 # get stderr, allowing for case where it's very large | |
65 tmp_stderr = open( tmp, 'rb' ) | |
66 stderr = '' | |
67 buffsize = 1048576 | |
68 try: | |
69 while True: | |
70 stderr += tmp_stderr.read( buffsize ) | |
71 if not stderr or len( stderr ) % buffsize != 0: | |
72 break | |
73 except OverflowError: | |
74 pass | |
75 tmp_stderr.close() | |
76 if returncode != 0: | |
77 raise Exception, stderr | |
78 except Exception, e: | |
79 if os.path.exists( tmp_index_dir ): | |
80 shutil.rmtree( tmp_index_dir ) | |
81 stop_err( 'Error indexing reference sequence\n' + str( e ) ) | |
82 opts = '' | |
83 #using genome reference from history | |
84 if options.genome_index: | |
85 index_path = options.genome_index | |
86 sx1 = 0 | |
87 #Aligning with reference | |
88 if options.input2: | |
89 cmd_index2 = 'novoalign -f %s %s -d %s ' %( options.input1,options.input2,index_path) | |
90 else: | |
91 cmd_index2 = 'novoalign -f %s -d %s ' %( options.input1,index_path) | |
92 # Selected settimgs | |
93 if options.settings == "full": | |
94 opts = ' -t %s -g %s -x %s -n %s -H %s -h %s -i %s %s,%s > %s' %(options.align,options.open,options.extend,options.trunc,options.hclip,options.repeat,options.pam,options.insert,options.sd,options.output) | |
95 else: | |
96 opts = ' > %s' %(options.output) | |
97 cmd_index2 += opts | |
98 print cmd_index2 | |
99 try: | |
100 tmp_out = tempfile.NamedTemporaryFile().name | |
101 tmp_stdout = open( tmp_out, 'wb' ) | |
102 tmp_err = tempfile.NamedTemporaryFile().name | |
103 tmp_stderr = open( tmp_err, 'wb' ) | |
104 proc = subprocess.Popen( args=cmd_index2, shell=True, cwd=".", stdout=tmp_stdout, stderr=tmp_stderr ) | |
105 pid = proc.pid | |
106 with file('/proc/%s/smaps' % pid) as fp: | |
107 sx2= sum([int(x) for x in re.findall('^Pss:\s+(\d+)',fp.read(),re.M)]) | |
108 sx = sx1 + sx2 | |
109 print cmd_index2 | |
110 print "memory usage (KiBs)", sx | |
111 returncode = proc.wait() | |
112 tmp_stderr.close() | |
113 # get stderr, allowing for case where it's very large | |
114 tmp_stderr = open( tmp_err, 'rb' ) | |
115 stderr = '' | |
116 buffsize = 1048576 | |
117 try: | |
118 while True: | |
119 stderr += tmp_stderr.read( buffsize ) | |
120 if not stderr or len( stderr ) % buffsize != 0: | |
121 break | |
122 except OverflowError: | |
123 pass | |
124 tmp_stdout.close() | |
125 tmp_stderr.close() | |
126 if returncode != 0: | |
127 raise Exception, stderr | |
128 end = time.time() | |
129 print "end time", end | |
130 elapsed = end - start | |
131 print ("Time elapsed: {0:4f}sec",format(elapsed)) | |
132 except Exception, e: | |
133 stop_err( 'Error in Novoalign:\n' + str( e ) ) | |
134 if __name__=="__main__": __main__() |