annotate stampy_wrapper.py @ 6:a6848bc30dfb draft default tip

Uploaded
author subazini
date Wed, 17 Dec 2014 10:27:41 -0500
parents 55b3da30e4e5
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
2
55b3da30e4e5 Uploaded
subazini
parents:
diff changeset
1 #!/usr/bin/env python
55b3da30e4e5 Uploaded
subazini
parents:
diff changeset
2
55b3da30e4e5 Uploaded
subazini
parents:
diff changeset
3 import optparse, os, shutil, subprocess, sys, tempfile, fileinput, commands
55b3da30e4e5 Uploaded
subazini
parents:
diff changeset
4 import time
55b3da30e4e5 Uploaded
subazini
parents:
diff changeset
5 import re
55b3da30e4e5 Uploaded
subazini
parents:
diff changeset
6 import psutil
55b3da30e4e5 Uploaded
subazini
parents:
diff changeset
7 def stop_err( msg ):
55b3da30e4e5 Uploaded
subazini
parents:
diff changeset
8 sys.stderr.write( "%s\n" % msg )
55b3da30e4e5 Uploaded
subazini
parents:
diff changeset
9 sys.exit()
55b3da30e4e5 Uploaded
subazini
parents:
diff changeset
10
55b3da30e4e5 Uploaded
subazini
parents:
diff changeset
11 def __main__():
55b3da30e4e5 Uploaded
subazini
parents:
diff changeset
12 #Parse Command Line
55b3da30e4e5 Uploaded
subazini
parents:
diff changeset
13 start = time.time()
55b3da30e4e5 Uploaded
subazini
parents:
diff changeset
14 #print "start time", start
55b3da30e4e5 Uploaded
subazini
parents:
diff changeset
15 parser = optparse.OptionParser()
55b3da30e4e5 Uploaded
subazini
parents:
diff changeset
16 parser.add_option( '-s','--species',dest='species')
55b3da30e4e5 Uploaded
subazini
parents:
diff changeset
17 parser.add_option( '-a', '--assembly', dest='assembly' )
55b3da30e4e5 Uploaded
subazini
parents:
diff changeset
18 parser.add_option( '-G', '--genome_index', dest='genome_index' )
55b3da30e4e5 Uploaded
subazini
parents:
diff changeset
19 parser.add_option( '-e', '--genome', dest='genome')
55b3da30e4e5 Uploaded
subazini
parents:
diff changeset
20 parser.add_option( '-D', '--indexes-path', dest='index_path', help='Indexes directory; location of .stidx and .sthash files.' )
55b3da30e4e5 Uploaded
subazini
parents:
diff changeset
21
55b3da30e4e5 Uploaded
subazini
parents:
diff changeset
22 # Wrapper options.
55b3da30e4e5 Uploaded
subazini
parents:
diff changeset
23 parser.add_option( '-O', '--output', dest='output' )
55b3da30e4e5 Uploaded
subazini
parents:
diff changeset
24 parser.add_option( '-1', '--input1', dest='input1', help='The (forward or single-end) reads file in Sanger FASTQ format' )
55b3da30e4e5 Uploaded
subazini
parents:
diff changeset
25 parser.add_option( '-2', '--input2', dest='input2', help='The reverse reads file in Sanger FASTQ format' )
55b3da30e4e5 Uploaded
subazini
parents:
diff changeset
26 parser.add_option( '-g', '--use_genome', dest='use_genome' )
55b3da30e4e5 Uploaded
subazini
parents:
diff changeset
27 parser.add_option( '-H', '--hashbuild', dest='build_hash' )
55b3da30e4e5 Uploaded
subazini
parents:
diff changeset
28 parser.add_option( '-M', '--map', dest='map', action="store_true" )
55b3da30e4e5 Uploaded
subazini
parents:
diff changeset
29
55b3da30e4e5 Uploaded
subazini
parents:
diff changeset
30 # Read group options.
55b3da30e4e5 Uploaded
subazini
parents:
diff changeset
31 parser.add_option( '-d', '--sd', dest='sd' )
55b3da30e4e5 Uploaded
subazini
parents:
diff changeset
32 parser.add_option( '-i', '--insert', dest='insert' )
55b3da30e4e5 Uploaded
subazini
parents:
diff changeset
33 parser.add_option( '-r', '--subrate', dest='subrate' )
55b3da30e4e5 Uploaded
subazini
parents:
diff changeset
34
55b3da30e4e5 Uploaded
subazini
parents:
diff changeset
35 parser.add_option( '-t', '--settings', dest='settings' )
55b3da30e4e5 Uploaded
subazini
parents:
diff changeset
36
55b3da30e4e5 Uploaded
subazini
parents:
diff changeset
37
55b3da30e4e5 Uploaded
subazini
parents:
diff changeset
38 (options, args) = parser.parse_args()
55b3da30e4e5 Uploaded
subazini
parents:
diff changeset
39
55b3da30e4e5 Uploaded
subazini
parents:
diff changeset
40 # Creat genome index if necessary.
55b3da30e4e5 Uploaded
subazini
parents:
diff changeset
41 tmp_index_dir = tempfile.mkdtemp()
55b3da30e4e5 Uploaded
subazini
parents:
diff changeset
42
55b3da30e4e5 Uploaded
subazini
parents:
diff changeset
43 # Genome index from history
55b3da30e4e5 Uploaded
subazini
parents:
diff changeset
44 if options.genome_index:
55b3da30e4e5 Uploaded
subazini
parents:
diff changeset
45 index_path = os.path.join( tmp_index_dir, '.'.join( os.path.split( options.genome_index )[1].split( '.' )[:-1] ) )
55b3da30e4e5 Uploaded
subazini
parents:
diff changeset
46 try:
55b3da30e4e5 Uploaded
subazini
parents:
diff changeset
47 os.link( options.genome_index, index_path + '.fa' )
55b3da30e4e5 Uploaded
subazini
parents:
diff changeset
48 #print options.genome_index
55b3da30e4e5 Uploaded
subazini
parents:
diff changeset
49 except:
55b3da30e4e5 Uploaded
subazini
parents:
diff changeset
50 pass
55b3da30e4e5 Uploaded
subazini
parents:
diff changeset
51 # Indexing genome
55b3da30e4e5 Uploaded
subazini
parents:
diff changeset
52 cmd_index = 'stampy.py --species=%s --assembly=%s -G %s %s' %(options.species,options.assembly,options.genome_index,options.genome)
55b3da30e4e5 Uploaded
subazini
parents:
diff changeset
53 #print cmd_index
55b3da30e4e5 Uploaded
subazini
parents:
diff changeset
54 #print options.genome
55b3da30e4e5 Uploaded
subazini
parents:
diff changeset
55
55b3da30e4e5 Uploaded
subazini
parents:
diff changeset
56 try:
55b3da30e4e5 Uploaded
subazini
parents:
diff changeset
57 tmp = tempfile.NamedTemporaryFile( dir=tmp_index_dir ).name
55b3da30e4e5 Uploaded
subazini
parents:
diff changeset
58 tmp_stderr = open( tmp, 'wb' )
55b3da30e4e5 Uploaded
subazini
parents:
diff changeset
59 proc=subprocess.Popen(cmd_index,shell=True)
55b3da30e4e5 Uploaded
subazini
parents:
diff changeset
60 returncode = proc.wait()
55b3da30e4e5 Uploaded
subazini
parents:
diff changeset
61 tmp_stderr.close()
55b3da30e4e5 Uploaded
subazini
parents:
diff changeset
62 tmp_stderr = open( tmp, 'rb' )
55b3da30e4e5 Uploaded
subazini
parents:
diff changeset
63 stderr = ''
55b3da30e4e5 Uploaded
subazini
parents:
diff changeset
64 buffsize = 1048576
55b3da30e4e5 Uploaded
subazini
parents:
diff changeset
65 try:
55b3da30e4e5 Uploaded
subazini
parents:
diff changeset
66 while True:
55b3da30e4e5 Uploaded
subazini
parents:
diff changeset
67 stderr += tmp_stderr.read( buffsize )
55b3da30e4e5 Uploaded
subazini
parents:
diff changeset
68 if not stderr or len( stderr ) % buffsize != 0:
55b3da30e4e5 Uploaded
subazini
parents:
diff changeset
69 break
55b3da30e4e5 Uploaded
subazini
parents:
diff changeset
70 except OverflowError:
55b3da30e4e5 Uploaded
subazini
parents:
diff changeset
71 pass
55b3da30e4e5 Uploaded
subazini
parents:
diff changeset
72 tmp_stderr.close()
55b3da30e4e5 Uploaded
subazini
parents:
diff changeset
73
55b3da30e4e5 Uploaded
subazini
parents:
diff changeset
74 except Exception, e:
55b3da30e4e5 Uploaded
subazini
parents:
diff changeset
75 if os.path.exists( tmp_index_dir ):
55b3da30e4e5 Uploaded
subazini
parents:
diff changeset
76 shutil.rmtree( tmp_index_dir )
55b3da30e4e5 Uploaded
subazini
parents:
diff changeset
77 stop_err( 'Error indexing reference sequence\n' + str( e ) )
55b3da30e4e5 Uploaded
subazini
parents:
diff changeset
78 else:
55b3da30e4e5 Uploaded
subazini
parents:
diff changeset
79 index_path = options.index_path
55b3da30e4e5 Uploaded
subazini
parents:
diff changeset
80 try:
55b3da30e4e5 Uploaded
subazini
parents:
diff changeset
81 tmp = tempfile.NamedTemporaryFile( dir=tmp_index_dir).name
55b3da30e4e5 Uploaded
subazini
parents:
diff changeset
82 tmp_stderr = open( tmp, 'wb' )
55b3da30e4e5 Uploaded
subazini
parents:
diff changeset
83 # Hashing genome
55b3da30e4e5 Uploaded
subazini
parents:
diff changeset
84 cmd_index2 = 'stampy.py -g %s -H %s > %s' %(options.genome_index,options.genome_index,options.output)
55b3da30e4e5 Uploaded
subazini
parents:
diff changeset
85 #print cmd_index2
55b3da30e4e5 Uploaded
subazini
parents:
diff changeset
86 proc=subprocess.Popen(cmd_index2, shell=True)
55b3da30e4e5 Uploaded
subazini
parents:
diff changeset
87 returncode = proc.wait()
55b3da30e4e5 Uploaded
subazini
parents:
diff changeset
88 #print proc.returncode
55b3da30e4e5 Uploaded
subazini
parents:
diff changeset
89
55b3da30e4e5 Uploaded
subazini
parents:
diff changeset
90 except Exception, e:
55b3da30e4e5 Uploaded
subazini
parents:
diff changeset
91 if os.path.exists( tmp_index_dir ):
55b3da30e4e5 Uploaded
subazini
parents:
diff changeset
92 shutil.rmtree( tmp_index_dir )
55b3da30e4e5 Uploaded
subazini
parents:
diff changeset
93 cmd_index3 = 'stampy.py -g %s -h %s' %(options.genome_index,options.genome_index)
55b3da30e4e5 Uploaded
subazini
parents:
diff changeset
94 if options.settings == "full":
55b3da30e4e5 Uploaded
subazini
parents:
diff changeset
95 opts = ' --insertsize=%s --insertsd=%s --substitutionrate=%s -M %s %s > %s' %(options.insert,options.sd,options.subrate,options.input1,options.input2,options.output)
55b3da30e4e5 Uploaded
subazini
parents:
diff changeset
96 else:
55b3da30e4e5 Uploaded
subazini
parents:
diff changeset
97 opts = '-M %s %s > %s' %(options.input1,options.input2,options.output)
55b3da30e4e5 Uploaded
subazini
parents:
diff changeset
98 cmd_index3 += opts
55b3da30e4e5 Uploaded
subazini
parents:
diff changeset
99 try:
55b3da30e4e5 Uploaded
subazini
parents:
diff changeset
100 tmp = tempfile.NamedTemporaryFile( dir=tmp_index_dir ).name
55b3da30e4e5 Uploaded
subazini
parents:
diff changeset
101 tmp_stderr = open( tmp, 'wb' )
55b3da30e4e5 Uploaded
subazini
parents:
diff changeset
102 #print cmd_index3
55b3da30e4e5 Uploaded
subazini
parents:
diff changeset
103 proc=subprocess.call(cmd_index3, shell=True)
55b3da30e4e5 Uploaded
subazini
parents:
diff changeset
104 except Exception, e:
55b3da30e4e5 Uploaded
subazini
parents:
diff changeset
105 if os.path.exists( tmp_index_dir ):
55b3da30e4e5 Uploaded
subazini
parents:
diff changeset
106 shutil.rmtree( tmp_index_dir )
55b3da30e4e5 Uploaded
subazini
parents:
diff changeset
107
55b3da30e4e5 Uploaded
subazini
parents:
diff changeset
108 end = time.time()
55b3da30e4e5 Uploaded
subazini
parents:
diff changeset
109 #print "end time", end
55b3da30e4e5 Uploaded
subazini
parents:
diff changeset
110 elapsed = end - start
55b3da30e4e5 Uploaded
subazini
parents:
diff changeset
111 #print ("Time elapsed: {0:4f}sec",format(elapsed))
55b3da30e4e5 Uploaded
subazini
parents:
diff changeset
112
55b3da30e4e5 Uploaded
subazini
parents:
diff changeset
113 if __name__=="__main__": __main__()
55b3da30e4e5 Uploaded
subazini
parents:
diff changeset
114
55b3da30e4e5 Uploaded
subazini
parents:
diff changeset
115