annotate tools/smalt_wrapper.py @ 0:747433a6de00 draft

Initial commit.
author cjav
date Wed, 13 Feb 2013 13:27:44 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
747433a6de00 Initial commit.
cjav
parents:
diff changeset
1 #!/usr/bin/env python
747433a6de00 Initial commit.
cjav
parents:
diff changeset
2
747433a6de00 Initial commit.
cjav
parents:
diff changeset
3 """
747433a6de00 Initial commit.
cjav
parents:
diff changeset
4 Runs Smalt on single-end or paired-end data.
747433a6de00 Initial commit.
cjav
parents:
diff changeset
5 Produces a SAM file containing the mappings.
747433a6de00 Initial commit.
cjav
parents:
diff changeset
6 Works with Smalt version 0.7.1.
747433a6de00 Initial commit.
cjav
parents:
diff changeset
7
747433a6de00 Initial commit.
cjav
parents:
diff changeset
8 usage: smalt_wrapper.py [options]
747433a6de00 Initial commit.
cjav
parents:
diff changeset
9
747433a6de00 Initial commit.
cjav
parents:
diff changeset
10 See below for options
747433a6de00 Initial commit.
cjav
parents:
diff changeset
11 """
747433a6de00 Initial commit.
cjav
parents:
diff changeset
12
747433a6de00 Initial commit.
cjav
parents:
diff changeset
13 import optparse, os, shutil, subprocess, sys, tempfile
747433a6de00 Initial commit.
cjav
parents:
diff changeset
14
747433a6de00 Initial commit.
cjav
parents:
diff changeset
15 def stop_err( msg ):
747433a6de00 Initial commit.
cjav
parents:
diff changeset
16 sys.stderr.write( '%s\n' % msg )
747433a6de00 Initial commit.
cjav
parents:
diff changeset
17 sys.exit()
747433a6de00 Initial commit.
cjav
parents:
diff changeset
18
747433a6de00 Initial commit.
cjav
parents:
diff changeset
19 def __main__():
747433a6de00 Initial commit.
cjav
parents:
diff changeset
20 #Parse Command Line
747433a6de00 Initial commit.
cjav
parents:
diff changeset
21 parser = optparse.OptionParser()
747433a6de00 Initial commit.
cjav
parents:
diff changeset
22 parser.add_option( '-t', '--threads', dest='threads', help='The number of threads to use' )
747433a6de00 Initial commit.
cjav
parents:
diff changeset
23 parser.add_option( '-r', '--ref', dest='ref', help='The reference genome to use or index' )
747433a6de00 Initial commit.
cjav
parents:
diff changeset
24 parser.add_option( '-f', '--input1', dest='fastq', help='The (forward) fastq file to use for the mapping' )
747433a6de00 Initial commit.
cjav
parents:
diff changeset
25 parser.add_option( '-F', '--input2', dest='rfastq', help='The reverse fastq file to use for mapping if paired-end data' )
747433a6de00 Initial commit.
cjav
parents:
diff changeset
26 parser.add_option( '-u', '--output', dest='output', help='The file to save the output (SAM format)' )
747433a6de00 Initial commit.
cjav
parents:
diff changeset
27 parser.add_option( '-g', '--genAlignType', dest='genAlignType', help='The type of pairing (single or paired)' )
747433a6de00 Initial commit.
cjav
parents:
diff changeset
28 parser.add_option( '-p', '--params', dest='params', help='Parameter setting to use (pre_set or full)' )
747433a6de00 Initial commit.
cjav
parents:
diff changeset
29 parser.add_option( '-s', '--fileSource', dest='fileSource', help='Whether to use a previously indexed reference sequence or one form history (indexed or history)' )
747433a6de00 Initial commit.
cjav
parents:
diff changeset
30 parser.add_option( '-x', '--exhaustiveSearch', dest='exhaustiveSearch', help='This flag triggers a more exhaustive search for alignments at the cost of decreased speed' )
747433a6de00 Initial commit.
cjav
parents:
diff changeset
31 parser.add_option( '-c', '--minCover', dest='minCover', help='Only consider mappings where the k-mer word seeds cover the query read to a minimum extent' )
747433a6de00 Initial commit.
cjav
parents:
diff changeset
32 parser.add_option( '-d', '--scorDiff', dest='scorDiff', help='Set a threshold of the Smith-Waterman alignment score relative to the maximum score' )
747433a6de00 Initial commit.
cjav
parents:
diff changeset
33 parser.add_option( '-i', '--insertMax', dest='insertMax', help='Maximum insert size (Only in paired-end mode)' )
747433a6de00 Initial commit.
cjav
parents:
diff changeset
34 parser.add_option( '-j', '--insertMin', dest='insertMin', help='Minimum insert size (Only in paired-end mode)' )
747433a6de00 Initial commit.
cjav
parents:
diff changeset
35 parser.add_option( '-l', '--pairTyp', dest='pairTyp', help='Type of read pair library, can be either pe, mp or pp' )
747433a6de00 Initial commit.
cjav
parents:
diff changeset
36 parser.add_option( '-m', '--minScor', dest='minScor', help='Sets an absolute threshold of the Smith-Waterman scores' )
747433a6de00 Initial commit.
cjav
parents:
diff changeset
37 parser.add_option( '-a', '--partialAlignments', dest='partialAlignments', help='Report partial alignments if they are complementary on the read (split reads)' )
747433a6de00 Initial commit.
cjav
parents:
diff changeset
38 parser.add_option( '-q', '--minBasq', dest='minBasq', help='Sets a base quality threshold (0 <= minbasq <= 10, default 0)' )
747433a6de00 Initial commit.
cjav
parents:
diff changeset
39 parser.add_option( '-e', '--seed', dest='seed', help='If <seed> >= 0 report an alignment selected at random where there are multiple mappings with the same best alignment score. With <seed> = 0 (default) a seed is derived from the current calendar time. If <seed> < 0 reads with multiple best mappings are reported as "not mapped".' )
747433a6de00 Initial commit.
cjav
parents:
diff changeset
40 parser.add_option( '-w', '--complexityWeighted', dest='complexityWeighted', help='Smith-Waterman scores are complexity weighted' )
747433a6de00 Initial commit.
cjav
parents:
diff changeset
41 parser.add_option( '-y', '--minId', dest='minId', help='Sets an identity threshold for a mapping to be reported' )
747433a6de00 Initial commit.
cjav
parents:
diff changeset
42 parser.add_option( '-D', '--dbkey', dest='dbkey', help='Dbkey for reference genome' )
747433a6de00 Initial commit.
cjav
parents:
diff changeset
43 parser.add_option( '-X', '--do_not_build_index', dest='do_not_build_index', action='store_true', help="Don't build index" )
747433a6de00 Initial commit.
cjav
parents:
diff changeset
44 parser.add_option( '-H', '--suppressHeader', dest='suppressHeader', help='Suppress header' )
747433a6de00 Initial commit.
cjav
parents:
diff changeset
45 (options, args) = parser.parse_args()
747433a6de00 Initial commit.
cjav
parents:
diff changeset
46
747433a6de00 Initial commit.
cjav
parents:
diff changeset
47 # output version # of tool
747433a6de00 Initial commit.
cjav
parents:
diff changeset
48 try:
747433a6de00 Initial commit.
cjav
parents:
diff changeset
49 tmp = tempfile.NamedTemporaryFile().name
747433a6de00 Initial commit.
cjav
parents:
diff changeset
50 tmp_stdout = open( tmp, 'wb' )
747433a6de00 Initial commit.
cjav
parents:
diff changeset
51 proc = subprocess.Popen( args='smalt 2>&1', shell=True, stdout=tmp_stdout )
747433a6de00 Initial commit.
cjav
parents:
diff changeset
52 tmp_stdout.close()
747433a6de00 Initial commit.
cjav
parents:
diff changeset
53 returncode = proc.wait()
747433a6de00 Initial commit.
cjav
parents:
diff changeset
54 stdout = None
747433a6de00 Initial commit.
cjav
parents:
diff changeset
55 for line in open( tmp_stdout.name, 'rb' ):
747433a6de00 Initial commit.
cjav
parents:
diff changeset
56 if line.lower().find( 'version' ) >= 0:
747433a6de00 Initial commit.
cjav
parents:
diff changeset
57 stdout = line.strip()
747433a6de00 Initial commit.
cjav
parents:
diff changeset
58 break
747433a6de00 Initial commit.
cjav
parents:
diff changeset
59 if stdout:
747433a6de00 Initial commit.
cjav
parents:
diff changeset
60 sys.stdout.write( 'SMALT %s\n' % stdout )
747433a6de00 Initial commit.
cjav
parents:
diff changeset
61 else:
747433a6de00 Initial commit.
cjav
parents:
diff changeset
62 raise Exception
747433a6de00 Initial commit.
cjav
parents:
diff changeset
63 except:
747433a6de00 Initial commit.
cjav
parents:
diff changeset
64 sys.stdout.write( 'Could not determine SMALT version\n' )
747433a6de00 Initial commit.
cjav
parents:
diff changeset
65
747433a6de00 Initial commit.
cjav
parents:
diff changeset
66 fastq = options.fastq
747433a6de00 Initial commit.
cjav
parents:
diff changeset
67 if options.rfastq:
747433a6de00 Initial commit.
cjav
parents:
diff changeset
68 rfastq = options.rfastq
747433a6de00 Initial commit.
cjav
parents:
diff changeset
69
747433a6de00 Initial commit.
cjav
parents:
diff changeset
70 # make temp directory for placement of indices
747433a6de00 Initial commit.
cjav
parents:
diff changeset
71 tmp_index_dir = tempfile.mkdtemp()
747433a6de00 Initial commit.
cjav
parents:
diff changeset
72 tmp_dir = tempfile.mkdtemp()
747433a6de00 Initial commit.
cjav
parents:
diff changeset
73 # index if necessary
747433a6de00 Initial commit.
cjav
parents:
diff changeset
74 if options.fileSource == 'history' and not options.do_not_build_index:
747433a6de00 Initial commit.
cjav
parents:
diff changeset
75 ref_file = tempfile.NamedTemporaryFile( dir=tmp_index_dir )
747433a6de00 Initial commit.
cjav
parents:
diff changeset
76 ref_file_name = ref_file.name
747433a6de00 Initial commit.
cjav
parents:
diff changeset
77 ref_file.close()
747433a6de00 Initial commit.
cjav
parents:
diff changeset
78 os.symlink( options.ref, ref_file_name )
747433a6de00 Initial commit.
cjav
parents:
diff changeset
79 cmd1 = 'smalt index %s %s' % ( ref_file_name, ref_file_name )
747433a6de00 Initial commit.
cjav
parents:
diff changeset
80 try:
747433a6de00 Initial commit.
cjav
parents:
diff changeset
81 tmp = tempfile.NamedTemporaryFile( dir=tmp_index_dir ).name
747433a6de00 Initial commit.
cjav
parents:
diff changeset
82 tmp_stderr = open( tmp, 'wb' )
747433a6de00 Initial commit.
cjav
parents:
diff changeset
83 proc = subprocess.Popen( args=cmd1, shell=True, cwd=tmp_index_dir, stderr=tmp_stderr.fileno() )
747433a6de00 Initial commit.
cjav
parents:
diff changeset
84 returncode = proc.wait()
747433a6de00 Initial commit.
cjav
parents:
diff changeset
85 tmp_stderr.close()
747433a6de00 Initial commit.
cjav
parents:
diff changeset
86 # get stderr, allowing for case where it's very large
747433a6de00 Initial commit.
cjav
parents:
diff changeset
87 tmp_stderr = open( tmp, 'rb' )
747433a6de00 Initial commit.
cjav
parents:
diff changeset
88 stderr = ''
747433a6de00 Initial commit.
cjav
parents:
diff changeset
89 buffsize = 1048576
747433a6de00 Initial commit.
cjav
parents:
diff changeset
90 try:
747433a6de00 Initial commit.
cjav
parents:
diff changeset
91 while True:
747433a6de00 Initial commit.
cjav
parents:
diff changeset
92 stderr += tmp_stderr.read( buffsize )
747433a6de00 Initial commit.
cjav
parents:
diff changeset
93 if not stderr or len( stderr ) % buffsize != 0:
747433a6de00 Initial commit.
cjav
parents:
diff changeset
94 break
747433a6de00 Initial commit.
cjav
parents:
diff changeset
95 except OverflowError:
747433a6de00 Initial commit.
cjav
parents:
diff changeset
96 pass
747433a6de00 Initial commit.
cjav
parents:
diff changeset
97 tmp_stderr.close()
747433a6de00 Initial commit.
cjav
parents:
diff changeset
98 if returncode != 0:
747433a6de00 Initial commit.
cjav
parents:
diff changeset
99 raise Exception, stderr
747433a6de00 Initial commit.
cjav
parents:
diff changeset
100 except Exception, e:
747433a6de00 Initial commit.
cjav
parents:
diff changeset
101 # clean up temp dirs
747433a6de00 Initial commit.
cjav
parents:
diff changeset
102 if os.path.exists( tmp_index_dir ):
747433a6de00 Initial commit.
cjav
parents:
diff changeset
103 shutil.rmtree( tmp_index_dir )
747433a6de00 Initial commit.
cjav
parents:
diff changeset
104 if os.path.exists( tmp_dir ):
747433a6de00 Initial commit.
cjav
parents:
diff changeset
105 shutil.rmtree( tmp_dir )
747433a6de00 Initial commit.
cjav
parents:
diff changeset
106 stop_err( 'Error indexing reference sequence. ' + str( e ) )
747433a6de00 Initial commit.
cjav
parents:
diff changeset
107 else:
747433a6de00 Initial commit.
cjav
parents:
diff changeset
108 ref_file_name = options.ref
747433a6de00 Initial commit.
cjav
parents:
diff changeset
109
747433a6de00 Initial commit.
cjav
parents:
diff changeset
110 # set up aligning and generate aligning command options
747433a6de00 Initial commit.
cjav
parents:
diff changeset
111 if options.params == 'pre_set':
747433a6de00 Initial commit.
cjav
parents:
diff changeset
112 aligning_cmds = '-n %s ' % ( options.threads )
747433a6de00 Initial commit.
cjav
parents:
diff changeset
113 gen_alignment_cmds = ''
747433a6de00 Initial commit.
cjav
parents:
diff changeset
114 else:
747433a6de00 Initial commit.
cjav
parents:
diff changeset
115 if options.exhaustiveSearch == 'true':
747433a6de00 Initial commit.
cjav
parents:
diff changeset
116 exhaustiveSearch = '-x'
747433a6de00 Initial commit.
cjav
parents:
diff changeset
117 minCover = '-c %s' % options.minCover
747433a6de00 Initial commit.
cjav
parents:
diff changeset
118 else:
747433a6de00 Initial commit.
cjav
parents:
diff changeset
119 exhaustiveSearch = ''
747433a6de00 Initial commit.
cjav
parents:
diff changeset
120 minCover = ''
747433a6de00 Initial commit.
cjav
parents:
diff changeset
121 if options.partialAlignments == 'true':
747433a6de00 Initial commit.
cjav
parents:
diff changeset
122 partialAlignments = '-x'
747433a6de00 Initial commit.
cjav
parents:
diff changeset
123 else:
747433a6de00 Initial commit.
cjav
parents:
diff changeset
124 partialAlignments = ''
747433a6de00 Initial commit.
cjav
parents:
diff changeset
125 if options.complexityWeighted == 'true':
747433a6de00 Initial commit.
cjav
parents:
diff changeset
126 complexityWeighted = '-w'
747433a6de00 Initial commit.
cjav
parents:
diff changeset
127 else:
747433a6de00 Initial commit.
cjav
parents:
diff changeset
128 complexityWeighted = ''
747433a6de00 Initial commit.
cjav
parents:
diff changeset
129 aligning_cmds = '-d %s -m %s -q %s -r %s -y %s' % \
747433a6de00 Initial commit.
cjav
parents:
diff changeset
130 ( options.scorDiff, options.minScor, options.minBasq, options.seed, options.minId )
747433a6de00 Initial commit.
cjav
parents:
diff changeset
131 if options.genAlignType == 'paired':
747433a6de00 Initial commit.
cjav
parents:
diff changeset
132 gen_alignment_cmds = '-i %s -j %s -l %s' % ( options.insertMax, options.insertMin, options.pairTyp )
747433a6de00 Initial commit.
cjav
parents:
diff changeset
133 else:
747433a6de00 Initial commit.
cjav
parents:
diff changeset
134 gen_alignment_cmds = ''
747433a6de00 Initial commit.
cjav
parents:
diff changeset
135 # prepare actual aligning and generate aligning commands
747433a6de00 Initial commit.
cjav
parents:
diff changeset
136 if options.genAlignType == 'paired':
747433a6de00 Initial commit.
cjav
parents:
diff changeset
137 cmd = 'smalt map %s %s -o %s %s %s ' % ( aligning_cmds, gen_alignment_cmds, options.output, ref_file_name, fastq, rfastq )
747433a6de00 Initial commit.
cjav
parents:
diff changeset
138 else:
747433a6de00 Initial commit.
cjav
parents:
diff changeset
139 cmd = 'smalt map %s -o %s %s %s ' % ( aligning_cmds, options.output, ref_file_name, fastq )
747433a6de00 Initial commit.
cjav
parents:
diff changeset
140 # perform alignments
747433a6de00 Initial commit.
cjav
parents:
diff changeset
141 buffsize = 1048576
747433a6de00 Initial commit.
cjav
parents:
diff changeset
142 try:
747433a6de00 Initial commit.
cjav
parents:
diff changeset
143 # need to nest try-except in try-finally to handle 2.4
747433a6de00 Initial commit.
cjav
parents:
diff changeset
144 try:
747433a6de00 Initial commit.
cjav
parents:
diff changeset
145 # align
747433a6de00 Initial commit.
cjav
parents:
diff changeset
146 try:
747433a6de00 Initial commit.
cjav
parents:
diff changeset
147 tmp = tempfile.NamedTemporaryFile( dir=tmp_dir ).name
747433a6de00 Initial commit.
cjav
parents:
diff changeset
148 tmp_stderr = open( tmp, 'wb' )
747433a6de00 Initial commit.
cjav
parents:
diff changeset
149 proc = subprocess.Popen( args=cmd, shell=True, cwd=tmp_dir, stderr=tmp_stderr.fileno() )
747433a6de00 Initial commit.
cjav
parents:
diff changeset
150 returncode = proc.wait()
747433a6de00 Initial commit.
cjav
parents:
diff changeset
151 tmp_stderr.close()
747433a6de00 Initial commit.
cjav
parents:
diff changeset
152 # get stderr, allowing for case where it's very large
747433a6de00 Initial commit.
cjav
parents:
diff changeset
153 tmp_stderr = open( tmp, 'rb' )
747433a6de00 Initial commit.
cjav
parents:
diff changeset
154 stderr = ''
747433a6de00 Initial commit.
cjav
parents:
diff changeset
155 try:
747433a6de00 Initial commit.
cjav
parents:
diff changeset
156 while True:
747433a6de00 Initial commit.
cjav
parents:
diff changeset
157 stderr += tmp_stderr.read( buffsize )
747433a6de00 Initial commit.
cjav
parents:
diff changeset
158 if not stderr or len( stderr ) % buffsize != 0:
747433a6de00 Initial commit.
cjav
parents:
diff changeset
159 break
747433a6de00 Initial commit.
cjav
parents:
diff changeset
160 except OverflowError:
747433a6de00 Initial commit.
cjav
parents:
diff changeset
161 pass
747433a6de00 Initial commit.
cjav
parents:
diff changeset
162 tmp_stderr.close()
747433a6de00 Initial commit.
cjav
parents:
diff changeset
163 if returncode != 0:
747433a6de00 Initial commit.
cjav
parents:
diff changeset
164 raise Exception, stderr
747433a6de00 Initial commit.
cjav
parents:
diff changeset
165 except Exception, e:
747433a6de00 Initial commit.
cjav
parents:
diff changeset
166 raise Exception, 'Error aligning sequence. ' + str( e )
747433a6de00 Initial commit.
cjav
parents:
diff changeset
167 # remove header if necessary
747433a6de00 Initial commit.
cjav
parents:
diff changeset
168 if options.suppressHeader == 'true':
747433a6de00 Initial commit.
cjav
parents:
diff changeset
169 tmp_out = tempfile.NamedTemporaryFile( dir=tmp_dir)
747433a6de00 Initial commit.
cjav
parents:
diff changeset
170 tmp_out_name = tmp_out.name
747433a6de00 Initial commit.
cjav
parents:
diff changeset
171 tmp_out.close()
747433a6de00 Initial commit.
cjav
parents:
diff changeset
172 try:
747433a6de00 Initial commit.
cjav
parents:
diff changeset
173 shutil.move( options.output, tmp_out_name )
747433a6de00 Initial commit.
cjav
parents:
diff changeset
174 except Exception, e:
747433a6de00 Initial commit.
cjav
parents:
diff changeset
175 raise Exception, 'Error moving output file before removing headers. ' + str( e )
747433a6de00 Initial commit.
cjav
parents:
diff changeset
176 fout = file( options.output, 'w' )
747433a6de00 Initial commit.
cjav
parents:
diff changeset
177 for line in file( tmp_out.name, 'r' ):
747433a6de00 Initial commit.
cjav
parents:
diff changeset
178 if not ( line.startswith( '@HD' ) or line.startswith( '@SQ' ) or line.startswith( '@RG' ) or line.startswith( '@PG' ) or line.startswith( '@CO' ) ):
747433a6de00 Initial commit.
cjav
parents:
diff changeset
179 fout.write( line )
747433a6de00 Initial commit.
cjav
parents:
diff changeset
180 fout.close()
747433a6de00 Initial commit.
cjav
parents:
diff changeset
181 # check that there are results in the output file
747433a6de00 Initial commit.
cjav
parents:
diff changeset
182 if os.path.getsize( options.output ) > 0:
747433a6de00 Initial commit.
cjav
parents:
diff changeset
183 sys.stdout.write( 'SMALT run on %s-end data' % options.genAlignType )
747433a6de00 Initial commit.
cjav
parents:
diff changeset
184 else:
747433a6de00 Initial commit.
cjav
parents:
diff changeset
185 raise Exception, 'The output file is empty. You may simply have no matches, or there may be an error with your input file or settings.'
747433a6de00 Initial commit.
cjav
parents:
diff changeset
186 except Exception, e:
747433a6de00 Initial commit.
cjav
parents:
diff changeset
187 stop_err( 'The alignment failed.\n' + str( e ) )
747433a6de00 Initial commit.
cjav
parents:
diff changeset
188 finally:
747433a6de00 Initial commit.
cjav
parents:
diff changeset
189 # clean up temp dir
747433a6de00 Initial commit.
cjav
parents:
diff changeset
190 if os.path.exists( tmp_index_dir ):
747433a6de00 Initial commit.
cjav
parents:
diff changeset
191 shutil.rmtree( tmp_index_dir )
747433a6de00 Initial commit.
cjav
parents:
diff changeset
192 if os.path.exists( tmp_dir ):
747433a6de00 Initial commit.
cjav
parents:
diff changeset
193 shutil.rmtree( tmp_dir )
747433a6de00 Initial commit.
cjav
parents:
diff changeset
194
747433a6de00 Initial commit.
cjav
parents:
diff changeset
195 if __name__=="__main__": __main__()