comparison tools/sr_mapping/bowtie_wrapper.py @ 0:9071e359b9a3

Uploaded
author xuebing
date Fri, 09 Mar 2012 19:37:19 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:9071e359b9a3
1 #!/usr/bin/env python
2
3 """
4 Runs Bowtie on single-end or paired-end data.
5 For use with Bowtie v. 0.12.7
6
7 usage: bowtie_wrapper.py [options]
8 -t, --threads=t: The number of threads to run
9 -o, --output=o: The output file
10 --output_unmapped_reads=: File name for unmapped reads (single-end)
11 --output_unmapped_reads_l=: File name for unmapped reads (left, paired-end)
12 --output_unmapped_reads_r=: File name for unmapped reads (right, paired-end)
13 --output_suppressed_reads=: File name for suppressed reads because of max setting (single-end)
14 --output_suppressed_reads_l=: File name for suppressed reads because of max setting (left, paired-end)
15 --output_suppressed_reads_r=: File name for suppressed reads because of max setting (right, paired-end)
16 -i, --input1=i: The (forward or single-end) reads file in Sanger FASTQ format
17 -I, --input2=I: The reverse reads file in Sanger FASTQ format
18 -4, --dataType=4: The type of data (SOLiD or Solexa)
19 -2, --paired=2: Whether the data is single- or paired-end
20 -g, --genomeSource=g: The type of reference provided
21 -r, --ref=r: The reference genome to use or index
22 -s, --skip=s: Skip the first n reads
23 -a, --alignLimit=a: Only align the first n reads
24 -T, --trimH=T: Trim n bases from high-quality (left) end of each read before alignment
25 -L, --trimL=L: Trim n bases from low-quality (right) end of each read before alignment
26 -m, --mismatchSeed=m: Maximum number of mismatches permitted in the seed
27 -M, --mismatchQual=M: Maximum permitted total of quality values at mismatched read positions
28 -l, --seedLen=l: Seed length
29 -n, --rounding=n: Whether or not to round to the nearest 10 and saturating at 30
30 -P, --maqSoapAlign=P: Choose MAQ- or SOAP-like alignment policy
31 -w, --tryHard=: Whether or not to try as hard as possible to find valid alignments when they exist
32 -v, --valAlign=v: Report up to n valid arguments per read
33 -V, --allValAligns=V: Whether or not to report all valid alignments per read
34 -G, --suppressAlign=G: Suppress all alignments for a read if more than n reportable alignments exist
35 -b, --best=b: Whether or not to make Bowtie guarantee that reported singleton alignments are 'best' in terms of stratum and in terms of the quality values at the mismatched positions
36 -B, --maxBacktracks=B: Maximum number of backtracks permitted when aligning a read
37 -R, --strata=R: Whether or not to report only those alignments that fall in the best stratum if many valid alignments exist and are reportable
38 -j, --minInsert=j: Minimum insert size for valid paired-end alignments
39 -J, --maxInsert=J: Maximum insert size for valid paired-end alignments
40 -O, --mateOrient=O: The upstream/downstream mate orientation for valid paired-end alignment against the forward reference strand
41 -A, --maxAlignAttempt=A: Maximum number of attempts Bowtie will make to match an alignment for one mate with an alignment for the opposite mate
42 -f, --forwardAlign=f: Whether or not to attempt to align the forward reference strand
43 -E, --reverseAlign=E: Whether or not to attempt to align the reverse-complement reference strand
44 -F, --offrate=F: Override the offrate of the index to n
45 -8, --snpphred=8: SNP penalty on Phred scale
46 -6, --snpfrac=6: Fraction of sites expected to be SNP sites
47 -7, --keepends=7: Keep extreme-end nucleotides and qualities
48 -S, --seed=S: Seed for pseudo-random number generator
49 -C, --params=C: Whether to use default or specified parameters
50 -u, --iautoB=u: Automatic or specified behavior
51 -K, --ipacked=K: Whether or not to use a packed representation for DNA strings
52 -Q, --ibmax=Q: Maximum number of suffixes allowed in a block
53 -Y, --ibmaxdivn=Y: Maximum number of suffixes allowed in a block as a fraction of the length of the reference
54 -D, --idcv=D: The period for the difference-cover sample
55 -U, --inodc=U: Whether or not to disable the use of the difference-cover sample
56 -y, --inoref=y: Whether or not to build the part of the reference index used only in paired-end alignment
57 -z, --ioffrate=z: How many rows get marked during annotation of some or all of the Burrows-Wheeler rows
58 -W, --iftab=W: The size of the lookup table used to calculate an initial Burrows-Wheeler range with respect to the first n characters of the query
59 -X, --intoa=X: Whether or not to convert Ns in the reference sequence to As
60 -N, --iendian=N: Endianness to use when serializing integers to the index file
61 -Z, --iseed=Z: Seed for the pseudorandom number generator
62 -c, --icutoff=c: Number of first bases of the reference sequence to index
63 -x, --indexSettings=x: Whether or not indexing options are to be set
64 -H, --suppressHeader=H: Suppress header
65 --do_not_build_index: Flag to specify that provided file is already indexed and to just use 'as is'
66 """
67
68 import optparse, os, shutil, subprocess, sys, tempfile
69
70 #Allow more than Sanger encoded variants
71 DEFAULT_ASCII_ENCODING = '--phred33-quals'
72 GALAXY_FORMAT_TO_QUALITY_SCORE_ENCODING_ARG = { 'fastqsanger':'--phred33-quals', 'fastqillumina':'--phred64-quals', 'fastqsolexa':'--solexa-quals' }
73 #FIXME: Integer quality scores are supported only when the '--integer-quals' argument is specified to bowtie; this is not currently able to be set in the tool/wrapper/config
74
75 def stop_err( msg ):
76 sys.stderr.write( '%s\n' % msg )
77 sys.exit()
78
79 def __main__():
80 #Parse Command Line
81 parser = optparse.OptionParser()
82 parser.add_option( '-t', '--threads', dest='threads', help='The number of threads to run' )
83 parser.add_option( '-o', '--output', dest='output', help='The output file' )
84 parser.add_option( '', '--output_unmapped_reads', dest='output_unmapped_reads', help='File name for unmapped reads (single-end)' )
85 parser.add_option( '', '--output_unmapped_reads_l', dest='output_unmapped_reads_l', help='File name for unmapped reads (left, paired-end)' )
86 parser.add_option( '', '--output_unmapped_reads_r', dest='output_unmapped_reads_r', help='File name for unmapped reads (right, paired-end)' )
87 parser.add_option( '', '--output_suppressed_reads', dest='output_suppressed_reads', help='File name for suppressed reads because of max setting (single-end)' )
88 parser.add_option( '', '--output_suppressed_reads_l', dest='output_suppressed_reads_l', help='File name for suppressed reads because of max setting (left, paired-end)' )
89 parser.add_option( '', '--output_suppressed_reads_r', dest='output_suppressed_reads_r', help='File name for suppressed reads because of max setting (right, paired-end)' )
90 parser.add_option( '-4', '--dataType', dest='dataType', help='The type of data (SOLiD or Solexa)' )
91 parser.add_option( '-i', '--input1', dest='input1', help='The (forward or single-end) reads file in Sanger FASTQ format' )
92 parser.add_option( '-I', '--input2', dest='input2', help='The reverse reads file in Sanger FASTQ format' )
93 parser.add_option( '-2', '--paired', dest='paired', help='Whether the data is single- or paired-end' )
94 parser.add_option( '-g', '--genomeSource', dest='genomeSource', help='The type of reference provided' )
95 parser.add_option( '-r', '--ref', dest='ref', help='The reference genome to use or index' )
96 parser.add_option( '-s', '--skip', dest='skip', help='Skip the first n reads' )
97 parser.add_option( '-a', '--alignLimit', dest='alignLimit', help='Only align the first n reads' )
98 parser.add_option( '-T', '--trimH', dest='trimH', help='Trim n bases from high-quality (left) end of each read before alignment' )
99 parser.add_option( '-L', '--trimL', dest='trimL', help='Trim n bases from low-quality (right) end of each read before alignment' )
100 parser.add_option( '-m', '--mismatchSeed', dest='mismatchSeed', help='Maximum number of mismatches permitted in the seed' )
101 parser.add_option( '-M', '--mismatchQual', dest='mismatchQual', help='Maximum permitted total of quality values at mismatched read positions' )
102 parser.add_option( '-l', '--seedLen', dest='seedLen', help='Seed length' )
103 parser.add_option( '-n', '--rounding', dest='rounding', help='Whether or not to round to the nearest 10 and saturating at 30' )
104 parser.add_option( '-P', '--maqSoapAlign', dest='maqSoapAlign', help='Choose MAQ- or SOAP-like alignment policy' )
105 parser.add_option( '-w', '--tryHard', dest='tryHard', help='Whether or not to try as hard as possible to find valid alignments when they exist' )
106 parser.add_option( '-v', '--valAlign', dest='valAlign', help='Report up to n valid arguments per read' )
107 parser.add_option( '-V', '--allValAligns', dest='allValAligns', help='Whether or not to report all valid alignments per read' )
108 parser.add_option( '-G', '--suppressAlign', dest='suppressAlign', help='Suppress all alignments for a read if more than n reportable alignments exist' )
109 parser.add_option( '-b', '--best', dest='best', help="Whether or not to make Bowtie guarantee that reported singleton alignments are 'best' in terms of stratum and in terms of the quality values at the mismatched positions" )
110 parser.add_option( '-B', '--maxBacktracks', dest='maxBacktracks', help='Maximum number of backtracks permitted when aligning a read' )
111 parser.add_option( '-R', '--strata', dest='strata', help='Whether or not to report only those alignments that fall in the best stratum if many valid alignments exist and are reportable' )
112 parser.add_option( '-j', '--minInsert', dest='minInsert', help='Minimum insert size for valid paired-end alignments' )
113 parser.add_option( '-J', '--maxInsert', dest='maxInsert', help='Maximum insert size for valid paired-end alignments' )
114 parser.add_option( '-O', '--mateOrient', dest='mateOrient', help='The upstream/downstream mate orientation for valid paired-end alignment against the forward reference strand' )
115 parser.add_option( '-A', '--maxAlignAttempt', dest='maxAlignAttempt', help='Maximum number of attempts Bowtie will make to match an alignment for one mate with an alignment for the opposite mate' )
116 parser.add_option( '-f', '--forwardAlign', dest='forwardAlign', help='Whether or not to attempt to align the forward reference strand' )
117 parser.add_option( '-E', '--reverseAlign', dest='reverseAlign', help='Whether or not to attempt to align the reverse-complement reference strand' )
118 parser.add_option( '-F', '--offrate', dest='offrate', help='Override the offrate of the index to n' )
119 parser.add_option( '-S', '--seed', dest='seed', help='Seed for pseudo-random number generator' )
120 parser.add_option( '-8', '--snpphred', dest='snpphred', help='SNP penalty on Phred scale' )
121 parser.add_option( '-6', '--snpfrac', dest='snpfrac', help='Fraction of sites expected to be SNP sites' )
122 parser.add_option( '-7', '--keepends', dest='keepends', help='Keep extreme-end nucleotides and qualities' )
123 parser.add_option( '-C', '--params', dest='params', help='Whether to use default or specified parameters' )
124 parser.add_option( '-u', '--iautoB', dest='iautoB', help='Automatic or specified behavior' )
125 parser.add_option( '-K', '--ipacked', dest='ipacked', help='Whether or not to use a packed representation for DNA strings' )
126 parser.add_option( '-Q', '--ibmax', dest='ibmax', help='Maximum number of suffixes allowed in a block' )
127 parser.add_option( '-Y', '--ibmaxdivn', dest='ibmaxdivn', help='Maximum number of suffixes allowed in a block as a fraction of the length of the reference' )
128 parser.add_option( '-D', '--idcv', dest='idcv', help='The period for the difference-cover sample' )
129 parser.add_option( '-U', '--inodc', dest='inodc', help='Whether or not to disable the use of the difference-cover sample' )
130 parser.add_option( '-y', '--inoref', dest='inoref', help='Whether or not to build the part of the reference index used only in paired-end alignment' )
131 parser.add_option( '-z', '--ioffrate', dest='ioffrate', help='How many rows get marked during annotation of some or all of the Burrows-Wheeler rows' )
132 parser.add_option( '-W', '--iftab', dest='iftab', help='The size of the lookup table used to calculate an initial Burrows-Wheeler range with respect to the first n characters of the query' )
133 parser.add_option( '-X', '--intoa', dest='intoa', help='Whether or not to convert Ns in the reference sequence to As' )
134 parser.add_option( '-N', '--iendian', dest='iendian', help='Endianness to use when serializing integers to the index file' )
135 parser.add_option( '-Z', '--iseed', dest='iseed', help='Seed for the pseudorandom number generator' )
136 parser.add_option( '-c', '--icutoff', dest='icutoff', help='Number of first bases of the reference sequence to index' )
137 parser.add_option( '-x', '--indexSettings', dest='index_settings', help='Whether or not indexing options are to be set' )
138 parser.add_option( '-H', '--suppressHeader', dest='suppressHeader', help='Suppress header' )
139 parser.add_option( '--galaxy_input_format', dest='galaxy_input_format', default="fastqsanger", help='galaxy input format' )
140 parser.add_option( '--do_not_build_index', dest='do_not_build_index', action="store_true", default=False, help='Flag to specify that provided file is already indexed, use as is' )
141 (options, args) = parser.parse_args()
142 stdout = ''
143
144 # make temp directory for placement of indices and copy reference file there if necessary
145 tmp_index_dir = tempfile.mkdtemp()
146 # get type of data (solid or solexa)
147 if options.dataType == 'solid':
148 colorspace = '-C'
149 else:
150 colorspace = ''
151 # index if necessary
152 if options.genomeSource == 'history' and not options.do_not_build_index:
153 # set up commands
154 if options.index_settings =='indexPreSet':
155 indexing_cmds = '%s' % colorspace
156 else:
157 try:
158 if options.iautoB and options.iautoB == 'set':
159 iautoB = '--noauto'
160 else:
161 iautoB = ''
162 if options. ipacked and options.ipacked == 'packed':
163 ipacked = '--packed'
164 else:
165 ipacked = ''
166 if options.ibmax and int( options.ibmax ) >= 1:
167 ibmax = '--bmax %s' % options.ibmax
168 else:
169 ibmax = ''
170 if options.ibmaxdivn and int( options.ibmaxdivn ) >= 0:
171 ibmaxdivn = '--bmaxdivn %s' % options.ibmaxdivn
172 else:
173 ibmaxdivn = ''
174 if options.idcv and int( options.idcv ) > 0:
175 idcv = '--dcv %s' % options.idcv
176 else:
177 idcv = ''
178 if options.inodc and options.inodc == 'nodc':
179 inodc = '--nodc'
180 else:
181 inodc = ''
182 if options.inoref and options.inoref == 'noref':
183 inoref = '--noref'
184 else:
185 inoref = ''
186 if options.iftab and int( options.iftab ) >= 0:
187 iftab = '--ftabchars %s' % options.iftab
188 else:
189 iftab = ''
190 if options.intoa and options.intoa == 'yes':
191 intoa = '--ntoa'
192 else:
193 intoa = ''
194 if options.iendian and options.iendian == 'big':
195 iendian = '--big'
196 else:
197 iendian = '--little'
198 if options.iseed and int( options.iseed ) > 0:
199 iseed = '--seed %s' % options.iseed
200 else:
201 iseed = ''
202 if options.icutoff and int( options.icutoff ) > 0:
203 icutoff = '--cutoff %s' % options.icutoff
204 else:
205 icutoff = ''
206 indexing_cmds = '%s %s %s %s %s %s %s --offrate %s %s %s %s %s %s %s' % \
207 ( iautoB, ipacked, ibmax, ibmaxdivn, idcv, inodc,
208 inoref, options.ioffrate, iftab, intoa, iendian,
209 iseed, icutoff, colorspace )
210 except ValueError, e:
211 # clean up temp dir
212 if os.path.exists( tmp_index_dir ):
213 shutil.rmtree( tmp_index_dir )
214 stop_err( "Something is wrong with the indexing parameters and the indexing and alignment could not be run. Make sure you don't have any non-numeric values where they should be numeric.\n" + str( e ) )
215 ref_file = tempfile.NamedTemporaryFile( dir=tmp_index_dir )
216 ref_file_name = ref_file.name
217 ref_file.close()
218 os.symlink( options.ref, ref_file_name )
219 cmd1 = 'bowtie-build %s -f %s %s' % ( indexing_cmds, ref_file_name, ref_file_name )
220 try:
221 tmp = tempfile.NamedTemporaryFile( dir=tmp_index_dir ).name
222 tmp_stderr = open( tmp, 'wb' )
223 proc = subprocess.Popen( args=cmd1, shell=True, cwd=tmp_index_dir, stderr=tmp_stderr.fileno() )
224 returncode = proc.wait()
225 tmp_stderr.close()
226 # get stderr, allowing for case where it's very large
227 tmp_stderr = open( tmp, 'rb' )
228 stderr = ''
229 buffsize = 1048576
230 try:
231 while True:
232 stderr += tmp_stderr.read( buffsize )
233 if not stderr or len( stderr ) % buffsize != 0:
234 break
235 except OverflowError:
236 pass
237 tmp_stderr.close()
238 if returncode != 0:
239 raise Exception, stderr
240 except Exception, e:
241 # clean up temp dir
242 if os.path.exists( tmp_index_dir ):
243 shutil.rmtree( tmp_index_dir )
244 stop_err( 'Error indexing reference sequence\n' + str( e ) )
245 stdout += 'File indexed. '
246 else:
247 ref_file_name = options.ref
248 # set up aligning and generate aligning command options
249 # automatically set threads in both cases
250 tmp_suppressed_file_name = None
251 tmp_unmapped_file_name = None
252 if options.suppressHeader == 'true':
253 suppressHeader = '--sam-nohead'
254 else:
255 suppressHeader = ''
256 if options.maxInsert and int( options.maxInsert ) > 0:
257 maxInsert = '-X %s' % options.maxInsert
258 else:
259 maxInsert = ''
260 if options.mateOrient:
261 mateOrient = '--%s' % options.mateOrient
262 else:
263 mateOrient = ''
264 quality_score_encoding = GALAXY_FORMAT_TO_QUALITY_SCORE_ENCODING_ARG.get( options.galaxy_input_format, DEFAULT_ASCII_ENCODING )
265 if options.params == 'preSet':
266 aligning_cmds = '-q %s %s -p %s -S %s %s %s ' % \
267 ( maxInsert, mateOrient, options.threads, suppressHeader, colorspace, quality_score_encoding )
268 else:
269 try:
270 if options.skip and int( options.skip ) > 0:
271 skip = '-s %s' % options.skip
272 else:
273 skip = ''
274 if options.alignLimit and int( options.alignLimit ) >= 0:
275 alignLimit = '-u %s' % options.alignLimit
276 else:
277 alignLimit = ''
278 if options.trimH and int( options.trimH ) > 0:
279 trimH = '-5 %s' % options.trimH
280 else:
281 trimH = ''
282 if options.trimL and int( options.trimL ) > 0:
283 trimL = '-3 %s' % options.trimL
284 else:
285 trimL = ''
286 if options.maqSoapAlign != '-1' and int( options.maqSoapAlign ) >= 0:
287 maqSoapAlign = '-v %s' % options.maqSoapAlign
288 else:
289 maqSoapAlign = ''
290 if options.mismatchSeed and (options.mismatchSeed == '0' or options.mismatchSeed == '1' \
291 or options.mismatchSeed == '2' or options.mismatchSeed == '3'):
292 mismatchSeed = '-n %s' % options.mismatchSeed
293 else:
294 mismatchSeed = ''
295 if options.mismatchQual and int( options.mismatchQual ) >= 0:
296 mismatchQual = '-e %s' % options.mismatchQual
297 else:
298 mismatchQual = ''
299 if options.seedLen and int( options.seedLen ) >= 5:
300 seedLen = '-l %s' % options.seedLen
301 else:
302 seedLen = ''
303 if options.rounding == 'noRound':
304 rounding = '--nomaqround'
305 else:
306 rounding = ''
307 if options.minInsert and int( options.minInsert ) > 0:
308 minInsert = '-I %s' % options.minInsert
309 else:
310 minInsert = ''
311 if options.maxAlignAttempt and int( options.maxAlignAttempt ) >= 0:
312 maxAlignAttempt = '--pairtries %s' % options.maxAlignAttempt
313 else:
314 maxAlignAttempt = ''
315 if options.forwardAlign == 'noForward':
316 forwardAlign = '--nofw'
317 else:
318 forwardAlign = ''
319 if options.reverseAlign == 'noReverse':
320 reverseAlign = '--norc'
321 else:
322 reverseAlign = ''
323 if options.maxBacktracks and int( options.maxBacktracks ) > 0 and \
324 ( options.mismatchSeed == '2' or options.mismatchSeed == '3' ):
325 maxBacktracks = '--maxbts %s' % options.maxBacktracks
326 else:
327 maxBacktracks = ''
328 if options.tryHard == 'doTryHard':
329 tryHard = '-y'
330 else:
331 tryHard = ''
332 if options.valAlign and int( options.valAlign ) >= 0:
333 valAlign = '-k %s' % options.valAlign
334 else:
335 valAlign = ''
336 if options.allValAligns == 'doAllValAligns':
337 allValAligns = '-a'
338 else:
339 allValAligns = ''
340 if options.suppressAlign and int( options.suppressAlign ) >= 0:
341 suppressAlign = '-m %s' % options.suppressAlign
342 else:
343 suppressAlign = ''
344 if options.best == 'doBest':
345 best = '--best'
346 else:
347 best = ''
348 if options.strata == 'doStrata':
349 strata = '--strata'
350 else:
351 strata = ''
352 if options.offrate and int( options.offrate ) >= 0:
353 offrate = '-o %s' % options.offrate
354 else:
355 offrate = ''
356 if options.seed and int( options.seed ) >= 0:
357 seed = '--seed %s' % options.seed
358 else:
359 seed = ''
360 if options.paired == 'paired':
361 if options.output_unmapped_reads_l and options.output_unmapped_reads_r:
362 tmp_unmapped_file = tempfile.NamedTemporaryFile( dir=tmp_index_dir, suffix='.fastq' )
363 tmp_unmapped_file_name = tmp_unmapped_file.name
364 tmp_unmapped_file.close()
365 output_unmapped_reads = '--un %s' % tmp_unmapped_file_name
366 else:
367 output_unmapped_reads = ''
368 if options.output_suppressed_reads:
369 tmp_suppressed_file = tempfile.NamedTemporaryFile( dir=tmp_index_dir, suffix='.fastq' )
370 tmp_suppressed_file_name = tmp_suppressed_file.name
371 tmp_suppressed_file.close()
372 output_suppressed_reads = '--max %s' % tmp_suppressed_file_name
373 else:
374 output_suppressed_reads = ''
375 else:
376 if options.output_unmapped_reads:
377 output_unmapped_reads = '--un %s' % options.output_unmapped_reads
378 else:
379 output_unmapped_reads = ''
380 if options.output_suppressed_reads:
381 output_suppressed_reads = '--max %s' % options.output_suppressed_reads
382 else:
383 output_suppressed_reads = ''
384 snpfrac = ''
385 if options.snpphred and int( options.snpphred ) >= 0:
386 snpphred = '--snpphred %s' % options.snpphred
387 else:
388 snpphred = ''
389 if options.snpfrac and float( options.snpfrac ) >= 0:
390 snpfrac = '--snpfrac %s' % options.snpfrac
391 if options.keepends and options.keepends == 'doKeepends':
392 keepends = '--col-keepends'
393 else:
394 keepends = ''
395 aligning_cmds = '-q %s %s -p %s -S %s %s %s %s %s %s %s %s %s %s %s %s ' \
396 '%s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s ' % \
397 ( maxInsert, mateOrient, options.threads, suppressHeader,
398 colorspace, skip, alignLimit, trimH, trimL, maqSoapAlign,
399 mismatchSeed, mismatchQual, seedLen, rounding, minInsert,
400 maxAlignAttempt, forwardAlign, reverseAlign, maxBacktracks,
401 tryHard, valAlign, allValAligns, suppressAlign, best,
402 strata, offrate, seed, snpphred, snpfrac, keepends,
403 output_unmapped_reads, output_suppressed_reads,
404 quality_score_encoding )
405 except ValueError, e:
406 # clean up temp dir
407 if os.path.exists( tmp_index_dir ):
408 shutil.rmtree( tmp_index_dir )
409 stop_err( 'Something is wrong with the alignment parameters and the alignment could not be run\n' + str( e ) )
410 try:
411 # have to nest try-except in try-finally to handle 2.4
412 try:
413 # prepare actual mapping commands
414 if options.paired == 'paired':
415 cmd2 = 'bowtie %s %s -1 %s -2 %s > %s' % ( aligning_cmds, ref_file_name, options.input1, options.input2, options.output )
416 else:
417 cmd2 = 'bowtie %s %s %s > %s' % ( aligning_cmds, ref_file_name, options.input1, options.output )
418 # align
419 tmp = tempfile.NamedTemporaryFile( dir=tmp_index_dir ).name
420 tmp_stderr = open( tmp, 'wb' )
421 proc = subprocess.Popen( args=cmd2, shell=True, cwd=tmp_index_dir, stderr=tmp_stderr.fileno() )
422 returncode = proc.wait()
423 tmp_stderr.close()
424 # get stderr, allowing for case where it's very large
425 tmp_stderr = open( tmp, 'rb' )
426 stderr = ''
427 buffsize = 1048576
428 try:
429 while True:
430 stderr += tmp_stderr.read( buffsize )
431 if not stderr or len( stderr ) % buffsize != 0:
432 break
433 except OverflowError:
434 pass
435 tmp_stderr.close()
436 if returncode != 0:
437 raise Exception, stderr
438 # get suppressed and unmapped reads output files in place if appropriate
439 if options.paired == 'paired' and tmp_suppressed_file_name and \
440 options.output_suppressed_reads_l and options.output_suppressed_reads_r:
441 try:
442 left = tmp_suppressed_file_name.replace( '.fastq', '_1.fastq' )
443 right = tmp_suppressed_file_name.replace( '.fastq', '_1.fastq' )
444 shutil.move( left, options.output_suppressed_reads_l )
445 shutil.move( right, options.output_suppressed_reads_r )
446 except Exception, e:
447 sys.stdout.write( 'Error producing the suppressed output file.\n' )
448 if options.paired == 'paired' and tmp_unmapped_file_name and \
449 options.output_unmapped_reads_l and options.output_unmapped_reads_r:
450 try:
451 left = tmp_unmapped_file_name.replace( '.fastq', '_1.fastq' )
452 right = tmp_unmapped_file_name.replace( '.fastq', '_2.fastq' )
453 shutil.move( left, options.output_unmapped_reads_l )
454 shutil.move( right, options.output_unmapped_reads_r )
455 except Exception, e:
456 sys.stdout.write( 'Error producing the unmapped output file.\n' )
457 # check that there are results in the output file
458 if os.path.getsize( options.output ) == 0:
459 raise Exception, 'The output file is empty, there may be an error with your input file or settings.'
460 except Exception, e:
461 stop_err( 'Error aligning sequence. ' + str( e ) )
462 finally:
463 # clean up temp dir
464 if os.path.exists( tmp_index_dir ):
465 shutil.rmtree( tmp_index_dir )
466 stdout += 'Sequence file aligned.\n'
467 sys.stdout.write( stdout )
468
469 if __name__=="__main__": __main__()