comparison bowtie_genome_wrapper/bowtie_genomic_wrapper.py @ 0:71f778b04f6e draft

Uploaded
author jackcurragh
date Tue, 22 Mar 2022 12:28:40 +0000
parents
children 5b1395ac1501
comparison
equal deleted inserted replaced
-1:000000000000 0:71f778b04f6e
1 #!/usr/bin/env python
2
3 """
4 Runs Bowtie on single-end or paired-end data.
5
6 usage: bowtie_wrapper.py [options]
7 -t, --threads=t: The number of threads to run
8 -o, --output=o: The output file
9 --output_unmapped_reads=: File name for unmapped reads (single-end)
10 --output_unmapped_reads_l=: File name for unmapped reads (left, paired-end)
11 --output_unmapped_reads_r=: File name for unmapped reads (right, paired-end)
12 --output_suppressed_reads=: File name for suppressed reads because of max setting (single-end)
13 --output_suppressed_reads_l=: File name for suppressed reads because of max setting (left, paired-end)
14 --output_suppressed_reads_r=: File name for suppressed reads because of max setting (right, paired-end)
15 --output_mapping_stats=: File name for mapping statistics (output on stderr by bowtie)
16 -i, --input1=i: The (forward or single-end) reads file in Sanger FASTQ format
17 -I, --input2=I: The reverse reads file in Sanger FASTQ format
18 -4, --dataType=4: The type of data (SOLiD or Solexa)
19 -2, --paired=2: Whether the data is single- or paired-end
20 -g, --genomeSource=g: The type of reference provided
21 -r, --ref=r: The reference genome to use or index
22 -s, --skip=s: Skip the first n reads
23 -a, --alignLimit=a: Only align the first n reads
24 -T, --trimH=T: Trim n bases from high-quality (left) end of each read before alignment
25 -L, --trimL=L: Trim n bases from low-quality (right) end of each read before alignment
26 -m, --mismatchSeed=m: Maximum number of mismatches permitted in the seed
27 -M, --mismatchQual=M: Maximum permitted total of quality values at mismatched read positions
28 -l, --seedLen=l: Seed length
29 -n, --rounding=n: Whether or not to round to the nearest 10 and saturating at 30
30 -P, --maxMismatches=P: Maximum number of mismatches for -v alignment mode
31 -w, --tryHard=: Whether or not to try as hard as possible to find valid alignments when they exist
32 -V, --allValAligns=V: Whether or not to report all valid alignments per read or pair
33 -v, --valAlign=v: Report up to n valid alignments per read or pair
34 -G, --suppressAlign=G: Suppress all alignments for a read if more than n reportable alignments exist
35 -b, --best=b: Whether or not to make Bowtie guarantee that reported singleton alignments are 'best' in terms of stratum and in terms of the quality values at the mismatched positions
36 -B, --maxBacktracks=B: Maximum number of backtracks permitted when aligning a read
37 -R, --strata=R: Whether or not to report only those alignments that fall in the best stratum if many valid alignments exist and are reportable
38 -j, --minInsert=j: Minimum insert size for valid paired-end alignments
39 -J, --maxInsert=J: Maximum insert size for valid paired-end alignments
40 -O, --mateOrient=O: The upstream/downstream mate orientation for valid paired-end alignment against the forward reference strand
41 -A, --maxAlignAttempt=A: Maximum number of attempts Bowtie will make to match an alignment for one mate with an alignment for the opposite mate
42 -f, --forwardAlign=f: Whether or not to attempt to align the forward reference strand
43 -E, --reverseAlign=E: Whether or not to attempt to align the reverse-complement reference strand
44 -F, --offrate=F: Override the offrate of the index to n
45 -8, --snpphred=8: SNP penalty on Phred scale
46 -6, --snpfrac=6: Fraction of sites expected to be SNP sites
47 -7, --keepends=7: Keep extreme-end nucleotides and qualities
48 -S, --seed=S: Seed for pseudo-random number generator
49 -C, --params=C: Whether to use default or specified parameters
50 -u, --iautoB=u: Automatic or specified behavior
51 -K, --ipacked=K: Whether or not to use a packed representation for DNA strings
52 -Q, --ibmax=Q: Maximum number of suffixes allowed in a block
53 -Y, --ibmaxdivn=Y: Maximum number of suffixes allowed in a block as a fraction of the length of the reference
54 -D, --idcv=D: The period for the difference-cover sample
55 -U, --inodc=U: Whether or not to disable the use of the difference-cover sample
56 -y, --inoref=y: Whether or not to build the part of the reference index used only in paired-end alignment
57 -z, --ioffrate=z: How many rows get marked during annotation of some or all of the Burrows-Wheeler rows
58 -W, --iftab=W: The size of the lookup table used to calculate an initial Burrows-Wheeler range with respect to the first n characters of the query
59 -X, --intoa=X: Whether or not to convert Ns in the reference sequence to As
60 -N, --iendian=N: Endianness to use when serializing integers to the index file
61 -Z, --iseed=Z: Seed for the pseudorandom number generator
62 -x, --indexSettings=x: Whether or not indexing options are to be set
63 -H, --suppressHeader=H: Suppress header
64 --do_not_build_index: Flag to specify that provided file is already indexed and to just use 'as is'
65 """
66
67 import optparse
68 import os
69 import shutil
70 import subprocess
71 import sys
72 import tempfile
73
74 # Allow more than Sanger encoded variants
75 DEFAULT_ASCII_ENCODING = '--phred33-quals'
76 GALAXY_FORMAT_TO_QUALITY_SCORE_ENCODING_ARG = {'fastqsanger': '--phred33-quals', 'fastqillumina': '--phred64-quals', 'fastqsolexa': '--solexa-quals'}
77 # FIXME: Integer quality scores are supported only when the '--integer-quals' argument is specified to bowtie; this is not currently able to be set in the tool/wrapper/config
78
79
80 def stop_err( msg ):
81 sys.exit('%s\n' % msg)
82
83
84 def __main__():
85 parser = optparse.OptionParser()
86 parser.add_option( '-t', '--threads', dest='threads', help='The number of threads to run' )
87 parser.add_option( '-o', '--output', dest='output', help='The output file' )
88 parser.add_option( '', '--output_unmapped_reads', dest='output_unmapped_reads', help='File name for unmapped reads (single-end)' )
89 parser.add_option( '', '--output_unmapped_reads_l', dest='output_unmapped_reads_l', help='File name for unmapped reads (left, paired-end)' )
90 parser.add_option( '', '--output_unmapped_reads_r', dest='output_unmapped_reads_r', help='File name for unmapped reads (right, paired-end)' )
91 parser.add_option( '', '--output_suppressed_reads', dest='output_suppressed_reads', help='File name for suppressed reads because of max setting (single-end)' )
92 parser.add_option( '', '--output_suppressed_reads_l', dest='output_suppressed_reads_l', help='File name for suppressed reads because of max setting (left, paired-end)' )
93 parser.add_option( '', '--output_suppressed_reads_r', dest='output_suppressed_reads_r', help='File name for suppressed reads because of max setting (right, paired-end)' )
94 parser.add_option( '', '--output_mapping_stats', dest='output_mapping_stats', help='File for mapping statistics (i.e. stderr from bowtie)' )
95 parser.add_option( '-4', '--dataType', dest='dataType', help='The type of data (SOLiD or Solexa)' )
96 parser.add_option( '-i', '--input1', dest='input1', help='The (forward or single-end) reads file in Sanger FASTQ format' )
97 parser.add_option( '-I', '--input2', dest='input2', help='The reverse reads file in Sanger FASTQ format' )
98 parser.add_option( '-2', '--paired', dest='paired', help='Whether the data is single- or paired-end' )
99 parser.add_option( '-g', '--genomeSource', dest='genomeSource', help='The type of reference provided' )
100 parser.add_option( '-r', '--ref', dest='ref', help='The reference genome to use or index' )
101 parser.add_option( '-s', '--skip', dest='skip', help='Skip the first n reads' )
102 parser.add_option( '-a', '--alignLimit', dest='alignLimit', help='Only align the first n reads' )
103 parser.add_option( '-T', '--trimH', dest='trimH', help='Trim n bases from high-quality (left) end of each read before alignment' )
104 parser.add_option( '-L', '--trimL', dest='trimL', help='Trim n bases from low-quality (right) end of each read before alignment' )
105 parser.add_option( '-m', '--mismatchSeed', dest='mismatchSeed', help='Maximum number of mismatches permitted in the seed' )
106 parser.add_option( '-M', '--mismatchQual', dest='mismatchQual', help='Maximum permitted total of quality values at mismatched read positions' )
107 parser.add_option( '-l', '--seedLen', dest='seedLen', help='Seed length' )
108 parser.add_option( '-n', '--rounding', dest='rounding', help='Whether or not to round to the nearest 10 and saturating at 30' )
109 parser.add_option( '-P', '--maxMismatches', dest='maxMismatches', help='Maximum number of mismatches for -v alignment mode' )
110 parser.add_option( '-w', '--tryHard', dest='tryHard', help='Whether or not to try as hard as possible to find valid alignments when they exist' )
111 parser.add_option( '-V', '--allValAligns', dest='allValAligns', help='Whether or not to report all valid alignments per read or pair' )
112 parser.add_option( '-v', '--valAlign', dest='valAlign', help='Report up to n valid alignments per read or pair' )
113 parser.add_option( '-G', '--suppressAlign', dest='suppressAlign', help='Suppress all alignments for a read if more than n reportable alignments exist' )
114 parser.add_option( '-b', '--best', dest='best', help="Whether or not to make Bowtie guarantee that reported singleton alignments are 'best' in terms of stratum and in terms of the quality values at the mismatched positions" )
115 parser.add_option( '-B', '--maxBacktracks', dest='maxBacktracks', help='Maximum number of backtracks permitted when aligning a read' )
116 parser.add_option( '-R', '--strata', dest='strata', help='Whether or not to report only those alignments that fall in the best stratum if many valid alignments exist and are reportable' )
117 parser.add_option( '-j', '--minInsert', dest='minInsert', help='Minimum insert size for valid paired-end alignments' )
118 parser.add_option( '-J', '--maxInsert', dest='maxInsert', help='Maximum insert size for valid paired-end alignments' )
119 parser.add_option( '-O', '--mateOrient', dest='mateOrient', help='The upstream/downstream mate orientation for valid paired-end alignment against the forward reference strand' )
120 parser.add_option( '-A', '--maxAlignAttempt', dest='maxAlignAttempt', help='Maximum number of attempts Bowtie will make to match an alignment for one mate with an alignment for the opposite mate' )
121 parser.add_option( '-f', '--forwardAlign', dest='forwardAlign', help='Whether or not to attempt to align the forward reference strand' )
122 parser.add_option( '-E', '--reverseAlign', dest='reverseAlign', help='Whether or not to attempt to align the reverse-complement reference strand' )
123 parser.add_option( '-F', '--offrate', dest='offrate', help='Override the offrate of the index to n' )
124 parser.add_option( '-S', '--seed', dest='seed', help='Seed for pseudo-random number generator' )
125 parser.add_option( '-8', '--snpphred', dest='snpphred', help='SNP penalty on Phred scale' )
126 parser.add_option( '-6', '--snpfrac', dest='snpfrac', help='Fraction of sites expected to be SNP sites' )
127 parser.add_option( '-7', '--keepends', dest='keepends', help='Keep extreme-end nucleotides and qualities' )
128 parser.add_option( '-C', '--params', dest='params', help='Whether to use default or specified parameters' )
129 parser.add_option( '-u', '--iautoB', dest='iautoB', help='Automatic or specified behavior' )
130 parser.add_option( '-K', '--ipacked', dest='ipacked', help='Whether or not to use a packed representation for DNA strings' )
131 parser.add_option( '-Q', '--ibmax', dest='ibmax', help='Maximum number of suffixes allowed in a block' )
132 parser.add_option( '-Y', '--ibmaxdivn', dest='ibmaxdivn', help='Maximum number of suffixes allowed in a block as a fraction of the length of the reference' )
133 parser.add_option( '-D', '--idcv', dest='idcv', help='The period for the difference-cover sample' )
134 parser.add_option( '-U', '--inodc', dest='inodc', help='Whether or not to disable the use of the difference-cover sample' )
135 parser.add_option( '-y', '--inoref', dest='inoref', help='Whether or not to build the part of the reference index used only in paired-end alignment' )
136 parser.add_option( '-z', '--ioffrate', dest='ioffrate', help='How many rows get marked during annotation of some or all of the Burrows-Wheeler rows' )
137 parser.add_option( '-W', '--iftab', dest='iftab', help='The size of the lookup table used to calculate an initial Burrows-Wheeler range with respect to the first n characters of the query' )
138 parser.add_option( '-X', '--intoa', dest='intoa', help='Whether or not to convert Ns in the reference sequence to As' )
139 parser.add_option( '-N', '--iendian', dest='iendian', help='Endianness to use when serializing integers to the index file' )
140 parser.add_option( '-Z', '--iseed', dest='iseed', help='Seed for the pseudorandom number generator' )
141 parser.add_option( '-x', '--indexSettings', dest='index_settings', help='Whether or not indexing options are to be set' )
142 parser.add_option( '-H', '--suppressHeader', dest='suppressHeader', help='Suppress header' )
143 parser.add_option( '--galaxy_input_format', dest='galaxy_input_format', default="fastqsanger", help='galaxy input format' )
144 parser.add_option( '--do_not_build_index', dest='do_not_build_index', action="store_true", default=False, help='Flag to specify that provided file is already indexed, use as is' )
145 (options, args) = parser.parse_args()
146 if options.mismatchSeed and options.maxMismatches:
147 parser.error("options --mismatchSeed and --maxMismatches are mutually exclusive")
148 stdout = ''
149
150 # make temp directory for placement of indices and copy reference file there if necessary
151 tmp_index_dir = tempfile.mkdtemp()
152 # get type of data (solid or solexa)
153 if options.dataType == 'solid':
154 colorspace = '-C'
155 else:
156 colorspace = ''
157 # index if necessary
158 if options.genomeSource == 'history' and not options.do_not_build_index:
159 # set up commands
160 if options.index_settings == 'indexPreSet':
161 indexing_cmds = '%s' % colorspace
162 else:
163 try:
164 if options.iautoB and options.iautoB == 'set':
165 iautoB = '--noauto'
166 else:
167 iautoB = ''
168 if options.ipacked and options.ipacked == 'packed':
169 ipacked = '--packed'
170 else:
171 ipacked = ''
172 if options.ibmax and int( options.ibmax ) >= 1:
173 ibmax = '--bmax %s' % options.ibmax
174 else:
175 ibmax = ''
176 if options.ibmaxdivn and int( options.ibmaxdivn ) >= 0:
177 ibmaxdivn = '--bmaxdivn %s' % options.ibmaxdivn
178 else:
179 ibmaxdivn = ''
180 if options.idcv and int( options.idcv ) >= 3:
181 idcv = '--dcv %s' % options.idcv
182 else:
183 idcv = ''
184 if options.inodc and options.inodc == 'nodc':
185 inodc = '--nodc'
186 else:
187 inodc = ''
188 if options.inoref and options.inoref == 'noref':
189 inoref = '--noref'
190 else:
191 inoref = ''
192 if options.iftab and int( options.iftab ) >= 1:
193 iftab = '--ftabchars %s' % options.iftab
194 else:
195 iftab = ''
196 if options.intoa and options.intoa == 'yes':
197 intoa = '--ntoa'
198 else:
199 intoa = ''
200 if options.iendian and options.iendian == 'big':
201 iendian = '--big'
202 else:
203 iendian = '--little'
204 if options.iseed and int( options.iseed ) > 0:
205 iseed = '--seed %s' % options.iseed
206 else:
207 iseed = ''
208 indexing_cmds = '%s %s %s %s %s %s %s --offrate %s %s %s %s %s %s' % \
209 ( iautoB, ipacked, ibmax, ibmaxdivn, idcv, inodc,
210 inoref, options.ioffrate, iftab, intoa, iendian,
211 iseed, colorspace )
212 except ValueError as e:
213 # clean up temp dir
214 if os.path.exists( tmp_index_dir ):
215 shutil.rmtree( tmp_index_dir )
216 stop_err( "Something is wrong with the indexing parameters and the indexing and alignment could not be run. Make sure you don't have any non-numeric values where they should be numeric.\n" + str( e ) )
217 ref_file = tempfile.NamedTemporaryFile( dir=tmp_index_dir )
218 ref_file_name = ref_file.name
219 ref_file.close()
220 os.symlink( options.ref, ref_file_name )
221 cmd1 = 'bowtie-build %s -f %s %s' % ( indexing_cmds, ref_file_name, ref_file_name )
222 try:
223 tmp = tempfile.NamedTemporaryFile( dir=tmp_index_dir ).name
224 with open(tmp, 'w') as tmp_stderr:
225 returncode = subprocess.call(args=cmd1, shell=True, cwd=tmp_index_dir, stderr=tmp_stderr.fileno())
226 if returncode != 0:
227 # get stderr, allowing for case where it's very large
228 stderr = ''
229 buffsize = 1048576
230 with open(tmp, 'r') as tmp_stderr:
231 try:
232 while True:
233 stderr += tmp_stderr.read(buffsize)
234 if not stderr or len(stderr) % buffsize != 0:
235 break
236 except OverflowError:
237 pass
238 raise Exception(stderr)
239 except Exception as e:
240 # clean up temp dir
241 if os.path.exists( tmp_index_dir ):
242 shutil.rmtree( tmp_index_dir )
243 stop_err( 'Error indexing reference sequence\n' + str( e ) )
244 stdout += 'File indexed. '
245 else:
246 ref_file_name = options.ref
247 # set up aligning and generate aligning command options
248 # automatically set threads in both cases
249 tmp_suppressed_file_name = None
250 tmp_unmapped_file_name = None
251 if options.suppressHeader == 'true':
252 suppressHeader = '--sam-nohead'
253 else:
254 suppressHeader = ''
255 if options.maxInsert and int( options.maxInsert ) > 0:
256 maxInsert = '-X %s' % options.maxInsert
257 else:
258 maxInsert = ''
259 if options.mateOrient:
260 mateOrient = '--%s' % options.mateOrient
261 else:
262 mateOrient = ''
263 quality_score_encoding = GALAXY_FORMAT_TO_QUALITY_SCORE_ENCODING_ARG.get( options.galaxy_input_format, DEFAULT_ASCII_ENCODING )
264 if options.params == 'preSet':
265 aligning_cmds = '-q %s %s -p %s -S %s %s %s ' % \
266 ( maxInsert, mateOrient, options.threads, suppressHeader, colorspace, quality_score_encoding )
267 else:
268 try:
269 if options.skip and int( options.skip ) > 0:
270 skip = '-s %s' % options.skip
271 else:
272 skip = ''
273 if options.alignLimit and int( options.alignLimit ) >= 0:
274 alignLimit = '-u %s' % options.alignLimit
275 else:
276 alignLimit = ''
277 if options.trimH and int( options.trimH ) > 0:
278 trimH = '-5 %s' % options.trimH
279 else:
280 trimH = ''
281 if options.trimL and int( options.trimL ) > 0:
282 trimL = '-3 %s' % options.trimL
283 else:
284 trimL = ''
285 if options.maxMismatches and (options.maxMismatches == '0' or options.maxMismatches == '1' or
286 options.maxMismatches == '2' or options.maxMismatches == '3'):
287 maxMismatches = '-v %s' % options.maxMismatches
288 else:
289 maxMismatches = ''
290 if options.mismatchSeed and (options.mismatchSeed == '0' or options.mismatchSeed == '1' or
291 options.mismatchSeed == '2' or options.mismatchSeed == '3'):
292 mismatchSeed = '-n %s' % options.mismatchSeed
293 else:
294 mismatchSeed = ''
295 if options.mismatchQual and int( options.mismatchQual ) >= 1:
296 mismatchQual = '-e %s' % options.mismatchQual
297 else:
298 mismatchQual = ''
299 if options.seedLen and int( options.seedLen ) >= 5:
300 seedLen = '-l %s' % options.seedLen
301 else:
302 seedLen = ''
303 if options.rounding == 'noRound':
304 rounding = '--nomaqround'
305 else:
306 rounding = ''
307 if options.minInsert and int( options.minInsert ) > 0:
308 minInsert = '-I %s' % options.minInsert
309 else:
310 minInsert = ''
311 if options.maxAlignAttempt and int( options.maxAlignAttempt ) >= 0:
312 maxAlignAttempt = '--pairtries %s' % options.maxAlignAttempt
313 else:
314 maxAlignAttempt = ''
315 if options.forwardAlign == 'noForward':
316 forwardAlign = '--nofw'
317 else:
318 forwardAlign = ''
319 if options.reverseAlign == 'noReverse':
320 reverseAlign = '--norc'
321 else:
322 reverseAlign = ''
323 if options.maxBacktracks and int( options.maxBacktracks ) > 0 and \
324 ( options.mismatchSeed == '2' or options.mismatchSeed == '3' ):
325 maxBacktracks = '--maxbts %s' % options.maxBacktracks
326 else:
327 maxBacktracks = ''
328 if options.tryHard == 'doTryHard':
329 tryHard = '-y'
330 else:
331 tryHard = ''
332 if options.valAlign and int( options.valAlign ) >= 0:
333 valAlign = '-k %s' % options.valAlign
334 else:
335 valAlign = ''
336 if options.allValAligns == 'doAllValAligns':
337 allValAligns = '-a'
338 else:
339 allValAligns = ''
340 if options.suppressAlign and int( options.suppressAlign ) >= 0:
341 suppressAlign = '-m %s' % options.suppressAlign
342 else:
343 suppressAlign = ''
344 if options.best == 'doBest':
345 best = '--best'
346 else:
347 best = ''
348 if options.strata == 'doStrata':
349 strata = '--strata'
350 else:
351 strata = ''
352 if options.offrate and int( options.offrate ) >= 0:
353 offrate = '-o %s' % options.offrate
354 else:
355 offrate = ''
356 if options.seed and int( options.seed ) >= 0:
357 seed = '--seed %s' % options.seed
358 else:
359 seed = ''
360 if options.paired == 'paired':
361 if options.output_unmapped_reads_l and options.output_unmapped_reads_r:
362 tmp_unmapped_file = tempfile.NamedTemporaryFile( dir=tmp_index_dir, suffix='.fastq' )
363 tmp_unmapped_file_name = tmp_unmapped_file.name
364 tmp_unmapped_file.close()
365 output_unmapped_reads = '--un %s' % tmp_unmapped_file_name
366 else:
367 output_unmapped_reads = ''
368 if options.output_suppressed_reads:
369 tmp_suppressed_file = tempfile.NamedTemporaryFile( dir=tmp_index_dir, suffix='.fastq' )
370 tmp_suppressed_file_name = tmp_suppressed_file.name
371 tmp_suppressed_file.close()
372 output_suppressed_reads = '--max %s' % tmp_suppressed_file_name
373 else:
374 output_suppressed_reads = ''
375 else:
376 if options.output_unmapped_reads:
377 output_unmapped_reads = '--un %s' % options.output_unmapped_reads
378 else:
379 output_unmapped_reads = ''
380 if options.output_suppressed_reads:
381 output_suppressed_reads = '--max %s' % options.output_suppressed_reads
382 else:
383 output_suppressed_reads = ''
384 snpfrac = ''
385 if options.snpphred and int( options.snpphred ) >= 0:
386 snpphred = '--snpphred %s' % options.snpphred
387 else:
388 snpphred = ''
389 if options.snpfrac and float( options.snpfrac ) >= 0:
390 snpfrac = '--snpfrac %s' % options.snpfrac
391 if options.keepends and options.keepends == 'doKeepends':
392 keepends = '--col-keepends'
393 else:
394 keepends = ''
395 aligning_cmds = '-q %s %s -p %s -S %s %s %s %s %s %s %s %s %s %s %s %s ' \
396 '%s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s ' % \
397 ( maxInsert, mateOrient, options.threads, suppressHeader,
398 colorspace, skip, alignLimit, trimH, trimL, maxMismatches,
399 mismatchSeed, mismatchQual, seedLen, rounding, minInsert,
400 maxAlignAttempt, forwardAlign, reverseAlign, maxBacktracks,
401 tryHard, valAlign, allValAligns, suppressAlign, best,
402 strata, offrate, seed, snpphred, snpfrac, keepends,
403 output_unmapped_reads, output_suppressed_reads,
404 quality_score_encoding )
405 except ValueError as e:
406 # clean up temp dir
407 if os.path.exists( tmp_index_dir ):
408 shutil.rmtree( tmp_index_dir )
409 stop_err( 'Something is wrong with the alignment parameters and the alignment could not be run\n' + str( e ) )
410 try:
411 # have to nest try-except in try-finally to handle 2.4
412 try:
413 # prepare actual mapping commands
414 if options.paired == 'paired':
415 cmd2 = 'bowtie %s %s -1 %s -2 %s > %s' % ( aligning_cmds, ref_file_name, options.input1, options.input2, options.output )
416 else:
417 cmd2 = 'bowtie %s %s %s > %s' % ( aligning_cmds, ref_file_name, options.input1, options.output )
418 # align
419 tmp = tempfile.NamedTemporaryFile( dir=tmp_index_dir ).name
420 with open(tmp, 'w') as tmp_stderr:
421 returncode = subprocess.call(args=cmd2, shell=True, cwd=tmp_index_dir, stderr=tmp_stderr.fileno())
422 # get stderr, allowing for case where it's very large
423 stderr = ''
424 buffsize = 1048576
425 with open(tmp, 'r') as tmp_stderr:
426 try:
427 while True:
428 stderr += tmp_stderr.read(buffsize)
429 if not stderr or len(stderr) % buffsize != 0:
430 break
431 except OverflowError:
432 pass
433 if returncode != 0:
434 raise Exception(stderr)
435 elif options.output_mapping_stats is not None:
436 # Write stderr (containing the mapping statistics) to a named file
437 with open(options.output_mapping_stats, 'w') as mapping_stats:
438 mapping_stats.write( stderr )
439 # get suppressed and unmapped reads output files in place if appropriate
440 if options.paired == 'paired' and tmp_suppressed_file_name and \
441 options.output_suppressed_reads_l and options.output_suppressed_reads_r:
442 try:
443 left = tmp_suppressed_file_name.replace( '.fastq', '_1.fastq' )
444 right = tmp_suppressed_file_name.replace( '.fastq', '_1.fastq' )
445 shutil.move( left, options.output_suppressed_reads_l )
446 shutil.move( right, options.output_suppressed_reads_r )
447 except Exception as e:
448 sys.stdout.write( 'Error producing the suppressed output file.\n' )
449 if options.paired == 'paired' and tmp_unmapped_file_name and \
450 options.output_unmapped_reads_l and options.output_unmapped_reads_r:
451 try:
452 left = tmp_unmapped_file_name.replace( '.fastq', '_1.fastq' )
453 right = tmp_unmapped_file_name.replace( '.fastq', '_2.fastq' )
454 shutil.move( left, options.output_unmapped_reads_l )
455 shutil.move( right, options.output_unmapped_reads_r )
456 except Exception as e:
457 sys.stdout.write( 'Error producing the unmapped output file.\n' )
458 # check that there are results in the output file
459 if os.path.getsize( options.output ) == 0:
460 raise Exception('The output file is empty, there may be an error with your input file or settings.')
461 except Exception as e:
462 stop_err( 'Error aligning sequence. ' + str( e ) )
463 finally:
464 # clean up temp dir
465 if os.path.exists( tmp_index_dir ):
466 shutil.rmtree( tmp_index_dir )
467 stdout += 'Sequence file aligned.\n'
468 sys.stdout.write( stdout )
469
470
471 if __name__ == "__main__":
472 __main__()