1
|
1 #!/usr/bin/env python
|
|
2
|
|
3 """
|
|
4 Runs Bowtie on single-end or paired-end data.
|
|
5
|
|
6 usage: bowtie_wrapper.py [options]
|
|
7 -t, --threads=t: The number of threads to run
|
|
8 -o, --output=o: The output file
|
|
9 --output_unmapped_reads=: File name for unmapped reads (single-end)
|
|
10 --output_unmapped_reads_l=: File name for unmapped reads (left, paired-end)
|
|
11 --output_unmapped_reads_r=: File name for unmapped reads (right, paired-end)
|
|
12 --output_suppressed_reads=: File name for suppressed reads because of max setting (single-end)
|
|
13 --output_suppressed_reads_l=: File name for suppressed reads because of max setting (left, paired-end)
|
|
14 --output_suppressed_reads_r=: File name for suppressed reads because of max setting (right, paired-end)
|
|
15 --output_mapping_stats=: File name for mapping statistics (output on stderr by bowtie)
|
|
16 -i, --input1=i: The (forward or single-end) reads file in Sanger FASTQ format
|
|
17 -I, --input2=I: The reverse reads file in Sanger FASTQ format
|
|
18 -4, --dataType=4: The type of data (SOLiD or Solexa)
|
|
19 -2, --paired=2: Whether the data is single- or paired-end
|
|
20 -g, --genomeSource=g: The type of reference provided
|
|
21 -r, --ref=r: The reference genome to use or index
|
|
22 -s, --skip=s: Skip the first n reads
|
|
23 -a, --alignLimit=a: Only align the first n reads
|
|
24 -T, --trimH=T: Trim n bases from high-quality (left) end of each read before alignment
|
|
25 -L, --trimL=L: Trim n bases from low-quality (right) end of each read before alignment
|
|
26 -m, --mismatchSeed=m: Maximum number of mismatches permitted in the seed
|
|
27 -M, --mismatchQual=M: Maximum permitted total of quality values at mismatched read positions
|
|
28 -l, --seedLen=l: Seed length
|
|
29 -n, --rounding=n: Whether or not to round to the nearest 10 and saturating at 30
|
|
30 -P, --maxMismatches=P: Maximum number of mismatches for -v alignment mode
|
|
31 -w, --tryHard=: Whether or not to try as hard as possible to find valid alignments when they exist
|
|
32 -V, --allValAligns=V: Whether or not to report all valid alignments per read or pair
|
|
33 -v, --valAlign=v: Report up to n valid alignments per read or pair
|
|
34 -G, --suppressAlign=G: Suppress all alignments for a read if more than n reportable alignments exist
|
|
35 -b, --best=b: Whether or not to make Bowtie guarantee that reported singleton alignments are 'best' in terms of stratum and in terms of the quality values at the mismatched positions
|
|
36 -B, --maxBacktracks=B: Maximum number of backtracks permitted when aligning a read
|
|
37 -R, --strata=R: Whether or not to report only those alignments that fall in the best stratum if many valid alignments exist and are reportable
|
|
38 -j, --minInsert=j: Minimum insert size for valid paired-end alignments
|
|
39 -J, --maxInsert=J: Maximum insert size for valid paired-end alignments
|
|
40 -O, --mateOrient=O: The upstream/downstream mate orientation for valid paired-end alignment against the forward reference strand
|
|
41 -A, --maxAlignAttempt=A: Maximum number of attempts Bowtie will make to match an alignment for one mate with an alignment for the opposite mate
|
|
42 -f, --forwardAlign=f: Whether or not to attempt to align the forward reference strand
|
|
43 -E, --reverseAlign=E: Whether or not to attempt to align the reverse-complement reference strand
|
|
44 -F, --offrate=F: Override the offrate of the index to n
|
|
45 -8, --snpphred=8: SNP penalty on Phred scale
|
|
46 -6, --snpfrac=6: Fraction of sites expected to be SNP sites
|
|
47 -7, --keepends=7: Keep extreme-end nucleotides and qualities
|
|
48 -S, --seed=S: Seed for pseudo-random number generator
|
|
49 -C, --params=C: Whether to use default or specified parameters
|
|
50 -u, --iautoB=u: Automatic or specified behavior
|
|
51 -K, --ipacked=K: Whether or not to use a packed representation for DNA strings
|
|
52 -Q, --ibmax=Q: Maximum number of suffixes allowed in a block
|
|
53 -Y, --ibmaxdivn=Y: Maximum number of suffixes allowed in a block as a fraction of the length of the reference
|
|
54 -D, --idcv=D: The period for the difference-cover sample
|
|
55 -U, --inodc=U: Whether or not to disable the use of the difference-cover sample
|
|
56 -y, --inoref=y: Whether or not to build the part of the reference index used only in paired-end alignment
|
|
57 -z, --ioffrate=z: How many rows get marked during annotation of some or all of the Burrows-Wheeler rows
|
|
58 -W, --iftab=W: The size of the lookup table used to calculate an initial Burrows-Wheeler range with respect to the first n characters of the query
|
|
59 -X, --intoa=X: Whether or not to convert Ns in the reference sequence to As
|
|
60 -N, --iendian=N: Endianness to use when serializing integers to the index file
|
|
61 -Z, --iseed=Z: Seed for the pseudorandom number generator
|
|
62 -x, --indexSettings=x: Whether or not indexing options are to be set
|
|
63 -H, --suppressHeader=H: Suppress header
|
|
64 --do_not_build_index: Flag to specify that provided file is already indexed and to just use 'as is'
|
|
65 """
|
|
66
|
|
67 import optparse
|
|
68 import os
|
|
69 import shutil
|
|
70 import subprocess
|
|
71 import sys
|
|
72 import tempfile
|
|
73
|
|
74 # Allow more than Sanger encoded variants
|
|
75 DEFAULT_ASCII_ENCODING = '--phred33-quals'
|
|
76 GALAXY_FORMAT_TO_QUALITY_SCORE_ENCODING_ARG = {'fastqsanger': '--phred33-quals', 'fastqillumina': '--phred64-quals', 'fastqsolexa': '--solexa-quals'}
|
|
77 # FIXME: Integer quality scores are supported only when the '--integer-quals' argument is specified to bowtie; this is not currently able to be set in the tool/wrapper/config
|
|
78
|
|
79
|
|
80 def stop_err( msg ):
|
|
81 sys.exit('%s\n' % msg)
|
|
82
|
|
83
|
|
84 def __main__():
|
|
85 parser = optparse.OptionParser()
|
|
86 parser.add_option( '-t', '--threads', dest='threads', help='The number of threads to run' )
|
|
87 parser.add_option( '-o', '--output', dest='output', help='The output file' )
|
|
88 parser.add_option( '', '--output_unmapped_reads', dest='output_unmapped_reads', help='File name for unmapped reads (single-end)' )
|
|
89 parser.add_option( '', '--output_unmapped_reads_l', dest='output_unmapped_reads_l', help='File name for unmapped reads (left, paired-end)' )
|
|
90 parser.add_option( '', '--output_unmapped_reads_r', dest='output_unmapped_reads_r', help='File name for unmapped reads (right, paired-end)' )
|
|
91 parser.add_option( '', '--output_suppressed_reads', dest='output_suppressed_reads', help='File name for suppressed reads because of max setting (single-end)' )
|
|
92 parser.add_option( '', '--output_suppressed_reads_l', dest='output_suppressed_reads_l', help='File name for suppressed reads because of max setting (left, paired-end)' )
|
|
93 parser.add_option( '', '--output_suppressed_reads_r', dest='output_suppressed_reads_r', help='File name for suppressed reads because of max setting (right, paired-end)' )
|
|
94 parser.add_option( '', '--output_mapping_stats', dest='output_mapping_stats', help='File for mapping statistics (i.e. stderr from bowtie)' )
|
|
95 parser.add_option( '-4', '--dataType', dest='dataType', help='The type of data (SOLiD or Solexa)' )
|
|
96 parser.add_option( '-i', '--input1', dest='input1', help='The (forward or single-end) reads file in Sanger FASTQ format' )
|
|
97 parser.add_option( '-I', '--input2', dest='input2', help='The reverse reads file in Sanger FASTQ format' )
|
|
98 parser.add_option( '-2', '--paired', dest='paired', help='Whether the data is single- or paired-end' )
|
|
99 parser.add_option( '-g', '--genomeSource', dest='genomeSource', help='The type of reference provided' )
|
|
100 parser.add_option( '-r', '--ref', dest='ref', help='The reference genome to use or index' )
|
|
101 parser.add_option( '-s', '--skip', dest='skip', help='Skip the first n reads' )
|
|
102 parser.add_option( '-a', '--alignLimit', dest='alignLimit', help='Only align the first n reads' )
|
|
103 parser.add_option( '-T', '--trimH', dest='trimH', help='Trim n bases from high-quality (left) end of each read before alignment' )
|
|
104 parser.add_option( '-L', '--trimL', dest='trimL', help='Trim n bases from low-quality (right) end of each read before alignment' )
|
|
105 parser.add_option( '-m', '--mismatchSeed', dest='mismatchSeed', help='Maximum number of mismatches permitted in the seed' )
|
|
106 parser.add_option( '-M', '--mismatchQual', dest='mismatchQual', help='Maximum permitted total of quality values at mismatched read positions' )
|
|
107 parser.add_option( '-l', '--seedLen', dest='seedLen', help='Seed length' )
|
|
108 parser.add_option( '-n', '--rounding', dest='rounding', help='Whether or not to round to the nearest 10 and saturating at 30' )
|
|
109 parser.add_option( '-P', '--maxMismatches', dest='maxMismatches', help='Maximum number of mismatches for -v alignment mode' )
|
|
110 parser.add_option( '-w', '--tryHard', dest='tryHard', help='Whether or not to try as hard as possible to find valid alignments when they exist' )
|
|
111 parser.add_option( '-V', '--allValAligns', dest='allValAligns', help='Whether or not to report all valid alignments per read or pair' )
|
|
112 parser.add_option( '-v', '--valAlign', dest='valAlign', help='Report up to n valid alignments per read or pair' )
|
|
113 parser.add_option( '-G', '--suppressAlign', dest='suppressAlign', help='Suppress all alignments for a read if more than n reportable alignments exist' )
|
|
114 parser.add_option( '-b', '--best', dest='best', help="Whether or not to make Bowtie guarantee that reported singleton alignments are 'best' in terms of stratum and in terms of the quality values at the mismatched positions" )
|
|
115 parser.add_option( '-B', '--maxBacktracks', dest='maxBacktracks', help='Maximum number of backtracks permitted when aligning a read' )
|
|
116 parser.add_option( '-R', '--strata', dest='strata', help='Whether or not to report only those alignments that fall in the best stratum if many valid alignments exist and are reportable' )
|
|
117 parser.add_option( '-j', '--minInsert', dest='minInsert', help='Minimum insert size for valid paired-end alignments' )
|
|
118 parser.add_option( '-J', '--maxInsert', dest='maxInsert', help='Maximum insert size for valid paired-end alignments' )
|
|
119 parser.add_option( '-O', '--mateOrient', dest='mateOrient', help='The upstream/downstream mate orientation for valid paired-end alignment against the forward reference strand' )
|
|
120 parser.add_option( '-A', '--maxAlignAttempt', dest='maxAlignAttempt', help='Maximum number of attempts Bowtie will make to match an alignment for one mate with an alignment for the opposite mate' )
|
|
121 parser.add_option( '-f', '--forwardAlign', dest='forwardAlign', help='Whether or not to attempt to align the forward reference strand' )
|
|
122 parser.add_option( '-E', '--reverseAlign', dest='reverseAlign', help='Whether or not to attempt to align the reverse-complement reference strand' )
|
|
123 parser.add_option( '-F', '--offrate', dest='offrate', help='Override the offrate of the index to n' )
|
|
124 parser.add_option( '-S', '--seed', dest='seed', help='Seed for pseudo-random number generator' )
|
|
125 parser.add_option( '-8', '--snpphred', dest='snpphred', help='SNP penalty on Phred scale' )
|
|
126 parser.add_option( '-6', '--snpfrac', dest='snpfrac', help='Fraction of sites expected to be SNP sites' )
|
|
127 parser.add_option( '-7', '--keepends', dest='keepends', help='Keep extreme-end nucleotides and qualities' )
|
|
128 parser.add_option( '-C', '--params', dest='params', help='Whether to use default or specified parameters' )
|
|
129 parser.add_option( '-u', '--iautoB', dest='iautoB', help='Automatic or specified behavior' )
|
|
130 parser.add_option( '-K', '--ipacked', dest='ipacked', help='Whether or not to use a packed representation for DNA strings' )
|
|
131 parser.add_option( '-Q', '--ibmax', dest='ibmax', help='Maximum number of suffixes allowed in a block' )
|
|
132 parser.add_option( '-Y', '--ibmaxdivn', dest='ibmaxdivn', help='Maximum number of suffixes allowed in a block as a fraction of the length of the reference' )
|
|
133 parser.add_option( '-D', '--idcv', dest='idcv', help='The period for the difference-cover sample' )
|
|
134 parser.add_option( '-U', '--inodc', dest='inodc', help='Whether or not to disable the use of the difference-cover sample' )
|
|
135 parser.add_option( '-y', '--inoref', dest='inoref', help='Whether or not to build the part of the reference index used only in paired-end alignment' )
|
|
136 parser.add_option( '-z', '--ioffrate', dest='ioffrate', help='How many rows get marked during annotation of some or all of the Burrows-Wheeler rows' )
|
|
137 parser.add_option( '-W', '--iftab', dest='iftab', help='The size of the lookup table used to calculate an initial Burrows-Wheeler range with respect to the first n characters of the query' )
|
|
138 parser.add_option( '-X', '--intoa', dest='intoa', help='Whether or not to convert Ns in the reference sequence to As' )
|
|
139 parser.add_option( '-N', '--iendian', dest='iendian', help='Endianness to use when serializing integers to the index file' )
|
|
140 parser.add_option( '-Z', '--iseed', dest='iseed', help='Seed for the pseudorandom number generator' )
|
|
141 parser.add_option( '-x', '--indexSettings', dest='index_settings', help='Whether or not indexing options are to be set' )
|
|
142 parser.add_option( '-H', '--suppressHeader', dest='suppressHeader', help='Suppress header' )
|
|
143 parser.add_option( '--galaxy_input_format', dest='galaxy_input_format', default="fastqsanger", help='galaxy input format' )
|
|
144 parser.add_option( '--do_not_build_index', dest='do_not_build_index', action="store_true", default=False, help='Flag to specify that provided file is already indexed, use as is' )
|
|
145 (options, args) = parser.parse_args()
|
|
146 if options.mismatchSeed and options.maxMismatches:
|
|
147 parser.error("options --mismatchSeed and --maxMismatches are mutually exclusive")
|
|
148 stdout = ''
|
|
149
|
|
150 # make temp directory for placement of indices and copy reference file there if necessary
|
|
151 tmp_index_dir = tempfile.mkdtemp()
|
|
152 # get type of data (solid or solexa)
|
|
153 if options.dataType == 'solid':
|
|
154 colorspace = '-C'
|
|
155 else:
|
|
156 colorspace = ''
|
|
157 # index if necessary
|
|
158 if options.genomeSource == 'history' and not options.do_not_build_index:
|
|
159 # set up commands
|
|
160 if options.index_settings == 'indexPreSet':
|
|
161 indexing_cmds = '%s' % colorspace
|
|
162 else:
|
|
163 try:
|
|
164 if options.iautoB and options.iautoB == 'set':
|
|
165 iautoB = '--noauto'
|
|
166 else:
|
|
167 iautoB = ''
|
|
168 if options.ipacked and options.ipacked == 'packed':
|
|
169 ipacked = '--packed'
|
|
170 else:
|
|
171 ipacked = ''
|
|
172 if options.ibmax and int( options.ibmax ) >= 1:
|
|
173 ibmax = '--bmax %s' % options.ibmax
|
|
174 else:
|
|
175 ibmax = ''
|
|
176 if options.ibmaxdivn and int( options.ibmaxdivn ) >= 0:
|
|
177 ibmaxdivn = '--bmaxdivn %s' % options.ibmaxdivn
|
|
178 else:
|
|
179 ibmaxdivn = ''
|
|
180 if options.idcv and int( options.idcv ) >= 3:
|
|
181 idcv = '--dcv %s' % options.idcv
|
|
182 else:
|
|
183 idcv = ''
|
|
184 if options.inodc and options.inodc == 'nodc':
|
|
185 inodc = '--nodc'
|
|
186 else:
|
|
187 inodc = ''
|
|
188 if options.inoref and options.inoref == 'noref':
|
|
189 inoref = '--noref'
|
|
190 else:
|
|
191 inoref = ''
|
|
192 if options.iftab and int( options.iftab ) >= 1:
|
|
193 iftab = '--ftabchars %s' % options.iftab
|
|
194 else:
|
|
195 iftab = ''
|
|
196 if options.intoa and options.intoa == 'yes':
|
|
197 intoa = '--ntoa'
|
|
198 else:
|
|
199 intoa = ''
|
|
200 if options.iendian and options.iendian == 'big':
|
|
201 iendian = '--big'
|
|
202 else:
|
|
203 iendian = '--little'
|
|
204 if options.iseed and int( options.iseed ) > 0:
|
|
205 iseed = '--seed %s' % options.iseed
|
|
206 else:
|
|
207 iseed = ''
|
|
208 indexing_cmds = '%s %s %s %s %s %s %s --offrate %s %s %s %s %s %s' % \
|
|
209 ( iautoB, ipacked, ibmax, ibmaxdivn, idcv, inodc,
|
|
210 inoref, options.ioffrate, iftab, intoa, iendian,
|
|
211 iseed, colorspace )
|
|
212 except ValueError as e:
|
|
213 # clean up temp dir
|
|
214 if os.path.exists( tmp_index_dir ):
|
|
215 shutil.rmtree( tmp_index_dir )
|
|
216 stop_err( "Something is wrong with the indexing parameters and the indexing and alignment could not be run. Make sure you don't have any non-numeric values where they should be numeric.\n" + str( e ) )
|
|
217 ref_file = tempfile.NamedTemporaryFile( dir=tmp_index_dir )
|
|
218 ref_file_name = ref_file.name
|
|
219 ref_file.close()
|
|
220 os.symlink( options.ref, ref_file_name )
|
|
221 cmd1 = 'bowtie-build %s -f %s %s' % ( indexing_cmds, ref_file_name, ref_file_name )
|
|
222 try:
|
|
223 tmp = tempfile.NamedTemporaryFile( dir=tmp_index_dir ).name
|
|
224 with open(tmp, 'w') as tmp_stderr:
|
|
225 returncode = subprocess.call(args=cmd1, shell=True, cwd=tmp_index_dir, stderr=tmp_stderr.fileno())
|
|
226 if returncode != 0:
|
|
227 # get stderr, allowing for case where it's very large
|
|
228 stderr = ''
|
|
229 buffsize = 1048576
|
|
230 with open(tmp, 'r') as tmp_stderr:
|
|
231 try:
|
|
232 while True:
|
|
233 stderr += tmp_stderr.read(buffsize)
|
|
234 if not stderr or len(stderr) % buffsize != 0:
|
|
235 break
|
|
236 except OverflowError:
|
|
237 pass
|
|
238 raise Exception(stderr)
|
|
239 except Exception as e:
|
|
240 # clean up temp dir
|
|
241 if os.path.exists( tmp_index_dir ):
|
|
242 shutil.rmtree( tmp_index_dir )
|
|
243 stop_err( 'Error indexing reference sequence\n' + str( e ) )
|
|
244 stdout += 'File indexed. '
|
|
245 else:
|
|
246 ref_file_name = options.ref
|
|
247 # set up aligning and generate aligning command options
|
|
248 # automatically set threads in both cases
|
|
249 tmp_suppressed_file_name = None
|
|
250 tmp_unmapped_file_name = None
|
|
251 if options.suppressHeader == 'true':
|
|
252 suppressHeader = '--sam-nohead'
|
|
253 else:
|
|
254 suppressHeader = ''
|
|
255 if options.maxInsert and int( options.maxInsert ) > 0:
|
|
256 maxInsert = '-X %s' % options.maxInsert
|
|
257 else:
|
|
258 maxInsert = ''
|
|
259 if options.mateOrient:
|
|
260 mateOrient = '--%s' % options.mateOrient
|
|
261 else:
|
|
262 mateOrient = ''
|
|
263 quality_score_encoding = GALAXY_FORMAT_TO_QUALITY_SCORE_ENCODING_ARG.get( options.galaxy_input_format, DEFAULT_ASCII_ENCODING )
|
|
264 if options.params == 'preSet':
|
|
265 aligning_cmds = '-q %s %s -p %s -S %s %s %s ' % \
|
|
266 ( maxInsert, mateOrient, options.threads, suppressHeader, colorspace, quality_score_encoding )
|
|
267 else:
|
|
268 try:
|
|
269 if options.skip and int( options.skip ) > 0:
|
|
270 skip = '-s %s' % options.skip
|
|
271 else:
|
|
272 skip = ''
|
|
273 if options.alignLimit and int( options.alignLimit ) >= 0:
|
|
274 alignLimit = '-u %s' % options.alignLimit
|
|
275 else:
|
|
276 alignLimit = ''
|
|
277 if options.trimH and int( options.trimH ) > 0:
|
|
278 trimH = '-5 %s' % options.trimH
|
|
279 else:
|
|
280 trimH = ''
|
|
281 if options.trimL and int( options.trimL ) > 0:
|
|
282 trimL = '-3 %s' % options.trimL
|
|
283 else:
|
|
284 trimL = ''
|
|
285 if options.maxMismatches and (options.maxMismatches == '0' or options.maxMismatches == '1' or
|
|
286 options.maxMismatches == '2' or options.maxMismatches == '3'):
|
|
287 maxMismatches = '-v %s' % options.maxMismatches
|
|
288 else:
|
|
289 maxMismatches = ''
|
|
290 if options.mismatchSeed and (options.mismatchSeed == '0' or options.mismatchSeed == '1' or
|
|
291 options.mismatchSeed == '2' or options.mismatchSeed == '3'):
|
|
292 mismatchSeed = '-n %s' % options.mismatchSeed
|
|
293 else:
|
|
294 mismatchSeed = ''
|
|
295 if options.mismatchQual and int( options.mismatchQual ) >= 1:
|
|
296 mismatchQual = '-e %s' % options.mismatchQual
|
|
297 else:
|
|
298 mismatchQual = ''
|
|
299 if options.seedLen and int( options.seedLen ) >= 5:
|
|
300 seedLen = '-l %s' % options.seedLen
|
|
301 else:
|
|
302 seedLen = ''
|
|
303 if options.rounding == 'noRound':
|
|
304 rounding = '--nomaqround'
|
|
305 else:
|
|
306 rounding = ''
|
|
307 if options.minInsert and int( options.minInsert ) > 0:
|
|
308 minInsert = '-I %s' % options.minInsert
|
|
309 else:
|
|
310 minInsert = ''
|
|
311 if options.maxAlignAttempt and int( options.maxAlignAttempt ) >= 0:
|
|
312 maxAlignAttempt = '--pairtries %s' % options.maxAlignAttempt
|
|
313 else:
|
|
314 maxAlignAttempt = ''
|
|
315 if options.forwardAlign == 'noForward':
|
|
316 forwardAlign = '--nofw'
|
|
317 else:
|
|
318 forwardAlign = ''
|
|
319 if options.reverseAlign == 'noReverse':
|
|
320 reverseAlign = '--norc'
|
|
321 else:
|
|
322 reverseAlign = ''
|
|
323 if options.maxBacktracks and int( options.maxBacktracks ) > 0 and \
|
|
324 ( options.mismatchSeed == '2' or options.mismatchSeed == '3' ):
|
|
325 maxBacktracks = '--maxbts %s' % options.maxBacktracks
|
|
326 else:
|
|
327 maxBacktracks = ''
|
|
328 if options.tryHard == 'doTryHard':
|
|
329 tryHard = '-y'
|
|
330 else:
|
|
331 tryHard = ''
|
|
332 if options.valAlign and int( options.valAlign ) >= 0:
|
|
333 valAlign = '-k %s' % options.valAlign
|
|
334 else:
|
|
335 valAlign = ''
|
|
336 if options.allValAligns == 'doAllValAligns':
|
|
337 allValAligns = '-a'
|
|
338 else:
|
|
339 allValAligns = ''
|
|
340 if options.suppressAlign and int( options.suppressAlign ) >= 0:
|
|
341 suppressAlign = '-m %s' % options.suppressAlign
|
|
342 else:
|
|
343 suppressAlign = ''
|
|
344 if options.best == 'doBest':
|
|
345 best = '--best'
|
|
346 else:
|
|
347 best = ''
|
|
348 if options.strata == 'doStrata':
|
|
349 strata = '--strata'
|
|
350 else:
|
|
351 strata = ''
|
|
352 if options.offrate and int( options.offrate ) >= 0:
|
|
353 offrate = '-o %s' % options.offrate
|
|
354 else:
|
|
355 offrate = ''
|
|
356 if options.seed and int( options.seed ) >= 0:
|
|
357 seed = '--seed %s' % options.seed
|
|
358 else:
|
|
359 seed = ''
|
|
360 if options.paired == 'paired':
|
|
361 if options.output_unmapped_reads_l and options.output_unmapped_reads_r:
|
|
362 tmp_unmapped_file = tempfile.NamedTemporaryFile( dir=tmp_index_dir, suffix='.fastq' )
|
|
363 tmp_unmapped_file_name = tmp_unmapped_file.name
|
|
364 tmp_unmapped_file.close()
|
|
365 output_unmapped_reads = '--un %s' % tmp_unmapped_file_name
|
|
366 else:
|
|
367 output_unmapped_reads = ''
|
|
368 if options.output_suppressed_reads:
|
|
369 tmp_suppressed_file = tempfile.NamedTemporaryFile( dir=tmp_index_dir, suffix='.fastq' )
|
|
370 tmp_suppressed_file_name = tmp_suppressed_file.name
|
|
371 tmp_suppressed_file.close()
|
|
372 output_suppressed_reads = '--max %s' % tmp_suppressed_file_name
|
|
373 else:
|
|
374 output_suppressed_reads = ''
|
|
375 else:
|
|
376 if options.output_unmapped_reads:
|
|
377 output_unmapped_reads = '--un %s' % options.output_unmapped_reads
|
|
378 else:
|
|
379 output_unmapped_reads = ''
|
|
380 if options.output_suppressed_reads:
|
|
381 output_suppressed_reads = '--max %s' % options.output_suppressed_reads
|
|
382 else:
|
|
383 output_suppressed_reads = ''
|
|
384 snpfrac = ''
|
|
385 if options.snpphred and int( options.snpphred ) >= 0:
|
|
386 snpphred = '--snpphred %s' % options.snpphred
|
|
387 else:
|
|
388 snpphred = ''
|
|
389 if options.snpfrac and float( options.snpfrac ) >= 0:
|
|
390 snpfrac = '--snpfrac %s' % options.snpfrac
|
|
391 if options.keepends and options.keepends == 'doKeepends':
|
|
392 keepends = '--col-keepends'
|
|
393 else:
|
|
394 keepends = ''
|
|
395 aligning_cmds = '-q %s %s -p %s -S %s %s %s %s %s %s %s %s %s %s %s %s ' \
|
|
396 '%s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s ' % \
|
|
397 ( maxInsert, mateOrient, options.threads, suppressHeader,
|
|
398 colorspace, skip, alignLimit, trimH, trimL, maxMismatches,
|
|
399 mismatchSeed, mismatchQual, seedLen, rounding, minInsert,
|
|
400 maxAlignAttempt, forwardAlign, reverseAlign, maxBacktracks,
|
|
401 tryHard, valAlign, allValAligns, suppressAlign, best,
|
|
402 strata, offrate, seed, snpphred, snpfrac, keepends,
|
|
403 output_unmapped_reads, output_suppressed_reads,
|
|
404 quality_score_encoding )
|
|
405 except ValueError as e:
|
|
406 # clean up temp dir
|
|
407 if os.path.exists( tmp_index_dir ):
|
|
408 shutil.rmtree( tmp_index_dir )
|
|
409 stop_err( 'Something is wrong with the alignment parameters and the alignment could not be run\n' + str( e ) )
|
|
410 try:
|
|
411 # have to nest try-except in try-finally to handle 2.4
|
|
412 try:
|
|
413 # prepare actual mapping commands
|
|
414 if options.paired == 'paired':
|
8
|
415 # cmd2 = 'bowtie %s %s -1 %s -2 %s > %s | samtools view -b -S > %s' % ( aligning_cmds, ref_file_name, options.input1, options.input2, options.output, options.output )
|
|
416 cmd2 = 'bowtie %s %s -1 %s -2 %s > %s ' % ( aligning_cmds, ref_file_name, options.input1, options.input2, options.output )
|
|
417
|
1
|
418 else:
|
8
|
419 # cmd2 = 'bowtie %s %s %s > %s | samtools view -b -S > %s' % ( aligning_cmds, ref_file_name, options.input1, options.output, options.output )
|
|
420 cmd2 = 'bowtie %s %s %s > %s ' % ( aligning_cmds, ref_file_name, options.input1, options.output )
|
|
421
|
1
|
422 # align
|
|
423 tmp = tempfile.NamedTemporaryFile( dir=tmp_index_dir ).name
|
|
424 with open(tmp, 'w') as tmp_stderr:
|
|
425 returncode = subprocess.call(args=cmd2, shell=True, cwd=tmp_index_dir, stderr=tmp_stderr.fileno())
|
|
426 # get stderr, allowing for case where it's very large
|
|
427 stderr = ''
|
|
428 buffsize = 1048576
|
|
429 with open(tmp, 'r') as tmp_stderr:
|
|
430 try:
|
|
431 while True:
|
|
432 stderr += tmp_stderr.read(buffsize)
|
|
433 if not stderr or len(stderr) % buffsize != 0:
|
|
434 break
|
|
435 except OverflowError:
|
|
436 pass
|
|
437 if returncode != 0:
|
|
438 raise Exception(stderr)
|
|
439 elif options.output_mapping_stats is not None:
|
|
440 # Write stderr (containing the mapping statistics) to a named file
|
|
441 with open(options.output_mapping_stats, 'w') as mapping_stats:
|
|
442 mapping_stats.write( stderr )
|
|
443 # get suppressed and unmapped reads output files in place if appropriate
|
|
444 if options.paired == 'paired' and tmp_suppressed_file_name and \
|
|
445 options.output_suppressed_reads_l and options.output_suppressed_reads_r:
|
|
446 try:
|
|
447 left = tmp_suppressed_file_name.replace( '.fastq', '_1.fastq' )
|
|
448 right = tmp_suppressed_file_name.replace( '.fastq', '_1.fastq' )
|
|
449 shutil.move( left, options.output_suppressed_reads_l )
|
|
450 shutil.move( right, options.output_suppressed_reads_r )
|
|
451 except Exception as e:
|
|
452 sys.stdout.write( 'Error producing the suppressed output file.\n' )
|
|
453 if options.paired == 'paired' and tmp_unmapped_file_name and \
|
|
454 options.output_unmapped_reads_l and options.output_unmapped_reads_r:
|
|
455 try:
|
|
456 left = tmp_unmapped_file_name.replace( '.fastq', '_1.fastq' )
|
|
457 right = tmp_unmapped_file_name.replace( '.fastq', '_2.fastq' )
|
|
458 shutil.move( left, options.output_unmapped_reads_l )
|
|
459 shutil.move( right, options.output_unmapped_reads_r )
|
|
460 except Exception as e:
|
|
461 sys.stdout.write( 'Error producing the unmapped output file.\n' )
|
|
462 # check that there are results in the output file
|
|
463 if os.path.getsize( options.output ) == 0:
|
|
464 raise Exception('The output file is empty, there may be an error with your input file or settings.')
|
|
465 except Exception as e:
|
|
466 stop_err( 'Error aligning sequence. ' + str( e ) )
|
|
467 finally:
|
|
468 # clean up temp dir
|
|
469 if os.path.exists( tmp_index_dir ):
|
|
470 shutil.rmtree( tmp_index_dir )
|
|
471 stdout += 'Sequence file aligned.\n'
|
|
472 sys.stdout.write( stdout )
|
|
473
|
|
474
|
|
475 if __name__ == "__main__":
|
|
476 __main__()
|