0
|
1 #!/usr/bin/env python
|
|
2
|
|
3 """
|
|
4 Runs Bowtie on single-end or paired-end data.
|
|
5 For use with Bowtie v. 0.12.7
|
|
6
|
|
7 usage: bowtie_wrapper.py [options]
|
|
8 -t, --threads=t: The number of threads to run
|
|
9 -o, --output=o: The output file
|
|
10 --output_unmapped_reads=: File name for unmapped reads (single-end)
|
|
11 --output_unmapped_reads_l=: File name for unmapped reads (left, paired-end)
|
|
12 --output_unmapped_reads_r=: File name for unmapped reads (right, paired-end)
|
|
13 --output_suppressed_reads=: File name for suppressed reads because of max setting (single-end)
|
|
14 --output_suppressed_reads_l=: File name for suppressed reads because of max setting (left, paired-end)
|
|
15 --output_suppressed_reads_r=: File name for suppressed reads because of max setting (right, paired-end)
|
|
16 -i, --input1=i: The (forward or single-end) reads file in Sanger FASTQ format
|
|
17 -I, --input2=I: The reverse reads file in Sanger FASTQ format
|
|
18 -4, --dataType=4: The type of data (SOLiD or Solexa)
|
|
19 -2, --paired=2: Whether the data is single- or paired-end
|
|
20 -g, --genomeSource=g: The type of reference provided
|
|
21 -r, --ref=r: The reference genome to use or index
|
|
22 -s, --skip=s: Skip the first n reads
|
|
23 -a, --alignLimit=a: Only align the first n reads
|
|
24 -T, --trimH=T: Trim n bases from high-quality (left) end of each read before alignment
|
|
25 -L, --trimL=L: Trim n bases from low-quality (right) end of each read before alignment
|
|
26 -m, --mismatchSeed=m: Maximum number of mismatches permitted in the seed
|
|
27 -M, --mismatchQual=M: Maximum permitted total of quality values at mismatched read positions
|
|
28 -l, --seedLen=l: Seed length
|
|
29 -n, --rounding=n: Whether or not to round to the nearest 10 and saturating at 30
|
|
30 -P, --maxMismatches=P: Maximum number of mismatches for -v alignment mode
|
|
31 -w, --tryHard=: Whether or not to try as hard as possible to find valid alignments when they exist
|
|
32 -V, --allValAligns=V: Whether or not to report all valid alignments per read or pair
|
|
33 -v, --valAlign=v: Report up to n valid alignments per read or pair
|
|
34 -G, --suppressAlign=G: Suppress all alignments for a read if more than n reportable alignments exist
|
|
35 -b, --best=b: Whether or not to make Bowtie guarantee that reported singleton alignments are 'best' in terms of stratum and in terms of the quality values at the mismatched positions
|
|
36 -B, --maxBacktracks=B: Maximum number of backtracks permitted when aligning a read
|
|
37 -R, --strata=R: Whether or not to report only those alignments that fall in the best stratum if many valid alignments exist and are reportable
|
|
38 -j, --minInsert=j: Minimum insert size for valid paired-end alignments
|
|
39 -J, --maxInsert=J: Maximum insert size for valid paired-end alignments
|
|
40 -O, --mateOrient=O: The upstream/downstream mate orientation for valid paired-end alignment against the forward reference strand
|
|
41 -A, --maxAlignAttempt=A: Maximum number of attempts Bowtie will make to match an alignment for one mate with an alignment for the opposite mate
|
|
42 -f, --forwardAlign=f: Whether or not to attempt to align the forward reference strand
|
|
43 -E, --reverseAlign=E: Whether or not to attempt to align the reverse-complement reference strand
|
|
44 -F, --offrate=F: Override the offrate of the index to n
|
|
45 -8, --snpphred=8: SNP penalty on Phred scale
|
|
46 -6, --snpfrac=6: Fraction of sites expected to be SNP sites
|
|
47 -7, --keepends=7: Keep extreme-end nucleotides and qualities
|
|
48 -S, --seed=S: Seed for pseudo-random number generator
|
|
49 -C, --params=C: Whether to use default or specified parameters
|
|
50 -u, --iautoB=u: Automatic or specified behavior
|
|
51 -K, --ipacked=K: Whether or not to use a packed representation for DNA strings
|
|
52 -Q, --ibmax=Q: Maximum number of suffixes allowed in a block
|
|
53 -Y, --ibmaxdivn=Y: Maximum number of suffixes allowed in a block as a fraction of the length of the reference
|
|
54 -D, --idcv=D: The period for the difference-cover sample
|
|
55 -U, --inodc=U: Whether or not to disable the use of the difference-cover sample
|
|
56 -y, --inoref=y: Whether or not to build the part of the reference index used only in paired-end alignment
|
|
57 -z, --ioffrate=z: How many rows get marked during annotation of some or all of the Burrows-Wheeler rows
|
|
58 -W, --iftab=W: The size of the lookup table used to calculate an initial Burrows-Wheeler range with respect to the first n characters of the query
|
|
59 -X, --intoa=X: Whether or not to convert Ns in the reference sequence to As
|
|
60 -N, --iendian=N: Endianness to use when serializing integers to the index file
|
|
61 -Z, --iseed=Z: Seed for the pseudorandom number generator
|
|
62 -x, --indexSettings=x: Whether or not indexing options are to be set
|
|
63 -H, --suppressHeader=H: Suppress header
|
|
64 --do_not_build_index: Flag to specify that provided file is already indexed and to just use 'as is'
|
|
65 """
|
|
66
|
|
67 import optparse, os, shutil, subprocess, sys, tempfile
|
|
68
|
|
69 #Allow more than Sanger encoded variants
|
|
70 DEFAULT_ASCII_ENCODING = '--phred33-quals'
|
|
71 GALAXY_FORMAT_TO_QUALITY_SCORE_ENCODING_ARG = { 'fastqsanger':'--phred33-quals', 'fastqillumina':'--phred64-quals', 'fastqsolexa':'--solexa-quals' }
|
|
72 #FIXME: Integer quality scores are supported only when the '--integer-quals' argument is specified to bowtie; this is not currently able to be set in the tool/wrapper/config
|
|
73
|
|
74 def stop_err( msg ):
|
|
75 sys.stderr.write( '%s\n' % msg )
|
|
76 sys.exit()
|
|
77
|
|
78 def __main__():
|
|
79 #Parse Command Line
|
|
80 parser = optparse.OptionParser()
|
|
81 parser.add_option( '-t', '--threads', dest='threads', help='The number of threads to run' )
|
|
82 parser.add_option( '-o', '--output', dest='output', help='The output file' )
|
|
83 parser.add_option( '', '--output_unmapped_reads', dest='output_unmapped_reads', help='File name for unmapped reads (single-end)' )
|
|
84 parser.add_option( '', '--output_unmapped_reads_l', dest='output_unmapped_reads_l', help='File name for unmapped reads (left, paired-end)' )
|
|
85 parser.add_option( '', '--output_unmapped_reads_r', dest='output_unmapped_reads_r', help='File name for unmapped reads (right, paired-end)' )
|
|
86 parser.add_option( '', '--output_suppressed_reads', dest='output_suppressed_reads', help='File name for suppressed reads because of max setting (single-end)' )
|
|
87 parser.add_option( '', '--output_suppressed_reads_l', dest='output_suppressed_reads_l', help='File name for suppressed reads because of max setting (left, paired-end)' )
|
|
88 parser.add_option( '', '--output_suppressed_reads_r', dest='output_suppressed_reads_r', help='File name for suppressed reads because of max setting (right, paired-end)' )
|
|
89 parser.add_option( '-4', '--dataType', dest='dataType', help='The type of data (SOLiD or Solexa)' )
|
|
90 parser.add_option( '-i', '--input1', dest='input1', help='The (forward or single-end) reads file in Sanger FASTQ or FASTA format' )
|
|
91
|
|
92
|
|
93
|
|
94 parser.add_option( '--filetype', dest='filetype', help='The filetype of your input reads - FASTA (f) or FASTQ (q)' )
|
|
95 parser.add_option( '--outtype', dest='outtype', help='The filetype of your output (nothing for map or -S for SAM)' )
|
|
96
|
|
97
|
|
98 parser.add_option( '-I', '--input2', dest='input2', help='The reverse reads file in Sanger FASTQ format' )
|
|
99 parser.add_option( '-2', '--paired', dest='paired', help='Whether the data is single- or paired-end' )
|
|
100 parser.add_option( '-g', '--genomeSource', dest='genomeSource', help='The type of reference provided' )
|
|
101 parser.add_option( '-r', '--ref', dest='ref', help='The reference genome to use or index' )
|
|
102 parser.add_option( '-s', '--skip', dest='skip', help='Skip the first n reads' )
|
|
103 parser.add_option( '-a', '--alignLimit', dest='alignLimit', help='Only align the first n reads' )
|
|
104 parser.add_option( '-T', '--trimH', dest='trimH', help='Trim n bases from high-quality (left) end of each read before alignment' )
|
|
105 parser.add_option( '-L', '--trimL', dest='trimL', help='Trim n bases from low-quality (right) end of each read before alignment' )
|
|
106 parser.add_option( '-m', '--mismatchSeed', dest='mismatchSeed', help='Maximum number of mismatches permitted in the seed' )
|
|
107 parser.add_option( '-M', '--mismatchQual', dest='mismatchQual', help='Maximum permitted total of quality values at mismatched read positions' )
|
|
108 parser.add_option( '-l', '--seedLen', dest='seedLen', help='Seed length' )
|
|
109 parser.add_option( '-n', '--rounding', dest='rounding', help='Whether or not to round to the nearest 10 and saturating at 30' )
|
|
110 parser.add_option( '-P', '--maxMismatches', dest='maxMismatches', help='Maximum number of mismatches for -v alignment mode' )
|
|
111 parser.add_option( '-w', '--tryHard', dest='tryHard', help='Whether or not to try as hard as possible to find valid alignments when they exist' )
|
|
112 parser.add_option( '-V', '--allValAligns', dest='allValAligns', help='Whether or not to report all valid alignments per read or pair' )
|
|
113 parser.add_option( '-v', '--valAlign', dest='valAlign', help='Report up to n valid alignments per read or pair' )
|
|
114 parser.add_option( '-G', '--suppressAlign', dest='suppressAlign', help='Suppress all alignments for a read if more than n reportable alignments exist' )
|
|
115 parser.add_option( '-b', '--best', dest='best', help="Whether or not to make Bowtie guarantee that reported singleton alignments are 'best' in terms of stratum and in terms of the quality values at the mismatched positions" )
|
|
116 parser.add_option( '-B', '--maxBacktracks', dest='maxBacktracks', help='Maximum number of backtracks permitted when aligning a read' )
|
|
117 parser.add_option( '-R', '--strata', dest='strata', help='Whether or not to report only those alignments that fall in the best stratum if many valid alignments exist and are reportable' )
|
|
118 parser.add_option( '-j', '--minInsert', dest='minInsert', help='Minimum insert size for valid paired-end alignments' )
|
|
119 parser.add_option( '-J', '--maxInsert', dest='maxInsert', help='Maximum insert size for valid paired-end alignments' )
|
|
120 parser.add_option( '-O', '--mateOrient', dest='mateOrient', help='The upstream/downstream mate orientation for valid paired-end alignment against the forward reference strand' )
|
|
121 parser.add_option( '-A', '--maxAlignAttempt', dest='maxAlignAttempt', help='Maximum number of attempts Bowtie will make to match an alignment for one mate with an alignment for the opposite mate' )
|
|
122 parser.add_option( '-f', '--forwardAlign', dest='forwardAlign', help='Whether or not to attempt to align the forward reference strand' )
|
|
123 parser.add_option( '-E', '--reverseAlign', dest='reverseAlign', help='Whether or not to attempt to align the reverse-complement reference strand' )
|
|
124 parser.add_option( '-F', '--offrate', dest='offrate', help='Override the offrate of the index to n' )
|
|
125 parser.add_option( '-S', '--seed', dest='seed', help='Seed for pseudo-random number generator' )
|
|
126 parser.add_option( '-8', '--snpphred', dest='snpphred', help='SNP penalty on Phred scale' )
|
|
127 parser.add_option( '-6', '--snpfrac', dest='snpfrac', help='Fraction of sites expected to be SNP sites' )
|
|
128 parser.add_option( '-7', '--keepends', dest='keepends', help='Keep extreme-end nucleotides and qualities' )
|
|
129 parser.add_option( '-C', '--params', dest='params', help='Whether to use default or specified parameters' )
|
|
130 parser.add_option( '-u', '--iautoB', dest='iautoB', help='Automatic or specified behavior' )
|
|
131 parser.add_option( '-K', '--ipacked', dest='ipacked', help='Whether or not to use a packed representation for DNA strings' )
|
|
132 parser.add_option( '-Q', '--ibmax', dest='ibmax', help='Maximum number of suffixes allowed in a block' )
|
|
133 parser.add_option( '-Y', '--ibmaxdivn', dest='ibmaxdivn', help='Maximum number of suffixes allowed in a block as a fraction of the length of the reference' )
|
|
134 parser.add_option( '-D', '--idcv', dest='idcv', help='The period for the difference-cover sample' )
|
|
135 parser.add_option( '-U', '--inodc', dest='inodc', help='Whether or not to disable the use of the difference-cover sample' )
|
|
136 parser.add_option( '-y', '--inoref', dest='inoref', help='Whether or not to build the part of the reference index used only in paired-end alignment' )
|
|
137 parser.add_option( '-z', '--ioffrate', dest='ioffrate', help='How many rows get marked during annotation of some or all of the Burrows-Wheeler rows' )
|
|
138 parser.add_option( '-W', '--iftab', dest='iftab', help='The size of the lookup table used to calculate an initial Burrows-Wheeler range with respect to the first n characters of the query' )
|
|
139 parser.add_option( '-X', '--intoa', dest='intoa', help='Whether or not to convert Ns in the reference sequence to As' )
|
|
140 parser.add_option( '-N', '--iendian', dest='iendian', help='Endianness to use when serializing integers to the index file' )
|
|
141 parser.add_option( '-Z', '--iseed', dest='iseed', help='Seed for the pseudorandom number generator' )
|
|
142 parser.add_option( '-x', '--indexSettings', dest='index_settings', help='Whether or not indexing options are to be set' )
|
|
143 parser.add_option( '-H', '--suppressHeader', dest='suppressHeader', help='Suppress header' )
|
|
144 parser.add_option( '--galaxy_input_format', dest='galaxy_input_format', default="fastqsanger", help='galaxy input format' )
|
|
145 parser.add_option( '--do_not_build_index', dest='do_not_build_index', action="store_true", default=False, help='Flag to specify that provided file is already indexed, use as is' )
|
|
146 (options, args) = parser.parse_args()
|
|
147 if options.mismatchSeed and options.maxMismatches:
|
|
148 parser.error("options --mismatchSeed and --maxMismatches are mutually exclusive")
|
|
149 stdout = ''
|
|
150
|
|
151 # make temp directory for placement of indices and copy reference file there if necessary
|
|
152 tmp_index_dir = tempfile.mkdtemp()
|
|
153 # get type of data (solid or solexa)
|
|
154 if options.dataType == 'solid':
|
|
155 colorspace = '-C'
|
|
156 else:
|
|
157 colorspace = ''
|
|
158 # index if necessary
|
|
159 if options.genomeSource == 'history' and not options.do_not_build_index:
|
|
160 # set up commands
|
|
161 if options.index_settings =='indexPreSet':
|
|
162 indexing_cmds = '%s' % colorspace
|
|
163 else:
|
|
164 try:
|
|
165 if options.iautoB and options.iautoB == 'set':
|
|
166 iautoB = '--noauto'
|
|
167 else:
|
|
168 iautoB = ''
|
|
169 if options.ipacked and options.ipacked == 'packed':
|
|
170 ipacked = '--packed'
|
|
171 else:
|
|
172 ipacked = ''
|
|
173 if options.ibmax and int( options.ibmax ) >= 1:
|
|
174 ibmax = '--bmax %s' % options.ibmax
|
|
175 else:
|
|
176 ibmax = ''
|
|
177 if options.ibmaxdivn and int( options.ibmaxdivn ) >= 0:
|
|
178 ibmaxdivn = '--bmaxdivn %s' % options.ibmaxdivn
|
|
179 else:
|
|
180 ibmaxdivn = ''
|
|
181 if options.idcv and int( options.idcv ) >= 3:
|
|
182 idcv = '--dcv %s' % options.idcv
|
|
183 else:
|
|
184 idcv = ''
|
|
185 if options.inodc and options.inodc == 'nodc':
|
|
186 inodc = '--nodc'
|
|
187 else:
|
|
188 inodc = ''
|
|
189 if options.inoref and options.inoref == 'noref':
|
|
190 inoref = '--noref'
|
|
191 else:
|
|
192 inoref = ''
|
|
193 if options.iftab and int( options.iftab ) >= 1:
|
|
194 iftab = '--ftabchars %s' % options.iftab
|
|
195 else:
|
|
196 iftab = ''
|
|
197 if options.intoa and options.intoa == 'yes':
|
|
198 intoa = '--ntoa'
|
|
199 else:
|
|
200 intoa = ''
|
|
201 if options.iendian and options.iendian == 'big':
|
|
202 iendian = '--big'
|
|
203 else:
|
|
204 iendian = '--little'
|
|
205 if options.iseed and int( options.iseed ) > 0:
|
|
206 iseed = '--seed %s' % options.iseed
|
|
207 else:
|
|
208 iseed = ''
|
|
209 indexing_cmds = '%s %s %s %s %s %s %s --offrate %s %s %s %s %s %s' % \
|
|
210 ( iautoB, ipacked, ibmax, ibmaxdivn, idcv, inodc,
|
|
211 inoref, options.ioffrate, iftab, intoa, iendian,
|
|
212 iseed, colorspace )
|
|
213 except ValueError, e:
|
|
214 # clean up temp dir
|
|
215 if os.path.exists( tmp_index_dir ):
|
|
216 shutil.rmtree( tmp_index_dir )
|
|
217 stop_err( "Something is wrong with the indexing parameters and the indexing and alignment could not be run. Make sure you don't have any non-numeric values where they should be numeric.\n" + str( e ) )
|
|
218 ref_file = tempfile.NamedTemporaryFile( dir=tmp_index_dir )
|
|
219 ref_file_name = ref_file.name
|
|
220 ref_file.close()
|
|
221 os.symlink( options.ref, ref_file_name )
|
|
222 cmd1 = 'bowtie-build %s -f %s %s' % ( indexing_cmds, ref_file_name, ref_file_name )
|
|
223 try:
|
|
224 tmp = tempfile.NamedTemporaryFile( dir=tmp_index_dir ).name
|
|
225 tmp_stderr = open( tmp, 'wb' )
|
|
226 proc = subprocess.Popen( args=cmd1, shell=True, cwd=tmp_index_dir, stderr=tmp_stderr.fileno() )
|
|
227 returncode = proc.wait()
|
|
228 tmp_stderr.close()
|
|
229 # get stderr, allowing for case where it's very large
|
|
230 tmp_stderr = open( tmp, 'rb' )
|
|
231 stderr = ''
|
|
232 buffsize = 1048576
|
|
233 try:
|
|
234 while True:
|
|
235 stderr += tmp_stderr.read( buffsize )
|
|
236 if not stderr or len( stderr ) % buffsize != 0:
|
|
237 break
|
|
238 except OverflowError:
|
|
239 pass
|
|
240 tmp_stderr.close()
|
|
241 if returncode != 0:
|
|
242 raise Exception, stderr
|
|
243 except Exception, e:
|
|
244 # clean up temp dir
|
|
245 if os.path.exists( tmp_index_dir ):
|
|
246 shutil.rmtree( tmp_index_dir )
|
|
247 stop_err( 'Error indexing reference sequence\n' + str( e ) )
|
|
248 stdout += 'File indexed. '
|
|
249 else:
|
|
250 ref_file_name = options.ref
|
|
251 # set up aligning and generate aligning command options
|
|
252 # automatically set threads in both cases
|
|
253 tmp_suppressed_file_name = None
|
|
254 tmp_unmapped_file_name = None
|
|
255 if options.suppressHeader == 'true':
|
|
256 suppressHeader = '--sam-nohead'
|
|
257 else:
|
|
258 suppressHeader = ''
|
|
259 if options.maxInsert and int( options.maxInsert ) > 0:
|
|
260 maxInsert = '-X %s' % options.maxInsert
|
|
261 else:
|
|
262 maxInsert = ''
|
|
263 if options.mateOrient:
|
|
264 mateOrient = '--%s' % options.mateOrient
|
|
265 else:
|
|
266 mateOrient = ''
|
|
267 quality_score_encoding = GALAXY_FORMAT_TO_QUALITY_SCORE_ENCODING_ARG.get( options.galaxy_input_format, DEFAULT_ASCII_ENCODING )
|
|
268 if options.params == 'preSet':
|
|
269 aligning_cmds = '-f %s %s -p %s -S %s %s %s ' % \
|
|
270 ( maxInsert, mateOrient, options.threads, suppressHeader, colorspace, quality_score_encoding )
|
|
271
|
|
272 else:
|
|
273 try:
|
|
274 if options.skip and int( options.skip ) > 0:
|
|
275 skip = '-s %s' % options.skip
|
|
276 else:
|
|
277 skip = ''
|
|
278 if options.alignLimit and int( options.alignLimit ) >= 0:
|
|
279 alignLimit = '-u %s' % options.alignLimit
|
|
280 else:
|
|
281 alignLimit = ''
|
|
282 if options.trimH and int( options.trimH ) > 0:
|
|
283 trimH = '-5 %s' % options.trimH
|
|
284 else:
|
|
285 trimH = ''
|
|
286 if options.trimL and int( options.trimL ) > 0:
|
|
287 trimL = '-3 %s' % options.trimL
|
|
288 else:
|
|
289 trimL = ''
|
|
290 if options.maxMismatches and (options.maxMismatches == '0' or options.maxMismatches == '1' \
|
|
291 or options.maxMismatches == '2' or options.maxMismatches == '3'):
|
|
292 maxMismatches = '-v %s' % options.maxMismatches
|
|
293 else:
|
|
294 maxMismatches = ''
|
|
295 if options.mismatchSeed and (options.mismatchSeed == '0' or options.mismatchSeed == '1' \
|
|
296 or options.mismatchSeed == '2' or options.mismatchSeed == '3'):
|
|
297 mismatchSeed = '-n %s' % options.mismatchSeed
|
|
298 else:
|
|
299 mismatchSeed = ''
|
|
300 if options.mismatchQual and int( options.mismatchQual ) >= 1:
|
|
301 mismatchQual = '-e %s' % options.mismatchQual
|
|
302 else:
|
|
303 mismatchQual = ''
|
|
304 if options.seedLen and int( options.seedLen ) >= 5:
|
|
305 seedLen = '-l %s' % options.seedLen
|
|
306 else:
|
|
307 seedLen = ''
|
|
308 if options.rounding == 'noRound':
|
|
309 rounding = '--nomaqround'
|
|
310 else:
|
|
311 rounding = ''
|
|
312 if options.minInsert and int( options.minInsert ) > 0:
|
|
313 minInsert = '-I %s' % options.minInsert
|
|
314 else:
|
|
315 minInsert = ''
|
|
316 if options.maxAlignAttempt and int( options.maxAlignAttempt ) >= 0:
|
|
317 maxAlignAttempt = '--pairtries %s' % options.maxAlignAttempt
|
|
318 else:
|
|
319 maxAlignAttempt = ''
|
|
320 if options.forwardAlign == 'noForward':
|
|
321 forwardAlign = '--nofw'
|
|
322 else:
|
|
323 forwardAlign = ''
|
|
324 if options.reverseAlign == 'noReverse':
|
|
325 reverseAlign = '--norc'
|
|
326 else:
|
|
327 reverseAlign = ''
|
|
328 if options.maxBacktracks and int( options.maxBacktracks ) > 0 and \
|
|
329 ( options.mismatchSeed == '2' or options.mismatchSeed == '3' ):
|
|
330 maxBacktracks = '--maxbts %s' % options.maxBacktracks
|
|
331 else:
|
|
332 maxBacktracks = ''
|
|
333 if options.tryHard == 'doTryHard':
|
|
334 tryHard = '-y'
|
|
335 else:
|
|
336 tryHard = ''
|
|
337 if options.valAlign and int( options.valAlign ) >= 0:
|
|
338 valAlign = '-k %s' % options.valAlign
|
|
339 else:
|
|
340 valAlign = ''
|
|
341 if options.allValAligns == 'doAllValAligns':
|
|
342 allValAligns = '-a'
|
|
343 else:
|
|
344 allValAligns = ''
|
|
345 if options.suppressAlign and int( options.suppressAlign ) >= 0:
|
|
346 suppressAlign = '-m %s' % options.suppressAlign
|
|
347 else:
|
|
348 suppressAlign = ''
|
|
349 if options.best == 'doBest':
|
|
350 best = '--best'
|
|
351 else:
|
|
352 best = ''
|
|
353 if options.strata == 'doStrata':
|
|
354 strata = '--strata'
|
|
355 else:
|
|
356 strata = ''
|
|
357 if options.offrate and int( options.offrate ) >= 0:
|
|
358 offrate = '-o %s' % options.offrate
|
|
359 else:
|
|
360 offrate = ''
|
|
361 if options.seed and int( options.seed ) >= 0:
|
|
362 seed = '--seed %s' % options.seed
|
|
363 else:
|
|
364 seed = ''
|
|
365 if options.paired == 'paired':
|
|
366 if options.output_unmapped_reads_l and options.output_unmapped_reads_r:
|
|
367 tmp_unmapped_file = tempfile.NamedTemporaryFile( dir=tmp_index_dir, suffix='.fastq' )
|
|
368 tmp_unmapped_file_name = tmp_unmapped_file.name
|
|
369 tmp_unmapped_file.close()
|
|
370 output_unmapped_reads = '--un %s' % tmp_unmapped_file_name
|
|
371 else:
|
|
372 output_unmapped_reads = ''
|
|
373 if options.output_suppressed_reads:
|
|
374 tmp_suppressed_file = tempfile.NamedTemporaryFile( dir=tmp_index_dir, suffix='.fastq' )
|
|
375 tmp_suppressed_file_name = tmp_suppressed_file.name
|
|
376 tmp_suppressed_file.close()
|
|
377 output_suppressed_reads = '--max %s' % tmp_suppressed_file_name
|
|
378 else:
|
|
379 output_suppressed_reads = ''
|
|
380 else:
|
|
381 if options.output_unmapped_reads:
|
|
382 output_unmapped_reads = '--un %s' % options.output_unmapped_reads
|
|
383 else:
|
|
384 output_unmapped_reads = ''
|
|
385 if options.output_suppressed_reads:
|
|
386 output_suppressed_reads = '--max %s' % options.output_suppressed_reads
|
|
387 else:
|
|
388 output_suppressed_reads = ''
|
|
389 snpfrac = ''
|
|
390 if options.snpphred and int( options.snpphred ) >= 0:
|
|
391 snpphred = '--snpphred %s' % options.snpphred
|
|
392 else:
|
|
393 snpphred = ''
|
|
394 if options.snpfrac and float( options.snpfrac ) >= 0:
|
|
395 snpfrac = '--snpfrac %s' % options.snpfrac
|
|
396 if options.keepends and options.keepends == 'doKeepends':
|
|
397 keepends = '--col-keepends'
|
|
398 else:
|
|
399 keepends = ''
|
|
400
|
|
401
|
|
402
|
|
403
|
|
404
|
|
405 if options.filetype == 'f':
|
|
406 filetype = '-f'
|
|
407 else:
|
|
408 filetype = '-q'
|
|
409
|
|
410 if options.outtype == 'S':
|
|
411 outtype = '-S'
|
|
412 else:
|
|
413 outtype = ''
|
|
414
|
|
415
|
|
416 aligning_cmds = '%s %s %s -p %s %s %s %s %s %s %s %s %s %s %s %s %s %s ' \
|
|
417 '%s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s ' % \
|
|
418 ( filetype, maxInsert, mateOrient, options.threads, outtype, suppressHeader,
|
|
419 colorspace, skip, alignLimit, trimH, trimL, maxMismatches,
|
|
420 mismatchSeed, mismatchQual, seedLen, rounding, minInsert,
|
|
421 maxAlignAttempt, forwardAlign, reverseAlign, maxBacktracks,
|
|
422 tryHard, valAlign, allValAligns, suppressAlign, best,
|
|
423 strata, offrate, seed, snpphred, snpfrac, keepends,
|
|
424 output_unmapped_reads, output_suppressed_reads,
|
|
425 quality_score_encoding )
|
|
426
|
|
427
|
|
428
|
|
429
|
|
430
|
|
431
|
|
432 except ValueError, e:
|
|
433 # clean up temp dir
|
|
434 if os.path.exists( tmp_index_dir ):
|
|
435 shutil.rmtree( tmp_index_dir )
|
|
436 stop_err( 'Something is wrong with the alignment parameters and the alignment could not be run\n' + str( e ) )
|
|
437 try:
|
|
438 # have to nest try-except in try-finally to handle 2.4
|
|
439 try:
|
|
440 # prepare actual mapping commands
|
|
441 if options.paired == 'paired':
|
|
442 cmd2 = 'bowtie %s %s -1 %s -2 %s > %s' % ( aligning_cmds, ref_file_name, options.input1, options.input2, options.output )
|
|
443 else:
|
|
444 cmd2 = 'bowtie %s %s %s > %s' % ( aligning_cmds, ref_file_name, options.input1, options.output )
|
|
445 # align
|
|
446 tmp = tempfile.NamedTemporaryFile( dir=tmp_index_dir ).name
|
|
447 tmp_stderr = open( tmp, 'wb' )
|
|
448 proc = subprocess.Popen( args=cmd2, shell=True, cwd=tmp_index_dir, stderr=tmp_stderr.fileno() )
|
|
449 returncode = proc.wait()
|
|
450 tmp_stderr.close()
|
|
451 # get stderr, allowing for case where it's very large
|
|
452 tmp_stderr = open( tmp, 'rb' )
|
|
453 stderr = ''
|
|
454 buffsize = 1048576
|
|
455 try:
|
|
456 while True:
|
|
457 stderr += tmp_stderr.read( buffsize )
|
|
458 if not stderr or len( stderr ) % buffsize != 0:
|
|
459 break
|
|
460 except OverflowError:
|
|
461 pass
|
|
462 tmp_stderr.close()
|
|
463 if returncode != 0:
|
|
464 raise Exception, stderr
|
|
465 # get suppressed and unmapped reads output files in place if appropriate
|
|
466 if options.paired == 'paired' and tmp_suppressed_file_name and \
|
|
467 options.output_suppressed_reads_l and options.output_suppressed_reads_r:
|
|
468 try:
|
|
469 left = tmp_suppressed_file_name.replace( '.fastq', '_1.fastq' )
|
|
470 right = tmp_suppressed_file_name.replace( '.fastq', '_1.fastq' )
|
|
471 shutil.move( left, options.output_suppressed_reads_l )
|
|
472 shutil.move( right, options.output_suppressed_reads_r )
|
|
473 except Exception, e:
|
|
474 sys.stdout.write( 'Error producing the suppressed output file.\n' )
|
|
475 if options.paired == 'paired' and tmp_unmapped_file_name and \
|
|
476 options.output_unmapped_reads_l and options.output_unmapped_reads_r:
|
|
477 try:
|
|
478 left = tmp_unmapped_file_name.replace( '.fastq', '_1.fastq' )
|
|
479 right = tmp_unmapped_file_name.replace( '.fastq', '_2.fastq' )
|
|
480 shutil.move( left, options.output_unmapped_reads_l )
|
|
481 shutil.move( right, options.output_unmapped_reads_r )
|
|
482 except Exception, e:
|
|
483 sys.stdout.write( 'Error producing the unmapped output file.\n' )
|
|
484 # check that there are results in the output file
|
|
485 if os.path.getsize( options.output ) == 0:
|
|
486 raise Exception, 'The output file is empty, there may be an error with your input file or settings.'
|
|
487 except Exception, e:
|
|
488 stop_err( 'Error aligning sequence. ' + str( e ) )
|
|
489 finally:
|
|
490 # clean up temp dir
|
|
491 if os.path.exists( tmp_index_dir ):
|
|
492 shutil.rmtree( tmp_index_dir )
|
|
493 stdout += 'Sequence file aligned.\n'
|
|
494 sys.stdout.write( stdout )
|
|
495
|
|
496 if __name__ == "__main__":
|
|
497 __main__()
|