Mercurial > repos > peterjc > seq_filter_by_mapping
comparison tools/seq_filter_by_mapping/seq_filter_by_mapping.py @ 3:481b0a925e66 draft
v0.0.6 Python 3 compatible print function
author | peterjc |
---|---|
date | Wed, 17 May 2017 09:24:01 -0400 |
parents | 48e71dfd51b3 |
children | f82868a026ea |
comparison
equal
deleted
inserted
replaced
2:48e71dfd51b3 | 3:481b0a925e66 |
---|---|
16 (formerly the Scottish Crop Research Institute, SCRI), UK. All rights reserved. | 16 (formerly the Scottish Crop Research Institute, SCRI), UK. All rights reserved. |
17 See accompanying text file for licence details (MIT license). | 17 See accompanying text file for licence details (MIT license). |
18 | 18 |
19 Use -v or --version to get the version, -h or --help for help. | 19 Use -v or --version to get the version, -h or --help for help. |
20 """ | 20 """ |
21 | |
22 from __future__ import print_function | |
21 | 23 |
22 import os | 24 import os |
23 import re | 25 import re |
24 import subprocess | 26 import subprocess |
25 import sys | 27 import sys |
60 help="Show version and quit") | 62 help="Show version and quit") |
61 | 63 |
62 options, args = parser.parse_args() | 64 options, args = parser.parse_args() |
63 | 65 |
64 if options.version: | 66 if options.version: |
65 print "v0.0.5" | 67 print("v0.0.6") |
66 sys.exit(0) | 68 sys.exit(0) |
67 | 69 |
68 in_file = options.input | 70 in_file = options.input |
69 seq_format = options.format | 71 seq_format = options.format |
70 out_positive_file = options.output_positive | 72 out_positive_file = options.output_positive |
249 # Galaxy now requires Python 2.5+ so can use with statements, | 251 # Galaxy now requires Python 2.5+ so can use with statements, |
250 with open(in_file) as in_handle: | 252 with open(in_file) as in_handle: |
251 # Doing the if statement outside the loop for speed | 253 # Doing the if statement outside the loop for speed |
252 # (with the downside of three very similar loops). | 254 # (with the downside of three very similar loops). |
253 if pos_file is not None and neg_file is not None: | 255 if pos_file is not None and neg_file is not None: |
254 print "Generating two FASTA files" | 256 print("Generating two FASTA files") |
255 with open(pos_file, "w") as pos_handle: | 257 with open(pos_file, "w") as pos_handle: |
256 with open(neg_file, "w") as neg_handle: | 258 with open(neg_file, "w") as neg_handle: |
257 for identifier, record in crude_fasta_iterator(in_handle): | 259 for identifier, record in crude_fasta_iterator(in_handle): |
258 if clean_name(identifier) in wanted: | 260 if clean_name(identifier) in wanted: |
259 pos_handle.write(record) | 261 pos_handle.write(record) |
260 pos_count += 1 | 262 pos_count += 1 |
261 else: | 263 else: |
262 neg_handle.write(record) | 264 neg_handle.write(record) |
263 neg_count += 1 | 265 neg_count += 1 |
264 elif pos_file is not None: | 266 elif pos_file is not None: |
265 print "Generating matching FASTA file" | 267 print("Generating matching FASTA file") |
266 with open(pos_file, "w") as pos_handle: | 268 with open(pos_file, "w") as pos_handle: |
267 for identifier, record in crude_fasta_iterator(in_handle): | 269 for identifier, record in crude_fasta_iterator(in_handle): |
268 if clean_name(identifier) in wanted: | 270 if clean_name(identifier) in wanted: |
269 pos_handle.write(record) | 271 pos_handle.write(record) |
270 pos_count += 1 | 272 pos_count += 1 |
271 else: | 273 else: |
272 neg_count += 1 | 274 neg_count += 1 |
273 else: | 275 else: |
274 print "Generating non-matching FASTA file" | 276 print("Generating non-matching FASTA file") |
275 assert neg_file is not None | 277 assert neg_file is not None |
276 with open(neg_file, "w") as neg_handle: | 278 with open(neg_file, "w") as neg_handle: |
277 for identifier, record in crude_fasta_iterator(in_handle): | 279 for identifier, record in crude_fasta_iterator(in_handle): |
278 if clean_name(identifier) in wanted: | 280 if clean_name(identifier) in wanted: |
279 pos_count += 1 | 281 pos_count += 1 |
287 """FASTQ filter.""" | 289 """FASTQ filter.""" |
288 from Bio.SeqIO.QualityIO import FastqGeneralIterator | 290 from Bio.SeqIO.QualityIO import FastqGeneralIterator |
289 pos_count = neg_count = 0 | 291 pos_count = neg_count = 0 |
290 handle = open(in_file, "r") | 292 handle = open(in_file, "r") |
291 if pos_file is not None and neg_file is not None: | 293 if pos_file is not None and neg_file is not None: |
292 print "Generating two FASTQ files" | 294 print("Generating two FASTQ files") |
293 positive_handle = open(pos_file, "w") | 295 positive_handle = open(pos_file, "w") |
294 negative_handle = open(neg_file, "w") | 296 negative_handle = open(neg_file, "w") |
295 print in_file | 297 print(in_file) |
296 for title, seq, qual in FastqGeneralIterator(handle): | 298 for title, seq, qual in FastqGeneralIterator(handle): |
297 # print("%s --> %s" % (title, clean_name(title.split(None, 1)[0]))) | 299 # print("%s --> %s" % (title, clean_name(title.split(None, 1)[0]))) |
298 if clean_name(title.split(None, 1)[0]) in wanted: | 300 if clean_name(title.split(None, 1)[0]) in wanted: |
299 positive_handle.write("@%s\n%s\n+\n%s\n" % (title, seq, qual)) | 301 positive_handle.write("@%s\n%s\n+\n%s\n" % (title, seq, qual)) |
300 pos_count += 1 | 302 pos_count += 1 |
302 negative_handle.write("@%s\n%s\n+\n%s\n" % (title, seq, qual)) | 304 negative_handle.write("@%s\n%s\n+\n%s\n" % (title, seq, qual)) |
303 neg_count += 1 | 305 neg_count += 1 |
304 positive_handle.close() | 306 positive_handle.close() |
305 negative_handle.close() | 307 negative_handle.close() |
306 elif pos_file is not None: | 308 elif pos_file is not None: |
307 print "Generating matching FASTQ file" | 309 print("Generating matching FASTQ file") |
308 positive_handle = open(pos_file, "w") | 310 positive_handle = open(pos_file, "w") |
309 for title, seq, qual in FastqGeneralIterator(handle): | 311 for title, seq, qual in FastqGeneralIterator(handle): |
310 if clean_name(title.split(None, 1)[0]) in wanted: | 312 if clean_name(title.split(None, 1)[0]) in wanted: |
311 positive_handle.write("@%s\n%s\n+\n%s\n" % (title, seq, qual)) | 313 positive_handle.write("@%s\n%s\n+\n%s\n" % (title, seq, qual)) |
312 pos_count += 1 | 314 pos_count += 1 |
313 else: | 315 else: |
314 neg_count += 1 | 316 neg_count += 1 |
315 positive_handle.close() | 317 positive_handle.close() |
316 elif neg_file is not None: | 318 elif neg_file is not None: |
317 print "Generating non-matching FASTQ file" | 319 print("Generating non-matching FASTQ file") |
318 negative_handle = open(neg_file, "w") | 320 negative_handle = open(neg_file, "w") |
319 for title, seq, qual in FastqGeneralIterator(handle): | 321 for title, seq, qual in FastqGeneralIterator(handle): |
320 if clean_name(title.split(None, 1)[0]) in wanted: | 322 if clean_name(title.split(None, 1)[0]) in wanted: |
321 pos_count += 1 | 323 pos_count += 1 |
322 else: | 324 else: |