comparison tools/seq_filter_by_mapping/seq_filter_by_mapping.py @ 3:481b0a925e66 draft

v0.0.6 Python 3 compatible print function
author peterjc
date Wed, 17 May 2017 09:24:01 -0400
parents 48e71dfd51b3
children f82868a026ea
comparison
equal deleted inserted replaced
2:48e71dfd51b3 3:481b0a925e66
16 (formerly the Scottish Crop Research Institute, SCRI), UK. All rights reserved. 16 (formerly the Scottish Crop Research Institute, SCRI), UK. All rights reserved.
17 See accompanying text file for licence details (MIT license). 17 See accompanying text file for licence details (MIT license).
18 18
19 Use -v or --version to get the version, -h or --help for help. 19 Use -v or --version to get the version, -h or --help for help.
20 """ 20 """
21
22 from __future__ import print_function
21 23
22 import os 24 import os
23 import re 25 import re
24 import subprocess 26 import subprocess
25 import sys 27 import sys
60 help="Show version and quit") 62 help="Show version and quit")
61 63
62 options, args = parser.parse_args() 64 options, args = parser.parse_args()
63 65
64 if options.version: 66 if options.version:
65 print "v0.0.5" 67 print("v0.0.6")
66 sys.exit(0) 68 sys.exit(0)
67 69
68 in_file = options.input 70 in_file = options.input
69 seq_format = options.format 71 seq_format = options.format
70 out_positive_file = options.output_positive 72 out_positive_file = options.output_positive
249 # Galaxy now requires Python 2.5+ so can use with statements, 251 # Galaxy now requires Python 2.5+ so can use with statements,
250 with open(in_file) as in_handle: 252 with open(in_file) as in_handle:
251 # Doing the if statement outside the loop for speed 253 # Doing the if statement outside the loop for speed
252 # (with the downside of three very similar loops). 254 # (with the downside of three very similar loops).
253 if pos_file is not None and neg_file is not None: 255 if pos_file is not None and neg_file is not None:
254 print "Generating two FASTA files" 256 print("Generating two FASTA files")
255 with open(pos_file, "w") as pos_handle: 257 with open(pos_file, "w") as pos_handle:
256 with open(neg_file, "w") as neg_handle: 258 with open(neg_file, "w") as neg_handle:
257 for identifier, record in crude_fasta_iterator(in_handle): 259 for identifier, record in crude_fasta_iterator(in_handle):
258 if clean_name(identifier) in wanted: 260 if clean_name(identifier) in wanted:
259 pos_handle.write(record) 261 pos_handle.write(record)
260 pos_count += 1 262 pos_count += 1
261 else: 263 else:
262 neg_handle.write(record) 264 neg_handle.write(record)
263 neg_count += 1 265 neg_count += 1
264 elif pos_file is not None: 266 elif pos_file is not None:
265 print "Generating matching FASTA file" 267 print("Generating matching FASTA file")
266 with open(pos_file, "w") as pos_handle: 268 with open(pos_file, "w") as pos_handle:
267 for identifier, record in crude_fasta_iterator(in_handle): 269 for identifier, record in crude_fasta_iterator(in_handle):
268 if clean_name(identifier) in wanted: 270 if clean_name(identifier) in wanted:
269 pos_handle.write(record) 271 pos_handle.write(record)
270 pos_count += 1 272 pos_count += 1
271 else: 273 else:
272 neg_count += 1 274 neg_count += 1
273 else: 275 else:
274 print "Generating non-matching FASTA file" 276 print("Generating non-matching FASTA file")
275 assert neg_file is not None 277 assert neg_file is not None
276 with open(neg_file, "w") as neg_handle: 278 with open(neg_file, "w") as neg_handle:
277 for identifier, record in crude_fasta_iterator(in_handle): 279 for identifier, record in crude_fasta_iterator(in_handle):
278 if clean_name(identifier) in wanted: 280 if clean_name(identifier) in wanted:
279 pos_count += 1 281 pos_count += 1
287 """FASTQ filter.""" 289 """FASTQ filter."""
288 from Bio.SeqIO.QualityIO import FastqGeneralIterator 290 from Bio.SeqIO.QualityIO import FastqGeneralIterator
289 pos_count = neg_count = 0 291 pos_count = neg_count = 0
290 handle = open(in_file, "r") 292 handle = open(in_file, "r")
291 if pos_file is not None and neg_file is not None: 293 if pos_file is not None and neg_file is not None:
292 print "Generating two FASTQ files" 294 print("Generating two FASTQ files")
293 positive_handle = open(pos_file, "w") 295 positive_handle = open(pos_file, "w")
294 negative_handle = open(neg_file, "w") 296 negative_handle = open(neg_file, "w")
295 print in_file 297 print(in_file)
296 for title, seq, qual in FastqGeneralIterator(handle): 298 for title, seq, qual in FastqGeneralIterator(handle):
297 # print("%s --> %s" % (title, clean_name(title.split(None, 1)[0]))) 299 # print("%s --> %s" % (title, clean_name(title.split(None, 1)[0])))
298 if clean_name(title.split(None, 1)[0]) in wanted: 300 if clean_name(title.split(None, 1)[0]) in wanted:
299 positive_handle.write("@%s\n%s\n+\n%s\n" % (title, seq, qual)) 301 positive_handle.write("@%s\n%s\n+\n%s\n" % (title, seq, qual))
300 pos_count += 1 302 pos_count += 1
302 negative_handle.write("@%s\n%s\n+\n%s\n" % (title, seq, qual)) 304 negative_handle.write("@%s\n%s\n+\n%s\n" % (title, seq, qual))
303 neg_count += 1 305 neg_count += 1
304 positive_handle.close() 306 positive_handle.close()
305 negative_handle.close() 307 negative_handle.close()
306 elif pos_file is not None: 308 elif pos_file is not None:
307 print "Generating matching FASTQ file" 309 print("Generating matching FASTQ file")
308 positive_handle = open(pos_file, "w") 310 positive_handle = open(pos_file, "w")
309 for title, seq, qual in FastqGeneralIterator(handle): 311 for title, seq, qual in FastqGeneralIterator(handle):
310 if clean_name(title.split(None, 1)[0]) in wanted: 312 if clean_name(title.split(None, 1)[0]) in wanted:
311 positive_handle.write("@%s\n%s\n+\n%s\n" % (title, seq, qual)) 313 positive_handle.write("@%s\n%s\n+\n%s\n" % (title, seq, qual))
312 pos_count += 1 314 pos_count += 1
313 else: 315 else:
314 neg_count += 1 316 neg_count += 1
315 positive_handle.close() 317 positive_handle.close()
316 elif neg_file is not None: 318 elif neg_file is not None:
317 print "Generating non-matching FASTQ file" 319 print("Generating non-matching FASTQ file")
318 negative_handle = open(neg_file, "w") 320 negative_handle = open(neg_file, "w")
319 for title, seq, qual in FastqGeneralIterator(handle): 321 for title, seq, qual in FastqGeneralIterator(handle):
320 if clean_name(title.split(None, 1)[0]) in wanted: 322 if clean_name(title.split(None, 1)[0]) in wanted:
321 pos_count += 1 323 pos_count += 1
322 else: 324 else: