comparison pick_plasmids_containing_genes.py @ 0:62019f5116f8 draft

"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
author public-health-bioinformatics
date Tue, 12 Nov 2019 22:20:54 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:62019f5116f8
1 #!/usr/bin/env python
2
3 from __future__ import print_function
4
5 import argparse
6 import csv
7 import errno
8 import os
9 import re
10 import shutil
11
12
13 def parse_screen_file(screen_file):
14 screen = []
15 with open(screen_file) as f:
16 reader = csv.DictReader(f, delimiter="\t", quotechar='"')
17 for row in reader:
18 screen.append(row)
19 return screen
20
21
22 def main(args):
23 # create output directory
24 try:
25 os.mkdir(args.outdir)
26 except OSError as exc:
27 if exc.errno == errno.EEXIST and os.path.isdir(args.outdir):
28 pass
29 else:
30 raise
31
32 # parse screening file
33 screen = parse_screen_file(args.abricate_report_screening_file)
34 contigs_with_genes_of_interest = []
35 # parse all abricate reports and determine which ones contain genes of interest
36 print("\t".join(["file", "gene_detected"]))
37
38 with open(args.concatenated_abricate_reports, 'r') as f:
39 abricate_report_reader = csv.DictReader(f, delimiter="\t", quotechar='"')
40 for gene in screen:
41 for abricate_report_row in abricate_report_reader:
42 if abricate_report_row['#FILE'] == '#FILE':
43 continue
44 if re.search(gene['regex'], abricate_report_row['GENE']):
45 contigs_with_genes_of_interest.append(abricate_report_row['SEQUENCE'])
46 f.seek(0)
47 next(abricate_report_reader)
48
49 # copy the corresponding plasmid fasta files into outdir
50 for contig in contigs_with_genes_of_interest:
51 for plasmid in args.plasmids:
52 copy_plasmid = False
53 with open(plasmid, 'r') as f:
54 for line in f:
55 if ('>' + contig) == line.rstrip():
56 copy_plasmid = True
57 if copy_plasmid:
58 print("\t".join([plasmid, "True"]))
59 shutil.copy2(plasmid, args.outdir)
60
61
62 if __name__ == '__main__':
63 parser = argparse.ArgumentParser()
64 parser.add_argument("--plasmids", nargs='+', help="plasmid assemblies (fasta)")
65 parser.add_argument("--concatenated_abricate_reports", help="abricate reports (tsv)")
66 parser.add_argument("--abricate_report_screening_file", help="")
67 parser.add_argument("--outdir", dest="outdir", default=".", help="Output directory")
68 args = parser.parse_args()
69 main(args)