Mercurial > repos > public-health-bioinformatics > pick_plasmids_containing_genes
comparison pick_plasmids_containing_genes.py @ 0:62019f5116f8 draft
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
author | public-health-bioinformatics |
---|---|
date | Tue, 12 Nov 2019 22:20:54 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:62019f5116f8 |
---|---|
1 #!/usr/bin/env python | |
2 | |
3 from __future__ import print_function | |
4 | |
5 import argparse | |
6 import csv | |
7 import errno | |
8 import os | |
9 import re | |
10 import shutil | |
11 | |
12 | |
13 def parse_screen_file(screen_file): | |
14 screen = [] | |
15 with open(screen_file) as f: | |
16 reader = csv.DictReader(f, delimiter="\t", quotechar='"') | |
17 for row in reader: | |
18 screen.append(row) | |
19 return screen | |
20 | |
21 | |
22 def main(args): | |
23 # create output directory | |
24 try: | |
25 os.mkdir(args.outdir) | |
26 except OSError as exc: | |
27 if exc.errno == errno.EEXIST and os.path.isdir(args.outdir): | |
28 pass | |
29 else: | |
30 raise | |
31 | |
32 # parse screening file | |
33 screen = parse_screen_file(args.abricate_report_screening_file) | |
34 contigs_with_genes_of_interest = [] | |
35 # parse all abricate reports and determine which ones contain genes of interest | |
36 print("\t".join(["file", "gene_detected"])) | |
37 | |
38 with open(args.concatenated_abricate_reports, 'r') as f: | |
39 abricate_report_reader = csv.DictReader(f, delimiter="\t", quotechar='"') | |
40 for gene in screen: | |
41 for abricate_report_row in abricate_report_reader: | |
42 if abricate_report_row['#FILE'] == '#FILE': | |
43 continue | |
44 if re.search(gene['regex'], abricate_report_row['GENE']): | |
45 contigs_with_genes_of_interest.append(abricate_report_row['SEQUENCE']) | |
46 f.seek(0) | |
47 next(abricate_report_reader) | |
48 | |
49 # copy the corresponding plasmid fasta files into outdir | |
50 for contig in contigs_with_genes_of_interest: | |
51 for plasmid in args.plasmids: | |
52 copy_plasmid = False | |
53 with open(plasmid, 'r') as f: | |
54 for line in f: | |
55 if ('>' + contig) == line.rstrip(): | |
56 copy_plasmid = True | |
57 if copy_plasmid: | |
58 print("\t".join([plasmid, "True"])) | |
59 shutil.copy2(plasmid, args.outdir) | |
60 | |
61 | |
62 if __name__ == '__main__': | |
63 parser = argparse.ArgumentParser() | |
64 parser.add_argument("--plasmids", nargs='+', help="plasmid assemblies (fasta)") | |
65 parser.add_argument("--concatenated_abricate_reports", help="abricate reports (tsv)") | |
66 parser.add_argument("--abricate_report_screening_file", help="") | |
67 parser.add_argument("--outdir", dest="outdir", default=".", help="Output directory") | |
68 args = parser.parse_args() | |
69 main(args) |