annotate pick_plasmids_containing_genes.py @ 1:c9129ecc609d draft default tip

"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit e732b47aa50e35cd4cb70df263d21c5987cae697"
author public-health-bioinformatics
date Thu, 19 Dec 2019 18:22:33 -0500
parents 62019f5116f8
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
62019f5116f8 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff changeset
1 #!/usr/bin/env python
62019f5116f8 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff changeset
2
62019f5116f8 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff changeset
3 from __future__ import print_function
62019f5116f8 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff changeset
4
62019f5116f8 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff changeset
5 import argparse
62019f5116f8 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff changeset
6 import csv
62019f5116f8 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff changeset
7 import errno
62019f5116f8 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff changeset
8 import os
62019f5116f8 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff changeset
9 import re
62019f5116f8 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff changeset
10 import shutil
62019f5116f8 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff changeset
11
62019f5116f8 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff changeset
12
62019f5116f8 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff changeset
13 def parse_screen_file(screen_file):
62019f5116f8 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff changeset
14 screen = []
62019f5116f8 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff changeset
15 with open(screen_file) as f:
62019f5116f8 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff changeset
16 reader = csv.DictReader(f, delimiter="\t", quotechar='"')
62019f5116f8 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff changeset
17 for row in reader:
62019f5116f8 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff changeset
18 screen.append(row)
62019f5116f8 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff changeset
19 return screen
62019f5116f8 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff changeset
20
62019f5116f8 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff changeset
21
62019f5116f8 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff changeset
22 def main(args):
62019f5116f8 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff changeset
23 # create output directory
62019f5116f8 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff changeset
24 try:
62019f5116f8 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff changeset
25 os.mkdir(args.outdir)
62019f5116f8 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff changeset
26 except OSError as exc:
62019f5116f8 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff changeset
27 if exc.errno == errno.EEXIST and os.path.isdir(args.outdir):
62019f5116f8 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff changeset
28 pass
62019f5116f8 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff changeset
29 else:
62019f5116f8 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff changeset
30 raise
62019f5116f8 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff changeset
31
62019f5116f8 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff changeset
32 # parse screening file
62019f5116f8 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff changeset
33 screen = parse_screen_file(args.abricate_report_screening_file)
62019f5116f8 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff changeset
34 contigs_with_genes_of_interest = []
62019f5116f8 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff changeset
35 # parse all abricate reports and determine which ones contain genes of interest
62019f5116f8 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff changeset
36 print("\t".join(["file", "gene_detected"]))
62019f5116f8 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff changeset
37
62019f5116f8 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff changeset
38 with open(args.concatenated_abricate_reports, 'r') as f:
62019f5116f8 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff changeset
39 abricate_report_reader = csv.DictReader(f, delimiter="\t", quotechar='"')
62019f5116f8 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff changeset
40 for gene in screen:
62019f5116f8 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff changeset
41 for abricate_report_row in abricate_report_reader:
62019f5116f8 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff changeset
42 if abricate_report_row['#FILE'] == '#FILE':
62019f5116f8 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff changeset
43 continue
62019f5116f8 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff changeset
44 if re.search(gene['regex'], abricate_report_row['GENE']):
62019f5116f8 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff changeset
45 contigs_with_genes_of_interest.append(abricate_report_row['SEQUENCE'])
62019f5116f8 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff changeset
46 f.seek(0)
62019f5116f8 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff changeset
47 next(abricate_report_reader)
62019f5116f8 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff changeset
48
62019f5116f8 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff changeset
49 # copy the corresponding plasmid fasta files into outdir
62019f5116f8 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff changeset
50 for contig in contigs_with_genes_of_interest:
62019f5116f8 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff changeset
51 for plasmid in args.plasmids:
62019f5116f8 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff changeset
52 copy_plasmid = False
62019f5116f8 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff changeset
53 with open(plasmid, 'r') as f:
62019f5116f8 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff changeset
54 for line in f:
62019f5116f8 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff changeset
55 if ('>' + contig) == line.rstrip():
62019f5116f8 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff changeset
56 copy_plasmid = True
62019f5116f8 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff changeset
57 if copy_plasmid:
62019f5116f8 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff changeset
58 print("\t".join([plasmid, "True"]))
62019f5116f8 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff changeset
59 shutil.copy2(plasmid, args.outdir)
62019f5116f8 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff changeset
60
62019f5116f8 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff changeset
61
62019f5116f8 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff changeset
62 if __name__ == '__main__':
62019f5116f8 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff changeset
63 parser = argparse.ArgumentParser()
62019f5116f8 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff changeset
64 parser.add_argument("--plasmids", nargs='+', help="plasmid assemblies (fasta)")
62019f5116f8 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff changeset
65 parser.add_argument("--concatenated_abricate_reports", help="abricate reports (tsv)")
62019f5116f8 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff changeset
66 parser.add_argument("--abricate_report_screening_file", help="")
62019f5116f8 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff changeset
67 parser.add_argument("--outdir", dest="outdir", default=".", help="Output directory")
62019f5116f8 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff changeset
68 args = parser.parse_args()
62019f5116f8 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff changeset
69 main(args)