Mercurial > repos > public-health-bioinformatics > pick_plasmids_containing_genes
annotate pick_plasmids_containing_genes.py @ 1:c9129ecc609d draft default tip
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit e732b47aa50e35cd4cb70df263d21c5987cae697"
author | public-health-bioinformatics |
---|---|
date | Thu, 19 Dec 2019 18:22:33 -0500 |
parents | 62019f5116f8 |
children |
rev | line source |
---|---|
0
62019f5116f8
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff
changeset
|
1 #!/usr/bin/env python |
62019f5116f8
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff
changeset
|
2 |
62019f5116f8
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff
changeset
|
3 from __future__ import print_function |
62019f5116f8
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff
changeset
|
4 |
62019f5116f8
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff
changeset
|
5 import argparse |
62019f5116f8
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff
changeset
|
6 import csv |
62019f5116f8
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff
changeset
|
7 import errno |
62019f5116f8
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff
changeset
|
8 import os |
62019f5116f8
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff
changeset
|
9 import re |
62019f5116f8
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff
changeset
|
10 import shutil |
62019f5116f8
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff
changeset
|
11 |
62019f5116f8
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff
changeset
|
12 |
62019f5116f8
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff
changeset
|
13 def parse_screen_file(screen_file): |
62019f5116f8
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff
changeset
|
14 screen = [] |
62019f5116f8
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff
changeset
|
15 with open(screen_file) as f: |
62019f5116f8
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff
changeset
|
16 reader = csv.DictReader(f, delimiter="\t", quotechar='"') |
62019f5116f8
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff
changeset
|
17 for row in reader: |
62019f5116f8
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff
changeset
|
18 screen.append(row) |
62019f5116f8
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff
changeset
|
19 return screen |
62019f5116f8
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff
changeset
|
20 |
62019f5116f8
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff
changeset
|
21 |
62019f5116f8
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff
changeset
|
22 def main(args): |
62019f5116f8
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff
changeset
|
23 # create output directory |
62019f5116f8
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff
changeset
|
24 try: |
62019f5116f8
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff
changeset
|
25 os.mkdir(args.outdir) |
62019f5116f8
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff
changeset
|
26 except OSError as exc: |
62019f5116f8
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff
changeset
|
27 if exc.errno == errno.EEXIST and os.path.isdir(args.outdir): |
62019f5116f8
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff
changeset
|
28 pass |
62019f5116f8
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff
changeset
|
29 else: |
62019f5116f8
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff
changeset
|
30 raise |
62019f5116f8
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff
changeset
|
31 |
62019f5116f8
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff
changeset
|
32 # parse screening file |
62019f5116f8
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff
changeset
|
33 screen = parse_screen_file(args.abricate_report_screening_file) |
62019f5116f8
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff
changeset
|
34 contigs_with_genes_of_interest = [] |
62019f5116f8
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff
changeset
|
35 # parse all abricate reports and determine which ones contain genes of interest |
62019f5116f8
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff
changeset
|
36 print("\t".join(["file", "gene_detected"])) |
62019f5116f8
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff
changeset
|
37 |
62019f5116f8
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff
changeset
|
38 with open(args.concatenated_abricate_reports, 'r') as f: |
62019f5116f8
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff
changeset
|
39 abricate_report_reader = csv.DictReader(f, delimiter="\t", quotechar='"') |
62019f5116f8
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff
changeset
|
40 for gene in screen: |
62019f5116f8
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff
changeset
|
41 for abricate_report_row in abricate_report_reader: |
62019f5116f8
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff
changeset
|
42 if abricate_report_row['#FILE'] == '#FILE': |
62019f5116f8
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff
changeset
|
43 continue |
62019f5116f8
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff
changeset
|
44 if re.search(gene['regex'], abricate_report_row['GENE']): |
62019f5116f8
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff
changeset
|
45 contigs_with_genes_of_interest.append(abricate_report_row['SEQUENCE']) |
62019f5116f8
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff
changeset
|
46 f.seek(0) |
62019f5116f8
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff
changeset
|
47 next(abricate_report_reader) |
62019f5116f8
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff
changeset
|
48 |
62019f5116f8
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff
changeset
|
49 # copy the corresponding plasmid fasta files into outdir |
62019f5116f8
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff
changeset
|
50 for contig in contigs_with_genes_of_interest: |
62019f5116f8
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff
changeset
|
51 for plasmid in args.plasmids: |
62019f5116f8
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff
changeset
|
52 copy_plasmid = False |
62019f5116f8
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff
changeset
|
53 with open(plasmid, 'r') as f: |
62019f5116f8
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff
changeset
|
54 for line in f: |
62019f5116f8
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff
changeset
|
55 if ('>' + contig) == line.rstrip(): |
62019f5116f8
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff
changeset
|
56 copy_plasmid = True |
62019f5116f8
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff
changeset
|
57 if copy_plasmid: |
62019f5116f8
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff
changeset
|
58 print("\t".join([plasmid, "True"])) |
62019f5116f8
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff
changeset
|
59 shutil.copy2(plasmid, args.outdir) |
62019f5116f8
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff
changeset
|
60 |
62019f5116f8
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff
changeset
|
61 |
62019f5116f8
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff
changeset
|
62 if __name__ == '__main__': |
62019f5116f8
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff
changeset
|
63 parser = argparse.ArgumentParser() |
62019f5116f8
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff
changeset
|
64 parser.add_argument("--plasmids", nargs='+', help="plasmid assemblies (fasta)") |
62019f5116f8
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff
changeset
|
65 parser.add_argument("--concatenated_abricate_reports", help="abricate reports (tsv)") |
62019f5116f8
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff
changeset
|
66 parser.add_argument("--abricate_report_screening_file", help="") |
62019f5116f8
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff
changeset
|
67 parser.add_argument("--outdir", dest="outdir", default=".", help="Output directory") |
62019f5116f8
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff
changeset
|
68 args = parser.parse_args() |
62019f5116f8
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
public-health-bioinformatics
parents:
diff
changeset
|
69 main(args) |