Mercurial > repos > public-health-bioinformatics > screen_abricate_report
annotate screen_abricate_report.py @ 2:912a3a3dc082 draft
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/screen_abricate_report commit 2ec76aac2fcf466fc16091bfff8b7cb83fd92467"
author | public-health-bioinformatics |
---|---|
date | Thu, 19 Dec 2019 20:31:11 -0500 |
parents | 4f963b583186 |
children | 2262e531c50b |
rev | line source |
---|---|
0
b2d56a44a872
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/screen_abricate_report commit 1d569fc27b052d1982d82ca19455caaff6386f46"
public-health-bioinformatics
parents:
diff
changeset
|
1 #!/usr/bin/env python |
b2d56a44a872
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/screen_abricate_report commit 1d569fc27b052d1982d82ca19455caaff6386f46"
public-health-bioinformatics
parents:
diff
changeset
|
2 |
b2d56a44a872
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/screen_abricate_report commit 1d569fc27b052d1982d82ca19455caaff6386f46"
public-health-bioinformatics
parents:
diff
changeset
|
3 from __future__ import print_function |
b2d56a44a872
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/screen_abricate_report commit 1d569fc27b052d1982d82ca19455caaff6386f46"
public-health-bioinformatics
parents:
diff
changeset
|
4 |
b2d56a44a872
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/screen_abricate_report commit 1d569fc27b052d1982d82ca19455caaff6386f46"
public-health-bioinformatics
parents:
diff
changeset
|
5 import argparse |
1
4f963b583186
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/screen_abricate_report commit c27095ceaf23aa2c910e0ea866db9ec8d3e7816b"
public-health-bioinformatics
parents:
0
diff
changeset
|
6 import csv |
0
b2d56a44a872
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/screen_abricate_report commit 1d569fc27b052d1982d82ca19455caaff6386f46"
public-health-bioinformatics
parents:
diff
changeset
|
7 import re |
1
4f963b583186
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/screen_abricate_report commit c27095ceaf23aa2c910e0ea866db9ec8d3e7816b"
public-health-bioinformatics
parents:
0
diff
changeset
|
8 |
0
b2d56a44a872
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/screen_abricate_report commit 1d569fc27b052d1982d82ca19455caaff6386f46"
public-health-bioinformatics
parents:
diff
changeset
|
9 |
b2d56a44a872
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/screen_abricate_report commit 1d569fc27b052d1982d82ca19455caaff6386f46"
public-health-bioinformatics
parents:
diff
changeset
|
10 def parse_screen_file(screen_file): |
b2d56a44a872
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/screen_abricate_report commit 1d569fc27b052d1982d82ca19455caaff6386f46"
public-health-bioinformatics
parents:
diff
changeset
|
11 screen = [] |
b2d56a44a872
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/screen_abricate_report commit 1d569fc27b052d1982d82ca19455caaff6386f46"
public-health-bioinformatics
parents:
diff
changeset
|
12 with open(screen_file) as f: |
b2d56a44a872
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/screen_abricate_report commit 1d569fc27b052d1982d82ca19455caaff6386f46"
public-health-bioinformatics
parents:
diff
changeset
|
13 reader = csv.DictReader(f, delimiter="\t", quotechar='"') |
b2d56a44a872
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/screen_abricate_report commit 1d569fc27b052d1982d82ca19455caaff6386f46"
public-health-bioinformatics
parents:
diff
changeset
|
14 for row in reader: |
b2d56a44a872
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/screen_abricate_report commit 1d569fc27b052d1982d82ca19455caaff6386f46"
public-health-bioinformatics
parents:
diff
changeset
|
15 screen.append(row) |
b2d56a44a872
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/screen_abricate_report commit 1d569fc27b052d1982d82ca19455caaff6386f46"
public-health-bioinformatics
parents:
diff
changeset
|
16 return screen |
b2d56a44a872
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/screen_abricate_report commit 1d569fc27b052d1982d82ca19455caaff6386f46"
public-health-bioinformatics
parents:
diff
changeset
|
17 |
1
4f963b583186
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/screen_abricate_report commit c27095ceaf23aa2c910e0ea866db9ec8d3e7816b"
public-health-bioinformatics
parents:
0
diff
changeset
|
18 |
0
b2d56a44a872
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/screen_abricate_report commit 1d569fc27b052d1982d82ca19455caaff6386f46"
public-health-bioinformatics
parents:
diff
changeset
|
19 def get_fieldnames(input_file): |
b2d56a44a872
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/screen_abricate_report commit 1d569fc27b052d1982d82ca19455caaff6386f46"
public-health-bioinformatics
parents:
diff
changeset
|
20 with open(input_file) as f: |
b2d56a44a872
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/screen_abricate_report commit 1d569fc27b052d1982d82ca19455caaff6386f46"
public-health-bioinformatics
parents:
diff
changeset
|
21 reader = csv.DictReader(f, delimiter="\t", quotechar='"') |
b2d56a44a872
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/screen_abricate_report commit 1d569fc27b052d1982d82ca19455caaff6386f46"
public-health-bioinformatics
parents:
diff
changeset
|
22 row = next(reader) |
b2d56a44a872
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/screen_abricate_report commit 1d569fc27b052d1982d82ca19455caaff6386f46"
public-health-bioinformatics
parents:
diff
changeset
|
23 fieldnames = row.keys() |
b2d56a44a872
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/screen_abricate_report commit 1d569fc27b052d1982d82ca19455caaff6386f46"
public-health-bioinformatics
parents:
diff
changeset
|
24 return fieldnames |
1
4f963b583186
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/screen_abricate_report commit c27095ceaf23aa2c910e0ea866db9ec8d3e7816b"
public-health-bioinformatics
parents:
0
diff
changeset
|
25 |
4f963b583186
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/screen_abricate_report commit c27095ceaf23aa2c910e0ea866db9ec8d3e7816b"
public-health-bioinformatics
parents:
0
diff
changeset
|
26 |
0
b2d56a44a872
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/screen_abricate_report commit 1d569fc27b052d1982d82ca19455caaff6386f46"
public-health-bioinformatics
parents:
diff
changeset
|
27 def main(args): |
b2d56a44a872
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/screen_abricate_report commit 1d569fc27b052d1982d82ca19455caaff6386f46"
public-health-bioinformatics
parents:
diff
changeset
|
28 screen = parse_screen_file(args.screening_file) |
b2d56a44a872
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/screen_abricate_report commit 1d569fc27b052d1982d82ca19455caaff6386f46"
public-health-bioinformatics
parents:
diff
changeset
|
29 gene_detection_status_fieldnames = ['gene_name', 'detected'] |
1
4f963b583186
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/screen_abricate_report commit c27095ceaf23aa2c910e0ea866db9ec8d3e7816b"
public-health-bioinformatics
parents:
0
diff
changeset
|
30 with open(args.abricate_report, 'r') as f1, \ |
4f963b583186
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/screen_abricate_report commit c27095ceaf23aa2c910e0ea866db9ec8d3e7816b"
public-health-bioinformatics
parents:
0
diff
changeset
|
31 open(args.screened_report, 'w') as f2, \ |
4f963b583186
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/screen_abricate_report commit c27095ceaf23aa2c910e0ea866db9ec8d3e7816b"
public-health-bioinformatics
parents:
0
diff
changeset
|
32 open(args.gene_detection_status, 'w') as f3: |
0
b2d56a44a872
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/screen_abricate_report commit 1d569fc27b052d1982d82ca19455caaff6386f46"
public-health-bioinformatics
parents:
diff
changeset
|
33 abricate_report_reader = csv.DictReader(f1, delimiter="\t", quotechar='"') |
1
4f963b583186
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/screen_abricate_report commit c27095ceaf23aa2c910e0ea866db9ec8d3e7816b"
public-health-bioinformatics
parents:
0
diff
changeset
|
34 screened_report_writer = csv.DictWriter(f2, delimiter="\t", quotechar='"', |
4f963b583186
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/screen_abricate_report commit c27095ceaf23aa2c910e0ea866db9ec8d3e7816b"
public-health-bioinformatics
parents:
0
diff
changeset
|
35 fieldnames=abricate_report_reader.fieldnames) |
4f963b583186
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/screen_abricate_report commit c27095ceaf23aa2c910e0ea866db9ec8d3e7816b"
public-health-bioinformatics
parents:
0
diff
changeset
|
36 gene_detection_status_writer = csv.DictWriter(f3, delimiter="\t", quotechar='"', |
4f963b583186
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/screen_abricate_report commit c27095ceaf23aa2c910e0ea866db9ec8d3e7816b"
public-health-bioinformatics
parents:
0
diff
changeset
|
37 fieldnames=gene_detection_status_fieldnames) |
0
b2d56a44a872
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/screen_abricate_report commit 1d569fc27b052d1982d82ca19455caaff6386f46"
public-health-bioinformatics
parents:
diff
changeset
|
38 screened_report_writer.writeheader() |
b2d56a44a872
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/screen_abricate_report commit 1d569fc27b052d1982d82ca19455caaff6386f46"
public-health-bioinformatics
parents:
diff
changeset
|
39 gene_detection_status_writer.writeheader() |
b2d56a44a872
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/screen_abricate_report commit 1d569fc27b052d1982d82ca19455caaff6386f46"
public-health-bioinformatics
parents:
diff
changeset
|
40 |
b2d56a44a872
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/screen_abricate_report commit 1d569fc27b052d1982d82ca19455caaff6386f46"
public-health-bioinformatics
parents:
diff
changeset
|
41 for gene in screen: |
b2d56a44a872
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/screen_abricate_report commit 1d569fc27b052d1982d82ca19455caaff6386f46"
public-health-bioinformatics
parents:
diff
changeset
|
42 gene_detection_status = { |
b2d56a44a872
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/screen_abricate_report commit 1d569fc27b052d1982d82ca19455caaff6386f46"
public-health-bioinformatics
parents:
diff
changeset
|
43 'gene_name': gene['gene_name'], |
b2d56a44a872
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/screen_abricate_report commit 1d569fc27b052d1982d82ca19455caaff6386f46"
public-health-bioinformatics
parents:
diff
changeset
|
44 'detected': False |
b2d56a44a872
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/screen_abricate_report commit 1d569fc27b052d1982d82ca19455caaff6386f46"
public-health-bioinformatics
parents:
diff
changeset
|
45 } |
b2d56a44a872
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/screen_abricate_report commit 1d569fc27b052d1982d82ca19455caaff6386f46"
public-health-bioinformatics
parents:
diff
changeset
|
46 for abricate_report_row in abricate_report_reader: |
b2d56a44a872
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/screen_abricate_report commit 1d569fc27b052d1982d82ca19455caaff6386f46"
public-health-bioinformatics
parents:
diff
changeset
|
47 if re.search(gene['regex'], abricate_report_row['GENE']): |
b2d56a44a872
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/screen_abricate_report commit 1d569fc27b052d1982d82ca19455caaff6386f46"
public-health-bioinformatics
parents:
diff
changeset
|
48 gene_detection_status['detected'] = True |
b2d56a44a872
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/screen_abricate_report commit 1d569fc27b052d1982d82ca19455caaff6386f46"
public-health-bioinformatics
parents:
diff
changeset
|
49 screened_report_writer.writerow(abricate_report_row) |
b2d56a44a872
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/screen_abricate_report commit 1d569fc27b052d1982d82ca19455caaff6386f46"
public-health-bioinformatics
parents:
diff
changeset
|
50 gene_detection_status_writer.writerow(gene_detection_status) |
1
4f963b583186
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/screen_abricate_report commit c27095ceaf23aa2c910e0ea866db9ec8d3e7816b"
public-health-bioinformatics
parents:
0
diff
changeset
|
51 f1.seek(0) # return file pointer to start of abricate report |
0
b2d56a44a872
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/screen_abricate_report commit 1d569fc27b052d1982d82ca19455caaff6386f46"
public-health-bioinformatics
parents:
diff
changeset
|
52 |
b2d56a44a872
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/screen_abricate_report commit 1d569fc27b052d1982d82ca19455caaff6386f46"
public-health-bioinformatics
parents:
diff
changeset
|
53 |
b2d56a44a872
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/screen_abricate_report commit 1d569fc27b052d1982d82ca19455caaff6386f46"
public-health-bioinformatics
parents:
diff
changeset
|
54 if __name__ == '__main__': |
b2d56a44a872
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/screen_abricate_report commit 1d569fc27b052d1982d82ca19455caaff6386f46"
public-health-bioinformatics
parents:
diff
changeset
|
55 parser = argparse.ArgumentParser() |
b2d56a44a872
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/screen_abricate_report commit 1d569fc27b052d1982d82ca19455caaff6386f46"
public-health-bioinformatics
parents:
diff
changeset
|
56 parser.add_argument("abricate_report", help="Input: Abricate report to screen (tsv)") |
b2d56a44a872
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/screen_abricate_report commit 1d569fc27b052d1982d82ca19455caaff6386f46"
public-health-bioinformatics
parents:
diff
changeset
|
57 parser.add_argument("--screening_file", help="Input: List of genes to screen for (tsv)") |
1
4f963b583186
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/screen_abricate_report commit c27095ceaf23aa2c910e0ea866db9ec8d3e7816b"
public-health-bioinformatics
parents:
0
diff
changeset
|
58 parser.add_argument("--screened_report", help=("Output: Screened abricate report ", |
4f963b583186
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/screen_abricate_report commit c27095ceaf23aa2c910e0ea866db9ec8d3e7816b"
public-health-bioinformatics
parents:
0
diff
changeset
|
59 "including only genes of interest (tsv)")) |
4f963b583186
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/screen_abricate_report commit c27095ceaf23aa2c910e0ea866db9ec8d3e7816b"
public-health-bioinformatics
parents:
0
diff
changeset
|
60 parser.add_argument("--gene_detection_status", help=("Output: detection status for all genes ", |
4f963b583186
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/screen_abricate_report commit c27095ceaf23aa2c910e0ea866db9ec8d3e7816b"
public-health-bioinformatics
parents:
0
diff
changeset
|
61 "listed in the screening file (tsv)")) |
0
b2d56a44a872
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/screen_abricate_report commit 1d569fc27b052d1982d82ca19455caaff6386f46"
public-health-bioinformatics
parents:
diff
changeset
|
62 args = parser.parse_args() |
b2d56a44a872
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/screen_abricate_report commit 1d569fc27b052d1982d82ca19455caaff6386f46"
public-health-bioinformatics
parents:
diff
changeset
|
63 main(args) |