annotate screen_abricate_report.py @ 0:b2d56a44a872 draft

"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/screen_abricate_report commit 1d569fc27b052d1982d82ca19455caaff6386f46"
author public-health-bioinformatics
date Thu, 31 Oct 2019 15:43:15 -0400
parents
children 4f963b583186
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
b2d56a44a872 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/screen_abricate_report commit 1d569fc27b052d1982d82ca19455caaff6386f46"
public-health-bioinformatics
parents:
diff changeset
1 #!/usr/bin/env python
b2d56a44a872 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/screen_abricate_report commit 1d569fc27b052d1982d82ca19455caaff6386f46"
public-health-bioinformatics
parents:
diff changeset
2
b2d56a44a872 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/screen_abricate_report commit 1d569fc27b052d1982d82ca19455caaff6386f46"
public-health-bioinformatics
parents:
diff changeset
3 from __future__ import print_function
b2d56a44a872 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/screen_abricate_report commit 1d569fc27b052d1982d82ca19455caaff6386f46"
public-health-bioinformatics
parents:
diff changeset
4
b2d56a44a872 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/screen_abricate_report commit 1d569fc27b052d1982d82ca19455caaff6386f46"
public-health-bioinformatics
parents:
diff changeset
5 import argparse
b2d56a44a872 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/screen_abricate_report commit 1d569fc27b052d1982d82ca19455caaff6386f46"
public-health-bioinformatics
parents:
diff changeset
6 import os
b2d56a44a872 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/screen_abricate_report commit 1d569fc27b052d1982d82ca19455caaff6386f46"
public-health-bioinformatics
parents:
diff changeset
7 import re
b2d56a44a872 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/screen_abricate_report commit 1d569fc27b052d1982d82ca19455caaff6386f46"
public-health-bioinformatics
parents:
diff changeset
8 import sys
b2d56a44a872 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/screen_abricate_report commit 1d569fc27b052d1982d82ca19455caaff6386f46"
public-health-bioinformatics
parents:
diff changeset
9 import csv
b2d56a44a872 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/screen_abricate_report commit 1d569fc27b052d1982d82ca19455caaff6386f46"
public-health-bioinformatics
parents:
diff changeset
10 from pprint import pprint
b2d56a44a872 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/screen_abricate_report commit 1d569fc27b052d1982d82ca19455caaff6386f46"
public-health-bioinformatics
parents:
diff changeset
11
b2d56a44a872 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/screen_abricate_report commit 1d569fc27b052d1982d82ca19455caaff6386f46"
public-health-bioinformatics
parents:
diff changeset
12 def parse_screen_file(screen_file):
b2d56a44a872 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/screen_abricate_report commit 1d569fc27b052d1982d82ca19455caaff6386f46"
public-health-bioinformatics
parents:
diff changeset
13 screen = []
b2d56a44a872 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/screen_abricate_report commit 1d569fc27b052d1982d82ca19455caaff6386f46"
public-health-bioinformatics
parents:
diff changeset
14 with open(screen_file) as f:
b2d56a44a872 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/screen_abricate_report commit 1d569fc27b052d1982d82ca19455caaff6386f46"
public-health-bioinformatics
parents:
diff changeset
15 reader = csv.DictReader(f, delimiter="\t", quotechar='"')
b2d56a44a872 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/screen_abricate_report commit 1d569fc27b052d1982d82ca19455caaff6386f46"
public-health-bioinformatics
parents:
diff changeset
16 for row in reader:
b2d56a44a872 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/screen_abricate_report commit 1d569fc27b052d1982d82ca19455caaff6386f46"
public-health-bioinformatics
parents:
diff changeset
17 screen.append(row)
b2d56a44a872 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/screen_abricate_report commit 1d569fc27b052d1982d82ca19455caaff6386f46"
public-health-bioinformatics
parents:
diff changeset
18 return screen
b2d56a44a872 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/screen_abricate_report commit 1d569fc27b052d1982d82ca19455caaff6386f46"
public-health-bioinformatics
parents:
diff changeset
19
b2d56a44a872 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/screen_abricate_report commit 1d569fc27b052d1982d82ca19455caaff6386f46"
public-health-bioinformatics
parents:
diff changeset
20 def get_fieldnames(input_file):
b2d56a44a872 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/screen_abricate_report commit 1d569fc27b052d1982d82ca19455caaff6386f46"
public-health-bioinformatics
parents:
diff changeset
21 with open(input_file) as f:
b2d56a44a872 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/screen_abricate_report commit 1d569fc27b052d1982d82ca19455caaff6386f46"
public-health-bioinformatics
parents:
diff changeset
22 reader = csv.DictReader(f, delimiter="\t", quotechar='"')
b2d56a44a872 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/screen_abricate_report commit 1d569fc27b052d1982d82ca19455caaff6386f46"
public-health-bioinformatics
parents:
diff changeset
23 row = next(reader)
b2d56a44a872 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/screen_abricate_report commit 1d569fc27b052d1982d82ca19455caaff6386f46"
public-health-bioinformatics
parents:
diff changeset
24 fieldnames = row.keys()
b2d56a44a872 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/screen_abricate_report commit 1d569fc27b052d1982d82ca19455caaff6386f46"
public-health-bioinformatics
parents:
diff changeset
25 return fieldnames
b2d56a44a872 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/screen_abricate_report commit 1d569fc27b052d1982d82ca19455caaff6386f46"
public-health-bioinformatics
parents:
diff changeset
26
b2d56a44a872 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/screen_abricate_report commit 1d569fc27b052d1982d82ca19455caaff6386f46"
public-health-bioinformatics
parents:
diff changeset
27 def main(args):
b2d56a44a872 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/screen_abricate_report commit 1d569fc27b052d1982d82ca19455caaff6386f46"
public-health-bioinformatics
parents:
diff changeset
28 screen = parse_screen_file(args.screening_file)
b2d56a44a872 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/screen_abricate_report commit 1d569fc27b052d1982d82ca19455caaff6386f46"
public-health-bioinformatics
parents:
diff changeset
29 abricate_report_fieldnames = get_fieldnames(args.abricate_report)
b2d56a44a872 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/screen_abricate_report commit 1d569fc27b052d1982d82ca19455caaff6386f46"
public-health-bioinformatics
parents:
diff changeset
30 gene_detection_status_fieldnames = ['gene_name', 'detected']
b2d56a44a872 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/screen_abricate_report commit 1d569fc27b052d1982d82ca19455caaff6386f46"
public-health-bioinformatics
parents:
diff changeset
31 with open(args.abricate_report, 'r') as f1, open(args.screened_report, 'w') as f2, open(args.gene_detection_status, 'w') as f3:
b2d56a44a872 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/screen_abricate_report commit 1d569fc27b052d1982d82ca19455caaff6386f46"
public-health-bioinformatics
parents:
diff changeset
32 abricate_report_reader = csv.DictReader(f1, delimiter="\t", quotechar='"')
b2d56a44a872 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/screen_abricate_report commit 1d569fc27b052d1982d82ca19455caaff6386f46"
public-health-bioinformatics
parents:
diff changeset
33 screened_report_writer = csv.DictWriter(f2, delimiter="\t", quotechar='"', fieldnames=abricate_report_reader.fieldnames)
b2d56a44a872 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/screen_abricate_report commit 1d569fc27b052d1982d82ca19455caaff6386f46"
public-health-bioinformatics
parents:
diff changeset
34 gene_detection_status_writer = csv.DictWriter(f3, delimiter="\t", quotechar='"', fieldnames=gene_detection_status_fieldnames)
b2d56a44a872 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/screen_abricate_report commit 1d569fc27b052d1982d82ca19455caaff6386f46"
public-health-bioinformatics
parents:
diff changeset
35 screened_report_writer.writeheader()
b2d56a44a872 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/screen_abricate_report commit 1d569fc27b052d1982d82ca19455caaff6386f46"
public-health-bioinformatics
parents:
diff changeset
36 gene_detection_status_writer.writeheader()
b2d56a44a872 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/screen_abricate_report commit 1d569fc27b052d1982d82ca19455caaff6386f46"
public-health-bioinformatics
parents:
diff changeset
37
b2d56a44a872 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/screen_abricate_report commit 1d569fc27b052d1982d82ca19455caaff6386f46"
public-health-bioinformatics
parents:
diff changeset
38 for gene in screen:
b2d56a44a872 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/screen_abricate_report commit 1d569fc27b052d1982d82ca19455caaff6386f46"
public-health-bioinformatics
parents:
diff changeset
39 gene_detection_status = {
b2d56a44a872 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/screen_abricate_report commit 1d569fc27b052d1982d82ca19455caaff6386f46"
public-health-bioinformatics
parents:
diff changeset
40 'gene_name': gene['gene_name'],
b2d56a44a872 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/screen_abricate_report commit 1d569fc27b052d1982d82ca19455caaff6386f46"
public-health-bioinformatics
parents:
diff changeset
41 'detected': False
b2d56a44a872 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/screen_abricate_report commit 1d569fc27b052d1982d82ca19455caaff6386f46"
public-health-bioinformatics
parents:
diff changeset
42 }
b2d56a44a872 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/screen_abricate_report commit 1d569fc27b052d1982d82ca19455caaff6386f46"
public-health-bioinformatics
parents:
diff changeset
43 for abricate_report_row in abricate_report_reader:
b2d56a44a872 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/screen_abricate_report commit 1d569fc27b052d1982d82ca19455caaff6386f46"
public-health-bioinformatics
parents:
diff changeset
44 if re.search(gene['regex'], abricate_report_row['GENE']):
b2d56a44a872 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/screen_abricate_report commit 1d569fc27b052d1982d82ca19455caaff6386f46"
public-health-bioinformatics
parents:
diff changeset
45 gene_detection_status['detected'] = True
b2d56a44a872 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/screen_abricate_report commit 1d569fc27b052d1982d82ca19455caaff6386f46"
public-health-bioinformatics
parents:
diff changeset
46 screened_report_writer.writerow(abricate_report_row)
b2d56a44a872 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/screen_abricate_report commit 1d569fc27b052d1982d82ca19455caaff6386f46"
public-health-bioinformatics
parents:
diff changeset
47 gene_detection_status_writer.writerow(gene_detection_status)
b2d56a44a872 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/screen_abricate_report commit 1d569fc27b052d1982d82ca19455caaff6386f46"
public-health-bioinformatics
parents:
diff changeset
48 f1.seek(0) # return file pointer to start of abricate report
b2d56a44a872 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/screen_abricate_report commit 1d569fc27b052d1982d82ca19455caaff6386f46"
public-health-bioinformatics
parents:
diff changeset
49
b2d56a44a872 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/screen_abricate_report commit 1d569fc27b052d1982d82ca19455caaff6386f46"
public-health-bioinformatics
parents:
diff changeset
50
b2d56a44a872 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/screen_abricate_report commit 1d569fc27b052d1982d82ca19455caaff6386f46"
public-health-bioinformatics
parents:
diff changeset
51 if __name__ == '__main__':
b2d56a44a872 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/screen_abricate_report commit 1d569fc27b052d1982d82ca19455caaff6386f46"
public-health-bioinformatics
parents:
diff changeset
52 parser = argparse.ArgumentParser()
b2d56a44a872 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/screen_abricate_report commit 1d569fc27b052d1982d82ca19455caaff6386f46"
public-health-bioinformatics
parents:
diff changeset
53 parser.add_argument("abricate_report", help="Input: Abricate report to screen (tsv)")
b2d56a44a872 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/screen_abricate_report commit 1d569fc27b052d1982d82ca19455caaff6386f46"
public-health-bioinformatics
parents:
diff changeset
54 parser.add_argument("--screening_file", help="Input: List of genes to screen for (tsv)")
b2d56a44a872 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/screen_abricate_report commit 1d569fc27b052d1982d82ca19455caaff6386f46"
public-health-bioinformatics
parents:
diff changeset
55 parser.add_argument("--screened_report", help="Output: Screened abricate report including only genes of interest (tsv)")
b2d56a44a872 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/screen_abricate_report commit 1d569fc27b052d1982d82ca19455caaff6386f46"
public-health-bioinformatics
parents:
diff changeset
56 parser.add_argument("--gene_detection_status", help="Output: detection status for all genes listed in the screening file (tsv)")
b2d56a44a872 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/screen_abricate_report commit 1d569fc27b052d1982d82ca19455caaff6386f46"
public-health-bioinformatics
parents:
diff changeset
57 args = parser.parse_args()
b2d56a44a872 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/screen_abricate_report commit 1d569fc27b052d1982d82ca19455caaff6386f46"
public-health-bioinformatics
parents:
diff changeset
58 main(args)