Mercurial > repos > public-health-bioinformatics > screen_abricate_report
comparison screen_abricate_report.py @ 3:2262e531c50b draft
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/screen_abricate_report commit 50a464c3e6f87ca8d2c874842cbcee370d8aa9c4"
author | public-health-bioinformatics |
---|---|
date | Thu, 02 Jan 2020 17:14:34 -0500 |
parents | 4f963b583186 |
children | 22247b1a59d5 |
comparison
equal
deleted
inserted
replaced
2:912a3a3dc082 | 3:2262e531c50b |
---|---|
3 from __future__ import print_function | 3 from __future__ import print_function |
4 | 4 |
5 import argparse | 5 import argparse |
6 import csv | 6 import csv |
7 import re | 7 import re |
8 | |
9 | |
10 class Range(object): | |
11 """ | |
12 Used to limit the min_coverage and min_identity args to range 0.0 - 100.0 | |
13 """ | |
14 def __init__(self, start, end): | |
15 self.start = start | |
16 self.end = end | |
17 | |
18 def __eq__(self, other): | |
19 return self.start <= other <= self.end | |
20 | |
21 def __contains__(self, item): | |
22 return self.__eq__(item) | |
23 | |
24 def __iter__(self): | |
25 yield self | |
26 | |
27 def __repr__(self): | |
28 return str(self.start) + " - " + str(self.end) | |
8 | 29 |
9 | 30 |
10 def parse_screen_file(screen_file): | 31 def parse_screen_file(screen_file): |
11 screen = [] | 32 screen = [] |
12 with open(screen_file) as f: | 33 with open(screen_file) as f: |
20 with open(input_file) as f: | 41 with open(input_file) as f: |
21 reader = csv.DictReader(f, delimiter="\t", quotechar='"') | 42 reader = csv.DictReader(f, delimiter="\t", quotechar='"') |
22 row = next(reader) | 43 row = next(reader) |
23 fieldnames = row.keys() | 44 fieldnames = row.keys() |
24 return fieldnames | 45 return fieldnames |
46 | |
47 | |
48 def detect_gene(abricate_report_row, regex, min_coverage, min_identity): | |
49 gene_of_interest = bool(re.search(regex, abricate_report_row['GENE'])) | |
50 sufficient_coverage = float(abricate_report_row['%COVERAGE']) >= min_coverage | |
51 sufficient_identity = float(abricate_report_row['%IDENTITY']) >= min_identity | |
52 if gene_of_interest and sufficient_coverage and sufficient_identity: | |
53 return True | |
54 else: | |
55 return False | |
25 | 56 |
26 | 57 |
27 def main(args): | 58 def main(args): |
28 screen = parse_screen_file(args.screening_file) | 59 screen = parse_screen_file(args.screening_file) |
29 gene_detection_status_fieldnames = ['gene_name', 'detected'] | 60 gene_detection_status_fieldnames = ['gene_name', 'detected'] |
42 gene_detection_status = { | 73 gene_detection_status = { |
43 'gene_name': gene['gene_name'], | 74 'gene_name': gene['gene_name'], |
44 'detected': False | 75 'detected': False |
45 } | 76 } |
46 for abricate_report_row in abricate_report_reader: | 77 for abricate_report_row in abricate_report_reader: |
47 if re.search(gene['regex'], abricate_report_row['GENE']): | 78 if detect_gene(abricate_report_row, gene['regex'], args.min_coverage, args.min_identity): |
48 gene_detection_status['detected'] = True | 79 gene_detection_status['detected'] = True |
49 screened_report_writer.writerow(abricate_report_row) | 80 screened_report_writer.writerow(abricate_report_row) |
50 gene_detection_status_writer.writerow(gene_detection_status) | 81 gene_detection_status_writer.writerow(gene_detection_status) |
51 f1.seek(0) # return file pointer to start of abricate report | 82 f1.seek(0) # return file pointer to start of abricate report |
83 next(abricate_report_reader) | |
52 | 84 |
53 | 85 |
54 if __name__ == '__main__': | 86 if __name__ == '__main__': |
55 parser = argparse.ArgumentParser() | 87 parser = argparse.ArgumentParser() |
56 parser.add_argument("abricate_report", help="Input: Abricate report to screen (tsv)") | 88 parser.add_argument("abricate_report", help="Input: Abricate report to screen (tsv)") |
57 parser.add_argument("--screening_file", help="Input: List of genes to screen for (tsv)") | 89 parser.add_argument("--screening_file", help="Input: List of genes to screen for (tsv)") |
58 parser.add_argument("--screened_report", help=("Output: Screened abricate report ", | 90 parser.add_argument("--screened_report", |
59 "including only genes of interest (tsv)")) | 91 help=("Output: Screened abricate report, including only genes of interest (tsv)")) |
60 parser.add_argument("--gene_detection_status", help=("Output: detection status for all genes ", | 92 parser.add_argument("--gene_detection_status", |
61 "listed in the screening file (tsv)")) | 93 help=("Output: detection status for all genes listed in the screening file (tsv)")) |
94 parser.add_argument("--min_coverage", type=float, default=90.0, | |
95 choices=Range(0.0, 100.0), help=("Minimum percent coverage")) | |
96 parser.add_argument("--min_identity", type=float, default=90.0, | |
97 choices=Range(0.0, 100.0), help=("Minimum percent identity")) | |
62 args = parser.parse_args() | 98 args = parser.parse_args() |
63 main(args) | 99 main(args) |