comparison screen_abricate_report.py @ 3:2262e531c50b draft

"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/screen_abricate_report commit 50a464c3e6f87ca8d2c874842cbcee370d8aa9c4"
author public-health-bioinformatics
date Thu, 02 Jan 2020 17:14:34 -0500
parents 4f963b583186
children 22247b1a59d5
comparison
equal deleted inserted replaced
2:912a3a3dc082 3:2262e531c50b
3 from __future__ import print_function 3 from __future__ import print_function
4 4
5 import argparse 5 import argparse
6 import csv 6 import csv
7 import re 7 import re
8
9
10 class Range(object):
11 """
12 Used to limit the min_coverage and min_identity args to range 0.0 - 100.0
13 """
14 def __init__(self, start, end):
15 self.start = start
16 self.end = end
17
18 def __eq__(self, other):
19 return self.start <= other <= self.end
20
21 def __contains__(self, item):
22 return self.__eq__(item)
23
24 def __iter__(self):
25 yield self
26
27 def __repr__(self):
28 return str(self.start) + " - " + str(self.end)
8 29
9 30
10 def parse_screen_file(screen_file): 31 def parse_screen_file(screen_file):
11 screen = [] 32 screen = []
12 with open(screen_file) as f: 33 with open(screen_file) as f:
20 with open(input_file) as f: 41 with open(input_file) as f:
21 reader = csv.DictReader(f, delimiter="\t", quotechar='"') 42 reader = csv.DictReader(f, delimiter="\t", quotechar='"')
22 row = next(reader) 43 row = next(reader)
23 fieldnames = row.keys() 44 fieldnames = row.keys()
24 return fieldnames 45 return fieldnames
46
47
48 def detect_gene(abricate_report_row, regex, min_coverage, min_identity):
49 gene_of_interest = bool(re.search(regex, abricate_report_row['GENE']))
50 sufficient_coverage = float(abricate_report_row['%COVERAGE']) >= min_coverage
51 sufficient_identity = float(abricate_report_row['%IDENTITY']) >= min_identity
52 if gene_of_interest and sufficient_coverage and sufficient_identity:
53 return True
54 else:
55 return False
25 56
26 57
27 def main(args): 58 def main(args):
28 screen = parse_screen_file(args.screening_file) 59 screen = parse_screen_file(args.screening_file)
29 gene_detection_status_fieldnames = ['gene_name', 'detected'] 60 gene_detection_status_fieldnames = ['gene_name', 'detected']
42 gene_detection_status = { 73 gene_detection_status = {
43 'gene_name': gene['gene_name'], 74 'gene_name': gene['gene_name'],
44 'detected': False 75 'detected': False
45 } 76 }
46 for abricate_report_row in abricate_report_reader: 77 for abricate_report_row in abricate_report_reader:
47 if re.search(gene['regex'], abricate_report_row['GENE']): 78 if detect_gene(abricate_report_row, gene['regex'], args.min_coverage, args.min_identity):
48 gene_detection_status['detected'] = True 79 gene_detection_status['detected'] = True
49 screened_report_writer.writerow(abricate_report_row) 80 screened_report_writer.writerow(abricate_report_row)
50 gene_detection_status_writer.writerow(gene_detection_status) 81 gene_detection_status_writer.writerow(gene_detection_status)
51 f1.seek(0) # return file pointer to start of abricate report 82 f1.seek(0) # return file pointer to start of abricate report
83 next(abricate_report_reader)
52 84
53 85
54 if __name__ == '__main__': 86 if __name__ == '__main__':
55 parser = argparse.ArgumentParser() 87 parser = argparse.ArgumentParser()
56 parser.add_argument("abricate_report", help="Input: Abricate report to screen (tsv)") 88 parser.add_argument("abricate_report", help="Input: Abricate report to screen (tsv)")
57 parser.add_argument("--screening_file", help="Input: List of genes to screen for (tsv)") 89 parser.add_argument("--screening_file", help="Input: List of genes to screen for (tsv)")
58 parser.add_argument("--screened_report", help=("Output: Screened abricate report ", 90 parser.add_argument("--screened_report",
59 "including only genes of interest (tsv)")) 91 help=("Output: Screened abricate report, including only genes of interest (tsv)"))
60 parser.add_argument("--gene_detection_status", help=("Output: detection status for all genes ", 92 parser.add_argument("--gene_detection_status",
61 "listed in the screening file (tsv)")) 93 help=("Output: detection status for all genes listed in the screening file (tsv)"))
94 parser.add_argument("--min_coverage", type=float, default=90.0,
95 choices=Range(0.0, 100.0), help=("Minimum percent coverage"))
96 parser.add_argument("--min_identity", type=float, default=90.0,
97 choices=Range(0.0, 100.0), help=("Minimum percent identity"))
62 args = parser.parse_args() 98 args = parser.parse_args()
63 main(args) 99 main(args)