screen_abricate_report: screen_abricate

comparison screen_abricate_report.py @ 3:2262e531c50b draft

"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/screen_abricate_report commit 50a464c3e6f87ca8d2c874842cbcee370d8aa9c4"

author	public-health-bioinformatics
date	Thu, 02 Jan 2020 17:14:34 -0500
parents	4f963b583186
children	22247b1a59d5

comparison

equal deleted inserted replaced

-:912a3a3dc082
+:2262e531c50b
 from __future__ import print_function
 import argparse
 import csv
 import re
+class Range(object):
+"""
+Used to limit the min_coverage and min_identity args to range 0.0 - 100.0
+"""
+def __init__(self, start, end):
+self.start = start
+self.end = end
+def __eq__(self, other):
+return self.start <= other <= self.end
+def __contains__(self, item):
+return self.__eq__(item)
+def __iter__(self):
+yield self
+def __repr__(self):
+return str(self.start) + " - " + str(self.end)
 def parse_screen_file(screen_file):
 screen = []
 with open(screen_file) as f:
 with open(input_file) as f:
 reader = csv.DictReader(f, delimiter="\t", quotechar='"')
 row = next(reader)
 fieldnames = row.keys()
 return fieldnames
+def detect_gene(abricate_report_row, regex, min_coverage, min_identity):
+gene_of_interest = bool(re.search(regex, abricate_report_row['GENE']))
+sufficient_coverage = float(abricate_report_row['%COVERAGE']) >= min_coverage
+sufficient_identity = float(abricate_report_row['%IDENTITY']) >= min_identity
+if gene_of_interest and sufficient_coverage and sufficient_identity:
+return True
+else:
+return False
 def main(args):
 screen = parse_screen_file(args.screening_file)
 gene_detection_status_fieldnames = ['gene_name', 'detected']
 gene_detection_status = {
 'gene_name': gene['gene_name'],
 'detected': False
 }
 for abricate_report_row in abricate_report_reader:
-if re.search(gene['regex'], abricate_report_row['GENE']):
+if detect_gene(abricate_report_row, gene['regex'], args.min_coverage, args.min_identity):
 gene_detection_status['detected'] = True
 screened_report_writer.writerow(abricate_report_row)
 gene_detection_status_writer.writerow(gene_detection_status)
 f1.seek(0)  # return file pointer to start of abricate report
+next(abricate_report_reader)
 if __name__ == '__main__':
 parser = argparse.ArgumentParser()
 parser.add_argument("abricate_report", help="Input: Abricate report to screen (tsv)")
 parser.add_argument("--screening_file", help="Input: List of genes to screen for (tsv)")
-parser.add_argument("--screened_report", help=("Output: Screened abricate report ",
+parser.add_argument("--screened_report",
-"including only genes of interest (tsv)"))
+help=("Output: Screened abricate report, including only genes of interest (tsv)"))
-parser.add_argument("--gene_detection_status", help=("Output: detection status for all genes ",
+parser.add_argument("--gene_detection_status",
-"listed in the screening file (tsv)"))
+help=("Output: detection status for all genes listed in the screening file (tsv)"))
+parser.add_argument("--min_coverage", type=float,  default=90.0,
+choices=Range(0.0, 100.0), help=("Minimum percent coverage"))
+parser.add_argument("--min_identity", type=float, default=90.0,
+choices=Range(0.0, 100.0), help=("Minimum percent identity"))
 args = parser.parse_args()
 main(args)

Mercurial > repos > public-health-bioinformatics > screen_abricate_report

comparison screen_abricate_report.py @ 3:2262e531c50b draft