# HG changeset patch # User estrain # Date 1512325691 18000 # Node ID efc202baae1fc49d49231d92d3971e950feda352 planemo upload diff -r 000000000000 -r efc202baae1f scoreProfiles.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scoreProfiles.py Sun Dec 03 13:28:11 2017 -0500 @@ -0,0 +1,82 @@ +#!/usr/bin/env + +## Generate basic summary stats for SRST2 (v2) allele score output. Generate summaries for each profile defined in the database +## author: errol strain, estrain@gmail.com + +from argparse import (ArgumentParser, FileType) +import sys +import glob +from decimal import Decimal + +def parse_args(): + "Parse the input arguments, use '-h' for help." + + parser = ArgumentParser(description='Generate Summary Scores for SRST2 Allele Score Output') + + # Read inputs + parser.add_argument('--mlst_definitions', type=str, required=True, nargs=1, help='MLST Definitions') + parser.add_argument('--output', type=str, required=True, nargs=1, help='MLST Definitions') + parser.add_argument('--profile_cov', type=str, required=False, help='Minimum Average Coverage to Report ST Profile',default="98") + parser.add_argument('--profile_max_mismatch', type=str, required=False, help='Maximum Number of Mismatches (SNPs)', default="1") + + return parser.parse_args() + +args =parse_args() + +allHash = {} + +# Read in Profile Database +profiles = open(args.mlst_definitions[0],"r") +output = open(args.output[0],"w") + +# Minimum mean percent coverage for reporting profile +min_per=float(args.profile_cov) +# Maximum mean mismatch for reporting profile +max_mis=float(args.profile_max_mismatch) + +# Read in Allele Scores +# Scores should be in srts2*.scores file +# Column 0:Allele, 1:Score, 2:Avg Depth, 5:% Coverage, 7:Mismatches, 8:Indels +scoreFile = open(glob.glob("srst2*.scores")[0],"r") +scoreFile.readline() + +for line in scoreFile.readlines() : + els = line.split("\t") + vals = els[0].split("_") + allHash.update({els[0]:line}) + + +# Allele names in first row +als = profiles.readline().rstrip() +filehead = als + str("\tMean_Score\tMean_Depth\tMean_%_Coverage\tTotal_Mismatches\tTotal_Indels\n") +output.write(filehead) + +genes = als.split("\t") + +for line in profiles.readlines() : + line = line.rstrip() + els = line.split("\t") + alleles = [] + for i in range(1,len(genes)) : + alleles.append(genes[i] + "_" + els[i]) + mscore=0 + mdepth=0 + mcover=0 + mmisma=0 + mindel=0 + for i in alleles : + if i in allHash : + vals=str(allHash[i]).split("\t") + mscore+=float(vals[1]) + mdepth+=float(vals[2]) + mcover+=float(vals[5]) + mmisma+=int(vals[7]) + mindel+=int(vals[8]) + + mscore=round(Decimal(mscore/(len(genes)-1)),5) + mdepth=round(Decimal(mdepth/(len(genes)-1)),2) + mcover=round(Decimal(mcover/(len(genes)-1)),2) + + if mmisma<=max_mis and mcover>=min_per : + output.write(line + "\t" + str(mscore) + "\t" + str(mdepth) + "\t" + str(mcover) + "\t" + str(mmisma) + "\t" + str(mindel) + "\n") + diff -r 000000000000 -r efc202baae1f srst2v2.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/srst2v2.xml Sun Dec 03 13:28:11 2017 -0500 @@ -0,0 +1,168 @@ + + + srst2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + job_type['job'] == "mlst" + + + job_type['job'] == "mlst" + + + job_type['job'] == "mlst" + + + job_type['job'] == "gene" + + + + + + + + + @misc{pope_dashnow_zobel_holt_raven_schultz_inouye_tomita_2014, + title={SRST2: Rapid genomic surveillance for public health and hospital microbiology labs}, + url={https://genomemedicine.biomedcentral.com/articles/10.1186/s13073-014-0090-6}, + journal={Genome Medicine}, publisher={BioMed Central}, + author={Pope, Bernard J and Dashnow, Harriet and Zobel, Justin and Holt, Kathryn E and Raven, Lesley-Ann and Schultz, Mark B and Inouye, Michael and Tomita, Takehiro}, + year={2014}, month={Nov}} , + } + +