| 0 | 1 #!/usr/bin/env | 
|  | 2 | 
|  | 3 ## Generate basic summary stats for SRST2 (v2) allele score output. Generate summaries for each profile defined in the database | 
|  | 4 ## author: errol strain, estrain@gmail.com | 
|  | 5 | 
|  | 6 from argparse import (ArgumentParser, FileType) | 
|  | 7 import sys | 
|  | 8 import glob | 
|  | 9 from decimal import Decimal | 
|  | 10 | 
|  | 11 def parse_args(): | 
|  | 12   "Parse the input arguments, use '-h' for help." | 
|  | 13 | 
|  | 14   parser = ArgumentParser(description='Generate Summary Scores for SRST2 Allele Score Output') | 
|  | 15 | 
|  | 16   # Read inputs | 
|  | 17   parser.add_argument('--mlst_definitions', type=str, required=True, nargs=1, help='MLST Definitions') | 
|  | 18   parser.add_argument('--output', type=str, required=True, nargs=1, help='MLST Definitions') | 
|  | 19   parser.add_argument('--profile_cov', type=str, required=False, help='Minimum Average Coverage to Report ST Profile',default="98") | 
|  | 20   parser.add_argument('--profile_max_mismatch', type=str, required=False, help='Maximum Number of Mismatches (SNPs)', default="1") | 
|  | 21 | 
|  | 22   return parser.parse_args() | 
|  | 23 | 
|  | 24 args =parse_args() | 
|  | 25 | 
|  | 26 allHash = {} | 
|  | 27 | 
|  | 28 # Read in Profile Database | 
|  | 29 profiles = open(args.mlst_definitions[0],"r") | 
|  | 30 output = open(args.output[0],"w") | 
|  | 31 | 
|  | 32 # Minimum mean percent coverage for reporting profile | 
|  | 33 min_per=float(args.profile_cov) | 
|  | 34 # Maximum mean mismatch for reporting profile | 
|  | 35 max_mis=float(args.profile_max_mismatch) | 
|  | 36 | 
|  | 37 # Read in Allele Scores | 
|  | 38 # Scores should be in srts2*.scores file | 
|  | 39 # Column 0:Allele, 1:Score, 2:Avg Depth, 5:% Coverage, 7:Mismatches, 8:Indels | 
|  | 40 scoreFile = open(glob.glob("srst2*.scores")[0],"r") | 
|  | 41 scoreFile.readline() | 
|  | 42 | 
|  | 43 for line in scoreFile.readlines() : | 
|  | 44   els = line.split("\t") | 
|  | 45   vals = els[0].split("_") | 
|  | 46   allHash.update({els[0]:line}) | 
|  | 47 | 
|  | 48 | 
|  | 49 # Allele names in first row | 
|  | 50 als = profiles.readline().rstrip() | 
|  | 51 filehead = als + str("\tMean_Score\tMean_Depth\tMean_%_Coverage\tTotal_Mismatches\tTotal_Indels\n") | 
|  | 52 output.write(filehead) | 
|  | 53 | 
|  | 54 genes = als.split("\t") | 
|  | 55 | 
|  | 56 for line in profiles.readlines() : | 
|  | 57   line = line.rstrip() | 
|  | 58   els = line.split("\t") | 
|  | 59   alleles = [] | 
|  | 60   for i in range(1,len(genes)) : | 
|  | 61     alleles.append(genes[i] + "_" + els[i]) | 
|  | 62   mscore=0 | 
|  | 63   mdepth=0 | 
|  | 64   mcover=0 | 
|  | 65   mmisma=0 | 
|  | 66   mindel=0 | 
|  | 67   for i in alleles : | 
|  | 68     if i in allHash : | 
|  | 69       vals=str(allHash[i]).split("\t") | 
|  | 70       mscore+=float(vals[1]) | 
|  | 71       mdepth+=float(vals[2]) | 
|  | 72       mcover+=float(vals[5]) | 
|  | 73       mmisma+=int(vals[7]) | 
|  | 74       mindel+=int(vals[8]) | 
|  | 75 | 
|  | 76   mscore=round(Decimal(mscore/(len(genes)-1)),5) | 
|  | 77   mdepth=round(Decimal(mdepth/(len(genes)-1)),2) | 
|  | 78   mcover=round(Decimal(mcover/(len(genes)-1)),2) | 
|  | 79 | 
|  | 80   if mmisma<=max_mis and mcover>=min_per : | 
|  | 81     output.write(line + "\t" + str(mscore) + "\t" + str(mdepth) + "\t" + str(mcover) + "\t" + str(mmisma) + "\t" + str(mindel) + "\n") | 
|  | 82 |