Mercurial > repos > estrain > srst2
comparison scoreProfiles.py @ 0:efc202baae1f draft
planemo upload
author | estrain |
---|---|
date | Sun, 03 Dec 2017 13:28:11 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:efc202baae1f |
---|---|
1 #!/usr/bin/env | |
2 | |
3 ## Generate basic summary stats for SRST2 (v2) allele score output. Generate summaries for each profile defined in the database | |
4 ## author: errol strain, estrain@gmail.com | |
5 | |
6 from argparse import (ArgumentParser, FileType) | |
7 import sys | |
8 import glob | |
9 from decimal import Decimal | |
10 | |
11 def parse_args(): | |
12 "Parse the input arguments, use '-h' for help." | |
13 | |
14 parser = ArgumentParser(description='Generate Summary Scores for SRST2 Allele Score Output') | |
15 | |
16 # Read inputs | |
17 parser.add_argument('--mlst_definitions', type=str, required=True, nargs=1, help='MLST Definitions') | |
18 parser.add_argument('--output', type=str, required=True, nargs=1, help='MLST Definitions') | |
19 parser.add_argument('--profile_cov', type=str, required=False, help='Minimum Average Coverage to Report ST Profile',default="98") | |
20 parser.add_argument('--profile_max_mismatch', type=str, required=False, help='Maximum Number of Mismatches (SNPs)', default="1") | |
21 | |
22 return parser.parse_args() | |
23 | |
24 args =parse_args() | |
25 | |
26 allHash = {} | |
27 | |
28 # Read in Profile Database | |
29 profiles = open(args.mlst_definitions[0],"r") | |
30 output = open(args.output[0],"w") | |
31 | |
32 # Minimum mean percent coverage for reporting profile | |
33 min_per=float(args.profile_cov) | |
34 # Maximum mean mismatch for reporting profile | |
35 max_mis=float(args.profile_max_mismatch) | |
36 | |
37 # Read in Allele Scores | |
38 # Scores should be in srts2*.scores file | |
39 # Column 0:Allele, 1:Score, 2:Avg Depth, 5:% Coverage, 7:Mismatches, 8:Indels | |
40 scoreFile = open(glob.glob("srst2*.scores")[0],"r") | |
41 scoreFile.readline() | |
42 | |
43 for line in scoreFile.readlines() : | |
44 els = line.split("\t") | |
45 vals = els[0].split("_") | |
46 allHash.update({els[0]:line}) | |
47 | |
48 | |
49 # Allele names in first row | |
50 als = profiles.readline().rstrip() | |
51 filehead = als + str("\tMean_Score\tMean_Depth\tMean_%_Coverage\tTotal_Mismatches\tTotal_Indels\n") | |
52 output.write(filehead) | |
53 | |
54 genes = als.split("\t") | |
55 | |
56 for line in profiles.readlines() : | |
57 line = line.rstrip() | |
58 els = line.split("\t") | |
59 alleles = [] | |
60 for i in range(1,len(genes)) : | |
61 alleles.append(genes[i] + "_" + els[i]) | |
62 mscore=0 | |
63 mdepth=0 | |
64 mcover=0 | |
65 mmisma=0 | |
66 mindel=0 | |
67 for i in alleles : | |
68 if i in allHash : | |
69 vals=str(allHash[i]).split("\t") | |
70 mscore+=float(vals[1]) | |
71 mdepth+=float(vals[2]) | |
72 mcover+=float(vals[5]) | |
73 mmisma+=int(vals[7]) | |
74 mindel+=int(vals[8]) | |
75 | |
76 mscore=round(Decimal(mscore/(len(genes)-1)),5) | |
77 mdepth=round(Decimal(mdepth/(len(genes)-1)),2) | |
78 mcover=round(Decimal(mcover/(len(genes)-1)),2) | |
79 | |
80 if mmisma<=max_mis and mcover>=min_per : | |
81 output.write(line + "\t" + str(mscore) + "\t" + str(mdepth) + "\t" + str(mcover) + "\t" + str(mmisma) + "\t" + str(mindel) + "\n") | |
82 |