comparison sucos_max.py @ 2:8161c08627bf draft

"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit 6f1ee2812cca091561a2b2e464498dae2f913b8d"
author bgruening
date Thu, 19 Mar 2020 07:13:36 -0400
parents f8f53668d5a2
children a3c1e2eea7d3
comparison
equal deleted inserted replaced
1:2e67eea82ff7 2:8161c08627bf
37 import sucos, utils 37 import sucos, utils
38 import argparse, gzip, os 38 import argparse, gzip, os
39 from rdkit import Chem 39 from rdkit import Chem
40 40
41 41
42 def process(inputfilename, clusterfilenames, outputfilename, mode): 42 def process(inputfilename, clusterfilenames, outputfilename):
43
44 all_clusters = {} 43 all_clusters = {}
45 for filename in clusterfilenames: 44 for filename in clusterfilenames:
46 cluster = [] 45 cluster = []
47 cluster_file = utils.open_file_for_reading(filename) 46 cluster_file = utils.open_file_for_reading(filename)
48 suppl = Chem.ForwardSDMolSupplier(cluster_file) 47 suppl = Chem.ForwardSDMolSupplier(cluster_file)
77 try: 76 try:
78 query_features = sucos.getRawFeatures(mol) 77 query_features = sucos.getRawFeatures(mol)
79 except: 78 except:
80 utils.log("WARNING: failed to generate features for molecule", mol_num, "in input") 79 utils.log("WARNING: failed to generate features for molecule", mol_num, "in input")
81 continue 80 continue
82 scores = [0, 0, 0] 81 scores_max = [0, 0, 0]
82 scores_cum = [0, 0, 0]
83 for clusterfilename in all_clusters: 83 for clusterfilename in all_clusters:
84 cluster = all_clusters[clusterfilename] 84 cluster = all_clusters[clusterfilename]
85 index = 0 85 index = 0
86 for entry in cluster: 86 for entry in cluster:
87 hit = entry[0] 87 hit = entry[0]
88 ref_features = entry[1] 88 ref_features = entry[1]
89 index += 1 89 index += 1
90 comparisons += 1 90 comparisons += 1
91 sucos_score, fm_score, vol_score = sucos.get_SucosScore(hit, mol, 91 sucos_score, fm_score, vol_score = sucos.get_SucosScore(hit, mol,
92 tani=False, ref_features=ref_features, query_features=query_features) 92 tani=False, ref_features=ref_features,
93 if mode == 'max': 93 query_features=query_features)
94 if sucos_score > scores[0]:
95 scores[0] = sucos_score
96 scores[1] = fm_score
97 scores[2] = vol_score
98 cluster_name = clusterfilename
99 cluster_index = index
100 elif mode == 'cum':
101 scores[0] += sucos_score
102 scores[1] += fm_score
103 scores[2] += vol_score
104 else:
105 raise ValueError("Invalid mode: " + mode)
106 94
107 if scores[0] > 0: 95 if sucos_score > scores_max[0]:
108 if mode == 'max': 96 scores_max[0] = sucos_score
109 cluster_file_name_only = cluster_name.split(os.sep)[-1] 97 scores_max[1] = fm_score
110 #utils.log("Max SuCOS:", scores[0], "FM:", scores[1], "P:", scores[2],"File:", cluster_file_name_only, "Index:", cluster_index) 98 scores_max[2] = vol_score
111 mol.SetDoubleProp("Max_SuCOS_Score", scores[0]) 99 cluster_name = clusterfilename
112 mol.SetDoubleProp("Max_SuCOS_FeatureMap_Score", scores[1]) 100 cluster_index = index
113 mol.SetDoubleProp("Max_SuCOS_Protrude_Score", scores[2])
114 mol.SetProp("Max_SuCOS_Cluster", cluster_file_name_only)
115 mol.SetIntProp("Max_SuCOS_Index", cluster_index)
116 101
117 else: 102 scores_cum[0] += sucos_score
118 #utils.log("Cum SuCOS:", scores[0], "FM:", scores[1], "P:", scores[2]) 103 scores_cum[1] += fm_score
119 mol.SetDoubleProp("Cum_SuCOS_Score", scores[0]) 104 scores_cum[2] += vol_score
120 mol.SetDoubleProp("Cum_SuCOS_FeatureMap_Score", scores[1])
121 mol.SetDoubleProp("Cum_SuCOS_Protrude_Score", scores[2])
122 105
123 writer.write(mol) 106 if scores_max[0] > 0:
107 cluster_file_name_only = cluster_name.split(os.sep)[-1]
108 # utils.log("Max SuCOS:", scores[0], "FM:", scores[1], "P:", scores[2],"File:", cluster_file_name_only, "Index:", cluster_index)
109 mol.SetDoubleProp("Max_SuCOS_Score", scores_max[0])
110 mol.SetDoubleProp("Max_SuCOS_FeatureMap_Score", scores_max[1])
111 mol.SetDoubleProp("Max_SuCOS_Protrude_Score", scores_max[2])
112 mol.SetProp("Max_SuCOS_Cluster", cluster_file_name_only)
113 mol.SetIntProp("Max_SuCOS_Index", cluster_index)
124 114
125 else: 115 # utils.log("Cum SuCOS:", scores[0], "FM:", scores[1], "P:", scores[2])
126 utils.log("Molecule", mol_num, "did not overlay. Omitting from results") 116 mol.SetDoubleProp("Cum_SuCOS_Score", scores_cum[0])
117 mol.SetDoubleProp("Cum_SuCOS_FeatureMap_Score", scores_cum[1])
118 mol.SetDoubleProp("Cum_SuCOS_Protrude_Score", scores_cum[2])
127 119
120 writer.write(mol)
128 121
129 input_file.close() 122 input_file.close()
130 writer.flush() 123 writer.flush()
131 writer.close() 124 writer.close()
132 output_file.close() 125 output_file.close()
138 131
139 def main(): 132 def main():
140 parser = argparse.ArgumentParser(description='Max SuCOS scores with RDKit') 133 parser = argparse.ArgumentParser(description='Max SuCOS scores with RDKit')
141 parser.add_argument('-i', '--input', help='Input file to score in SDF format. Can be gzipped (*.gz).') 134 parser.add_argument('-i', '--input', help='Input file to score in SDF format. Can be gzipped (*.gz).')
142 parser.add_argument('-o', '--output', help='Output file in SDF format. Can be gzipped (*.gz).') 135 parser.add_argument('-o', '--output', help='Output file in SDF format. Can be gzipped (*.gz).')
143 parser.add_argument('-m', '--mode', choices=['max', 'cum'],
144 default='max', help='Score mode: max = best score, cum = sum of all scores')
145 parser.add_argument('clusters', nargs='*', help="One or more SDF files with the clustered hits") 136 parser.add_argument('clusters', nargs='*', help="One or more SDF files with the clustered hits")
146 137
147 args = parser.parse_args() 138 args = parser.parse_args()
148 utils.log("Max SuCOS Args: ", args) 139 utils.log("Max SuCOS Args: ", args)
149 140
150 process(args.input, args.clusters, args.output, args.mode) 141 process(args.input, args.clusters, args.output)
151 142
152 143
153 if __name__ == "__main__": 144 if __name__ == "__main__":
154 main() 145 main()