comparison sucos_max.py @ 4:791c86130585 draft

"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit c35334ca80c87a5078da1a6df85b34e23b80d837"
author bgruening
date Wed, 15 Apr 2020 09:26:30 -0400
parents bd12f4b4c3a8
children b8725fec8c7b
comparison
equal deleted inserted replaced
3:bd12f4b4c3a8 4:791c86130585
37 import sucos, utils 37 import sucos, utils
38 import argparse, gzip, os 38 import argparse, gzip, os
39 from rdkit import Chem 39 from rdkit import Chem
40 40
41 41
42 def process(inputfilename, clusterfilenames, outputfilename): 42 def process(inputfilename, clusterfilenames, outputfilename, filter_value, filter_field):
43 all_clusters = {} 43 all_clusters = {}
44 for filename in clusterfilenames: 44 for filename in clusterfilenames:
45 cluster = [] 45 cluster = []
46 cluster_file = utils.open_file_for_reading(filename) 46 cluster_file = utils.open_file_for_reading(filename)
47 suppl = Chem.ForwardSDMolSupplier(cluster_file) 47 suppl = Chem.ForwardSDMolSupplier(cluster_file)
78 except: 78 except:
79 utils.log("WARNING: failed to generate features for molecule", mol_num, "in input") 79 utils.log("WARNING: failed to generate features for molecule", mol_num, "in input")
80 continue 80 continue
81 scores_max = [0, 0, 0] 81 scores_max = [0, 0, 0]
82 scores_cum = [0, 0, 0] 82 scores_cum = [0, 0, 0]
83 cluster_name = None
83 for clusterfilename in all_clusters: 84 for clusterfilename in all_clusters:
84 cluster = all_clusters[clusterfilename] 85 cluster = all_clusters[clusterfilename]
85 index = 0 86 index = 0
86 for entry in cluster: 87 for entry in cluster:
87 hit = entry[0] 88 hit = entry[0]
102 scores_cum[0] += sucos_score 103 scores_cum[0] += sucos_score
103 scores_cum[1] += fm_score 104 scores_cum[1] += fm_score
104 scores_cum[2] += vol_score 105 scores_cum[2] += vol_score
105 106
106 107
107 cluster_file_name_only = cluster_name.split(os.sep)[-1]
108
109 # utils.log("Max SuCOS:", scores[0], "FM:", scores[1], "P:", scores[2],"File:", cluster_file_name_only, "Index:", cluster_index) 108 # utils.log("Max SuCOS:", scores[0], "FM:", scores[1], "P:", scores[2],"File:", cluster_file_name_only, "Index:", cluster_index)
110 mol.SetDoubleProp("Max_SuCOS_Score", scores_max[0] if scores_max[0] > 0 else 0) 109 mol.SetDoubleProp("Max_SuCOS_Score", scores_max[0] if scores_max[0] > 0 else 0)
111 mol.SetDoubleProp("Max_SuCOS_FeatureMap_Score", scores_max[1] if scores_max[1] > 0 else 0) 110 mol.SetDoubleProp("Max_SuCOS_FeatureMap_Score", scores_max[1] if scores_max[1] > 0 else 0)
112 mol.SetDoubleProp("Max_SuCOS_Protrude_Score", scores_max[2] if scores_max[2] > 0 else 0) 111 mol.SetDoubleProp("Max_SuCOS_Protrude_Score", scores_max[2] if scores_max[2] > 0 else 0)
113 mol.SetProp("Max_SuCOS_Cluster", cluster_file_name_only) 112
114 mol.SetIntProp("Max_SuCOS_Index", cluster_index) 113 if cluster_name:
114 cluster_file_name_only = cluster_name.split(os.sep)[-1]
115 mol.SetProp("Max_SuCOS_Cluster", cluster_file_name_only)
116 mol.SetIntProp("Max_SuCOS_Index", cluster_index)
115 117
116 # utils.log("Cum SuCOS:", scores[0], "FM:", scores[1], "P:", scores[2]) 118 # utils.log("Cum SuCOS:", scores[0], "FM:", scores[1], "P:", scores[2])
117 mol.SetDoubleProp("Cum_SuCOS_Score", scores_cum[0] if scores_cum[0] > 0 else 0) 119 mol.SetDoubleProp("Cum_SuCOS_Score", scores_cum[0] if scores_cum[0] > 0 else 0)
118 mol.SetDoubleProp("Cum_SuCOS_FeatureMap_Score", scores_cum[1] if scores_cum[1] > 0 else 0) 120 mol.SetDoubleProp("Cum_SuCOS_FeatureMap_Score", scores_cum[1] if scores_cum[1] > 0 else 0)
119 mol.SetDoubleProp("Cum_SuCOS_Protrude_Score", scores_cum[2] if scores_cum[2] > 0 else 0) 121 mol.SetDoubleProp("Cum_SuCOS_Protrude_Score", scores_cum[2] if scores_cum[2] > 0 else 0)
120 122
121 writer.write(mol) 123 if filter_value and filter_field:
124 if mol.HasProp(filter_field):
125 val = mol.GetDoubleProp(filter_field)
126 if val > filter_value:
127 writer.write(mol)
128 else:
129 writer.write(mol)
122 130
123 input_file.close() 131 input_file.close()
124 writer.flush() 132 writer.flush()
125 writer.close() 133 writer.close()
126 output_file.close() 134 output_file.close()
133 def main(): 141 def main():
134 parser = argparse.ArgumentParser(description='Max SuCOS scores with RDKit') 142 parser = argparse.ArgumentParser(description='Max SuCOS scores with RDKit')
135 parser.add_argument('-i', '--input', help='Input file to score in SDF format. Can be gzipped (*.gz).') 143 parser.add_argument('-i', '--input', help='Input file to score in SDF format. Can be gzipped (*.gz).')
136 parser.add_argument('-o', '--output', help='Output file in SDF format. Can be gzipped (*.gz).') 144 parser.add_argument('-o', '--output', help='Output file in SDF format. Can be gzipped (*.gz).')
137 parser.add_argument('clusters', nargs='*', help="One or more SDF files with the clustered hits") 145 parser.add_argument('clusters', nargs='*', help="One or more SDF files with the clustered hits")
146 parser.add_argument('--filter-value', type=float, help='Filter out values with scores less than this.')
147 parser.add_argument('--filter-field', help='Field to use to filter values.')
138 148
139 args = parser.parse_args() 149 args = parser.parse_args()
140 utils.log("Max SuCOS Args: ", args) 150 utils.log("Max SuCOS Args: ", args)
141 151
142 process(args.input, args.clusters, args.output) 152 process(args.input, args.clusters, args.output, args.filter_value, args.filter_field)
143 153
144 154
145 if __name__ == "__main__": 155 if __name__ == "__main__":
146 main() 156 main()