Mercurial > repos > bgruening > sucos_max_score
changeset 2:2f110aef9b53 draft
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit 6f1ee2812cca091561a2b2e464498dae2f913b8d"
author | bgruening |
---|---|
date | Thu, 19 Mar 2020 09:43:31 -0400 |
parents | 8eab6d2b7bdf |
children | bf99565cec1f |
files | sucos.py sucos_max.py sucos_max.xml |
diffstat | 3 files changed, 56 insertions(+), 71 deletions(-) [+] |
line wrap: on
line diff
--- a/sucos.py Fri Oct 11 18:25:27 2019 -0400 +++ b/sucos.py Thu Mar 19 09:43:31 2020 -0400 @@ -109,17 +109,21 @@ fm_score = get_FeatureMapScore(ref_features, query_features, tani, score_mode) fm_score = np.clip(fm_score, 0, 1) - if tani: - tani_sim = 1 - float(rdShapeHelpers.ShapeTanimotoDist(ref_mol, query_mol)) - tani_sim = np.clip(tani_sim, 0, 1) - SuCOS_score = 0.5*fm_score + 0.5*tani_sim - return SuCOS_score, fm_score, tani_sim - else: - protrude_dist = rdShapeHelpers.ShapeProtrudeDist(ref_mol, query_mol, allowReordering=False) - protrude_dist = np.clip(protrude_dist, 0, 1) - protrude_val = 1.0 - protrude_dist - SuCOS_score = 0.5 * fm_score + 0.5 * protrude_val - return SuCOS_score, fm_score, protrude_val + try : + if tani: + tani_sim = 1 - float(rdShapeHelpers.ShapeTanimotoDist(ref_mol, query_mol)) + tani_sim = np.clip(tani_sim, 0, 1) + SuCOS_score = 0.5*fm_score + 0.5*tani_sim + return SuCOS_score, fm_score, tani_sim + else: + protrude_dist = rdShapeHelpers.ShapeProtrudeDist(ref_mol, query_mol, allowReordering=False) + protrude_dist = np.clip(protrude_dist, 0, 1) + protrude_val = 1.0 - protrude_dist + SuCOS_score = 0.5 * fm_score + 0.5 * protrude_val + return SuCOS_score, fm_score, protrude_val + except: + utils.log("Failed to calculate SuCOS scores. Returning 0,0,0") + return 0, 0, 0 def process(refmol_filename, inputs_filename, outputs_filename, refmol_index=None, refmol_format=None, tani=False, score_mode=FeatMaps.FeatMapScoreMode.All):
--- a/sucos_max.py Fri Oct 11 18:25:27 2019 -0400 +++ b/sucos_max.py Thu Mar 19 09:43:31 2020 -0400 @@ -39,8 +39,7 @@ from rdkit import Chem -def process(inputfilename, clusterfilenames, outputfilename, mode): - +def process(inputfilename, clusterfilenames, outputfilename): all_clusters = {} for filename in clusterfilenames: cluster = [] @@ -79,7 +78,8 @@ except: utils.log("WARNING: failed to generate features for molecule", mol_num, "in input") continue - scores = [0, 0, 0] + scores_max = [0, 0, 0] + scores_cum = [0, 0, 0] for clusterfilename in all_clusters: cluster = all_clusters[clusterfilename] index = 0 @@ -89,42 +89,35 @@ index += 1 comparisons += 1 sucos_score, fm_score, vol_score = sucos.get_SucosScore(hit, mol, - tani=False, ref_features=ref_features, query_features=query_features) - if mode == 'max': - if sucos_score > scores[0]: - scores[0] = sucos_score - scores[1] = fm_score - scores[2] = vol_score - cluster_name = clusterfilename - cluster_index = index - elif mode == 'cum': - scores[0] += sucos_score - scores[1] += fm_score - scores[2] += vol_score - else: - raise ValueError("Invalid mode: " + mode) + tani=False, ref_features=ref_features, + query_features=query_features) + + if sucos_score > scores_max[0]: + scores_max[0] = sucos_score + scores_max[1] = fm_score + scores_max[2] = vol_score + cluster_name = clusterfilename + cluster_index = index + + scores_cum[0] += sucos_score + scores_cum[1] += fm_score + scores_cum[2] += vol_score - if scores[0] > 0: - if mode == 'max': - cluster_file_name_only = cluster_name.split(os.sep)[-1] - #utils.log("Max SuCOS:", scores[0], "FM:", scores[1], "P:", scores[2],"File:", cluster_file_name_only, "Index:", cluster_index) - mol.SetDoubleProp("Max_SuCOS_Score", scores[0]) - mol.SetDoubleProp("Max_SuCOS_FeatureMap_Score", scores[1]) - mol.SetDoubleProp("Max_SuCOS_Protrude_Score", scores[2]) - mol.SetProp("Max_SuCOS_Cluster", cluster_file_name_only) - mol.SetIntProp("Max_SuCOS_Index", cluster_index) + if scores_max[0] > 0: + cluster_file_name_only = cluster_name.split(os.sep)[-1] + # utils.log("Max SuCOS:", scores[0], "FM:", scores[1], "P:", scores[2],"File:", cluster_file_name_only, "Index:", cluster_index) + mol.SetDoubleProp("Max_SuCOS_Score", scores_max[0]) + mol.SetDoubleProp("Max_SuCOS_FeatureMap_Score", scores_max[1]) + mol.SetDoubleProp("Max_SuCOS_Protrude_Score", scores_max[2]) + mol.SetProp("Max_SuCOS_Cluster", cluster_file_name_only) + mol.SetIntProp("Max_SuCOS_Index", cluster_index) - else: - #utils.log("Cum SuCOS:", scores[0], "FM:", scores[1], "P:", scores[2]) - mol.SetDoubleProp("Cum_SuCOS_Score", scores[0]) - mol.SetDoubleProp("Cum_SuCOS_FeatureMap_Score", scores[1]) - mol.SetDoubleProp("Cum_SuCOS_Protrude_Score", scores[2]) + # utils.log("Cum SuCOS:", scores[0], "FM:", scores[1], "P:", scores[2]) + mol.SetDoubleProp("Cum_SuCOS_Score", scores_cum[0]) + mol.SetDoubleProp("Cum_SuCOS_FeatureMap_Score", scores_cum[1]) + mol.SetDoubleProp("Cum_SuCOS_Protrude_Score", scores_cum[2]) - writer.write(mol) - - else: - utils.log("Molecule", mol_num, "did not overlay. Omitting from results") - + writer.write(mol) input_file.close() writer.flush() @@ -140,15 +133,13 @@ parser = argparse.ArgumentParser(description='Max SuCOS scores with RDKit') parser.add_argument('-i', '--input', help='Input file to score in SDF format. Can be gzipped (*.gz).') parser.add_argument('-o', '--output', help='Output file in SDF format. Can be gzipped (*.gz).') - parser.add_argument('-m', '--mode', choices=['max', 'cum'], - default='max', help='Score mode: max = best score, cum = sum of all scores') parser.add_argument('clusters', nargs='*', help="One or more SDF files with the clustered hits") args = parser.parse_args() utils.log("Max SuCOS Args: ", args) - process(args.input, args.clusters, args.output, args.mode) + process(args.input, args.clusters, args.output) if __name__ == "__main__": - main() \ No newline at end of file + main()
--- a/sucos_max.xml Fri Oct 11 18:25:27 2019 -0400 +++ b/sucos_max.xml Thu Mar 19 09:43:31 2020 -0400 @@ -1,4 +1,4 @@ -<tool id="sucos_max_score" name="Max SuCOS score" version="0.1.1"> +<tool id="sucos_max_score" name="Max SuCOS score" version="0.2.0"> <description>- determine maximum SuCOS score of ligands against clustered fragment hits</description> <macros> <import>sucos_macros.xml</import> @@ -8,7 +8,6 @@ python '$__tool_directory__/sucos_max.py' -i '$input' -o '$output' - -m $mode #for $cluster in $clusters '$cluster' #end for @@ -16,10 +15,6 @@ <inputs> <param name="input" type="data" format="sdf" label="Ligands to be scored" help="Input in SDF format." /> <param name="clusters" type="data" format="sdf" multiple="true" label="Set of clusters to score against" help="Clusters in SDF format." /> - <param name="mode" type="select" value="max" label="Mode"> - <option value="max">Max score</option> - <option value="cum">Cumulative score</option> - </param> </inputs> <outputs> <data format="sdf" name="output" label="The scored ligands"/> @@ -28,19 +23,9 @@ <test> <param name="input" ftype="sdf" value="sucos_cluster.sdf"/> <param name="clusters" ftype="sdf" value="cluster1.sdf,cluster2.sdf,cluster3.sdf,cluster4.sdf,cluster5.sdf,cluster6.sdf"/> - <param name="mode" value="max"/> <output name="output" ftype="sdf"> <assert_contents> <has_text text="Max_SuCOS_Score" /> - </assert_contents> - </output> - </test> - <test> - <param name="input" ftype="sdf" value="sucos_cluster.sdf"/> - <param name="clusters" ftype="sdf" value="cluster1.sdf,cluster2.sdf,cluster3.sdf,cluster4.sdf,cluster5.sdf,cluster6.sdf"/> - <param name="mode" value="cum"/> - <output name="output" ftype="sdf"> - <assert_contents> <has_text text="Cum_SuCOS_Score" /> </assert_contents> </output> @@ -73,11 +58,16 @@ **Output** -The same SD file as the input ligands with a "Max_SuCOS_Score" property added containing the best (maximum) SuCOS score -along with the "Max_SuCOS_FeatureMap_Score" and "Max_SuCOS_Tanimoto_Score" of that comparison. +The same SD file as the input ligands with the following properties added: -In addition, the "Max_SuCOS_Cluster" field shows the name of the cluster file that contained the molecule with this best -score and the "Max_SuCOS_Index" shows the index (first record is index 1) of that molecule in the file. +* Max_SuCOS_Score - the best (maximum) SuCOS score +* Max_SuCOS_FeatureMap_Score - the corresponding FeatureMap score +* Max_SuCOS_Protrude_Score - the corresponding Protrude score +* Max_SuCOS_Cluster - the file name of the cluster that contained the max score +* Max_SuCOS_Index - the index of the cluster that contained the max score (first record is index 1) +* Cum_SuCOS_Score - the cumulative SuCOS score for all overlays (the sum of the individual scores) +* Cum_SuCOS_FeatureMap_Score - the corresponding FeatureMap score +* Cum_SuCOS_Protrude_Score - the corresponding Protrude score ]]></help> <expand macro="citations"/>