Mercurial > repos > bgruening > sucos_clustering
diff sucos_max.py @ 6:b8725fec8c7b draft default tip
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit 05dc325ce687441e5d3bdbdedcc0e3529cd5e070"
author | bgruening |
---|---|
date | Wed, 14 Apr 2021 09:30:48 +0000 |
parents | 791c86130585 |
children |
line wrap: on
line diff
--- a/sucos_max.py Tue Jul 28 08:48:16 2020 -0400 +++ b/sucos_max.py Wed Apr 14 09:30:48 2021 +0000 @@ -34,12 +34,17 @@ Publication: https://doi.org/10.26434/chemrxiv.8100203.v1 """ -import sucos, utils -import argparse, gzip, os +import argparse +import os + +import sucos +import utils from rdkit import Chem -def process(inputfilename, clusterfilenames, outputfilename, filter_value, filter_field): +def process( + inputfilename, clusterfilenames, outputfilename, filter_value, filter_field +): all_clusters = {} for filename in clusterfilenames: cluster = [] @@ -49,13 +54,20 @@ for mol in suppl: i += 1 if not mol: - utils.log("WARNING: failed to generate molecule", i, "in cluster", filename) + utils.log( + "WARNING: failed to generate molecule", i, "in cluster", filename + ) continue try: features = sucos.getRawFeatures(mol) cluster.append((mol, features)) - except: - utils.log("WARNING: failed to generate features for molecule", i, "in cluster", filename) + except Exception: + utils.log( + "WARNING: failed to generate features for molecule", + i, + "in cluster", + filename, + ) cluster_file.close() all_clusters[filename] = cluster @@ -75,8 +87,10 @@ continue try: query_features = sucos.getRawFeatures(mol) - except: - utils.log("WARNING: failed to generate features for molecule", mol_num, "in input") + except Exception: + utils.log( + "WARNING: failed to generate features for molecule", mol_num, "in input" + ) continue scores_max = [0, 0, 0] scores_cum = [0, 0, 0] @@ -89,9 +103,13 @@ ref_features = entry[1] index += 1 comparisons += 1 - sucos_score, fm_score, vol_score = sucos.get_SucosScore(hit, mol, - tani=False, ref_features=ref_features, - query_features=query_features) + sucos_score, fm_score, vol_score = sucos.get_SucosScore( + hit, + mol, + tani=False, + ref_features=ref_features, + query_features=query_features, + ) if sucos_score > scores_max[0]: scores_max[0] = sucos_score @@ -104,11 +122,14 @@ scores_cum[1] += fm_score scores_cum[2] += vol_score - # utils.log("Max SuCOS:", scores[0], "FM:", scores[1], "P:", scores[2],"File:", cluster_file_name_only, "Index:", cluster_index) mol.SetDoubleProp("Max_SuCOS_Score", scores_max[0] if scores_max[0] > 0 else 0) - mol.SetDoubleProp("Max_SuCOS_FeatureMap_Score", scores_max[1] if scores_max[1] > 0 else 0) - mol.SetDoubleProp("Max_SuCOS_Protrude_Score", scores_max[2] if scores_max[2] > 0 else 0) + mol.SetDoubleProp( + "Max_SuCOS_FeatureMap_Score", scores_max[1] if scores_max[1] > 0 else 0 + ) + mol.SetDoubleProp( + "Max_SuCOS_Protrude_Score", scores_max[2] if scores_max[2] > 0 else 0 + ) if cluster_name: cluster_file_name_only = cluster_name.split(os.sep)[-1] @@ -117,8 +138,12 @@ # utils.log("Cum SuCOS:", scores[0], "FM:", scores[1], "P:", scores[2]) mol.SetDoubleProp("Cum_SuCOS_Score", scores_cum[0] if scores_cum[0] > 0 else 0) - mol.SetDoubleProp("Cum_SuCOS_FeatureMap_Score", scores_cum[1] if scores_cum[1] > 0 else 0) - mol.SetDoubleProp("Cum_SuCOS_Protrude_Score", scores_cum[2] if scores_cum[2] > 0 else 0) + mol.SetDoubleProp( + "Cum_SuCOS_FeatureMap_Score", scores_cum[1] if scores_cum[1] > 0 else 0 + ) + mol.SetDoubleProp( + "Cum_SuCOS_Protrude_Score", scores_cum[2] if scores_cum[2] > 0 else 0 + ) if filter_value and filter_field: if mol.HasProp(filter_field): @@ -136,20 +161,35 @@ utils.log("Completed", comparisons, "comparisons") -### start main execution ######################################### +# start main execution ######################################### + def main(): - parser = argparse.ArgumentParser(description='Max SuCOS scores with RDKit') - parser.add_argument('-i', '--input', help='Input file to score in SDF format. Can be gzipped (*.gz).') - parser.add_argument('-o', '--output', help='Output file in SDF format. Can be gzipped (*.gz).') - parser.add_argument('clusters', nargs='*', help="One or more SDF files with the clustered hits") - parser.add_argument('--filter-value', type=float, help='Filter out values with scores less than this.') - parser.add_argument('--filter-field', help='Field to use to filter values.') + parser = argparse.ArgumentParser(description="Max SuCOS scores with RDKit") + parser.add_argument( + "-i", + "--input", + help="Input file to score in SDF format. Can be gzipped (*.gz).", + ) + parser.add_argument( + "-o", "--output", help="Output file in SDF format. Can be gzipped (*.gz)." + ) + parser.add_argument( + "clusters", nargs="*", help="One or more SDF files with the clustered hits" + ) + parser.add_argument( + "--filter-value", + type=float, + help="Filter out values with scores less than this.", + ) + parser.add_argument("--filter-field", help="Field to use to filter values.") args = parser.parse_args() utils.log("Max SuCOS Args: ", args) - process(args.input, args.clusters, args.output, args.filter_value, args.filter_field) + process( + args.input, args.clusters, args.output, args.filter_value, args.filter_field + ) if __name__ == "__main__":