Mercurial > repos > bgruening > rdconf
diff rdkit_descriptors.py @ 0:5c501eb8d56c draft default tip
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
author | bgruening |
---|---|
date | Sat, 04 Dec 2021 16:39:31 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rdkit_descriptors.py Sat Dec 04 16:39:31 2021 +0000 @@ -0,0 +1,121 @@ +#!/usr/bin/env python + +import argparse +import inspect +import sys + +from rdkit import Chem +from rdkit.Chem import Descriptors + + +def get_supplier(infile, format="smiles"): + """ + Returns a generator over a SMILES or InChI file. Every element is of RDKit + molecule and has its original string as _Name property. + """ + with open(infile) as handle: + for line in handle: + line = line.strip() + if format == "smiles": + mol = Chem.MolFromSmiles(line, sanitize=True) + elif format == "inchi": + mol = Chem.inchi.MolFromInchi( + line, + sanitize=True, + removeHs=True, + logLevel=None, + treatWarningAsError=False, + ) + if mol is None: + yield False + else: + mol.SetProp("_Name", line.split("\t")[0]) + yield mol + + +def get_rdkit_descriptor_functions(): + """ + Returns all descriptor functions under the Chem.Descriptors Module as tuple of (name, function) + """ + ret = [ + (name, f) + for name, f in inspect.getmembers(Descriptors) + if inspect.isfunction(f) and not name.startswith("_") + ] + # some which are not in the official Descriptors module we need to add manually + ret.extend([("FormalCharge", Chem.GetFormalCharge), ("SSSR", Chem.GetSSSR)]) + ret.sort() + return ret + + +def descriptors(mol, functions): + """ + Calculates the descriptors of a given molecule. + """ + for name, function in functions: + yield (name, function(mol)) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("-i", "--infile", required=True, help="Path to the input file.") + parser.add_argument("--iformat", help="Specify the input file format.") + + parser.add_argument( + "-o", + "--outfile", + type=argparse.FileType("w+"), + default=sys.stdout, + help="path to the result file, default is stdout", + ) + + parser.add_argument( + "-s", + "--select", + default=None, + help="select a subset of comma-separated descriptors to use", + ) + + parser.add_argument( + "--header", + dest="header", + action="store_true", + default=False, + help="Write header line.", + ) + + args = parser.parse_args() + + if args.iformat == "sdf": + supplier = Chem.SDMolSupplier(args.infile) + elif args.iformat == "smi": + supplier = get_supplier(args.infile, format="smiles") + elif args.iformat == "inchi": + supplier = get_supplier(args.infile, format="inchi") + elif args.iformat == "pdb": + supplier = [Chem.MolFromPDBFile(args.infile)] + elif args.iformat == "mol2": + supplier = [Chem.MolFromMol2File(args.infile)] + + functions = get_rdkit_descriptor_functions() + if args.select and args.select != "None": + selected = args.select.split(",") + functions = [(name, f) for name, f in functions if name in selected] + + if args.header: + args.outfile.write( + "%s\n" % "\t".join(["MoleculeID"] + [name for name, f in functions]) + ) + + for mol in supplier: + if not mol: + continue + descs = descriptors(mol, functions) + try: + molecule_id = mol.GetProp("_Name") + except KeyError: + molecule_id = Chem.MolToSmiles(mol) + args.outfile.write( + "%s\n" + % "\t".join([molecule_id] + [str(round(res, 6)) for name, res in descs]) + )