# HG changeset patch # User bgruening # Date 1613566783 0 # Node ID a1c53f0533b05147b432f48dd86886c203050901 # Parent cf725c82c86503c77a9f3429e49228198364a68d "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 4d0bfcf37bfbedafc7ff0672dfe452766ca8a606" diff -r cf725c82c865 -r a1c53f0533b0 dimorphite_dl.py --- a/dimorphite_dl.py Tue Jul 28 08:43:19 2020 -0400 +++ b/dimorphite_dl.py Wed Feb 17 12:59:43 2021 +0000 @@ -1,3 +1,4 @@ +# flake8: noqa # Copyright 2018 Jacob D. Durrant # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -18,7 +19,6 @@ """ from __future__ import print_function -import copy import os import argparse import sys diff -r cf725c82c865 -r a1c53f0533b0 rdkit_descriptors.py --- a/rdkit_descriptors.py Tue Jul 28 08:43:19 2020 -0400 +++ b/rdkit_descriptors.py Wed Feb 17 12:59:43 2021 +0000 @@ -1,44 +1,49 @@ #!/usr/bin/env python -from rdkit.Chem import Descriptors -from rdkit import Chem -import sys, os, re import argparse import inspect +import sys -def get_supplier( infile, format = 'smiles' ): +from rdkit import Chem +from rdkit.Chem import Descriptors + + +def get_supplier(infile, format='smiles'): """ - Returns a generator over a SMILES or InChI file. Every element is of RDKit + Returns a generator over a SMILES or InChI file. Every element is of RDKit molecule and has its original string as _Name property. """ with open(infile) as handle: for line in handle: line = line.strip() if format == 'smiles': - mol = Chem.MolFromSmiles( line, sanitize=True ) + mol = Chem.MolFromSmiles(line, sanitize=True) elif format == 'inchi': - mol = Chem.inchi.MolFromInchi( line, sanitize=True, removeHs=True, logLevel=None, treatWarningAsError=False ) + mol = Chem.inchi.MolFromInchi(line, sanitize=True, removeHs=True, logLevel=None, treatWarningAsError=False) if mol is None: yield False else: - mol.SetProp( '_Name', line.split('\t')[0] ) + mol.SetProp('_Name', line.split('\t')[0]) yield mol + def get_rdkit_descriptor_functions(): """ Returns all descriptor functions under the Chem.Descriptors Module as tuple of (name, function) """ - ret = [ (name, f) for name, f in inspect.getmembers( Descriptors ) if inspect.isfunction( f ) and not name.startswith( '_' ) ] + ret = [(name, f) for name, f in inspect.getmembers(Descriptors) if inspect.isfunction(f) and not name.startswith('_')] + # some which are not in the official Descriptors module we need to add manually + ret.extend([('FormalCharge', Chem.GetFormalCharge), ('SSSR', Chem.GetSSSR)]) ret.sort() return ret -def descriptors( mol, functions ): +def descriptors(mol, functions): """ Calculates the descriptors of a given molecule. """ for name, function in functions: - yield (name, function( mol )) + yield (name, function(mol)) if __name__ == "__main__": @@ -46,31 +51,44 @@ parser.add_argument('-i', '--infile', required=True, help='Path to the input file.') parser.add_argument("--iformat", help="Specify the input file format.") - parser.add_argument('-o', '--outfile', type=argparse.FileType('w+'), - default=sys.stdout, help="path to the result file, default it sdtout") + parser.add_argument('-o', '--outfile', type=argparse.FileType('w+'), + default=sys.stdout, + help="path to the result file, default is stdout") + + parser.add_argument('-s', '--select', default=None, + help="select a subset of comma-separated descriptors to use") parser.add_argument("--header", dest="header", action="store_true", - default=False, - help="Write header line.") + default=False, + help="Write header line.") args = parser.parse_args() if args.iformat == 'sdf': - supplier = Chem.SDMolSupplier( args.infile ) - elif args.iformat =='smi': - supplier = get_supplier( args.infile, format = 'smiles' ) + supplier = Chem.SDMolSupplier(args.infile) + elif args.iformat == 'smi': + supplier = get_supplier(args.infile, format='smiles') elif args.iformat == 'inchi': - supplier = get_supplier( args.infile, format = 'inchi' ) + supplier = get_supplier(args.infile, format='inchi') + elif args.iformat == 'pdb': + supplier = [Chem.MolFromPDBFile(args.infile)] + elif args.iformat == 'mol2': + supplier = [Chem.MolFromMol2File(args.infile)] functions = get_rdkit_descriptor_functions() + if args.select and args.select != 'None': + selected = args.select.split(',') + functions = [(name, f) for name, f in functions if name in selected] if args.header: - args.outfile.write( '%s\n' % '\t'.join( ['MoleculeID'] + [name for name, f in functions] ) ) + args.outfile.write('%s\n' % '\t'.join(['MoleculeID'] + [name for name, f in functions])) for mol in supplier: if not mol: continue - descs = descriptors( mol, functions ) - molecule_id = mol.GetProp("_Name") - args.outfile.write( "%s\n" % '\t'.join( [molecule_id]+ [str(round(res, 6)) for name, res in descs] ) ) - + descs = descriptors(mol, functions) + try: + molecule_id = mol.GetProp("_Name") + except KeyError: + molecule_id = Chem.MolToSmiles(mol) + args.outfile.write("%s\n" % '\t'.join([molecule_id] + [str(round(res, 6)) for name, res in descs])) diff -r cf725c82c865 -r a1c53f0533b0 rdkit_descriptors.xml --- a/rdkit_descriptors.xml Tue Jul 28 08:43:19 2020 -0400 +++ b/rdkit_descriptors.xml Wed Feb 17 12:59:43 2021 +0000 @@ -1,9 +1,9 @@ + calculated with RDKit 2020.03.4 - 0 + 1 - calculated with RDKit rdkit @@ -14,14 +14,222 @@ -i '${infile}' --iformat '${infile.ext}' -o '${outfile}' + #if $select_multiple: + -s '${select_multiple}' + #end if $header ]]> - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -32,6 +240,18 @@ + + + + + + + + + + + +