Mercurial > repos > bgruening > enumerate_charges
comparison rdconf.py @ 5:67ee76f0e497 draft default tip
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
| author | bgruening |
|---|---|
| date | Sat, 04 Dec 2021 16:40:23 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 4:bbbf5fb356dd | 5:67ee76f0e497 |
|---|---|
| 1 #!/usr/bin/python3 | |
| 2 | |
| 3 import gzip | |
| 4 import os | |
| 5 import sys | |
| 6 from optparse import OptionParser | |
| 7 | |
| 8 from rdkit.Chem import AllChem as Chem | |
| 9 | |
| 10 """ | |
| 11 This script was originally written by David Koes, University of Pittsburgh: | |
| 12 https://github.com/dkoes/rdkit-scripts/blob/master/rdconf.py | |
| 13 It is licensed under the MIT licence. | |
| 14 | |
| 15 Given a smiles file, generate 3D conformers in output sdf. | |
| 16 Energy minimizes and filters conformers to meet energy window and rms constraints. | |
| 17 | |
| 18 Some time ago I compared this to alternative conformer generators and | |
| 19 it was quite competitive (especially after RDKit's UFF implementation | |
| 20 added OOP terms). | |
| 21 """ | |
| 22 | |
| 23 | |
| 24 # convert smiles to sdf | |
| 25 def getRMS(mol, c1, c2): | |
| 26 rms = Chem.GetBestRMS(mol, mol, c1, c2) | |
| 27 return rms | |
| 28 | |
| 29 | |
| 30 parser = OptionParser(usage="Usage: %prog [options] <input>.smi <output>.sdf") | |
| 31 parser.add_option( | |
| 32 "--maxconfs", | |
| 33 dest="maxconfs", | |
| 34 action="store", | |
| 35 help="maximum number of conformers to generate per a molecule (default 20)", | |
| 36 default="20", | |
| 37 type="int", | |
| 38 metavar="CNT", | |
| 39 ) | |
| 40 parser.add_option( | |
| 41 "--sample_multiplier", | |
| 42 dest="sample", | |
| 43 action="store", | |
| 44 help="sample N*maxconfs conformers and choose the maxconformers with lowest energy (default 1)", | |
| 45 default="1", | |
| 46 type="float", | |
| 47 metavar="N", | |
| 48 ) | |
| 49 parser.add_option( | |
| 50 "--seed", | |
| 51 dest="seed", | |
| 52 action="store", | |
| 53 help="random seed (default 9162006)", | |
| 54 default="9162006", | |
| 55 type="int", | |
| 56 metavar="s", | |
| 57 ) | |
| 58 parser.add_option( | |
| 59 "--rms_threshold", | |
| 60 dest="rms", | |
| 61 action="store", | |
| 62 help="filter based on rms (default 0.7)", | |
| 63 default="0.7", | |
| 64 type="float", | |
| 65 metavar="R", | |
| 66 ) | |
| 67 parser.add_option( | |
| 68 "--energy_window", | |
| 69 dest="energy", | |
| 70 action="store", | |
| 71 help="filter based on energy difference with lowest energy conformer", | |
| 72 default="10", | |
| 73 type="float", | |
| 74 metavar="E", | |
| 75 ) | |
| 76 parser.add_option( | |
| 77 "-v", | |
| 78 "--verbose", | |
| 79 dest="verbose", | |
| 80 action="store_true", | |
| 81 default=False, | |
| 82 help="verbose output", | |
| 83 ) | |
| 84 parser.add_option( | |
| 85 "--mmff", | |
| 86 dest="mmff", | |
| 87 action="store_true", | |
| 88 default=False, | |
| 89 help="use MMFF forcefield instead of UFF", | |
| 90 ) | |
| 91 parser.add_option( | |
| 92 "--nomin", | |
| 93 dest="nomin", | |
| 94 action="store_true", | |
| 95 default=False, | |
| 96 help="don't perform energy minimization (bad idea)", | |
| 97 ) | |
| 98 parser.add_option( | |
| 99 "--etkdg", | |
| 100 dest="etkdg", | |
| 101 action="store_true", | |
| 102 default=False, | |
| 103 help="use new ETKDG knowledge-based method instead of distance geometry", | |
| 104 ) | |
| 105 | |
| 106 | |
| 107 (options, args) = parser.parse_args() | |
| 108 | |
| 109 if len(args) < 2: | |
| 110 parser.error("Need input and output") | |
| 111 sys.exit(-1) | |
| 112 | |
| 113 input = args[0] | |
| 114 output = args[1] | |
| 115 smifile = open(input) | |
| 116 if options.verbose: | |
| 117 print("Generating a maximum of", options.maxconfs, "per a mol") | |
| 118 | |
| 119 if options.etkdg and not Chem.ETKDG: | |
| 120 print("ETKDB does not appear to be implemented. Please upgrade RDKit.") | |
| 121 sys.exit(1) | |
| 122 | |
| 123 split = os.path.splitext(output) | |
| 124 if split[1] == ".gz": | |
| 125 outf = gzip.open(output, "wt+") | |
| 126 output = split[0] # strip .gz | |
| 127 else: | |
| 128 outf = open(output, "w+") | |
| 129 | |
| 130 | |
| 131 if os.path.splitext(output)[1] == ".pdb": | |
| 132 sdwriter = Chem.PDBWriter(outf) | |
| 133 else: | |
| 134 sdwriter = Chem.SDWriter(outf) | |
| 135 | |
| 136 if sdwriter is None: | |
| 137 print("Could not open ".output) | |
| 138 sys.exit(-1) | |
| 139 | |
| 140 for line in smifile: | |
| 141 toks = line.split() | |
| 142 smi = toks[0] | |
| 143 name = " ".join(toks[1:]) | |
| 144 | |
| 145 pieces = smi.split(".") | |
| 146 if len(pieces) > 1: | |
| 147 smi = max(pieces, key=len) # take largest component by length | |
| 148 print("Taking largest component: %s\t%s" % (smi, name)) | |
| 149 | |
| 150 mol = Chem.MolFromSmiles(smi) | |
| 151 if mol is not None: | |
| 152 if options.verbose: | |
| 153 print(smi) | |
| 154 try: | |
| 155 Chem.SanitizeMol(mol) | |
| 156 mol = Chem.AddHs(mol) | |
| 157 mol.SetProp("_Name", name) | |
| 158 | |
| 159 if options.etkdg: | |
| 160 cids = Chem.EmbedMultipleConfs( | |
| 161 mol, int(options.sample * options.maxconfs), Chem.ETKDG() | |
| 162 ) | |
| 163 else: | |
| 164 cids = Chem.EmbedMultipleConfs( | |
| 165 mol, int(options.sample * options.maxconfs), randomSeed=options.seed | |
| 166 ) | |
| 167 if options.verbose: | |
| 168 print(len(cids), "conformers found") | |
| 169 cenergy = [] | |
| 170 for conf in cids: | |
| 171 # not passing confID only minimizes the first conformer | |
| 172 if options.nomin: | |
| 173 cenergy.append(conf) | |
| 174 elif options.mmff: | |
| 175 converged = Chem.MMFFOptimizeMolecule(mol, confId=conf) | |
| 176 mp = Chem.MMFFGetMoleculeProperties(mol) | |
| 177 cenergy.append( | |
| 178 Chem.MMFFGetMoleculeForceField( | |
| 179 mol, mp, confId=conf | |
| 180 ).CalcEnergy() | |
| 181 ) | |
| 182 else: | |
| 183 converged = not Chem.UFFOptimizeMolecule(mol, confId=conf) | |
| 184 cenergy.append( | |
| 185 Chem.UFFGetMoleculeForceField(mol, confId=conf).CalcEnergy() | |
| 186 ) | |
| 187 if options.verbose: | |
| 188 print("Convergence of conformer", conf, converged) | |
| 189 | |
| 190 mol = Chem.RemoveHs(mol) | |
| 191 sortedcids = sorted(cids, key=lambda cid: cenergy[cid]) | |
| 192 if len(sortedcids) > 0: | |
| 193 mine = cenergy[sortedcids[0]] | |
| 194 else: | |
| 195 mine = 0 | |
| 196 if options.rms == 0: | |
| 197 cnt = 0 | |
| 198 for conf in sortedcids: | |
| 199 if cnt >= options.maxconfs: | |
| 200 break | |
| 201 if (options.energy < 0) or cenergy[conf] - mine <= options.energy: | |
| 202 sdwriter.write(mol, conf) | |
| 203 cnt += 1 | |
| 204 else: | |
| 205 written = {} | |
| 206 for conf in sortedcids: | |
| 207 if len(written) >= options.maxconfs: | |
| 208 break | |
| 209 # check rmsd | |
| 210 passed = True | |
| 211 for seenconf in written.keys(): | |
| 212 rms = getRMS(mol, seenconf, conf) | |
| 213 if (rms < options.rms) or ( | |
| 214 options.energy > 0 and cenergy[conf] - mine > options.energy | |
| 215 ): | |
| 216 passed = False | |
| 217 break | |
| 218 if passed: | |
| 219 written[conf] = True | |
| 220 sdwriter.write(mol, conf) | |
| 221 except (KeyboardInterrupt, SystemExit): | |
| 222 raise | |
| 223 except Exception as e: | |
| 224 print("Exception", e) | |
| 225 else: | |
| 226 print("ERROR:", smi) | |
| 227 | |
| 228 sdwriter.close() | |
| 229 outf.close() |
