Mercurial > repos > bgruening > ctb_rdkit_descriptors
comparison dimorphite_dl.py @ 9:0993ac4f4a23 draft default tip
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
| author | bgruening | 
|---|---|
| date | Sat, 04 Dec 2021 16:40:00 +0000 | 
| parents | a1c53f0533b0 | 
| children | 
   comparison
  equal
  deleted
  inserted
  replaced
| 8:a1c53f0533b0 | 9:0993ac4f4a23 | 
|---|---|
| 17 This script identifies and enumerates the possible protonation sites of SMILES | 17 This script identifies and enumerates the possible protonation sites of SMILES | 
| 18 strings. | 18 strings. | 
| 19 """ | 19 """ | 
| 20 | 20 | 
| 21 from __future__ import print_function | 21 from __future__ import print_function | 
| 22 | |
| 23 import argparse | |
| 22 import os | 24 import os | 
| 23 import argparse | |
| 24 import sys | 25 import sys | 
| 25 | 26 | 
| 26 try: | 27 try: | 
| 27 # Python2 | 28 # Python2 | 
| 28 from StringIO import StringIO | 29 from StringIO import StringIO | 
| 41 | 42 | 
| 42 try: | 43 try: | 
| 43 import rdkit | 44 import rdkit | 
| 44 from rdkit import Chem | 45 from rdkit import Chem | 
| 45 from rdkit.Chem import AllChem | 46 from rdkit.Chem import AllChem | 
| 46 except: | 47 except Exception: | 
| 47 msg = "Dimorphite-DL requires RDKit. See https://www.rdkit.org/" | 48 msg = "Dimorphite-DL requires RDKit. See https://www.rdkit.org/" | 
| 48 print(msg) | 49 print(msg) | 
| 49 raise Exception(msg) | 50 raise Exception(msg) | 
| 51 | |
| 50 | 52 | 
| 51 def main(params=None): | 53 def main(params=None): | 
| 52 """The main definition run when you call the script from the commandline. | 54 """The main definition run when you call the script from the commandline. | 
| 53 | 55 | 
| 54 :param params: The parameters to use. Entirely optional. If absent, | 56 :param params: The parameters to use. Entirely optional. If absent, | 
| 82 if "output_file" in args and args["output_file"] is not None: | 84 if "output_file" in args and args["output_file"] is not None: | 
| 83 # An output file was specified, so write to that. | 85 # An output file was specified, so write to that. | 
| 84 with open(args["output_file"], "w") as file: | 86 with open(args["output_file"], "w") as file: | 
| 85 for protonated_smi in Protonate(args): | 87 for protonated_smi in Protonate(args): | 
| 86 file.write(protonated_smi + "\n") | 88 file.write(protonated_smi + "\n") | 
| 87 elif "return_as_list" in args and args["return_as_list"] == True: | 89 elif "return_as_list" in args and args["return_as_list"]: | 
| 88 return list(Protonate(args)) | 90 return list(Protonate(args)) | 
| 89 else: | 91 else: | 
| 90 # No output file specified. Just print it to the screen. | 92 # No output file specified. Just print it to the screen. | 
| 91 for protonated_smi in Protonate(args): | 93 for protonated_smi in Protonate(args): | 
| 92 print(protonated_smi) | 94 print(protonated_smi) | 
| 93 | 95 | 
| 96 | |
| 94 class MyParser(argparse.ArgumentParser): | 97 class MyParser(argparse.ArgumentParser): | 
| 95 """Overwrite default parse so it displays help file on error. See | 98 """Overwrite default parse so it displays help file on error. See | 
| 96 https://stackoverflow.com/questions/4042452/display-help-message-with-python-argparse-when-script-is-called-without-any-argu""" | 99 https://stackoverflow.com/questions/4042452/display-help-message-with-python-argparse-when-script-is-called-without-any-argu""" | 
| 97 | 100 | 
| 98 def error(self, message): | 101 def error(self, message): | 
| 115 print("") | 118 print("") | 
| 116 | 119 | 
| 117 if file is None: | 120 if file is None: | 
| 118 file = sys.stdout | 121 file = sys.stdout | 
| 119 self._print_message(self.format_help(), file) | 122 self._print_message(self.format_help(), file) | 
| 120 print(""" | 123 print( | 
| 124 """ | |
| 121 examples: | 125 examples: | 
| 122 python dimorphite_dl.py --smiles_file sample_molecules.smi | 126 python dimorphite_dl.py --smiles_file sample_molecules.smi | 
| 123 python dimorphite_dl.py --smiles "CCC(=O)O" --min_ph -3.0 --max_ph -2.0 | 127 python dimorphite_dl.py --smiles "CCC(=O)O" --min_ph -3.0 --max_ph -2.0 | 
| 124 python dimorphite_dl.py --smiles "CCCN" --min_ph -3.0 --max_ph -2.0 --output_file output.smi | 128 python dimorphite_dl.py --smiles "CCCN" --min_ph -3.0 --max_ph -2.0 --output_file output.smi | 
| 125 python dimorphite_dl.py --smiles_file sample_molecules.smi --pka_precision 2.0 --label_states | 129 python dimorphite_dl.py --smiles_file sample_molecules.smi --pka_precision 2.0 --label_states | 
| 126 python dimorphite_dl.py --test""") | 130 python dimorphite_dl.py --test""" | 
| 131 ) | |
| 127 print("") | 132 print("") | 
| 133 | |
| 128 | 134 | 
| 129 class ArgParseFuncs: | 135 class ArgParseFuncs: | 
| 130 """A namespace for storing functions that are useful for processing | 136 """A namespace for storing functions that are useful for processing | 
| 131 command-line arguments. To keep things organized.""" | 137 command-line arguments. To keep things organized.""" | 
| 132 | 138 | 
| 135 """Gets the arguments from the command line. | 141 """Gets the arguments from the command line. | 
| 136 | 142 | 
| 137 :return: A parser object. | 143 :return: A parser object. | 
| 138 """ | 144 """ | 
| 139 | 145 | 
| 140 parser = MyParser(description="Dimorphite 1.2: Creates models of " + | 146 parser = MyParser( | 
| 141 "appropriately protonated small moleucles. " + | 147 description="Dimorphite 1.2: Creates models of " | 
| 142 "Apache 2.0 License. Copyright 2018 Jacob D. " + | 148 + "appropriately protonated small moleucles. " | 
| 143 "Durrant.") | 149 + "Apache 2.0 License. Copyright 2018 Jacob D. " | 
| 144 parser.add_argument('--min_ph', metavar='MIN', type=float, default=6.4, | 150 + "Durrant." | 
| 145 help='minimum pH to consider (default: 6.4)') | 151 ) | 
| 146 parser.add_argument('--max_ph', metavar='MAX', type=float, default=8.4, | 152 parser.add_argument( | 
| 147 help='maximum pH to consider (default: 8.4)') | 153 "--min_ph", | 
| 148 parser.add_argument('--pka_precision', metavar='PRE', type=float, default=1.0, | 154 metavar="MIN", | 
| 149 help='pKa precision factor (number of standard devations, default: 1.0)') | 155 type=float, | 
| 150 parser.add_argument('--smiles', metavar='SMI', type=str, | 156 default=6.4, | 
| 151 help='SMILES string to protonate') | 157 help="minimum pH to consider (default: 6.4)", | 
| 152 parser.add_argument('--smiles_file', metavar="FILE", type=str, | 158 ) | 
| 153 help='file that contains SMILES strings to protonate') | 159 parser.add_argument( | 
| 154 parser.add_argument('--output_file', metavar="FILE", type=str, | 160 "--max_ph", | 
| 155 help='output file to write protonated SMILES (optional)') | 161 metavar="MAX", | 
| 156 parser.add_argument('--label_states', action="store_true", | 162 type=float, | 
| 157 help='label protonated SMILES with target state ' + \ | 163 default=8.4, | 
| 158 '(i.e., "DEPROTONATED", "PROTONATED", or "BOTH").') | 164 help="maximum pH to consider (default: 8.4)", | 
| 159 parser.add_argument('--test', action="store_true", | 165 ) | 
| 160 help='run unit tests (for debugging)') | 166 parser.add_argument( | 
| 167 "--pka_precision", | |
| 168 metavar="PRE", | |
| 169 type=float, | |
| 170 default=1.0, | |
| 171 help="pKa precision factor (number of standard devations, default: 1.0)", | |
| 172 ) | |
| 173 parser.add_argument( | |
| 174 "--smiles", metavar="SMI", type=str, help="SMILES string to protonate" | |
| 175 ) | |
| 176 parser.add_argument( | |
| 177 "--smiles_file", | |
| 178 metavar="FILE", | |
| 179 type=str, | |
| 180 help="file that contains SMILES strings to protonate", | |
| 181 ) | |
| 182 parser.add_argument( | |
| 183 "--output_file", | |
| 184 metavar="FILE", | |
| 185 type=str, | |
| 186 help="output file to write protonated SMILES (optional)", | |
| 187 ) | |
| 188 parser.add_argument( | |
| 189 "--label_states", | |
| 190 action="store_true", | |
| 191 help="label protonated SMILES with target state " | |
| 192 + '(i.e., "DEPROTONATED", "PROTONATED", or "BOTH").', | |
| 193 ) | |
| 194 parser.add_argument( | |
| 195 "--test", action="store_true", help="run unit tests (for debugging)" | |
| 196 ) | |
| 161 | 197 | 
| 162 return parser | 198 return parser | 
| 163 | 199 | 
| 164 @staticmethod | 200 @staticmethod | 
| 165 def clean_args(args): | 201 def clean_args(args): | 
| 168 :param args: A dictionary containing the arguments. | 204 :param args: A dictionary containing the arguments. | 
| 169 :type args: dict | 205 :type args: dict | 
| 170 :raises Exception: No SMILES in params. | 206 :raises Exception: No SMILES in params. | 
| 171 """ | 207 """ | 
| 172 | 208 | 
| 173 defaults = {'min_ph' : 6.4, | 209 defaults = { | 
| 174 'max_ph' : 8.4, | 210 "min_ph": 6.4, | 
| 175 'pka_precision' : 1.0, | 211 "max_ph": 8.4, | 
| 176 'label_states' : False, | 212 "pka_precision": 1.0, | 
| 177 'test' : False} | 213 "label_states": False, | 
| 214 "test": False, | |
| 215 } | |
| 178 | 216 | 
| 179 for key in defaults: | 217 for key in defaults: | 
| 180 if key not in args: | 218 if key not in args: | 
| 181 args[key] = defaults[key] | 219 args[key] = defaults[key] | 
| 182 | 220 | 
| 192 | 230 | 
| 193 # If the user provides a smiles string, turn it into a file-like StringIO | 231 # If the user provides a smiles string, turn it into a file-like StringIO | 
| 194 # object. | 232 # object. | 
| 195 if "smiles" in args: | 233 if "smiles" in args: | 
| 196 if isinstance(args["smiles"], str): | 234 if isinstance(args["smiles"], str): | 
| 197 args["smiles_file"] = StringIO(args["smiles"]) | 235 args["smiles_file"] = StringIO(args["smiles"]) | 
| 198 | 236 | 
| 199 args["smiles_and_data"] = LoadSMIFile(args["smiles_file"]) | 237 args["smiles_and_data"] = LoadSMIFile(args["smiles_file"]) | 
| 200 | 238 | 
| 201 return args | 239 return args | 
| 240 | |
| 202 | 241 | 
| 203 class UtilFuncs: | 242 class UtilFuncs: | 
| 204 """A namespace to store functions for manipulating mol objects. To keep | 243 """A namespace to store functions for manipulating mol objects. To keep | 
| 205 things organized.""" | 244 things organized.""" | 
| 206 | 245 | 
| 213 :return: The neutralized Mol object. | 252 :return: The neutralized Mol object. | 
| 214 """ | 253 """ | 
| 215 | 254 | 
| 216 # Get the reaction data | 255 # Get the reaction data | 
| 217 rxn_data = [ | 256 rxn_data = [ | 
| 218 ['[Ov1-1:1]', '[Ov2+0:1]-[H]'], # To handle O- bonded to only one atom (add hydrogen). | 257 [ | 
| 219 ['[#7v4+1:1]-[H]', '[#7v3+0:1]'], # To handle N+ bonded to a hydrogen (remove hydrogen). | 258 "[Ov1-1:1]", | 
| 220 ['[Ov2-:1]', '[Ov2+0:1]'], # To handle O- bonded to two atoms. Should not be Negative. | 259 "[Ov2+0:1]-[H]", | 
| 221 ['[#7v3+1:1]', '[#7v3+0:1]'], # To handle N+ bonded to three atoms. Should not be positive. | 260 ], # To handle O- bonded to only one atom (add hydrogen). | 
| 222 ['[#7v2-1:1]', '[#7+0:1]-[H]'], # To handle N- Bonded to two atoms. Add hydrogen. | 261 [ | 
| 262 "[#7v4+1:1]-[H]", | |
| 263 "[#7v3+0:1]", | |
| 264 ], # To handle N+ bonded to a hydrogen (remove hydrogen). | |
| 265 [ | |
| 266 "[Ov2-:1]", | |
| 267 "[Ov2+0:1]", | |
| 268 ], # To handle O- bonded to two atoms. Should not be Negative. | |
| 269 [ | |
| 270 "[#7v3+1:1]", | |
| 271 "[#7v3+0:1]", | |
| 272 ], # To handle N+ bonded to three atoms. Should not be positive. | |
| 273 [ | |
| 274 "[#7v2-1:1]", | |
| 275 "[#7+0:1]-[H]", | |
| 276 ], # To handle N- Bonded to two atoms. Add hydrogen. | |
| 223 # ['[N:1]=[N+0:2]=[N:3]-[H]', '[N:1]=[N+1:2]=[N+0:3]-[H]'], # To | 277 # ['[N:1]=[N+0:2]=[N:3]-[H]', '[N:1]=[N+1:2]=[N+0:3]-[H]'], # To | 
| 224 # handle bad azide. Must be protonated. (Now handled elsewhere, before | 278 # handle bad azide. Must be protonated. (Now handled elsewhere, before | 
| 225 # SMILES converted to Mol object.) | 279 # SMILES converted to Mol object.) | 
| 226 ['[H]-[N:1]-[N:2]#[N:3]', '[N:1]=[N+1:2]=[N:3]-[H]'] # To handle bad azide. R-N-N#N should be R-N=[N+]=N | 280 [ | 
| 281 "[H]-[N:1]-[N:2]#[N:3]", | |
| 282 "[N:1]=[N+1:2]=[N:3]-[H]", | |
| 283 ], # To handle bad azide. R-N-N#N should be R-N=[N+]=N | |
| 227 ] | 284 ] | 
| 228 | 285 | 
| 229 # Add substructures and reactions (initially none) | 286 # Add substructures and reactions (initially none) | 
| 230 for i, rxn_datum in enumerate(rxn_data): | 287 for i, rxn_datum in enumerate(rxn_data): | 
| 231 rxn_data[i].append(Chem.MolFromSmarts(rxn_datum[0])) | 288 rxn_data[i].append(Chem.MolFromSmarts(rxn_datum[0])) | 
| 239 while True: # Keep going until all these issues have been resolved. | 296 while True: # Keep going until all these issues have been resolved. | 
| 240 current_rxn = None # The reaction to perform. | 297 current_rxn = None # The reaction to perform. | 
| 241 current_rxn_str = None | 298 current_rxn_str = None | 
| 242 | 299 | 
| 243 for i, rxn_datum in enumerate(rxn_data): | 300 for i, rxn_datum in enumerate(rxn_data): | 
| 244 reactant_smarts, product_smarts, substruct_match_mol, rxn_placeholder = rxn_datum | 301 ( | 
| 302 reactant_smarts, | |
| 303 product_smarts, | |
| 304 substruct_match_mol, | |
| 305 rxn_placeholder, | |
| 306 ) = rxn_datum | |
| 245 if mol.HasSubstructMatch(substruct_match_mol): | 307 if mol.HasSubstructMatch(substruct_match_mol): | 
| 246 if rxn_placeholder is None: | 308 if rxn_placeholder is None: | 
| 247 current_rxn_str = reactant_smarts + '>>' + product_smarts | 309 current_rxn_str = reactant_smarts + ">>" + product_smarts | 
| 248 current_rxn = AllChem.ReactionFromSmarts(current_rxn_str) | 310 current_rxn = AllChem.ReactionFromSmarts(current_rxn_str) | 
| 249 rxn_data[i][3] = current_rxn # Update the placeholder. | 311 rxn_data[i][3] = current_rxn # Update the placeholder. | 
| 250 else: | 312 else: | 
| 251 current_rxn = rxn_data[i][3] | 313 current_rxn = rxn_data[i][3] | 
| 252 break | 314 break | 
| 260 | 322 | 
| 261 # The mols have been altered from the reactions described above, we need | 323 # The mols have been altered from the reactions described above, we need | 
| 262 # to resanitize them. Make sure aromatic rings are shown as such This | 324 # to resanitize them. Make sure aromatic rings are shown as such This | 
| 263 # catches all RDKit Errors. without the catchError and sanitizeOps the | 325 # catches all RDKit Errors. without the catchError and sanitizeOps the | 
| 264 # Chem.SanitizeMol can crash the program. | 326 # Chem.SanitizeMol can crash the program. | 
| 265 sanitize_string = Chem.SanitizeMol( | 327 sanitize_string = Chem.SanitizeMol( | 
| 266 mol, | 328 mol, | 
| 267 sanitizeOps=rdkit.Chem.rdmolops.SanitizeFlags.SANITIZE_ALL, | 329 sanitizeOps=rdkit.Chem.rdmolops.SanitizeFlags.SANITIZE_ALL, | 
| 268 catchErrors = True | 330 catchErrors=True, | 
| 269 ) | 331 ) | 
| 270 | 332 | 
| 271 return mol if sanitize_string.name == "SANITIZE_NONE" else None | 333 return mol if sanitize_string.name == "SANITIZE_NONE" else None | 
| 272 | 334 | 
| 273 @staticmethod | 335 @staticmethod | 
| 319 """Error messages should be printed to STDERR. See | 381 """Error messages should be printed to STDERR. See | 
| 320 https://stackoverflow.com/questions/5574702/how-to-print-to-stderr-in-python""" | 382 https://stackoverflow.com/questions/5574702/how-to-print-to-stderr-in-python""" | 
| 321 | 383 | 
| 322 print(*args, file=sys.stderr, **kwargs) | 384 print(*args, file=sys.stderr, **kwargs) | 
| 323 | 385 | 
| 386 | |
| 324 class LoadSMIFile(object): | 387 class LoadSMIFile(object): | 
| 325 """A generator class for loading in the SMILES strings from a file, one at | 388 """A generator class for loading in the SMILES strings from a file, one at | 
| 326 a time.""" | 389 a time.""" | 
| 327 | 390 | 
| 328 def __init__(self, filename): | 391 def __init__(self, filename): | 
| 386 # Convert from SMILES string to RDKIT Mol. This series of tests is | 449 # Convert from SMILES string to RDKIT Mol. This series of tests is | 
| 387 # to make sure the SMILES string is properly formed and to get it | 450 # to make sure the SMILES string is properly formed and to get it | 
| 388 # into a canonical form. Filter if failed. | 451 # into a canonical form. Filter if failed. | 
| 389 mol = UtilFuncs.convert_smiles_str_to_mol(smiles_str) | 452 mol = UtilFuncs.convert_smiles_str_to_mol(smiles_str) | 
| 390 if mol is None: | 453 if mol is None: | 
| 391 UtilFuncs.eprint("WARNING: Skipping poorly formed SMILES string: " + line) | 454 UtilFuncs.eprint( | 
| 455 "WARNING: Skipping poorly formed SMILES string: " + line | |
| 456 ) | |
| 392 return self.next() | 457 return self.next() | 
| 393 | 458 | 
| 394 # Handle nuetralizing the molecules. Filter if failed. | 459 # Handle nuetralizing the molecules. Filter if failed. | 
| 395 mol = UtilFuncs.neutralize_mol(mol) | 460 mol = UtilFuncs.neutralize_mol(mol) | 
| 396 if mol is None: | 461 if mol is None: | 
| 397 UtilFuncs.eprint("WARNING: Skipping poorly formed SMILES string: " + line) | 462 UtilFuncs.eprint( | 
| 463 "WARNING: Skipping poorly formed SMILES string: " + line | |
| 464 ) | |
| 398 return self.next() | 465 return self.next() | 
| 399 | 466 | 
| 400 # Remove the hydrogens. | 467 # Remove the hydrogens. | 
| 401 try: | 468 try: | 
| 402 mol = Chem.RemoveHs(mol) | 469 mol = Chem.RemoveHs(mol) | 
| 403 except: | 470 except Exception: | 
| 404 UtilFuncs.eprint("WARNING: Skipping poorly formed SMILES string: " + line) | 471 UtilFuncs.eprint( | 
| 472 "WARNING: Skipping poorly formed SMILES string: " + line | |
| 473 ) | |
| 405 return self.next() | 474 return self.next() | 
| 406 | 475 | 
| 407 if mol is None: | 476 if mol is None: | 
| 408 UtilFuncs.eprint("WARNING: Skipping poorly formed SMILES string: " + line) | 477 UtilFuncs.eprint( | 
| 478 "WARNING: Skipping poorly formed SMILES string: " + line | |
| 479 ) | |
| 409 return self.next() | 480 return self.next() | 
| 410 | 481 | 
| 411 # Regenerate the smiles string (to standardize). | 482 # Regenerate the smiles string (to standardize). | 
| 412 new_mol_string = Chem.MolToSmiles(mol, isomericSmiles=True) | 483 new_mol_string = Chem.MolToSmiles(mol, isomericSmiles=True) | 
| 413 | 484 | 
| 414 return { | 485 return {"smiles": new_mol_string, "data": splits[1:]} | 
| 415 "smiles": new_mol_string, | |
| 416 "data": splits[1:] | |
| 417 } | |
| 418 else: | 486 else: | 
| 419 # Blank line? Go to next one. | 487 # Blank line? Go to next one. | 
| 420 return self.next() | 488 return self.next() | 
| 489 | |
| 421 | 490 | 
| 422 class Protonate(object): | 491 class Protonate(object): | 
| 423 """A generator class for protonating SMILES strings, one at a time.""" | 492 """A generator class for protonating SMILES strings, one at a time.""" | 
| 424 | 493 | 
| 425 def __init__(self, args): | 494 def __init__(self, args): | 
| 489 # There are no more input smiles strings... | 558 # There are no more input smiles strings... | 
| 490 raise StopIteration() | 559 raise StopIteration() | 
| 491 | 560 | 
| 492 smi = smile_and_datum["smiles"] | 561 smi = smile_and_datum["smiles"] | 
| 493 data = smile_and_datum["data"] # Everything on SMILES line but the | 562 data = smile_and_datum["data"] # Everything on SMILES line but the | 
| 494 # SMILES string itself (e.g., the | 563 # SMILES string itself (e.g., the | 
| 495 # molecule name). | 564 # molecule name). | 
| 496 | 565 | 
| 497 # Collect the data associated with this smiles (e.g., the molecule | 566 # Collect the data associated with this smiles (e.g., the molecule | 
| 498 # name). | 567 # name). | 
| 499 tag = " ".join(data) | 568 tag = " ".join(data) | 
| 500 | 569 | 
| 514 new_smis = ProtSubstructFuncs.protonate_site(new_smis, site) | 583 new_smis = ProtSubstructFuncs.protonate_site(new_smis, site) | 
| 515 # print(site, new_smis) # Good for debugging. | 584 # print(site, new_smis) # Good for debugging. | 
| 516 | 585 | 
| 517 # Only add new smiles if not already in the list. | 586 # Only add new smiles if not already in the list. | 
| 518 # for s in new_smis_to_perhaps_add: | 587 # for s in new_smis_to_perhaps_add: | 
| 519 # if not s in new_smis: | 588 # if not s in new_smis: | 
| 520 # new_smis.append(s) | 589 # new_smis.append(s) | 
| 521 | 590 | 
| 522 # In some cases, the script might generate redundant molecules. | 591 # In some cases, the script might generate redundant molecules. | 
| 523 # Phosphonates, when the pH is between the two pKa values and the | 592 # Phosphonates, when the pH is between the two pKa values and the | 
| 524 # stdev value is big enough, for example, will generate two identical | 593 # stdev value is big enough, for example, will generate two identical | 
| 525 # BOTH states. Let's remove this redundancy. | 594 # BOTH states. Let's remove this redundancy. | 
| 530 new_smis = [s.replace("[nH-]", "[n-]") for s in new_smis] | 599 new_smis = [s.replace("[nH-]", "[n-]") for s in new_smis] | 
| 531 | 600 | 
| 532 # Sometimes Dimorphite-DL generates molecules that aren't actually | 601 # Sometimes Dimorphite-DL generates molecules that aren't actually | 
| 533 # possible. Simply convert these to mol objects to eliminate the bad | 602 # possible. Simply convert these to mol objects to eliminate the bad | 
| 534 # ones (that are None). | 603 # ones (that are None). | 
| 535 new_smis = [s for s in new_smis if UtilFuncs.convert_smiles_str_to_mol(s) is not None] | 604 new_smis = [ | 
| 605 s for s in new_smis if UtilFuncs.convert_smiles_str_to_mol(s) is not None | |
| 606 ] | |
| 536 | 607 | 
| 537 # If there are no smi left, return the input one at the very least. | 608 # If there are no smi left, return the input one at the very least. | 
| 538 # All generated forms have apparently been judged | 609 # All generated forms have apparently been judged | 
| 539 # inappropriate/mal-formed. | 610 # inappropriate/mal-formed. | 
| 540 if len(new_smis) == 0: | 611 if len(new_smis) == 0: | 
| 541 new_smis = [smi] | 612 new_smis = [smi] | 
| 542 | 613 | 
| 543 # If the user wants to see the target states, add those | 614 # If the user wants to see the target states, add those | 
| 544 # to the ends of each line. | 615 # to the ends of each line. | 
| 545 if self.args["label_states"]: | 616 if self.args["label_states"]: | 
| 546 states = '\t'.join([x[1] for x in sites]) | 617 states = "\t".join([x[1] for x in sites]) | 
| 547 new_lines = [x + "\t" + tag + "\t" + states for x in new_smis] | 618 new_lines = [x + "\t" + tag + "\t" + states for x in new_smis] | 
| 548 else: | 619 else: | 
| 549 new_lines = [x + "\t" + tag for x in new_smis] | 620 new_lines = [x + "\t" + tag for x in new_smis] | 
| 550 | 621 | 
| 551 self.cur_prot_SMI = new_lines | 622 self.cur_prot_SMI = new_lines | 
| 552 | 623 | 
| 553 return self.next() | 624 return self.next() | 
| 554 | 625 | 
| 626 | |
| 555 class ProtSubstructFuncs: | 627 class ProtSubstructFuncs: | 
| 556 """A namespace to store functions for loading the substructures that can | 628 """A namespace to store functions for loading the substructures that can | 
| 557 be protonated. To keep things organized.""" | 629 be protonated. To keep things organized.""" | 
| 558 | 630 | 
| 559 @staticmethod | 631 @staticmethod | 
| 560 def load_protonation_substructs_calc_state_for_ph(min_ph=6.4, max_ph=8.4, pka_std_range=1): | 632 def load_protonation_substructs_calc_state_for_ph( | 
| 633 min_ph=6.4, max_ph=8.4, pka_std_range=1 | |
| 634 ): | |
| 561 """A pre-calculated list of R-groups with protonation sites, with their | 635 """A pre-calculated list of R-groups with protonation sites, with their | 
| 562 likely pKa bins. | 636 likely pKa bins. | 
| 563 | 637 | 
| 564 :param float min_ph: The lower bound on the pH range, defaults to 6.4. | 638 :param float min_ph: The lower bound on the pH range, defaults to 6.4. | 
| 565 :param float max_ph: The upper bound on the pH range, defaults to 8.4. | 639 :param float max_ph: The upper bound on the pH range, defaults to 8.4. | 
| 571 | 645 | 
| 572 subs = [] | 646 subs = [] | 
| 573 pwd = os.path.dirname(os.path.realpath(__file__)) | 647 pwd = os.path.dirname(os.path.realpath(__file__)) | 
| 574 | 648 | 
| 575 site_structures_file = "{}/{}".format(pwd, "site_substructures.smarts") | 649 site_structures_file = "{}/{}".format(pwd, "site_substructures.smarts") | 
| 576 with open(site_structures_file, 'r') as substruct: | 650 with open(site_structures_file, "r") as substruct: | 
| 577 for line in substruct: | 651 for line in substruct: | 
| 578 line = line.strip() | 652 line = line.strip() | 
| 579 sub = {} | 653 sub = {} | 
| 580 if line is not "": | 654 if line is not "": | 
| 581 splits = line.split() | 655 splits = line.split() | 
| 582 sub["name"] = splits[0] | 656 sub["name"] = splits[0] | 
| 583 sub["smart"] = splits[1] | 657 sub["smart"] = splits[1] | 
| 584 sub["mol"] = Chem.MolFromSmarts(sub["smart"]) | 658 sub["mol"] = Chem.MolFromSmarts(sub["smart"]) | 
| 585 | 659 | 
| 586 # NEED TO DIVIDE THIS BY 3s | 660 # NEED TO DIVIDE THIS BY 3s | 
| 587 pka_ranges = [splits[i:i+3] for i in range(2, len(splits)-1, 3)] | 661 pka_ranges = [ | 
| 662 splits[i : i + 3] for i in range(2, len(splits) - 1, 3) | |
| 663 ] | |
| 588 | 664 | 
| 589 prot = [] | 665 prot = [] | 
| 590 for pka_range in pka_ranges: | 666 for pka_range in pka_ranges: | 
| 591 site = pka_range[0] | 667 site = pka_range[0] | 
| 592 std = float(pka_range[2]) * pka_std_range | 668 std = float(pka_range[2]) * pka_std_range | 
| 618 max_pka = mean + std | 694 max_pka = mean + std | 
| 619 | 695 | 
| 620 # This needs to be reassigned, and 'ERROR' should never make it past the | 696 # This needs to be reassigned, and 'ERROR' should never make it past the | 
| 621 # next set of checks. | 697 # next set of checks. | 
| 622 if min_pka <= max_ph and min_ph <= max_pka: | 698 if min_pka <= max_ph and min_ph <= max_pka: | 
| 623 protonation_state = 'BOTH' | 699 protonation_state = "BOTH" | 
| 624 elif mean > max_ph: | 700 elif mean > max_ph: | 
| 625 protonation_state = 'PROTONATED' | 701 protonation_state = "PROTONATED" | 
| 626 else: | 702 else: | 
| 627 protonation_state = 'DEPROTONATED' | 703 protonation_state = "DEPROTONATED" | 
| 628 | 704 | 
| 629 return protonation_state | 705 return protonation_state | 
| 630 | 706 | 
| 631 @staticmethod | 707 @staticmethod | 
| 632 def get_prot_sites_and_target_states(smi, subs): | 708 def get_prot_sites_and_target_states(smi, subs): | 
| 648 UtilFuncs.eprint("ERROR: ", smi) | 724 UtilFuncs.eprint("ERROR: ", smi) | 
| 649 return [] | 725 return [] | 
| 650 | 726 | 
| 651 # Try to Add hydrogens. if failed return [] | 727 # Try to Add hydrogens. if failed return [] | 
| 652 try: | 728 try: | 
| 653 mol = Chem.AddHs(mol) | 729 mol = Chem.AddHs(mol) | 
| 654 except: | 730 except Exception: | 
| 655 UtilFuncs.eprint("ERROR: ", smi) | 731 UtilFuncs.eprint("ERROR: ", smi) | 
| 656 return [] | 732 return [] | 
| 657 | 733 | 
| 658 # Check adding Hs worked | 734 # Check adding Hs worked | 
| 659 if mol is None: | 735 if mol is None: | 
| 699 idx, target_prot_state, prot_site_name = site | 775 idx, target_prot_state, prot_site_name = site | 
| 700 | 776 | 
| 701 # Initialize the output list | 777 # Initialize the output list | 
| 702 output_smis = [] | 778 output_smis = [] | 
| 703 | 779 | 
| 704 state_to_charge = {"DEPROTONATED": [-1], | 780 state_to_charge = {"DEPROTONATED": [-1], "PROTONATED": [0], "BOTH": [-1, 0]} | 
| 705 "PROTONATED": [0], | |
| 706 "BOTH": [-1, 0]} | |
| 707 | 781 | 
| 708 charges = state_to_charge[target_prot_state] | 782 charges = state_to_charge[target_prot_state] | 
| 709 | 783 | 
| 710 # Now make the actual smiles match the target protonation state. | 784 # Now make the actual smiles match the target protonation state. | 
| 711 output_smis = ProtSubstructFuncs.set_protonation_charge(smis, idx, charges, prot_site_name) | 785 output_smis = ProtSubstructFuncs.set_protonation_charge( | 
| 786 smis, idx, charges, prot_site_name | |
| 787 ) | |
| 712 | 788 | 
| 713 return output_smis | 789 return output_smis | 
| 714 | 790 | 
| 715 @staticmethod | 791 @staticmethod | 
| 716 def set_protonation_charge(smis, idx, charges, prot_site_name): | 792 def set_protonation_charge(smis, idx, charges, prot_site_name): | 
| 757 atom.SetFormalCharge(nitro_charge) | 833 atom.SetFormalCharge(nitro_charge) | 
| 758 else: | 834 else: | 
| 759 atom.SetFormalCharge(charge) | 835 atom.SetFormalCharge(charge) | 
| 760 | 836 | 
| 761 # Convert back to SMILE and add to output | 837 # Convert back to SMILE and add to output | 
| 762 out_smile = Chem.MolToSmiles(mol, isomericSmiles=True,canonical=True) | 838 out_smile = Chem.MolToSmiles(mol, isomericSmiles=True, canonical=True) | 
| 763 output.append(out_smile) | 839 output.append(out_smile) | 
| 764 | 840 | 
| 765 return output | 841 return output | 
| 842 | |
| 766 | 843 | 
| 767 class ProtectUnprotectFuncs: | 844 class ProtectUnprotectFuncs: | 
| 768 """A namespace for storing functions that are useful for protecting and | 845 """A namespace for storing functions that are useful for protecting and | 
| 769 unprotecting molecules. To keep things organized. We need to identify and | 846 unprotecting molecules. To keep things organized. We need to identify and | 
| 770 mark groups that have been matched with a substructure.""" | 847 mark groups that have been matched with a substructure.""" | 
| 777 :param rdkit.Chem.rdchem.Mol mol: The rdkit Mol object. | 854 :param rdkit.Chem.rdchem.Mol mol: The rdkit Mol object. | 
| 778 :type mol: The rdkit Mol object with atoms unprotected. | 855 :type mol: The rdkit Mol object with atoms unprotected. | 
| 779 """ | 856 """ | 
| 780 | 857 | 
| 781 for atom in mol.GetAtoms(): | 858 for atom in mol.GetAtoms(): | 
| 782 atom.SetProp('_protected', '0') | 859 atom.SetProp("_protected", "0") | 
| 783 | 860 | 
| 784 @staticmethod | 861 @staticmethod | 
| 785 def protect_molecule(mol, match): | 862 def protect_molecule(mol, match): | 
| 786 """Given a 'match', a list of molecules idx's, we set the protected status | 863 """Given a 'match', a list of molecules idx's, we set the protected status | 
| 787 of each atom to 1. This will prevent any matches using that atom in the | 864 of each atom to 1. This will prevent any matches using that atom in the | 
| 791 :param list match: A list of molecule idx's. | 868 :param list match: A list of molecule idx's. | 
| 792 """ | 869 """ | 
| 793 | 870 | 
| 794 for idx in match: | 871 for idx in match: | 
| 795 atom = mol.GetAtomWithIdx(idx) | 872 atom = mol.GetAtomWithIdx(idx) | 
| 796 atom.SetProp('_protected', '1') | 873 atom.SetProp("_protected", "1") | 
| 797 | 874 | 
| 798 @staticmethod | 875 @staticmethod | 
| 799 def get_unprotected_matches(mol, substruct): | 876 def get_unprotected_matches(mol, substruct): | 
| 800 """Finds substructure matches with atoms that have not been protected. | 877 """Finds substructure matches with atoms that have not been protected. | 
| 801 Returns list of matches, each match a list of atom idxs. | 878 Returns list of matches, each match a list of atom idxs. | 
| 827 protected = atom.GetProp("_protected") | 904 protected = atom.GetProp("_protected") | 
| 828 if protected == "1": | 905 if protected == "1": | 
| 829 return False | 906 return False | 
| 830 return True | 907 return True | 
| 831 | 908 | 
| 909 | |
| 832 class TestFuncs: | 910 class TestFuncs: | 
| 833 """A namespace for storing functions that perform tests on the code. To | 911 """A namespace for storing functions that perform tests on the code. To | 
| 834 keep things organized.""" | 912 keep things organized.""" | 
| 835 | 913 | 
| 836 @staticmethod | 914 @staticmethod | 
| 837 def test(): | 915 def test(): | 
| 838 """Tests all the 38 groups.""" | 916 """Tests all the 38 groups.""" | 
| 839 | 917 | 
| 840 smis = [ | 918 smis = [ | 
| 841 # [input smiles, pka, protonated, deprotonated, category] | 919 # [input smiles, pka, protonated, deprotonated, category] | 
| 842 ["C#CCO", "C#CCO", "C#CC[O-]", "Alcohol"], | 920 ["C#CCO", "C#CCO", "C#CC[O-]", "Alcohol"], | 
| 843 ["C(=O)N", "NC=O", "[NH-]C=O", "Amide"], | 921 ["C(=O)N", "NC=O", "[NH-]C=O", "Amide"], | 
| 844 ["CC(=O)NOC(C)=O", "CC(=O)NOC(C)=O", "CC(=O)[N-]OC(C)=O", "Amide_electronegative"], | 922 [ | 
| 845 ["COC(=N)N", "COC(N)=[NH2+]", "COC(=N)N", "AmidineGuanidine2"], | 923 "CC(=O)NOC(C)=O", | 
| 846 ["Brc1ccc(C2NCCS2)cc1", "Brc1ccc(C2[NH2+]CCS2)cc1", "Brc1ccc(C2NCCS2)cc1", "Amines_primary_secondary_tertiary"], | 924 "CC(=O)NOC(C)=O", | 
| 847 ["CC(=O)[n+]1ccc(N)cc1", "CC(=O)[n+]1ccc([NH3+])cc1", "CC(=O)[n+]1ccc(N)cc1", "Anilines_primary"], | 925 "CC(=O)[N-]OC(C)=O", | 
| 848 ["CCNc1ccccc1", "CC[NH2+]c1ccccc1", "CCNc1ccccc1", "Anilines_secondary"], | 926 "Amide_electronegative", | 
| 849 ["Cc1ccccc1N(C)C", "Cc1ccccc1[NH+](C)C", "Cc1ccccc1N(C)C", "Anilines_tertiary"], | 927 ], | 
| 850 ["BrC1=CC2=C(C=C1)NC=C2", "Brc1ccc2[nH]ccc2c1", "Brc1ccc2[n-]ccc2c1", "Indole_pyrrole"], | 928 ["COC(=N)N", "COC(N)=[NH2+]", "COC(=N)N", "AmidineGuanidine2"], | 
| 851 ["O=c1cc[nH]cc1", "O=c1cc[nH]cc1", "O=c1cc[n-]cc1", "Aromatic_nitrogen_protonated"], | 929 [ | 
| 852 ["C-N=[N+]=[N@H]", "CN=[N+]=N", "CN=[N+]=[N-]", "Azide"], | 930 "Brc1ccc(C2NCCS2)cc1", | 
| 853 ["BrC(C(O)=O)CBr", "O=C(O)C(Br)CBr", "O=C([O-])C(Br)CBr", "Carboxyl"], | 931 "Brc1ccc(C2[NH2+]CCS2)cc1", | 
| 854 ["NC(NN=O)=N", "NC(=[NH2+])NN=O", "N=C(N)NN=O", "AmidineGuanidine1"], | 932 "Brc1ccc(C2NCCS2)cc1", | 
| 855 ["C(F)(F)(F)C(=O)NC(=O)C", "CC(=O)NC(=O)C(F)(F)F", "CC(=O)[N-]C(=O)C(F)(F)F", "Imide"], | 933 "Amines_primary_secondary_tertiary", | 
| 856 ["O=C(C)NC(C)=O", "CC(=O)NC(C)=O", "CC(=O)[N-]C(C)=O", "Imide2"], | 934 ], | 
| 857 ["CC(C)(C)C(N(C)O)=O", "CN(O)C(=O)C(C)(C)C", "CN([O-])C(=O)C(C)(C)C", "N-hydroxyamide"], | 935 [ | 
| 858 ["C[N+](O)=O", "C[N+](=O)O", "C[N+](=O)[O-]", "Nitro"], | 936 "CC(=O)[n+]1ccc(N)cc1", | 
| 859 ["O=C1C=C(O)CC1", "O=C1C=C(O)CC1", "O=C1C=C([O-])CC1", "O=C-C=C-OH"], | 937 "CC(=O)[n+]1ccc([NH3+])cc1", | 
| 860 ["C1CC1OO", "OOC1CC1", "[O-]OC1CC1", "Peroxide2"], | 938 "CC(=O)[n+]1ccc(N)cc1", | 
| 861 ["C(=O)OO", "O=COO", "O=CO[O-]", "Peroxide1"], | 939 "Anilines_primary", | 
| 862 ["Brc1cc(O)cc(Br)c1", "Oc1cc(Br)cc(Br)c1", "[O-]c1cc(Br)cc(Br)c1", "Phenol"], | 940 ], | 
| 863 ["CC(=O)c1ccc(S)cc1", "CC(=O)c1ccc(S)cc1", "CC(=O)c1ccc([S-])cc1", "Phenyl_Thiol"], | 941 ["CCNc1ccccc1", "CC[NH2+]c1ccccc1", "CCNc1ccccc1", "Anilines_secondary"], | 
| 864 ["C=CCOc1ccc(C(=O)O)cc1", "C=CCOc1ccc(C(=O)O)cc1", "C=CCOc1ccc(C(=O)[O-])cc1", "Phenyl_carboxyl"], | 942 [ | 
| 865 ["COP(=O)(O)OC", "COP(=O)(O)OC", "COP(=O)([O-])OC", "Phosphate_diester"], | 943 "Cc1ccccc1N(C)C", | 
| 866 ["CP(C)(=O)O", "CP(C)(=O)O", "CP(C)(=O)[O-]", "Phosphinic_acid"], | 944 "Cc1ccccc1[NH+](C)C", | 
| 867 ["CC(C)OP(C)(=O)O", "CC(C)OP(C)(=O)O", "CC(C)OP(C)(=O)[O-]", "Phosphonate_ester"], | 945 "Cc1ccccc1N(C)C", | 
| 868 ["CC1(C)OC(=O)NC1=O", "CC1(C)OC(=O)NC1=O", "CC1(C)OC(=O)[N-]C1=O", "Ringed_imide1"], | 946 "Anilines_tertiary", | 
| 869 ["O=C(N1)C=CC1=O", "O=C1C=CC(=O)N1", "O=C1C=CC(=O)[N-]1", "Ringed_imide2"], | 947 ], | 
| 870 ["O=S(OC)(O)=O", "COS(=O)(=O)O", "COS(=O)(=O)[O-]", "Sulfate"], | 948 [ | 
| 871 ["COc1ccc(S(=O)O)cc1", "COc1ccc(S(=O)O)cc1", "COc1ccc(S(=O)[O-])cc1", "Sulfinic_acid"], | 949 "BrC1=CC2=C(C=C1)NC=C2", | 
| 872 ["CS(N)(=O)=O", "CS(N)(=O)=O", "CS([NH-])(=O)=O", "Sulfonamide"], | 950 "Brc1ccc2[nH]ccc2c1", | 
| 873 ["CC(=O)CSCCS(O)(=O)=O", "CC(=O)CSCCS(=O)(=O)O", "CC(=O)CSCCS(=O)(=O)[O-]", "Sulfonate"], | 951 "Brc1ccc2[n-]ccc2c1", | 
| 874 ["CC(=O)S", "CC(=O)S", "CC(=O)[S-]", "Thioic_acid"], | 952 "Indole_pyrrole", | 
| 875 ["C(C)(C)(C)(S)", "CC(C)(C)S", "CC(C)(C)[S-]", "Thiol"], | 953 ], | 
| 876 ["Brc1cc[nH+]cc1", "Brc1cc[nH+]cc1", "Brc1ccncc1", "Aromatic_nitrogen_unprotonated"], | 954 [ | 
| 877 ["C=C(O)c1c(C)cc(C)cc1C", "C=C(O)c1c(C)cc(C)cc1C", "C=C([O-])c1c(C)cc(C)cc1C", "Vinyl_alcohol"], | 955 "O=c1cc[nH]cc1", | 
| 878 ["CC(=O)ON", "CC(=O)O[NH3+]", "CC(=O)ON", "Primary_hydroxyl_amine"] | 956 "O=c1cc[nH]cc1", | 
| 957 "O=c1cc[n-]cc1", | |
| 958 "Aromatic_nitrogen_protonated", | |
| 959 ], | |
| 960 ["C-N=[N+]=[N@H]", "CN=[N+]=N", "CN=[N+]=[N-]", "Azide"], | |
| 961 ["BrC(C(O)=O)CBr", "O=C(O)C(Br)CBr", "O=C([O-])C(Br)CBr", "Carboxyl"], | |
| 962 ["NC(NN=O)=N", "NC(=[NH2+])NN=O", "N=C(N)NN=O", "AmidineGuanidine1"], | |
| 963 [ | |
| 964 "C(F)(F)(F)C(=O)NC(=O)C", | |
| 965 "CC(=O)NC(=O)C(F)(F)F", | |
| 966 "CC(=O)[N-]C(=O)C(F)(F)F", | |
| 967 "Imide", | |
| 968 ], | |
| 969 ["O=C(C)NC(C)=O", "CC(=O)NC(C)=O", "CC(=O)[N-]C(C)=O", "Imide2"], | |
| 970 [ | |
| 971 "CC(C)(C)C(N(C)O)=O", | |
| 972 "CN(O)C(=O)C(C)(C)C", | |
| 973 "CN([O-])C(=O)C(C)(C)C", | |
| 974 "N-hydroxyamide", | |
| 975 ], | |
| 976 ["C[N+](O)=O", "C[N+](=O)O", "C[N+](=O)[O-]", "Nitro"], | |
| 977 ["O=C1C=C(O)CC1", "O=C1C=C(O)CC1", "O=C1C=C([O-])CC1", "O=C-C=C-OH"], | |
| 978 ["C1CC1OO", "OOC1CC1", "[O-]OC1CC1", "Peroxide2"], | |
| 979 ["C(=O)OO", "O=COO", "O=CO[O-]", "Peroxide1"], | |
| 980 [ | |
| 981 "Brc1cc(O)cc(Br)c1", | |
| 982 "Oc1cc(Br)cc(Br)c1", | |
| 983 "[O-]c1cc(Br)cc(Br)c1", | |
| 984 "Phenol", | |
| 985 ], | |
| 986 [ | |
| 987 "CC(=O)c1ccc(S)cc1", | |
| 988 "CC(=O)c1ccc(S)cc1", | |
| 989 "CC(=O)c1ccc([S-])cc1", | |
| 990 "Phenyl_Thiol", | |
| 991 ], | |
| 992 [ | |
| 993 "C=CCOc1ccc(C(=O)O)cc1", | |
| 994 "C=CCOc1ccc(C(=O)O)cc1", | |
| 995 "C=CCOc1ccc(C(=O)[O-])cc1", | |
| 996 "Phenyl_carboxyl", | |
| 997 ], | |
| 998 ["COP(=O)(O)OC", "COP(=O)(O)OC", "COP(=O)([O-])OC", "Phosphate_diester"], | |
| 999 ["CP(C)(=O)O", "CP(C)(=O)O", "CP(C)(=O)[O-]", "Phosphinic_acid"], | |
| 1000 [ | |
| 1001 "CC(C)OP(C)(=O)O", | |
| 1002 "CC(C)OP(C)(=O)O", | |
| 1003 "CC(C)OP(C)(=O)[O-]", | |
| 1004 "Phosphonate_ester", | |
| 1005 ], | |
| 1006 [ | |
| 1007 "CC1(C)OC(=O)NC1=O", | |
| 1008 "CC1(C)OC(=O)NC1=O", | |
| 1009 "CC1(C)OC(=O)[N-]C1=O", | |
| 1010 "Ringed_imide1", | |
| 1011 ], | |
| 1012 ["O=C(N1)C=CC1=O", "O=C1C=CC(=O)N1", "O=C1C=CC(=O)[N-]1", "Ringed_imide2"], | |
| 1013 ["O=S(OC)(O)=O", "COS(=O)(=O)O", "COS(=O)(=O)[O-]", "Sulfate"], | |
| 1014 [ | |
| 1015 "COc1ccc(S(=O)O)cc1", | |
| 1016 "COc1ccc(S(=O)O)cc1", | |
| 1017 "COc1ccc(S(=O)[O-])cc1", | |
| 1018 "Sulfinic_acid", | |
| 1019 ], | |
| 1020 ["CS(N)(=O)=O", "CS(N)(=O)=O", "CS([NH-])(=O)=O", "Sulfonamide"], | |
| 1021 [ | |
| 1022 "CC(=O)CSCCS(O)(=O)=O", | |
| 1023 "CC(=O)CSCCS(=O)(=O)O", | |
| 1024 "CC(=O)CSCCS(=O)(=O)[O-]", | |
| 1025 "Sulfonate", | |
| 1026 ], | |
| 1027 ["CC(=O)S", "CC(=O)S", "CC(=O)[S-]", "Thioic_acid"], | |
| 1028 ["C(C)(C)(C)(S)", "CC(C)(C)S", "CC(C)(C)[S-]", "Thiol"], | |
| 1029 [ | |
| 1030 "Brc1cc[nH+]cc1", | |
| 1031 "Brc1cc[nH+]cc1", | |
| 1032 "Brc1ccncc1", | |
| 1033 "Aromatic_nitrogen_unprotonated", | |
| 1034 ], | |
| 1035 [ | |
| 1036 "C=C(O)c1c(C)cc(C)cc1C", | |
| 1037 "C=C(O)c1c(C)cc(C)cc1C", | |
| 1038 "C=C([O-])c1c(C)cc(C)cc1C", | |
| 1039 "Vinyl_alcohol", | |
| 1040 ], | |
| 1041 ["CC(=O)ON", "CC(=O)O[NH3+]", "CC(=O)ON", "Primary_hydroxyl_amine"], | |
| 879 ] | 1042 ] | 
| 880 | 1043 | 
| 881 smis_phos = [ | 1044 smis_phos = [ | 
| 882 ["O=P(O)(O)OCCCC", "CCCCOP(=O)(O)O", "CCCCOP(=O)([O-])O", "CCCCOP(=O)([O-])[O-]", "Phosphate"], | 1045 [ | 
| 883 ["CC(P(O)(O)=O)C", "CC(C)P(=O)(O)O", "CC(C)P(=O)([O-])O", "CC(C)P(=O)([O-])[O-]", "Phosphonate"] | 1046 "O=P(O)(O)OCCCC", | 
| 1047 "CCCCOP(=O)(O)O", | |
| 1048 "CCCCOP(=O)([O-])O", | |
| 1049 "CCCCOP(=O)([O-])[O-]", | |
| 1050 "Phosphate", | |
| 1051 ], | |
| 1052 [ | |
| 1053 "CC(P(O)(O)=O)C", | |
| 1054 "CC(C)P(=O)(O)O", | |
| 1055 "CC(C)P(=O)([O-])O", | |
| 1056 "CC(C)P(=O)([O-])[O-]", | |
| 1057 "Phosphonate", | |
| 1058 ], | |
| 884 ] | 1059 ] | 
| 885 | 1060 | 
| 886 # Load the average pKa values. | 1061 # Load the average pKa values. | 
| 887 average_pkas = {l.split()[0].replace("*", ""):float(l.split()[3]) for l in open("site_substructures.smarts") if l.split()[0] not in ["Phosphate", "Phosphonate"]} | 1062 average_pkas = { | 
| 888 average_pkas_phos = {l.split()[0].replace("*", ""):[float(l.split()[3]), float(l.split()[6])] for l in open("site_substructures.smarts") if l.split()[0] in ["Phosphate", "Phosphonate"]} | 1063 l.split()[0].replace("*", ""): float(l.split()[3]) | 
| 1064 for l in open("site_substructures.smarts") | |
| 1065 if l.split()[0] not in ["Phosphate", "Phosphonate"] | |
| 1066 } | |
| 1067 average_pkas_phos = { | |
| 1068 l.split()[0].replace("*", ""): [float(l.split()[3]), float(l.split()[6])] | |
| 1069 for l in open("site_substructures.smarts") | |
| 1070 if l.split()[0] in ["Phosphate", "Phosphonate"] | |
| 1071 } | |
| 889 | 1072 | 
| 890 print("Running Tests") | 1073 print("Running Tests") | 
| 891 print("=============") | 1074 print("=============") | 
| 892 print("") | 1075 print("") | 
| 893 | 1076 | 
| 898 args = { | 1081 args = { | 
| 899 "min_ph": -10000000, | 1082 "min_ph": -10000000, | 
| 900 "max_ph": -10000000, | 1083 "max_ph": -10000000, | 
| 901 "pka_precision": 0.5, | 1084 "pka_precision": 0.5, | 
| 902 "smiles": "", | 1085 "smiles": "", | 
| 903 "label_states": True | 1086 "label_states": True, | 
| 904 } | 1087 } | 
| 905 | 1088 | 
| 906 for smi, protonated, deprotonated, category in smis: | 1089 for smi, protonated, deprotonated, category in smis: | 
| 907 args["smiles"] = smi | 1090 args["smiles"] = smi | 
| 908 TestFuncs.test_check(args, [protonated], ["PROTONATED"]) | 1091 TestFuncs.test_check(args, [protonated], ["PROTONATED"]) | 
| 952 | 1135 | 
| 953 avg_pka = average_pkas_phos[category][1] | 1136 avg_pka = average_pkas_phos[category][1] | 
| 954 args["min_ph"] = avg_pka | 1137 args["min_ph"] = avg_pka | 
| 955 args["max_ph"] = avg_pka | 1138 args["max_ph"] = avg_pka | 
| 956 | 1139 | 
| 957 TestFuncs.test_check(args, [mix, deprotonated], ["DEPROTONATED", "DEPROTONATED"]) | 1140 TestFuncs.test_check( | 
| 958 | 1141 args, [mix, deprotonated], ["DEPROTONATED", "DEPROTONATED"] | 
| 959 avg_pka = 0.5 * (average_pkas_phos[category][0] + average_pkas_phos[category][1]) | 1142 ) | 
| 1143 | |
| 1144 avg_pka = 0.5 * ( | |
| 1145 average_pkas_phos[category][0] + average_pkas_phos[category][1] | |
| 1146 ) | |
| 960 args["min_ph"] = avg_pka | 1147 args["min_ph"] = avg_pka | 
| 961 args["max_ph"] = avg_pka | 1148 args["max_ph"] = avg_pka | 
| 962 args["pka_precision"] = 5 # Should give all three | 1149 args["pka_precision"] = 5 # Should give all three | 
| 963 | 1150 | 
| 964 TestFuncs.test_check(args, [mix, deprotonated, protonated], ["BOTH", "BOTH"]) | 1151 TestFuncs.test_check( | 
| 1152 args, [mix, deprotonated, protonated], ["BOTH", "BOTH"] | |
| 1153 ) | |
| 965 | 1154 | 
| 966 @staticmethod | 1155 @staticmethod | 
| 967 def test_check(args, expected_output, labels): | 1156 def test_check(args, expected_output, labels): | 
| 968 """Tests most ionizable groups. The ones that can only loose or gain a single proton. | 1157 """Tests most ionizable groups. The ones that can only loose or gain a single proton. | 
| 969 | 1158 | 
| 979 output = list(Protonate(args)) | 1168 output = list(Protonate(args)) | 
| 980 output = [o.split() for o in output] | 1169 output = [o.split() for o in output] | 
| 981 | 1170 | 
| 982 num_states = len(expected_output) | 1171 num_states = len(expected_output) | 
| 983 | 1172 | 
| 984 if (len(output) != num_states): | 1173 if len(output) != num_states: | 
| 985 msg = args["smiles"] + " should have " + str(num_states) + \ | 1174 msg = ( | 
| 986 " states at at pH " + str(args["min_ph"]) + ": " + str(output) | 1175 args["smiles"] | 
| 1176 + " should have " | |
| 1177 + str(num_states) | |
| 1178 + " states at at pH " | |
| 1179 + str(args["min_ph"]) | |
| 1180 + ": " | |
| 1181 + str(output) | |
| 1182 ) | |
| 987 print(msg) | 1183 print(msg) | 
| 988 raise Exception(msg) | 1184 raise Exception(msg) | 
| 989 | 1185 | 
| 990 if (len(set([l[0] for l in output]) - set(expected_output)) != 0): | 1186 if len(set([l[0] for l in output]) - set(expected_output)) != 0: | 
| 991 msg = args["smiles"] + " is not " + " AND ".join(expected_output) + \ | 1187 msg = ( | 
| 992 " at pH " + str(args["min_ph"]) + " - " + str(args["max_ph"]) + \ | 1188 args["smiles"] | 
| 993 "; it is " + " AND ".join([l[0] for l in output]) | 1189 + " is not " | 
| 1190 + " AND ".join(expected_output) | |
| 1191 + " at pH " | |
| 1192 + str(args["min_ph"]) | |
| 1193 + " - " | |
| 1194 + str(args["max_ph"]) | |
| 1195 + "; it is " | |
| 1196 + " AND ".join([l[0] for l in output]) | |
| 1197 ) | |
| 994 print(msg) | 1198 print(msg) | 
| 995 raise Exception(msg) | 1199 raise Exception(msg) | 
| 996 | 1200 | 
| 997 if (len(set([l[1] for l in output]) - set(labels)) != 0): | 1201 if len(set([l[1] for l in output]) - set(labels)) != 0: | 
| 998 msg = args["smiles"] + " not labeled as " + " AND ".join(labels) + \ | 1202 msg = ( | 
| 999 "; it is " + " AND ".join([l[1] for l in output]) | 1203 args["smiles"] | 
| 1204 + " not labeled as " | |
| 1205 + " AND ".join(labels) | |
| 1206 + "; it is " | |
| 1207 + " AND ".join([l[1] for l in output]) | |
| 1208 ) | |
| 1000 print(msg) | 1209 print(msg) | 
| 1001 raise Exception(msg) | 1210 raise Exception(msg) | 
| 1002 | 1211 | 
| 1003 ph_range = sorted(list(set([args["min_ph"], args["max_ph"]]))) | 1212 ph_range = sorted(list(set([args["min_ph"], args["max_ph"]]))) | 
| 1004 ph_range_str = "(" + " - ".join("{0:.2f}".format(n) for n in ph_range) + ")" | 1213 ph_range_str = "(" + " - ".join("{0:.2f}".format(n) for n in ph_range) + ")" | 
| 1005 print("(CORRECT) " + ph_range_str.ljust(10) + " " + args["smiles"] + " => " + " AND ".join([l[0] for l in output])) | 1214 print( | 
| 1215 "(CORRECT) " | |
| 1216 + ph_range_str.ljust(10) | |
| 1217 + " " | |
| 1218 + args["smiles"] | |
| 1219 + " => " | |
| 1220 + " AND ".join([l[0] for l in output]) | |
| 1221 ) | |
| 1222 | |
| 1006 | 1223 | 
| 1007 def run(**kwargs): | 1224 def run(**kwargs): | 
| 1008 """A helpful, importable function for those who want to call Dimorphite-DL | 1225 """A helpful, importable function for those who want to call Dimorphite-DL | 
| 1009 from another Python script rather than the command line. Note that this | 1226 from another Python script rather than the command line. Note that this | 
| 1010 function accepts keyword arguments that match the command-line parameters | 1227 function accepts keyword arguments that match the command-line parameters | 
| 1016 :type kwargs: dict | 1233 :type kwargs: dict | 
| 1017 """ | 1234 """ | 
| 1018 | 1235 | 
| 1019 # Run the main function with the specified arguments. | 1236 # Run the main function with the specified arguments. | 
| 1020 main(kwargs) | 1237 main(kwargs) | 
| 1238 | |
| 1021 | 1239 | 
| 1022 def run_with_mol_list(mol_lst, **kwargs): | 1240 def run_with_mol_list(mol_lst, **kwargs): | 
| 1023 """A helpful, importable function for those who want to call Dimorphite-DL | 1241 """A helpful, importable function for those who want to call Dimorphite-DL | 
| 1024 from another Python script rather than the command line. Note that this | 1242 from another Python script rather than the command line. Note that this | 
| 1025 function is for passing Dimorphite-DL a list of RDKit Mol objects, together | 1243 function is for passing Dimorphite-DL a list of RDKit Mol objects, together | 
| 1035 """ | 1253 """ | 
| 1036 | 1254 | 
| 1037 # Do a quick check to make sure the user input makes sense. | 1255 # Do a quick check to make sure the user input makes sense. | 
| 1038 for bad_arg in ["smiles", "smiles_file", "output_file", "test"]: | 1256 for bad_arg in ["smiles", "smiles_file", "output_file", "test"]: | 
| 1039 if bad_arg in kwargs: | 1257 if bad_arg in kwargs: | 
| 1040 msg = "You're using Dimorphite-DL's run_with_mol_list(mol_lst, " + \ | 1258 msg = ( | 
| 1041 "**kwargs) function, but you also passed the \"" + \ | 1259 "You're using Dimorphite-DL's run_with_mol_list(mol_lst, " | 
| 1042 bad_arg + "\" argument. Did you mean to use the " + \ | 1260 + '**kwargs) function, but you also passed the "' | 
| 1043 "run(**kwargs) function instead?" | 1261 + bad_arg | 
| 1262 + '" argument. Did you mean to use the ' | |
| 1263 + "run(**kwargs) function instead?" | |
| 1264 ) | |
| 1044 print(msg) | 1265 print(msg) | 
| 1045 raise Exception(msg) | 1266 raise Exception(msg) | 
| 1046 | 1267 | 
| 1047 # Set the return_as_list flag so main() will return the protonated smiles | 1268 # Set the return_as_list flag so main() will return the protonated smiles | 
| 1048 # as a list. | 1269 # as a list. | 
| 1074 m.SetBoolProp(prop, val) | 1295 m.SetBoolProp(prop, val) | 
| 1075 else: | 1296 else: | 
| 1076 m.SetProp(prop, str(val)) | 1297 m.SetProp(prop, str(val)) | 
| 1077 mols.append(m) | 1298 mols.append(m) | 
| 1078 else: | 1299 else: | 
| 1079 UtilFuncs.eprint("WARNING: Could not process molecule with SMILES string " + s + " and properties " + str(props)) | 1300 UtilFuncs.eprint( | 
| 1301 "WARNING: Could not process molecule with SMILES string " | |
| 1302 + s | |
| 1303 + " and properties " | |
| 1304 + str(props) | |
| 1305 ) | |
| 1080 | 1306 | 
| 1081 return mols | 1307 return mols | 
| 1308 | |
| 1082 | 1309 | 
| 1083 if __name__ == "__main__": | 1310 if __name__ == "__main__": | 
| 1084 main() | 1311 main() | 
