diff dimorphite_dl.py @ 6:4beb3e026bbb draft default tip

"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
author bgruening
date Sat, 04 Dec 2021 16:39:05 +0000
parents 351fbd750a6d
children
line wrap: on
line diff
--- a/dimorphite_dl.py	Wed Feb 17 13:00:12 2021 +0000
+++ b/dimorphite_dl.py	Sat Dec 04 16:39:05 2021 +0000
@@ -19,8 +19,9 @@
 """
 
 from __future__ import print_function
+
+import argparse
 import os
-import argparse
 import sys
 
 try:
@@ -43,11 +44,12 @@
     import rdkit
     from rdkit import Chem
     from rdkit.Chem import AllChem
-except:
+except Exception:
     msg = "Dimorphite-DL requires RDKit. See https://www.rdkit.org/"
     print(msg)
     raise Exception(msg)
 
+
 def main(params=None):
     """The main definition run when you call the script from the commandline.
 
@@ -84,13 +86,14 @@
             with open(args["output_file"], "w") as file:
                 for protonated_smi in Protonate(args):
                     file.write(protonated_smi + "\n")
-        elif "return_as_list" in args and args["return_as_list"] == True:
+        elif "return_as_list" in args and args["return_as_list"]:
             return list(Protonate(args))
         else:
             # No output file specified. Just print it to the screen.
             for protonated_smi in Protonate(args):
                 print(protonated_smi)
 
+
 class MyParser(argparse.ArgumentParser):
     """Overwrite default parse so it displays help file on error. See
     https://stackoverflow.com/questions/4042452/display-help-message-with-python-argparse-when-script-is-called-without-any-argu"""
@@ -117,15 +120,18 @@
         if file is None:
             file = sys.stdout
         self._print_message(self.format_help(), file)
-        print("""
+        print(
+            """
 examples:
   python dimorphite_dl.py --smiles_file sample_molecules.smi
   python dimorphite_dl.py --smiles "CCC(=O)O" --min_ph -3.0 --max_ph -2.0
   python dimorphite_dl.py --smiles "CCCN" --min_ph -3.0 --max_ph -2.0 --output_file output.smi
   python dimorphite_dl.py --smiles_file sample_molecules.smi --pka_precision 2.0 --label_states
-  python dimorphite_dl.py --test""")
+  python dimorphite_dl.py --test"""
+        )
         print("")
 
+
 class ArgParseFuncs:
     """A namespace for storing functions that are useful for processing
     command-line arguments. To keep things organized."""
@@ -137,27 +143,57 @@
         :return: A parser object.
         """
 
-        parser = MyParser(description="Dimorphite 1.2: Creates models of " +
-                                    "appropriately protonated small moleucles. " +
-                                    "Apache 2.0 License. Copyright 2018 Jacob D. " +
-                                    "Durrant.")
-        parser.add_argument('--min_ph', metavar='MIN', type=float, default=6.4,
-                            help='minimum pH to consider (default: 6.4)')
-        parser.add_argument('--max_ph', metavar='MAX', type=float, default=8.4,
-                            help='maximum pH to consider (default: 8.4)')
-        parser.add_argument('--pka_precision', metavar='PRE', type=float, default=1.0,
-                            help='pKa precision factor (number of standard devations, default: 1.0)')
-        parser.add_argument('--smiles', metavar='SMI', type=str,
-                            help='SMILES string to protonate')
-        parser.add_argument('--smiles_file', metavar="FILE", type=str,
-                            help='file that contains SMILES strings to protonate')
-        parser.add_argument('--output_file', metavar="FILE", type=str,
-                            help='output file to write protonated SMILES (optional)')
-        parser.add_argument('--label_states', action="store_true",
-                            help='label protonated SMILES with target state ' + \
-                                '(i.e., "DEPROTONATED", "PROTONATED", or "BOTH").')
-        parser.add_argument('--test', action="store_true",
-                            help='run unit tests (for debugging)')
+        parser = MyParser(
+            description="Dimorphite 1.2: Creates models of "
+            + "appropriately protonated small moleucles. "
+            + "Apache 2.0 License. Copyright 2018 Jacob D. "
+            + "Durrant."
+        )
+        parser.add_argument(
+            "--min_ph",
+            metavar="MIN",
+            type=float,
+            default=6.4,
+            help="minimum pH to consider (default: 6.4)",
+        )
+        parser.add_argument(
+            "--max_ph",
+            metavar="MAX",
+            type=float,
+            default=8.4,
+            help="maximum pH to consider (default: 8.4)",
+        )
+        parser.add_argument(
+            "--pka_precision",
+            metavar="PRE",
+            type=float,
+            default=1.0,
+            help="pKa precision factor (number of standard devations, default: 1.0)",
+        )
+        parser.add_argument(
+            "--smiles", metavar="SMI", type=str, help="SMILES string to protonate"
+        )
+        parser.add_argument(
+            "--smiles_file",
+            metavar="FILE",
+            type=str,
+            help="file that contains SMILES strings to protonate",
+        )
+        parser.add_argument(
+            "--output_file",
+            metavar="FILE",
+            type=str,
+            help="output file to write protonated SMILES (optional)",
+        )
+        parser.add_argument(
+            "--label_states",
+            action="store_true",
+            help="label protonated SMILES with target state "
+            + '(i.e., "DEPROTONATED", "PROTONATED", or "BOTH").',
+        )
+        parser.add_argument(
+            "--test", action="store_true", help="run unit tests (for debugging)"
+        )
 
         return parser
 
@@ -170,11 +206,13 @@
         :raises Exception: No SMILES in params.
         """
 
-        defaults = {'min_ph' : 6.4,
-                    'max_ph' : 8.4,
-                    'pka_precision' : 1.0,
-                    'label_states' : False,
-                    'test' : False}
+        defaults = {
+            "min_ph": 6.4,
+            "max_ph": 8.4,
+            "pka_precision": 1.0,
+            "label_states": False,
+            "test": False,
+        }
 
         for key in defaults:
             if key not in args:
@@ -194,12 +232,13 @@
         # object.
         if "smiles" in args:
             if isinstance(args["smiles"], str):
-                args["smiles_file"]  = StringIO(args["smiles"])
+                args["smiles_file"] = StringIO(args["smiles"])
 
         args["smiles_and_data"] = LoadSMIFile(args["smiles_file"])
 
         return args
 
+
 class UtilFuncs:
     """A namespace to store functions for manipulating mol objects. To keep
     things organized."""
@@ -215,15 +254,33 @@
 
         # Get the reaction data
         rxn_data = [
-            ['[Ov1-1:1]', '[Ov2+0:1]-[H]'],  # To handle O- bonded to only one atom (add hydrogen).
-            ['[#7v4+1:1]-[H]', '[#7v3+0:1]'],  # To handle N+ bonded to a hydrogen (remove hydrogen).
-            ['[Ov2-:1]', '[Ov2+0:1]'],  # To handle O- bonded to two atoms. Should not be Negative.
-            ['[#7v3+1:1]', '[#7v3+0:1]'],  # To handle N+ bonded to three atoms. Should not be positive.
-            ['[#7v2-1:1]', '[#7+0:1]-[H]'],  # To handle N- Bonded to two atoms. Add hydrogen.
+            [
+                "[Ov1-1:1]",
+                "[Ov2+0:1]-[H]",
+            ],  # To handle O- bonded to only one atom (add hydrogen).
+            [
+                "[#7v4+1:1]-[H]",
+                "[#7v3+0:1]",
+            ],  # To handle N+ bonded to a hydrogen (remove hydrogen).
+            [
+                "[Ov2-:1]",
+                "[Ov2+0:1]",
+            ],  # To handle O- bonded to two atoms. Should not be Negative.
+            [
+                "[#7v3+1:1]",
+                "[#7v3+0:1]",
+            ],  # To handle N+ bonded to three atoms. Should not be positive.
+            [
+                "[#7v2-1:1]",
+                "[#7+0:1]-[H]",
+            ],  # To handle N- Bonded to two atoms. Add hydrogen.
             # ['[N:1]=[N+0:2]=[N:3]-[H]', '[N:1]=[N+1:2]=[N+0:3]-[H]'],  # To
             # handle bad azide. Must be protonated. (Now handled elsewhere, before
             # SMILES converted to Mol object.)
-            ['[H]-[N:1]-[N:2]#[N:3]', '[N:1]=[N+1:2]=[N:3]-[H]']  # To handle bad azide. R-N-N#N should be R-N=[N+]=N
+            [
+                "[H]-[N:1]-[N:2]#[N:3]",
+                "[N:1]=[N+1:2]=[N:3]-[H]",
+            ],  # To handle bad azide. R-N-N#N should be R-N=[N+]=N
         ]
 
         # Add substructures and reactions (initially none)
@@ -241,10 +298,15 @@
             current_rxn_str = None
 
             for i, rxn_datum in enumerate(rxn_data):
-                reactant_smarts, product_smarts, substruct_match_mol, rxn_placeholder = rxn_datum
+                (
+                    reactant_smarts,
+                    product_smarts,
+                    substruct_match_mol,
+                    rxn_placeholder,
+                ) = rxn_datum
                 if mol.HasSubstructMatch(substruct_match_mol):
                     if rxn_placeholder is None:
-                        current_rxn_str = reactant_smarts + '>>' + product_smarts
+                        current_rxn_str = reactant_smarts + ">>" + product_smarts
                         current_rxn = AllChem.ReactionFromSmarts(current_rxn_str)
                         rxn_data[i][3] = current_rxn  # Update the placeholder.
                     else:
@@ -262,10 +324,10 @@
         # to resanitize them. Make sure aromatic rings are shown as such This
         # catches all RDKit Errors. without the catchError and sanitizeOps the
         # Chem.SanitizeMol can crash the program.
-        sanitize_string =  Chem.SanitizeMol(
+        sanitize_string = Chem.SanitizeMol(
             mol,
             sanitizeOps=rdkit.Chem.rdmolops.SanitizeFlags.SANITIZE_ALL,
-            catchErrors = True
+            catchErrors=True,
         )
 
         return mol if sanitize_string.name == "SANITIZE_NONE" else None
@@ -321,6 +383,7 @@
 
         print(*args, file=sys.stderr, **kwargs)
 
+
 class LoadSMIFile(object):
     """A generator class for loading in the SMILES strings from a file, one at
     a time."""
@@ -388,37 +451,43 @@
             # into a canonical form. Filter if failed.
             mol = UtilFuncs.convert_smiles_str_to_mol(smiles_str)
             if mol is None:
-                UtilFuncs.eprint("WARNING: Skipping poorly formed SMILES string: " + line)
+                UtilFuncs.eprint(
+                    "WARNING: Skipping poorly formed SMILES string: " + line
+                )
                 return self.next()
 
             # Handle nuetralizing the molecules. Filter if failed.
             mol = UtilFuncs.neutralize_mol(mol)
             if mol is None:
-                UtilFuncs.eprint("WARNING: Skipping poorly formed SMILES string: " + line)
+                UtilFuncs.eprint(
+                    "WARNING: Skipping poorly formed SMILES string: " + line
+                )
                 return self.next()
 
             # Remove the hydrogens.
             try:
                 mol = Chem.RemoveHs(mol)
-            except:
-                UtilFuncs.eprint("WARNING: Skipping poorly formed SMILES string: " + line)
+            except Exception:
+                UtilFuncs.eprint(
+                    "WARNING: Skipping poorly formed SMILES string: " + line
+                )
                 return self.next()
 
             if mol is None:
-                UtilFuncs.eprint("WARNING: Skipping poorly formed SMILES string: " + line)
+                UtilFuncs.eprint(
+                    "WARNING: Skipping poorly formed SMILES string: " + line
+                )
                 return self.next()
 
             # Regenerate the smiles string (to standardize).
             new_mol_string = Chem.MolToSmiles(mol, isomericSmiles=True)
 
-            return {
-                "smiles": new_mol_string,
-                "data": splits[1:]
-            }
+            return {"smiles": new_mol_string, "data": splits[1:]}
         else:
             # Blank line? Go to next one.
             return self.next()
 
+
 class Protonate(object):
     """A generator class for protonating SMILES strings, one at a time."""
 
@@ -491,8 +560,8 @@
 
         smi = smile_and_datum["smiles"]
         data = smile_and_datum["data"]  # Everything on SMILES line but the
-                                        # SMILES string itself (e.g., the
-                                        # molecule name).
+        # SMILES string itself (e.g., the
+        # molecule name).
 
         # Collect the data associated with this smiles (e.g., the molecule
         # name).
@@ -516,8 +585,8 @@
 
             # Only add new smiles if not already in the list.
             # for s in new_smis_to_perhaps_add:
-                # if not s in new_smis:
-                    # new_smis.append(s)
+            # if not s in new_smis:
+            # new_smis.append(s)
 
         # In some cases, the script might generate redundant molecules.
         # Phosphonates, when the pH is between the two pKa values and the
@@ -532,7 +601,9 @@
         # Sometimes Dimorphite-DL generates molecules that aren't actually
         # possible. Simply convert these to mol objects to eliminate the bad
         # ones (that are None).
-        new_smis = [s for s in new_smis if UtilFuncs.convert_smiles_str_to_mol(s) is not None]
+        new_smis = [
+            s for s in new_smis if UtilFuncs.convert_smiles_str_to_mol(s) is not None
+        ]
 
         # If there are no smi left, return the input one at the very least.
         # All generated forms have apparently been judged
@@ -543,7 +614,7 @@
         # If the user wants to see the target states, add those
         # to the ends of each line.
         if self.args["label_states"]:
-            states = '\t'.join([x[1] for x in sites])
+            states = "\t".join([x[1] for x in sites])
             new_lines = [x + "\t" + tag + "\t" + states for x in new_smis]
         else:
             new_lines = [x + "\t" + tag for x in new_smis]
@@ -552,12 +623,15 @@
 
         return self.next()
 
+
 class ProtSubstructFuncs:
     """A namespace to store functions for loading the substructures that can
     be protonated. To keep things organized."""
 
     @staticmethod
-    def load_protonation_substructs_calc_state_for_ph(min_ph=6.4, max_ph=8.4, pka_std_range=1):
+    def load_protonation_substructs_calc_state_for_ph(
+        min_ph=6.4, max_ph=8.4, pka_std_range=1
+    ):
         """A pre-calculated list of R-groups with protonation sites, with their
         likely pKa bins.
 
@@ -573,7 +647,7 @@
         pwd = os.path.dirname(os.path.realpath(__file__))
 
         site_structures_file = "{}/{}".format(pwd, "site_substructures.smarts")
-        with open(site_structures_file, 'r') as substruct:
+        with open(site_structures_file, "r") as substruct:
             for line in substruct:
                 line = line.strip()
                 sub = {}
@@ -584,7 +658,9 @@
                     sub["mol"] = Chem.MolFromSmarts(sub["smart"])
 
                     # NEED TO DIVIDE THIS BY 3s
-                    pka_ranges = [splits[i:i+3] for i in range(2, len(splits)-1, 3)]
+                    pka_ranges = [
+                        splits[i : i + 3] for i in range(2, len(splits) - 1, 3)
+                    ]
 
                     prot = []
                     for pka_range in pka_ranges:
@@ -620,11 +696,11 @@
         # This needs to be reassigned, and 'ERROR' should never make it past the
         # next set of checks.
         if min_pka <= max_ph and min_ph <= max_pka:
-            protonation_state = 'BOTH'
+            protonation_state = "BOTH"
         elif mean > max_ph:
-            protonation_state = 'PROTONATED'
+            protonation_state = "PROTONATED"
         else:
-            protonation_state = 'DEPROTONATED'
+            protonation_state = "DEPROTONATED"
 
         return protonation_state
 
@@ -650,8 +726,8 @@
 
         # Try to Add hydrogens. if failed return []
         try:
-            mol =  Chem.AddHs(mol)
-        except:
+            mol = Chem.AddHs(mol)
+        except Exception:
             UtilFuncs.eprint("ERROR:   ", smi)
             return []
 
@@ -701,14 +777,14 @@
         # Initialize the output list
         output_smis = []
 
-        state_to_charge = {"DEPROTONATED": [-1],
-                        "PROTONATED": [0],
-                        "BOTH": [-1, 0]}
+        state_to_charge = {"DEPROTONATED": [-1], "PROTONATED": [0], "BOTH": [-1, 0]}
 
         charges = state_to_charge[target_prot_state]
 
         # Now make the actual smiles match the target protonation state.
-        output_smis = ProtSubstructFuncs.set_protonation_charge(smis, idx, charges, prot_site_name)
+        output_smis = ProtSubstructFuncs.set_protonation_charge(
+            smis, idx, charges, prot_site_name
+        )
 
         return output_smis
 
@@ -759,11 +835,12 @@
                     atom.SetFormalCharge(charge)
 
                 # Convert back to SMILE and add to output
-                out_smile = Chem.MolToSmiles(mol, isomericSmiles=True,canonical=True)
+                out_smile = Chem.MolToSmiles(mol, isomericSmiles=True, canonical=True)
                 output.append(out_smile)
 
         return output
 
+
 class ProtectUnprotectFuncs:
     """A namespace for storing functions that are useful for protecting and
     unprotecting molecules. To keep things organized. We need to identify and
@@ -779,7 +856,7 @@
         """
 
         for atom in mol.GetAtoms():
-            atom.SetProp('_protected', '0')
+            atom.SetProp("_protected", "0")
 
     @staticmethod
     def protect_molecule(mol, match):
@@ -793,7 +870,7 @@
 
         for idx in match:
             atom = mol.GetAtomWithIdx(idx)
-            atom.SetProp('_protected', '1')
+            atom.SetProp("_protected", "1")
 
     @staticmethod
     def get_unprotected_matches(mol, substruct):
@@ -829,6 +906,7 @@
                 return False
         return True
 
+
 class TestFuncs:
     """A namespace for storing functions that perform tests on the code. To
     keep things organized."""
@@ -839,53 +917,158 @@
 
         smis = [
             # [input smiles, pka, protonated, deprotonated, category]
-            ["C#CCO",                  "C#CCO",                     "C#CC[O-]",                 "Alcohol"],
-            ["C(=O)N",                 "NC=O",                      "[NH-]C=O",                 "Amide"],
-            ["CC(=O)NOC(C)=O",         "CC(=O)NOC(C)=O",            "CC(=O)[N-]OC(C)=O",        "Amide_electronegative"],
-            ["COC(=N)N",               "COC(N)=[NH2+]",             "COC(=N)N",                 "AmidineGuanidine2"],
-            ["Brc1ccc(C2NCCS2)cc1",    "Brc1ccc(C2[NH2+]CCS2)cc1",  "Brc1ccc(C2NCCS2)cc1",      "Amines_primary_secondary_tertiary"],
-            ["CC(=O)[n+]1ccc(N)cc1",   "CC(=O)[n+]1ccc([NH3+])cc1", "CC(=O)[n+]1ccc(N)cc1",     "Anilines_primary"],
-            ["CCNc1ccccc1",            "CC[NH2+]c1ccccc1",          "CCNc1ccccc1",              "Anilines_secondary"],
-            ["Cc1ccccc1N(C)C",         "Cc1ccccc1[NH+](C)C",        "Cc1ccccc1N(C)C",           "Anilines_tertiary"],
-            ["BrC1=CC2=C(C=C1)NC=C2",  "Brc1ccc2[nH]ccc2c1",        "Brc1ccc2[n-]ccc2c1",       "Indole_pyrrole"],
-            ["O=c1cc[nH]cc1",          "O=c1cc[nH]cc1",             "O=c1cc[n-]cc1",            "Aromatic_nitrogen_protonated"],
-            ["C-N=[N+]=[N@H]",         "CN=[N+]=N",                 "CN=[N+]=[N-]",             "Azide"],
-            ["BrC(C(O)=O)CBr",         "O=C(O)C(Br)CBr",            "O=C([O-])C(Br)CBr",        "Carboxyl"],
-            ["NC(NN=O)=N",             "NC(=[NH2+])NN=O",           "N=C(N)NN=O",               "AmidineGuanidine1"],
-            ["C(F)(F)(F)C(=O)NC(=O)C", "CC(=O)NC(=O)C(F)(F)F",      "CC(=O)[N-]C(=O)C(F)(F)F",  "Imide"],
-            ["O=C(C)NC(C)=O",          "CC(=O)NC(C)=O",             "CC(=O)[N-]C(C)=O",         "Imide2"],
-            ["CC(C)(C)C(N(C)O)=O",     "CN(O)C(=O)C(C)(C)C",        "CN([O-])C(=O)C(C)(C)C",    "N-hydroxyamide"],
-            ["C[N+](O)=O",             "C[N+](=O)O",                "C[N+](=O)[O-]",            "Nitro"],
-            ["O=C1C=C(O)CC1",          "O=C1C=C(O)CC1",             "O=C1C=C([O-])CC1",         "O=C-C=C-OH"],
-            ["C1CC1OO",                "OOC1CC1",                   "[O-]OC1CC1",               "Peroxide2"],
-            ["C(=O)OO",                "O=COO",                     "O=CO[O-]",                 "Peroxide1"],
-            ["Brc1cc(O)cc(Br)c1",      "Oc1cc(Br)cc(Br)c1",         "[O-]c1cc(Br)cc(Br)c1",     "Phenol"],
-            ["CC(=O)c1ccc(S)cc1",      "CC(=O)c1ccc(S)cc1",         "CC(=O)c1ccc([S-])cc1",     "Phenyl_Thiol"],
-            ["C=CCOc1ccc(C(=O)O)cc1",  "C=CCOc1ccc(C(=O)O)cc1",     "C=CCOc1ccc(C(=O)[O-])cc1", "Phenyl_carboxyl"],
-            ["COP(=O)(O)OC",           "COP(=O)(O)OC",              "COP(=O)([O-])OC",          "Phosphate_diester"],
-            ["CP(C)(=O)O",             "CP(C)(=O)O",                "CP(C)(=O)[O-]",            "Phosphinic_acid"],
-            ["CC(C)OP(C)(=O)O",        "CC(C)OP(C)(=O)O",           "CC(C)OP(C)(=O)[O-]",       "Phosphonate_ester"],
-            ["CC1(C)OC(=O)NC1=O",      "CC1(C)OC(=O)NC1=O",         "CC1(C)OC(=O)[N-]C1=O",     "Ringed_imide1"],
-            ["O=C(N1)C=CC1=O",         "O=C1C=CC(=O)N1",            "O=C1C=CC(=O)[N-]1",        "Ringed_imide2"],
-            ["O=S(OC)(O)=O",           "COS(=O)(=O)O",              "COS(=O)(=O)[O-]",          "Sulfate"],
-            ["COc1ccc(S(=O)O)cc1",     "COc1ccc(S(=O)O)cc1",        "COc1ccc(S(=O)[O-])cc1",    "Sulfinic_acid"],
-            ["CS(N)(=O)=O",            "CS(N)(=O)=O",               "CS([NH-])(=O)=O",          "Sulfonamide"],
-            ["CC(=O)CSCCS(O)(=O)=O",   "CC(=O)CSCCS(=O)(=O)O",      "CC(=O)CSCCS(=O)(=O)[O-]",  "Sulfonate"],
-            ["CC(=O)S",                "CC(=O)S",                   "CC(=O)[S-]",               "Thioic_acid"],
-            ["C(C)(C)(C)(S)",          "CC(C)(C)S",                 "CC(C)(C)[S-]",             "Thiol"],
-            ["Brc1cc[nH+]cc1",         "Brc1cc[nH+]cc1",            "Brc1ccncc1",               "Aromatic_nitrogen_unprotonated"],
-            ["C=C(O)c1c(C)cc(C)cc1C",  "C=C(O)c1c(C)cc(C)cc1C",     "C=C([O-])c1c(C)cc(C)cc1C", "Vinyl_alcohol"],
-            ["CC(=O)ON",               "CC(=O)O[NH3+]",             "CC(=O)ON",                 "Primary_hydroxyl_amine"]
+            ["C#CCO", "C#CCO", "C#CC[O-]", "Alcohol"],
+            ["C(=O)N", "NC=O", "[NH-]C=O", "Amide"],
+            [
+                "CC(=O)NOC(C)=O",
+                "CC(=O)NOC(C)=O",
+                "CC(=O)[N-]OC(C)=O",
+                "Amide_electronegative",
+            ],
+            ["COC(=N)N", "COC(N)=[NH2+]", "COC(=N)N", "AmidineGuanidine2"],
+            [
+                "Brc1ccc(C2NCCS2)cc1",
+                "Brc1ccc(C2[NH2+]CCS2)cc1",
+                "Brc1ccc(C2NCCS2)cc1",
+                "Amines_primary_secondary_tertiary",
+            ],
+            [
+                "CC(=O)[n+]1ccc(N)cc1",
+                "CC(=O)[n+]1ccc([NH3+])cc1",
+                "CC(=O)[n+]1ccc(N)cc1",
+                "Anilines_primary",
+            ],
+            ["CCNc1ccccc1", "CC[NH2+]c1ccccc1", "CCNc1ccccc1", "Anilines_secondary"],
+            [
+                "Cc1ccccc1N(C)C",
+                "Cc1ccccc1[NH+](C)C",
+                "Cc1ccccc1N(C)C",
+                "Anilines_tertiary",
+            ],
+            [
+                "BrC1=CC2=C(C=C1)NC=C2",
+                "Brc1ccc2[nH]ccc2c1",
+                "Brc1ccc2[n-]ccc2c1",
+                "Indole_pyrrole",
+            ],
+            [
+                "O=c1cc[nH]cc1",
+                "O=c1cc[nH]cc1",
+                "O=c1cc[n-]cc1",
+                "Aromatic_nitrogen_protonated",
+            ],
+            ["C-N=[N+]=[N@H]", "CN=[N+]=N", "CN=[N+]=[N-]", "Azide"],
+            ["BrC(C(O)=O)CBr", "O=C(O)C(Br)CBr", "O=C([O-])C(Br)CBr", "Carboxyl"],
+            ["NC(NN=O)=N", "NC(=[NH2+])NN=O", "N=C(N)NN=O", "AmidineGuanidine1"],
+            [
+                "C(F)(F)(F)C(=O)NC(=O)C",
+                "CC(=O)NC(=O)C(F)(F)F",
+                "CC(=O)[N-]C(=O)C(F)(F)F",
+                "Imide",
+            ],
+            ["O=C(C)NC(C)=O", "CC(=O)NC(C)=O", "CC(=O)[N-]C(C)=O", "Imide2"],
+            [
+                "CC(C)(C)C(N(C)O)=O",
+                "CN(O)C(=O)C(C)(C)C",
+                "CN([O-])C(=O)C(C)(C)C",
+                "N-hydroxyamide",
+            ],
+            ["C[N+](O)=O", "C[N+](=O)O", "C[N+](=O)[O-]", "Nitro"],
+            ["O=C1C=C(O)CC1", "O=C1C=C(O)CC1", "O=C1C=C([O-])CC1", "O=C-C=C-OH"],
+            ["C1CC1OO", "OOC1CC1", "[O-]OC1CC1", "Peroxide2"],
+            ["C(=O)OO", "O=COO", "O=CO[O-]", "Peroxide1"],
+            [
+                "Brc1cc(O)cc(Br)c1",
+                "Oc1cc(Br)cc(Br)c1",
+                "[O-]c1cc(Br)cc(Br)c1",
+                "Phenol",
+            ],
+            [
+                "CC(=O)c1ccc(S)cc1",
+                "CC(=O)c1ccc(S)cc1",
+                "CC(=O)c1ccc([S-])cc1",
+                "Phenyl_Thiol",
+            ],
+            [
+                "C=CCOc1ccc(C(=O)O)cc1",
+                "C=CCOc1ccc(C(=O)O)cc1",
+                "C=CCOc1ccc(C(=O)[O-])cc1",
+                "Phenyl_carboxyl",
+            ],
+            ["COP(=O)(O)OC", "COP(=O)(O)OC", "COP(=O)([O-])OC", "Phosphate_diester"],
+            ["CP(C)(=O)O", "CP(C)(=O)O", "CP(C)(=O)[O-]", "Phosphinic_acid"],
+            [
+                "CC(C)OP(C)(=O)O",
+                "CC(C)OP(C)(=O)O",
+                "CC(C)OP(C)(=O)[O-]",
+                "Phosphonate_ester",
+            ],
+            [
+                "CC1(C)OC(=O)NC1=O",
+                "CC1(C)OC(=O)NC1=O",
+                "CC1(C)OC(=O)[N-]C1=O",
+                "Ringed_imide1",
+            ],
+            ["O=C(N1)C=CC1=O", "O=C1C=CC(=O)N1", "O=C1C=CC(=O)[N-]1", "Ringed_imide2"],
+            ["O=S(OC)(O)=O", "COS(=O)(=O)O", "COS(=O)(=O)[O-]", "Sulfate"],
+            [
+                "COc1ccc(S(=O)O)cc1",
+                "COc1ccc(S(=O)O)cc1",
+                "COc1ccc(S(=O)[O-])cc1",
+                "Sulfinic_acid",
+            ],
+            ["CS(N)(=O)=O", "CS(N)(=O)=O", "CS([NH-])(=O)=O", "Sulfonamide"],
+            [
+                "CC(=O)CSCCS(O)(=O)=O",
+                "CC(=O)CSCCS(=O)(=O)O",
+                "CC(=O)CSCCS(=O)(=O)[O-]",
+                "Sulfonate",
+            ],
+            ["CC(=O)S", "CC(=O)S", "CC(=O)[S-]", "Thioic_acid"],
+            ["C(C)(C)(C)(S)", "CC(C)(C)S", "CC(C)(C)[S-]", "Thiol"],
+            [
+                "Brc1cc[nH+]cc1",
+                "Brc1cc[nH+]cc1",
+                "Brc1ccncc1",
+                "Aromatic_nitrogen_unprotonated",
+            ],
+            [
+                "C=C(O)c1c(C)cc(C)cc1C",
+                "C=C(O)c1c(C)cc(C)cc1C",
+                "C=C([O-])c1c(C)cc(C)cc1C",
+                "Vinyl_alcohol",
+            ],
+            ["CC(=O)ON", "CC(=O)O[NH3+]", "CC(=O)ON", "Primary_hydroxyl_amine"],
         ]
 
         smis_phos = [
-            ["O=P(O)(O)OCCCC", "CCCCOP(=O)(O)O", "CCCCOP(=O)([O-])O", "CCCCOP(=O)([O-])[O-]", "Phosphate"],
-            ["CC(P(O)(O)=O)C", "CC(C)P(=O)(O)O", "CC(C)P(=O)([O-])O", "CC(C)P(=O)([O-])[O-]", "Phosphonate"]
+            [
+                "O=P(O)(O)OCCCC",
+                "CCCCOP(=O)(O)O",
+                "CCCCOP(=O)([O-])O",
+                "CCCCOP(=O)([O-])[O-]",
+                "Phosphate",
+            ],
+            [
+                "CC(P(O)(O)=O)C",
+                "CC(C)P(=O)(O)O",
+                "CC(C)P(=O)([O-])O",
+                "CC(C)P(=O)([O-])[O-]",
+                "Phosphonate",
+            ],
         ]
 
         # Load the average pKa values.
-        average_pkas = {l.split()[0].replace("*", ""):float(l.split()[3]) for l in open("site_substructures.smarts") if l.split()[0] not in ["Phosphate", "Phosphonate"]}
-        average_pkas_phos = {l.split()[0].replace("*", ""):[float(l.split()[3]), float(l.split()[6])] for l in open("site_substructures.smarts") if l.split()[0] in ["Phosphate", "Phosphonate"]}
+        average_pkas = {
+            l.split()[0].replace("*", ""): float(l.split()[3])
+            for l in open("site_substructures.smarts")
+            if l.split()[0] not in ["Phosphate", "Phosphonate"]
+        }
+        average_pkas_phos = {
+            l.split()[0].replace("*", ""): [float(l.split()[3]), float(l.split()[6])]
+            for l in open("site_substructures.smarts")
+            if l.split()[0] in ["Phosphate", "Phosphonate"]
+        }
 
         print("Running Tests")
         print("=============")
@@ -900,7 +1083,7 @@
             "max_ph": -10000000,
             "pka_precision": 0.5,
             "smiles": "",
-            "label_states": True
+            "label_states": True,
         }
 
         for smi, protonated, deprotonated, category in smis:
@@ -954,14 +1137,20 @@
             args["min_ph"] = avg_pka
             args["max_ph"] = avg_pka
 
-            TestFuncs.test_check(args, [mix, deprotonated], ["DEPROTONATED", "DEPROTONATED"])
+            TestFuncs.test_check(
+                args, [mix, deprotonated], ["DEPROTONATED", "DEPROTONATED"]
+            )
 
-            avg_pka = 0.5 * (average_pkas_phos[category][0] + average_pkas_phos[category][1])
+            avg_pka = 0.5 * (
+                average_pkas_phos[category][0] + average_pkas_phos[category][1]
+            )
             args["min_ph"] = avg_pka
             args["max_ph"] = avg_pka
             args["pka_precision"] = 5  # Should give all three
 
-            TestFuncs.test_check(args, [mix, deprotonated, protonated], ["BOTH", "BOTH"])
+            TestFuncs.test_check(
+                args, [mix, deprotonated, protonated], ["BOTH", "BOTH"]
+            )
 
     @staticmethod
     def test_check(args, expected_output, labels):
@@ -981,28 +1170,56 @@
 
         num_states = len(expected_output)
 
-        if (len(output) != num_states):
-            msg = args["smiles"] + " should have " + str(num_states) + \
-                " states at at pH " + str(args["min_ph"]) + ": " + str(output)
+        if len(output) != num_states:
+            msg = (
+                args["smiles"]
+                + " should have "
+                + str(num_states)
+                + " states at at pH "
+                + str(args["min_ph"])
+                + ": "
+                + str(output)
+            )
             print(msg)
             raise Exception(msg)
 
-        if (len(set([l[0] for l in output]) - set(expected_output)) != 0):
-            msg = args["smiles"] + " is not " + " AND ".join(expected_output) + \
-                " at pH " + str(args["min_ph"]) + " - " + str(args["max_ph"]) + \
-                "; it is " + " AND ".join([l[0] for l in output])
+        if len(set([l[0] for l in output]) - set(expected_output)) != 0:
+            msg = (
+                args["smiles"]
+                + " is not "
+                + " AND ".join(expected_output)
+                + " at pH "
+                + str(args["min_ph"])
+                + " - "
+                + str(args["max_ph"])
+                + "; it is "
+                + " AND ".join([l[0] for l in output])
+            )
             print(msg)
             raise Exception(msg)
 
-        if (len(set([l[1] for l in output]) - set(labels)) != 0):
-            msg = args["smiles"] + " not labeled as " + " AND ".join(labels) + \
-                "; it is " + " AND ".join([l[1] for l in output])
+        if len(set([l[1] for l in output]) - set(labels)) != 0:
+            msg = (
+                args["smiles"]
+                + " not labeled as "
+                + " AND ".join(labels)
+                + "; it is "
+                + " AND ".join([l[1] for l in output])
+            )
             print(msg)
             raise Exception(msg)
 
         ph_range = sorted(list(set([args["min_ph"], args["max_ph"]])))
         ph_range_str = "(" + " - ".join("{0:.2f}".format(n) for n in ph_range) + ")"
-        print("(CORRECT) " + ph_range_str.ljust(10) + " " + args["smiles"] + " => " + " AND ".join([l[0] for l in output]))
+        print(
+            "(CORRECT) "
+            + ph_range_str.ljust(10)
+            + " "
+            + args["smiles"]
+            + " => "
+            + " AND ".join([l[0] for l in output])
+        )
+
 
 def run(**kwargs):
     """A helpful, importable function for those who want to call Dimorphite-DL
@@ -1019,6 +1236,7 @@
     # Run the main function with the specified arguments.
     main(kwargs)
 
+
 def run_with_mol_list(mol_lst, **kwargs):
     """A helpful, importable function for those who want to call Dimorphite-DL
     from another Python script rather than the command line. Note that this
@@ -1037,10 +1255,13 @@
     # Do a quick check to make sure the user input makes sense.
     for bad_arg in ["smiles", "smiles_file", "output_file", "test"]:
         if bad_arg in kwargs:
-            msg = "You're using Dimorphite-DL's run_with_mol_list(mol_lst, " + \
-                   "**kwargs) function, but you also passed the \"" + \
-                   bad_arg + "\" argument. Did you mean to use the " + \
-                   "run(**kwargs) function instead?"
+            msg = (
+                "You're using Dimorphite-DL's run_with_mol_list(mol_lst, "
+                + '**kwargs) function, but you also passed the "'
+                + bad_arg
+                + '" argument. Did you mean to use the '
+                + "run(**kwargs) function instead?"
+            )
             print(msg)
             raise Exception(msg)
 
@@ -1076,9 +1297,15 @@
                     m.SetProp(prop, str(val))
             mols.append(m)
         else:
-            UtilFuncs.eprint("WARNING: Could not process molecule with SMILES string " + s + " and properties " + str(props))
+            UtilFuncs.eprint(
+                "WARNING: Could not process molecule with SMILES string "
+                + s
+                + " and properties "
+                + str(props)
+            )
 
     return mols
 
+
 if __name__ == "__main__":
     main()