changeset 15:4242b4d68e9c draft default tip

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/openbabel commit d9c51279c061a1da948a2582d5b502ca7573adbf
author bgruening
date Thu, 15 Aug 2024 11:06:27 +0000 (5 months ago)
parents e2c36f62e22f
children
files change_title_to_metadata_value.py cheminfolib.py distance_finder.py multi_obgrep.py ob_addh.py ob_convert.xml ob_filter.py ob_genProp.py ob_remIons.py ob_spectrophore_search.py remove_protonation_state.py subsearch.py test-data/2_mol.sdf
diffstat 13 files changed, 618 insertions(+), 294 deletions(-) [+]
line wrap: on
line diff
--- a/change_title_to_metadata_value.py	Tue Nov 10 20:33:21 2020 +0000
+++ b/change_title_to_metadata_value.py	Thu Aug 15 11:06:27 2024 +0000
@@ -11,6 +11,7 @@
 import string
 
 from openbabel import openbabel, pybel
+
 openbabel.obErrorLog.StopLogging()
 
 
@@ -19,14 +20,19 @@
         description="Change the title from a molecule file to metadata \
                      value of a given-id of the same molecule file.",
     )
-    parser.add_argument('--infile', '-i', required=True,
-                        help="path to the input file")
-    parser.add_argument('--outfile', '-o', required=True,
-                        help="path to the output file")
-    parser.add_argument('--key', '-k', required=True,
-                        help="the metadata key from the sdf file which should inlcude the new title")
-    parser.add_argument('--random', '-r', action="store_true",
-                        help="Add random suffix to the title.")
+    parser.add_argument("--infile", "-i", required=True, help="path to the input file")
+    parser.add_argument(
+        "--outfile", "-o", required=True, help="path to the output file"
+    )
+    parser.add_argument(
+        "--key",
+        "-k",
+        required=True,
+        help="the metadata key from the sdf file which should inlcude the new title",
+    )
+    parser.add_argument(
+        "--random", "-r", action="store_true", help="Add random suffix to the title."
+    )
 
     args = parser.parse_args()
 
@@ -35,8 +41,11 @@
         if args.key in mol.data:
             mol.title = mol.data[args.key]
             if args.random:
-                suffix = ''.join(random.choice(string.ascii_lowercase + string.digits) for _ in range(13))
-                mol.title += '__%s' % suffix
+                suffix = "".join(
+                    random.choice(string.ascii_lowercase + string.digits)
+                    for _ in range(13)
+                )
+                mol.title += "__%s" % suffix
         output.write(mol)
 
     output.close()
--- a/cheminfolib.py	Tue Nov 10 20:33:21 2020 +0000
+++ b/cheminfolib.py	Thu Aug 15 11:06:27 2024 +0000
@@ -11,28 +11,32 @@
 import tempfile
 from multiprocessing import Pool
 
-
 try:
     from galaxy import eggs
-    eggs.require('psycopg2')
+
+    eggs.require("psycopg2")
 except ImportError:
     psycopg2 = None
-    print('psycopg2 is not available. It is currently used in the pgchem wrappers, that are not shipped with default CTB')
+    print(
+        "psycopg2 is not available. It is currently used in the pgchem wrappers, that are not shipped with default CTB"
+    )
 
 try:
     from openbabel import openbabel, pybel
+
     openbabel.obErrorLog.StopLogging()
 except ImportError:
     openbabel, pybel = None, None
-    print('OpenBabel could not be found. A few functions are not available without OpenBabel.')
+    print(
+        "OpenBabel could not be found. A few functions are not available without OpenBabel."
+    )
 
 
 def CountLines(path):
-    out = subprocess.Popen(['wc', '-l', path],
-                           stdout=subprocess.PIPE,
-                           stderr=subprocess.STDOUT
-                           ).communicate()[0]
-    return int(out.partition(b' ')[0])
+    out = subprocess.Popen(
+        ["wc", "-l", path], stdout=subprocess.PIPE, stderr=subprocess.STDOUT
+    ).communicate()[0]
+    return int(out.partition(b" ")[0])
 
 
 def grep(pattern, file_obj):
@@ -49,15 +53,15 @@
     for line_counter, line in enumerate(open(filepath)):
         if line_counter > 10000:
             break
-        if line.find('$$$$') != -1:
-            return 'sdf'
-        elif line.find('@<TRIPOS>MOLECULE') != -1:
-            return 'mol2'
-        elif line.find('ligand id') != -1:
-            return 'drf'
-        elif possible_inchi and re.findall('^InChI=', line):
-            return 'inchi'
-        elif re.findall(r'^M\s+END', line):
+        if line.find("$$$$") != -1:
+            return "sdf"
+        elif line.find("@<TRIPOS>MOLECULE") != -1:
+            return "mol2"
+        elif line.find("ligand id") != -1:
+            return "drf"
+        elif possible_inchi and re.findall("^InChI=", line):
+            return "inchi"
+        elif re.findall(r"^M\s+END", line):
             mol = True
         # first line is not an InChI, so it can't be an InChI file
         possible_inchi = False
@@ -65,99 +69,128 @@
     if mol:
         # END can occures before $$$$, so and SDF file will
         # be recognised as mol, if you not using this hack'
-        return 'mol'
-    return 'smi'
+        return "mol"
+    return "smi"
 
 
 def db_connect(args):
     try:
-        db_conn = psycopg2.connect("dbname=%s user=%s host=%s password=%s" % (args.dbname, args.dbuser, args.dbhost, args.dbpasswd))
+        db_conn = psycopg2.connect(
+            "dbname=%s user=%s host=%s password=%s"
+            % (args.dbname, args.dbuser, args.dbhost, args.dbpasswd)
+        )
         return db_conn
     except psycopg2.Error:
-        sys.exit('Unable to connect to the db')
+        sys.exit("Unable to connect to the db")
 
 
 ColumnNames = {
-    'can_smiles': 'Canonical SMILES',
-    'can': 'Canonical SMILES',
-    'inchi': 'InChI',
-    'inchi_key': 'InChI key',
-    'inchi_key_first': 'InChI key first',
-    'inchi_key_last': 'InChI key last',
-    'molwt': 'Molecular weight',
-    'hbd': 'Hydrogen-bond donors',
-    'donors': 'Hydrogen-bond donors',
-    'hba': 'Hydrogen-bond acceptors',
-    'acceptors': 'Hydrogen-bond acceptors',
-    'rotbonds': 'Rotatable bonds',
-    'logp': 'logP',
-    'psa': 'Polar surface area',
-    'mr': 'Molecular refractivity',
-    'atoms': 'Number of heavy atoms',
-    'rings': 'Number of rings',
-    'set_bits': 'FP2 bits',
-    'id': 'Internal identifier',
-    'tani': 'Tanimoto coefficient',
-    'spectrophore': 'Spectrophores(TM)',
-    'dist_spectrophore': 'Spectrophores(TM) distance to target',
-    'synonym': 'Entry id',
+    "can_smiles": "Canonical SMILES",
+    "can": "Canonical SMILES",
+    "inchi": "InChI",
+    "inchi_key": "InChI key",
+    "inchi_key_first": "InChI key first",
+    "inchi_key_last": "InChI key last",
+    "molwt": "Molecular weight",
+    "hbd": "Hydrogen-bond donors",
+    "donors": "Hydrogen-bond donors",
+    "hba": "Hydrogen-bond acceptors",
+    "acceptors": "Hydrogen-bond acceptors",
+    "rotbonds": "Rotatable bonds",
+    "logp": "logP",
+    "psa": "Polar surface area",
+    "mr": "Molecular refractivity",
+    "atoms": "Number of heavy atoms",
+    "rings": "Number of rings",
+    "set_bits": "FP2 bits",
+    "id": "Internal identifier",
+    "tani": "Tanimoto coefficient",
+    "spectrophore": "Spectrophores(TM)",
+    "dist_spectrophore": "Spectrophores(TM) distance to target",
+    "synonym": "Entry id",
 }
 
 OBDescriptor = {
-    'atoms': ["atoms", "Number of atoms"],
-    'hatoms': ["hatoms", "Number of heavy atoms"],  # self defined tag hatoms in plugindefines.txt
-    'can_smiles': ["cansmi", "Canonical SMILES"],
-    'can_smilesNS': ["cansmiNS", "Canonical SMILES without isotopes or stereo"],
+    "atoms": ["atoms", "Number of atoms"],
+    "hatoms": [
+        "hatoms",
+        "Number of heavy atoms",
+    ],  # self defined tag hatoms in plugindefines.txt
+    "can_smiles": ["cansmi", "Canonical SMILES"],
+    "can_smilesNS": ["cansmiNS", "Canonical SMILES without isotopes or stereo"],
     # ["abonds", "Number of aromatic bonds"],
     # ["bonds", "Number of bonds"],
     # ["dbonds", "Number of double bonds"],
     # ["formula", "Chemical formula"],
-    'hba': ["HBA1", "Number of Hydrogen Bond Acceptors 1 (JoelLib)"],
-    'hba2': ["HBA2", "Number of Hydrogen Bond Acceptors 2 (JoelLib)"],
-    'hbd': ["HBD", "Number of Hydrogen Bond Donors (JoelLib)"],
-    'inchi': ["InChI", "IUPAC InChI identifier"],
-    'inchi_key': ["InChIKey", "InChIKey"],
+    "hba": ["HBA1", "Number of Hydrogen Bond Acceptors 1 (JoelLib)"],
+    "hba2": ["HBA2", "Number of Hydrogen Bond Acceptors 2 (JoelLib)"],
+    "hbd": ["HBD", "Number of Hydrogen Bond Donors (JoelLib)"],
+    "inchi": ["InChI", "IUPAC InChI identifier"],
+    "inchi_key": ["InChIKey", "InChIKey"],
     # ["L5", "Lipinski Rule of Five"],
-    'logp': ["logP", "octanol/water partition coefficient"],
-    'mr': ["MR", "molar refractivity"],
-    'molwt': ["MW", "Molecular Weight filter"],
+    "logp": ["logP", "octanol/water partition coefficient"],
+    "mr": ["MR", "molar refractivity"],
+    "molwt": ["MW", "Molecular Weight filter"],
     # ["nF", "Number of Fluorine Atoms"],
     # ["s", "SMARTS filter"],
     # ["sbonds", "Number of single bonds"],
     # ["smarts", "SMARTS filter"],
     # ["tbonds", "Number of triple bonds"],
     # ["title", "For comparing a molecule's title"],
-    'psa': ["TPSA", "topological polar surface area"],
-    'rotbonds': ['ROTATABLE_BOND', 'rotatable bonds'],
+    "psa": ["TPSA", "topological polar surface area"],
+    "rotbonds": ["ROTATABLE_BOND", "rotatable bonds"],
 }
 
 
 def print_output(args, rows):
-    if args.oformat == 'table':
-        outfile = open(args.output, 'w')
-        requested_fields = (filter(lambda x: x not in ["[", "]", "'"], args.fetch)).split(', ')
+    if args.oformat == "table":
+        outfile = open(args.output, "w")
+        requested_fields = (
+            filter(lambda x: x not in ["[", "]", "'"], args.fetch)
+        ).split(", ")
         if args.header:
-            outfile.write('Identifier\t' + '\t'.join([ColumnNames[key] for key in requested_fields]) + '\n')
+            outfile.write(
+                "Identifier\t"
+                + "\t".join([ColumnNames[key] for key in requested_fields])
+                + "\n"
+            )
         for row in rows:
-            outfile.write(row['synonym'] + '\t' + '\t'.join([str(row[key]) for key in requested_fields]) + '\n')
+            outfile.write(
+                row["synonym"]
+                + "\t"
+                + "\t".join([str(row[key]) for key in requested_fields])
+                + "\n"
+            )
 
-    elif args.oformat in ['sdf', 'mol2']:
+    elif args.oformat in ["sdf", "mol2"]:
         outfile = pybel.Outputfile(args.oformat, args.output, overwrite=True)
         for row in rows:
             try:
-                mol = pybel.readstring('sdf', row['mol'])
-                if args.oformat == 'sdf':
-                    keys = filter(lambda x: x not in ["[", "]", "'"], args.fetch).split(', ')
-                    mol.data.update({ColumnNames['synonym']: row['synonym']})
-                    if 'inchi_key' in keys:
-                        keys = (', '.join(keys).replace("inchi_key", "inchi_key_first, inchi_key_last")).split(', ')
-                    [mol.data.update({ColumnNames[key]: row[key]}) for key in keys if key]
+                mol = pybel.readstring("sdf", row["mol"])
+                if args.oformat == "sdf":
+                    keys = filter(lambda x: x not in ["[", "]", "'"], args.fetch).split(
+                        ", "
+                    )
+                    mol.data.update({ColumnNames["synonym"]: row["synonym"]})
+                    if "inchi_key" in keys:
+                        keys = (
+                            ", ".join(keys).replace(
+                                "inchi_key", "inchi_key_first, inchi_key_last"
+                            )
+                        ).split(", ")
+                    [
+                        mol.data.update({ColumnNames[key]: row[key]})
+                        for key in keys
+                        if key
+                    ]
                 outfile.write(mol)
             except OSError:
                 pass
     else:
-        outfile = open(args.output, 'w')
-        outfile.write('\n'.join(['%s\t%s' % (row[args.oformat], row['synonym']) for row in rows]))
+        outfile = open(args.output, "w")
+        outfile.write(
+            "\n".join(["%s\t%s" % (row[args.oformat], row["synonym"]) for row in rows])
+        )
     outfile.close()
 
 
@@ -167,31 +200,37 @@
 
 def get_properties_ext(mol):
     HBD = pybel.Smarts("[!#6;!H0]")
-    HBA = pybel.Smarts(("[$([$([#8,#16]);!$(*=N~O);"
-                        "!$(*~N=O);X1,X2]),$([#7;v3;"
-                        "!$([nH]);!$(*(-a)-a)])]"
-                        ))
+    HBA = pybel.Smarts(
+        (
+            "[$([$([#8,#16]);!$(*=N~O);"
+            "!$(*~N=O);X1,X2]),$([#7;v3;"
+            "!$([nH]);!$(*(-a)-a)])]"
+        )
+    )
     calc_desc_dict = mol.calcdesc()
 
     try:
-        logp = calc_desc_dict['logP']
+        logp = calc_desc_dict["logP"]
     except KeyError:
-        logp = calc_desc_dict['LogP']
+        logp = calc_desc_dict["LogP"]
 
-    return {"molwt": mol.molwt,
-            "logp": logp,
-            "donors": len(HBD.findall(mol)),
-            "acceptors": len(HBA.findall(mol)),
-            "psa": calc_desc_dict['TPSA'],
-            "mr": calc_desc_dict['MR'],
-            "rotbonds": mol.OBMol.NumRotors(),
-            "can": mol.write("can").split()[0].strip(),  # tthis one works fine for both zinc and chembl (no ZINC code added after can descriptor string)
-            "inchi": mol.write("inchi").strip(),
-            "inchi_key": get_inchikey(mol).strip(),
-            "rings": len(mol.sssr),
-            "atoms": mol.OBMol.NumHvyAtoms(),
-            "spectrophore": OBspectrophore(mol),
-            }
+    return {
+        "molwt": mol.molwt,
+        "logp": logp,
+        "donors": len(HBD.findall(mol)),
+        "acceptors": len(HBA.findall(mol)),
+        "psa": calc_desc_dict["TPSA"],
+        "mr": calc_desc_dict["MR"],
+        "rotbonds": mol.OBMol.NumRotors(),
+        "can": mol.write("can")
+        .split()[0]
+        .strip(),  # tthis one works fine for both zinc and chembl (no ZINC code added after can descriptor string)
+        "inchi": mol.write("inchi").strip(),
+        "inchi_key": get_inchikey(mol).strip(),
+        "rings": len(mol.sssr),
+        "atoms": mol.OBMol.NumHvyAtoms(),
+        "spectrophore": OBspectrophore(mol),
+    }
 
 
 def get_inchikey(mol):
@@ -206,10 +245,12 @@
     spectrophore = pybel.ob.OBSpectrophore()
     # Parameters: rotation angle = 20, normalization for mean and sd, accuracy = 3.0 A and non-stereospecific cages.
     spectrophore.SetNormalization(spectrophore.NormalizationTowardsZeroMeanAndUnitStd)
-    return ', '.join(["%.3f" % value for value in spectrophore.GetSpectrophore(mol.OBMol)])
+    return ", ".join(
+        ["%.3f" % value for value in spectrophore.GetSpectrophore(mol.OBMol)]
+    )
 
 
-def split_library(lib_path, lib_format='sdf', package_size=None):
+def split_library(lib_path, lib_format="sdf", package_size=None):
     """
     Split a library of compounds. Usage: split_library(lib_path, lib_format, package_size)
     IT currently ONLY WORKS FOR SD-Files
@@ -217,18 +258,39 @@
     pack = 1
     mol_counter = 0
 
-    outfile = open('/%s/%s_pack_%i.%s' % ('/'.join(lib_path.split('/')[:-1]), lib_path.split('/')[-1].split('.')[0], pack, 'sdf'), 'w')
+    outfile = open(
+        "/%s/%s_pack_%i.%s"
+        % (
+            "/".join(lib_path.split("/")[:-1]),
+            lib_path.split("/")[-1].split(".")[0],
+            pack,
+            "sdf",
+        ),
+        "w",
+    )
 
-    for line in open(lib_path, 'r'):
+    for line in open(lib_path, "r"):
         outfile.write(line)
-        if line.strip() == '$$$$':
+        if line.strip() == "$$$$":
             mol_counter += 1
             if mol_counter % package_size == 0:
                 outfile.close()
                 pack += 1
-                outfile = open('/%s/%s_pack_%i.%s' % ('/'.join(lib_path.split('/')[:-1]), lib_path.split('/')[-1].split('.')[0], pack, 'sdf'), 'w')
+                outfile = open(
+                    "/%s/%s_pack_%i.%s"
+                    % (
+                        "/".join(lib_path.split("/")[:-1]),
+                        lib_path.split("/")[-1].split(".")[0],
+                        pack,
+                        "sdf",
+                    ),
+                    "w",
+                )
                 if mol_counter * 10 % package_size == 0:
-                    print('%i molecules parsed, starting pack nr. %i' % (mol_counter, pack - 1))
+                    print(
+                        "%i molecules parsed, starting pack nr. %i"
+                        % (mol_counter, pack - 1)
+                    )
     outfile.close()
 
     return True
@@ -242,7 +304,7 @@
     output_files = []
     tfile = tempfile.NamedTemporaryFile(delete=False)
 
-    smiles_handle = open(smiles_file, 'r')
+    smiles_handle = open(smiles_file, "r")
     for count, line in enumerate(smiles_handle):
         if count % structures_in_one_file == 0 and count != 0:
             tfile.close()
@@ -257,16 +319,19 @@
 
 def mp_run(input_path, regex, PROCESSES, function_to_call):
     paths = []
-    [paths.append(compound_file) for compound_file in glob.glob(str(input_path) + str(regex))]
+    [
+        paths.append(compound_file)
+        for compound_file in glob.glob(str(input_path) + str(regex))
+    ]
     paths.sort()
 
     pool = Pool(processes=PROCESSES)
-    print('Process initialized with', PROCESSES, 'processors')
+    print("Process initialized with", PROCESSES, "processors")
     result = pool.map_async(function_to_call, paths)
     result.get()
 
     return paths
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     print(check_filetype(sys.argv[1]))
--- a/distance_finder.py	Tue Nov 10 20:33:21 2020 +0000
+++ b/distance_finder.py	Thu Aug 15 11:06:27 2024 +0000
@@ -19,9 +19,8 @@
 
 
 def log(*args, **kwargs):
-    """Log output to STDERR
-    """
-    print(*args, file=sys.stderr, ** kwargs)
+    """Log output to STDERR"""
+    print(*args, file=sys.stderr, **kwargs)
 
 
 def execute(ligands_sdf, points_file, outfile):
@@ -35,7 +34,7 @@
     points = []
 
     # read the points
-    with open(points_file, 'r') as f:
+    with open(points_file, "r") as f:
         for line in f.readlines():
             line.strip()
             if line:
@@ -45,7 +44,7 @@
                     log("Read points", p)
                     continue
             log("Failed to read line:", line)
-    log('Found', len(points), 'atom points')
+    log("Found", len(points), "atom points")
 
     sdf_writer = pybel.Outputfile("sdf", outfile, overwrite=True)
 
@@ -53,7 +52,7 @@
     for mol in pybel.readfile("sdf", ligands_sdf):
         count += 1
         if count % 50000 == 0:
-            log('Processed', count)
+            log("Processed", count)
 
         try:
             # print("Processing mol", mol.title)
@@ -70,32 +69,42 @@
                 distances = []
                 for i in coords:
                     # calculates distance based on cartesian coordinates
-                    distance = math.sqrt((point[0] - i[0])**2 + (point[1] - i[1])**2 + (point[2] - i[2])**2)
+                    distance = math.sqrt(
+                        (point[0] - i[0]) ** 2
+                        + (point[1] - i[1]) ** 2
+                        + (point[2] - i[2]) ** 2
+                    )
                     distances.append(distance)
                     # log("distance:", distance)
                 min_distance = min(distances)
                 # log('Min:', min_distance)
                 # log(count, p, min_distance)
 
-                mol.data['distance' + str(p)] = min_distance
+                mol.data["distance" + str(p)] = min_distance
 
             sdf_writer.write(mol)
 
         except Exception as e:
-            log('Failed to handle molecule: ' + str(e))
+            log("Failed to handle molecule: " + str(e))
             continue
 
     sdf_writer.close()
-    log('Wrote', count, 'molecules')
+    log("Wrote", count, "molecules")
 
 
 def main():
     global work_dir
 
-    parser = argparse.ArgumentParser(description='XChem distances - measure distances to particular points')
-    parser.add_argument('-i', '--input', help="SDF containing the 3D molecules to score)")
-    parser.add_argument('-p', '--points', help="PDB format file with atoms")
-    parser.add_argument('-o', '--outfile', default='output.sdf', help="File name for results")
+    parser = argparse.ArgumentParser(
+        description="XChem distances - measure distances to particular points"
+    )
+    parser.add_argument(
+        "-i", "--input", help="SDF containing the 3D molecules to score)"
+    )
+    parser.add_argument("-p", "--points", help="PDB format file with atoms")
+    parser.add_argument(
+        "-o", "--outfile", default="output.sdf", help="File name for results"
+    )
 
     args = parser.parse_args()
     log("XChem distances args: ", args)
--- a/multi_obgrep.py	Tue Nov 10 20:33:21 2020 +0000
+++ b/multi_obgrep.py	Thu Aug 15 11:06:27 2024 +0000
@@ -15,21 +15,55 @@
 
 def parse_command_line():
     parser = argparse.ArgumentParser()
-    parser.add_argument('-i', '--infile', required=True, help='Molecule file.')
-    parser.add_argument('-q', '--query', required=True, help='Query file, containing different SMARTS in each line.')
-    parser.add_argument('-o', '--outfile', required=True, help='Path to the output file.')
+    parser.add_argument("-i", "--infile", required=True, help="Molecule file.")
+    parser.add_argument(
+        "-q",
+        "--query",
+        required=True,
+        help="Query file, containing different SMARTS in each line.",
+    )
+    parser.add_argument(
+        "-o", "--outfile", required=True, help="Path to the output file."
+    )
     parser.add_argument("--iformat", help="Input format, like smi, sdf, inchi")
-    parser.add_argument("--n-times", dest="n_times", type=int,
-                        default=0, help="Print a molecule only if the pattern occurs # times inside the molecule.")
-    parser.add_argument('-p', '--processors', type=int, default=multiprocessing.cpu_count())
-    parser.add_argument("--invert-matches", dest="invert_matches", action="store_true",
-                        default=False, help="Invert the matching, print non-matching molecules.")
-    parser.add_argument("--only-name", dest="only_name", action="store_true",
-                        default=False, help="Only print the name of the molecules.")
-    parser.add_argument("--full-match", dest="full_match", action="store_true",
-                        default=False, help="Full match, print matching-molecules only when the number of heavy atoms is also equal to the number of atoms in the SMARTS pattern.")
-    parser.add_argument("--number-of-matches", dest="number_of_matches", action="store_true",
-                        default=False, help="Print the number of matches.")
+    parser.add_argument(
+        "--n-times",
+        dest="n_times",
+        type=int,
+        default=0,
+        help="Print a molecule only if the pattern occurs # times inside the molecule.",
+    )
+    parser.add_argument(
+        "-p", "--processors", type=int, default=multiprocessing.cpu_count()
+    )
+    parser.add_argument(
+        "--invert-matches",
+        dest="invert_matches",
+        action="store_true",
+        default=False,
+        help="Invert the matching, print non-matching molecules.",
+    )
+    parser.add_argument(
+        "--only-name",
+        dest="only_name",
+        action="store_true",
+        default=False,
+        help="Only print the name of the molecules.",
+    )
+    parser.add_argument(
+        "--full-match",
+        dest="full_match",
+        action="store_true",
+        default=False,
+        help="Full match, print matching-molecules only when the number of heavy atoms is also equal to the number of atoms in the SMARTS pattern.",
+    )
+    parser.add_argument(
+        "--number-of-matches",
+        dest="number_of_matches",
+        action="store_true",
+        default=False,
+        help="Print the number of matches.",
+    )
     return parser.parse_args()
 
 
@@ -42,25 +76,27 @@
 
 def mp_helper(query, args):
     """
-        Helper function for multiprocessing.
-        That function is a wrapper around obgrep.
+    Helper function for multiprocessing.
+    That function is a wrapper around obgrep.
     """
 
     cmd_list = []
     if args.invert_matches:
-        cmd_list.append('-v')
+        cmd_list.append("-v")
     if args.only_name:
-        cmd_list.append('-n')
+        cmd_list.append("-n")
     if args.full_match:
-        cmd_list.append('-f')
+        cmd_list.append("-f")
     if args.number_of_matches:
-        cmd_list.append('-c')
+        cmd_list.append("-c")
     if args.n_times:
-        cmd_list.append('-t %s' % str(args.n_times))
+        cmd_list.append("-t %s" % str(args.n_times))
 
     tmp = tempfile.NamedTemporaryFile(delete=False)
-    cmd = 'obgrep %s "%s" %s' % (' '.join(cmd_list), query, args.infile)
-    child = subprocess.Popen(shlex.split(cmd), stdout=open(tmp.name, 'w+'), stderr=subprocess.PIPE)
+    cmd = 'obgrep %s "%s" %s' % (" ".join(cmd_list), query, args.infile)
+    child = subprocess.Popen(
+        shlex.split(cmd), stdout=open(tmp.name, "w+"), stderr=subprocess.PIPE
+    )
 
     stdout, stderr = child.communicate()
     return (tmp.name, query)
@@ -80,9 +116,9 @@
     pool.close()
     pool.join()
 
-    out_handle = open(args.outfile, 'wb')
+    out_handle = open(args.outfile, "wb")
     for result_file, query in results:
-        res_handle = open(result_file, 'rb')
+        res_handle = open(result_file, "rb")
         shutil.copyfileobj(res_handle, out_handle)
         res_handle.close()
         os.remove(result_file)
@@ -93,7 +129,7 @@
 
 def __main__():
     """
-        Multiprocessing obgrep search.
+    Multiprocessing obgrep search.
     """
     args = parse_command_line()
     obgrep(args)
--- a/ob_addh.py	Tue Nov 10 20:33:21 2020 +0000
+++ b/ob_addh.py	Thu Aug 15 11:06:27 2024 +0000
@@ -7,16 +7,28 @@
 import sys
 
 from openbabel import openbabel, pybel
+
 openbabel.obErrorLog.StopLogging()
 
 
 def parse_command_line(argv):
     parser = argparse.ArgumentParser()
-    parser.add_argument('--iformat', type=str, default='sdf', help='input file format')
-    parser.add_argument('-i', '--input', type=str, required=True, help='input file name')
-    parser.add_argument('-o', '--output', type=str, required=True, help='output file name')
-    parser.add_argument('--polar', action="store_true", default=False, help='Add hydrogen atoms only to polar atoms')
-    parser.add_argument('--pH', type=float, default="7.4", help='Specify target pH value')
+    parser.add_argument("--iformat", type=str, default="sdf", help="input file format")
+    parser.add_argument(
+        "-i", "--input", type=str, required=True, help="input file name"
+    )
+    parser.add_argument(
+        "-o", "--output", type=str, required=True, help="output file name"
+    )
+    parser.add_argument(
+        "--polar",
+        action="store_true",
+        default=False,
+        help="Add hydrogen atoms only to polar atoms",
+    )
+    parser.add_argument(
+        "--pH", type=float, default="7.4", help="Specify target pH value"
+    )
     return parser.parse_args()
 
 
@@ -32,7 +44,7 @@
 
 def __main__():
     """
-        Add hydrogen atoms at a certain pH value
+    Add hydrogen atoms at a certain pH value
     """
     args = parse_command_line(sys.argv)
     addh(args)
--- a/ob_convert.xml	Tue Nov 10 20:33:21 2020 +0000
+++ b/ob_convert.xml	Thu Aug 15 11:06:27 2024 +0000
@@ -6,7 +6,7 @@
     -->
     <macros>
         <import>macros.xml</import>
-        <token name="@GALAXY_VERSION@">0</token>
+        <token name="@GALAXY_VERSION@">1</token>
     </macros>
     <expand macro="requirements"/>
     <command detect_errors="aggressive">
@@ -132,7 +132,11 @@
         #if float($ph) >= 0:
             -p $ph
         #end if
-
+        
+        #if $appendproperties:
+            #set $props = str($appendproperties).replace(',', ' ')
+            --append '$props'
+        #end if
 ]]>
     </command>
     <inputs>
@@ -155,7 +159,7 @@
                 <option value="cif">Crystallographic Information File</option>
                 <option value="cml">Chemical Markup Language (CML)</option>
                 <option value="cmlr">CML Reaction format</option>
-                <option value="com">Gaussian 98/03 Cartesian Input</option>
+                <option value="com">Gaussian 98/03 Cartesian Input(com)</option>
                 <option value="copy">Copies raw text</option>
                 <option value="crk2d">Chemical Resource Kit 2D diagram format</option>
                 <option value="crk3d">Chemical Resource Kit 3D format</option>
@@ -163,8 +167,6 @@
                 <option value="cssr">CSD CSSR format</option>
                 <option value="ct">ChemDraw Connection Table format</option>
                 <option value="dmol">DMol3 coordinates format</option>
-                <!--<option value="ent">Protein Data Bank format</option>
-                <option value="fa">FASTA format</option>-->
                 <option value="fasta">FASTA format</option>
                 <option value="feat">Feature format</option>
                 <option value="fh">Fenske-Hall Z-Matrix format</option>
@@ -172,27 +174,22 @@
                 <option value="fpt">Fingerprint format (fpt)</option>
                 <option value="fract">Free Form Fractional format</option>
                 <option value="fs">Open Babel FastSearching database (fs)</option>
-                <!--<option value="fsa">FASTA format</option>-->
-                <option value="gamin">GAMESS Input</option>
-                <option value="gau">Gaussian 98/03 Cartesian Input</option>
-                <!--<option value="gjc">Gaussian 98/03 Cartesian Input</option>
-                <option value="gjf">Gaussian 98/03 Cartesian Input</option>-->
+                <option value="gamin">GAMESS Input (gamin)</option>
+                <option value="inp">GAMESS Input (inp)</option>
+                <option value="gau">Gaussian 98/03 Cartesian Input(gau)</option>
                 <option value="gpr">Ghemical format</option>
                 <option value="gr96">GROMOS96 format</option>
                 <option value="hin">HyperChem HIN format</option>
                 <option value="inchi">IUPAC InChI</option>
-                <option value="inp">GAMESS Input</option>
                 <option value="jin">Jaguar input format</option>
-                <!--<option value="mdl">MDL MOL format (mol)</option>-->
-                <option value="mmd">MacroModel format</option>
-                <option value="mmod">MacroModel format</option>
-                <!--<option value="mol">MDL MOL format (mol)</option> use SDF-->
+                <option value="mmd">MacroModel format (mmd)</option>
+                <option value="mmod">MacroModel format (mmod)</option>
                 <option value="mol2">Sybyl Mol2 format (mol2)</option>
                 <option value="molreport">Open Babel molecule report</option>
-                <option value="mop">MOPAC Cartesian format</option>
-                <option value="mopcrt">MOPAC Cartesian format</option>
+                <option value="mop">MOPAC Cartesian format (mop)</option>
+                <option value="mopcrt">MOPAC Cartesian format (mopcrt)</option>
                 <option value="mopin">MOPAC Internal</option>
-                <option value="mpc">MOPAC Cartesian format</option>
+                <option value="mpc">MOPAC Cartesian format (mpc)</option>
                 <option value="mpd">Sybyl descriptor format</option>
                 <option value="mpqcin">MPQC simplified input format</option>
                 <option value="nw">NWChem input format</option>
@@ -204,13 +201,11 @@
                 <option value="qcin">Q-Chem input format</option>
                 <option value="report">Open Babel report format</option>
                 <option value="rxn">MDL RXN format</option>
-                <!--<option value="sd">MDL MOL format</option>-->
                 <option value="sdf">MDL MOL format (sdf, mol)</option>
                 <option value="smi">SMILES format (smi)</option>
-                <!--<option value="sy2">Sybyl Mol2 format</option>-->
-                <option value="tdd">Thermo format</option>
+                <option value="tdd">Thermo format (tdd)</option>
                 <option value="test">Test format</option>
-                <option value="therm">Thermo format</option>
+                <option value="therm">Thermo format (therm)</option>
                 <option value="tmol">TurboMole Coordinate format</option>
                 <option value="txyz">Tinker MM2 format</option>
                 <option value="unixyz">UniChem XYZ format</option>
@@ -398,6 +393,27 @@
         <param name="dative_bonds" type="boolean" truevalue="-b" falsevalue=""
             label="Convert dative bonds" help="e.g. [N+]([O-])=O to N(=O)=O (-b)" />
         <param name="appendtotitle" type="text" value="" label="Append the specified text after each molecule title"/>
+        <param name="appendproperties" type="select" multiple="true" label="Molecular properties to append to the title." optional="true">
+            <option value="abonds">Number of aromatic bonds</option>
+            <option value="atoms">Number of atoms</option>
+            <option value="bonds">Number of bonds</option>
+            <option value="cansmi">Canonical SMILES</option>
+            <option value="cansmiNS">Canonical SMILES without isotopes or stereo</option>
+            <option value="dbonds">Number of double bonds</option>
+            <option value="sbonds">Number of single bonds</option>
+            <option value="tbonds">Number of triple bonds</option>
+            <option value="formula">Chemical formula</option>
+            <option value="HBA1">Number of Hydrogen Bond Acceptors 1 (JoelLib)</option>
+            <option value="HBA2">Number of Hydrogen Bond Acceptors 2 (JoelLib)</option>
+            <option value="HBD">Number of Hydrogen Bond Donors (JoelLib)</option>
+            <option value="InChI">IUPAC InChI identifier</option>
+            <option value="InChIKey">InChIKey</option>
+            <option value="L5">Lipinski Rule of Five</option>
+            <option value="logP">octanol/water partition coefficient</option>
+            <option value="MR">molar refractivity</option>
+            <option value="MW">Molecular Weight</option>
+            <option value="TPSA">topological polar surface area</option>
+        </param>
 
         <!-- Uniqueness -->
         <conditional name="unique">
@@ -449,40 +465,40 @@
         </collection>
     </outputs>
     <tests>
-        <test>
+        <test expect_num_outputs="1">
             <param name="infile" ftype="sdf" value="CID_2244.sdf"/>
             <param name="oformat_opts_selector" value="cml" />
             <output name="outfile" ftype="cml" file="ob_convert_on_CID2244.cml" />
         </test>
-        <test>
+        <test expect_num_outputs="1">
             <param name="infile" ftype="sdf" value="CID_2244.sdf"/>
             <param name="oformat_opts_selector" value="fs" />
-            <output name="outfile" compare="contains" file="ob_convert_on_CID2244_obfs.txt" ftype="obfs">
+            <output name="outfile" compare="contains" file="ob_convert_on_CID2244_obfs.txt" ftype="obfs" >
                 <extra_files type="file" value="molecule.sdf" name="molecule.sdf" />
                 <extra_files type="file" value="molecule.fs" name="molecule.fs" compare="sim_size" />
             </output>
         </test>
-        <test>
+        <test expect_num_outputs="1">
             <param name="infile" ftype="sdf" value="CID_2244.sdf"/>
             <param name="oformat_opts_selector" value="inchi" />
             <output name="outfile" ftype="inchi" file="ob_convert_on_CID2244.inchi" />
         </test>
-        <test>
+        <test expect_num_outputs="1">
             <param name="infile" ftype="sdf" value="CID_2244.sdf"/>
             <param name="oformat_opts_selector" value="pdb" />
             <output name="outfile" ftype="pdb" file="ob_convert_on_CID2244.pdb" lines_diff="4" />
         </test>
-        <test>
+        <test expect_num_outputs="1">
             <param name="infile" ftype="sdf" value="CID_2244.sdf"/>
             <param name="oformat_opts_selector" value="can" />
             <output name="outfile" ftype="smi" file="ob_convert_on_CID2244.smi" />
         </test>
-        <test>
+        <test expect_num_outputs="1">
             <param name="infile" ftype="sdf" value="CID_2244.sdf"/>
             <param name="oformat_opts_selector" value="sdf" />
             <output name="outfile" ftype="sdf" file="ob_convert_on_CID2244.sdf" lines_diff="2"/>
         </test>
-        <test>
+        <test expect_num_outputs="1">
             <param name="infile" ftype="smi" value="2_mol.smi"/>
             <param name="oformat_opts_selector" value="pdbqt"/>
             <param name="split" value="true"/>
@@ -491,6 +507,12 @@
                 <element name="molecule2" file="split2.pdbqt" />
             </output_collection>
         </test>
+        <test expect_num_outputs="1">
+            <param name="infile" ftype="smi" value="2_mol.smi"/>
+            <param name="oformat_opts_selector" value="sdf" />
+            <param name="appendproperties" value="cansmi,InChI"/>
+            <output name="outfile" ftype="sdf" file="2_mol.sdf" lines_diff="4"/>
+        </test>
     </tests>
     <help>
 <![CDATA[
--- a/ob_filter.py	Tue Nov 10 20:33:21 2020 +0000
+++ b/ob_filter.py	Thu Aug 15 11:06:27 2024 +0000
@@ -14,33 +14,36 @@
 
 import cheminfolib
 from openbabel import pybel
+
 cheminfolib.pybel_stop_logging()
 
 
 def parse_command_line():
     parser = argparse.ArgumentParser()
-    parser.add_argument('-i', '--input', help='Input file name')
-    parser.add_argument('-iformat', help='Input file format')
-    parser.add_argument('-oformat', default='smi',
-                        help='Output file format')
-    parser.add_argument('-o', '--output', help='Output file name',
-                        required=True)
-    parser.add_argument('--filters', help="Specify the filters to apply",
-                        required=True)
-    parser.add_argument('--list_of_names', required=False,
-                        help="A file with list of molecule names to extract. Every name is in one line.")
+    parser.add_argument("-i", "--input", help="Input file name")
+    parser.add_argument("-iformat", help="Input file format")
+    parser.add_argument("-oformat", default="smi", help="Output file format")
+    parser.add_argument("-o", "--output", help="Output file name", required=True)
+    parser.add_argument("--filters", help="Specify the filters to apply", required=True)
+    parser.add_argument(
+        "--list_of_names",
+        required=False,
+        help="A file with list of molecule names to extract. Every name is in one line.",
+    )
     return parser.parse_args()
 
 
 def filter_precalculated_compounds(args, filters):
     outfile = pybel.Outputfile(args.oformat, args.output, overwrite=True)
-    for mol in pybel.readfile('sdf', args.input):
+    for mol in pybel.readfile("sdf", args.input):
         for key, elem in filters.items():
             # map the short description to the larger metadata names stored in the sdf file
             property = cheminfolib.ColumnNames.get(key, key)
             min = elem[0]
             max = elem[1]
-            if float(mol.data[property]) >= float(min) and float(mol.data[property]) <= float(max):
+            if float(mol.data[property]) >= float(min) and float(
+                mol.data[property]
+            ) <= float(max):
                 pass
             else:
                 # leave the filter loop, because one filter constrained are not satisfied
@@ -56,16 +59,30 @@
     if args.iformat == args.oformat:
         # use the -ocopy option from openbabel to speed up the filtering, additionally no conversion is carried out
         # http://openbabel.org/docs/dev/FileFormats/Copy_raw_text.html#copy-raw-text
-        cmd = 'obabel -i%s %s -ocopy -O %s --filter' % (args.iformat, args.input, args.output)
+        cmd = "obabel -i%s %s -ocopy -O %s --filter" % (
+            args.iformat,
+            args.input,
+            args.output,
+        )
     else:
-        cmd = 'obabel -i%s %s -o%s -O %s --filter' % (args.iformat, args.input, args.oformat, args.output)
-    filter_cmd = ''
+        cmd = "obabel -i%s %s -o%s -O %s --filter" % (
+            args.iformat,
+            args.input,
+            args.oformat,
+            args.output,
+        )
+    filter_cmd = ""
     # OBDescriptor stores a mapping from our desc shortcut to the OB name [0] and a long description [1]
     for key, elem in filters.items():
         ob_descriptor_name = cheminfolib.OBDescriptor[key][0]
         min = elem[0]
         max = elem[1]
-        filter_cmd += ' %s>=%s %s<=%s ' % (ob_descriptor_name, min, ob_descriptor_name, max)
+        filter_cmd += " %s>=%s %s<=%s " % (
+            ob_descriptor_name,
+            min,
+            ob_descriptor_name,
+            max,
+        )
 
     args = shlex.split('%s "%s"' % (cmd, filter_cmd))
     # print '%s "%s"' % (cmd, filter_cmd)
@@ -76,18 +93,18 @@
     return_code = child.returncode
 
     if return_code:
-        sys.stdout.write(stdout.decode('utf-8'))
-        sys.stderr.write(stderr.decode('utf-8'))
+        sys.stdout.write(stdout.decode("utf-8"))
+        sys.stderr.write(stderr.decode("utf-8"))
         sys.stderr.write("Return error code %i from command:\n" % return_code)
         sys.stderr.write("%s\n" % cmd)
     else:
-        sys.stdout.write(stdout.decode('utf-8'))
-        sys.stdout.write(stderr.decode('utf-8'))
+        sys.stdout.write(stdout.decode("utf-8"))
+        sys.stdout.write(stderr.decode("utf-8"))
 
 
 def filter_by_name(args):
     outfile = pybel.Outputfile(args.oformat, args.output, overwrite=True)
-    for mol in pybel.readfile('sdf', args.input):
+    for mol in pybel.readfile("sdf", args.input):
         for name in open(args.list_of_names):
             if mol.title.strip() == name.strip():
                 outfile.write(mol)
@@ -96,21 +113,21 @@
 
 def __main__():
     """
-        Select compounds with certain properties from a small library
+    Select compounds with certain properties from a small library
     """
     args = parse_command_line()
 
-    if args.filters == '__filter_by_name__':
+    if args.filters == "__filter_by_name__":
         filter_by_name(args)
         return
 
     # Its a small trick to get the parameters in an easy way from the xml file.
     # To keep it readable in the xml file, many white-spaces are included in that string it needs to be removed.
     # Also the last loop creates a ',{' that is not an valid jason expression.
-    filters = json.loads((args.filters).replace(' ', '').replace(',}', '}'))
-    if args.iformat == 'sdf':
+    filters = json.loads((args.filters).replace(" ", "").replace(",}", "}"))
+    if args.iformat == "sdf":
         # Check if the sdf file contains all of the required metadata to invoke the precalculation filtering
-        mol = next(pybel.readfile('sdf', args.input))
+        mol = next(pybel.readfile("sdf", args.input))
         for key, elem in filters.items():
             property = cheminfolib.ColumnNames.get(key, key)
             if property not in mol.data:
--- a/ob_genProp.py	Tue Nov 10 20:33:21 2020 +0000
+++ b/ob_genProp.py	Thu Aug 15 11:06:27 2024 +0000
@@ -10,43 +10,57 @@
 import cheminfolib
 import openbabel
 from openbabel import pybel
+
 openbabel.obErrorLog.StopLogging()
 
 
 def parse_command_line(argv):
     parser = argparse.ArgumentParser()
-    parser.add_argument('--iformat', default='sdf', help='input file format')
-    parser.add_argument('-i', '--input', required=True, help='input file name')
-    parser.add_argument('--oformat', default='sdf', choices=['sdf', 'table'], help='output file format')
-    parser.add_argument('--header', type=bool, help='Include the header as the first line of the output table')
-    parser.add_argument('-o', '--output', required=True, help='output file name')
+    parser.add_argument("--iformat", default="sdf", help="input file format")
+    parser.add_argument("-i", "--input", required=True, help="input file name")
+    parser.add_argument(
+        "--oformat", default="sdf", choices=["sdf", "table"], help="output file format"
+    )
+    parser.add_argument(
+        "--header",
+        type=bool,
+        help="Include the header as the first line of the output table",
+    )
+    parser.add_argument("-o", "--output", required=True, help="output file name")
     return parser.parse_args()
 
 
 def compute_properties(args):
-    if args.oformat == 'sdf':
+    if args.oformat == "sdf":
         outfile = pybel.Outputfile(args.oformat, args.output, overwrite=True)
     else:
-        outfile = open(args.output, 'w')
+        outfile = open(args.output, "w")
         if args.header:
             mol = next(pybel.readfile(args.iformat, args.input))
             metadata = cheminfolib.get_properties_ext(mol)
-            outfile.write('%s\n' % '\t'.join([cheminfolib.ColumnNames[key] for key in metadata]))
+            outfile.write(
+                "%s\n" % "\t".join([cheminfolib.ColumnNames[key] for key in metadata])
+            )
 
     for mol in pybel.readfile(args.iformat, args.input):
         if mol.OBMol.NumHvyAtoms() > 5:
             metadata = cheminfolib.get_properties_ext(mol)
-            if args.oformat == 'sdf':
-                [mol.data.update({cheminfolib.ColumnNames[key]: metadata[key]}) for key in metadata]
+            if args.oformat == "sdf":
+                [
+                    mol.data.update({cheminfolib.ColumnNames[key]: metadata[key]})
+                    for key in metadata
+                ]
                 outfile.write(mol)
             else:
-                outfile.write('%s\n' % ('\t'.join([str(metadata[key]) for key in metadata])))
+                outfile.write(
+                    "%s\n" % ("\t".join([str(metadata[key]) for key in metadata]))
+                )
     outfile.close()
 
 
 def __main__():
     """
-        Physico-chemical properties are computed and stored as metadata in the sdf output file
+    Physico-chemical properties are computed and stored as metadata in the sdf output file
     """
     args = parse_command_line(sys.argv)
     compute_properties(args)
--- a/ob_remIons.py	Tue Nov 10 20:33:21 2020 +0000
+++ b/ob_remIons.py	Thu Aug 15 11:06:27 2024 +0000
@@ -8,37 +8,43 @@
 import argparse
 
 from openbabel import openbabel, pybel
+
 openbabel.obErrorLog.StopLogging()
 
 
 def parse_command_line():
     parser = argparse.ArgumentParser()
-    parser.add_argument('-iformat', default='sdf', help='input file format')
-    parser.add_argument('-i', '--input', required=True, help='input file name')
-    parser.add_argument('-o', '--output', required=True, help='output file name')
-    parser.add_argument('-idx', default=False, action='store_true', help='should output be an indexed text table? works only for inchi/smiles, otherwise is ignored')
+    parser.add_argument("-iformat", default="sdf", help="input file format")
+    parser.add_argument("-i", "--input", required=True, help="input file name")
+    parser.add_argument("-o", "--output", required=True, help="output file name")
+    parser.add_argument(
+        "-idx",
+        default=False,
+        action="store_true",
+        help="should output be an indexed text table? works only for inchi/smiles, otherwise is ignored",
+    )
     return parser.parse_args()
 
 
 def remove_ions(args):
-    with open(args.output, 'w') as outfile:
+    with open(args.output, "w") as outfile:
         for index, mol in enumerate(pybel.readfile(args.iformat, args.input)):
             if mol.OBMol.NumHvyAtoms() > 5:
                 mol.OBMol.StripSalts(0)
-                if 'inchi' in mol.data:
-                    del mol.data['inchi']  # remove inchi cache so modified mol is saved
+                if "inchi" in mol.data:
+                    del mol.data["inchi"]  # remove inchi cache so modified mol is saved
 
-            mol = mol.write(args.iformat) if mol.OBMol.NumHvyAtoms() > 5 else '\n'
+            mol = mol.write(args.iformat) if mol.OBMol.NumHvyAtoms() > 5 else "\n"
 
-            if args.idx and args.iformat in ['inchi', 'smi']:
-                outfile.write(f'{index}\t{mol}')
-            elif mol != '\n':
-                outfile.write(f'{mol}')
+            if args.idx and args.iformat in ["inchi", "smi"]:
+                outfile.write(f"{index}\t{mol}")
+            elif mol != "\n":
+                outfile.write(f"{mol}")
 
 
 def __main__():
     """
-        Remove any counterion and delete any fragment but the largest one for each molecule.
+    Remove any counterion and delete any fragment but the largest one for each molecule.
     """
     args = parse_command_line()
     remove_ions(args)
--- a/ob_spectrophore_search.py	Tue Nov 10 20:33:21 2020 +0000
+++ b/ob_spectrophore_search.py	Thu Aug 15 11:06:27 2024 +0000
@@ -8,6 +8,7 @@
 
 import numpy as np
 from openbabel import openbabel, pybel
+
 openbabel.obErrorLog.StopLogging()
 # TODO get rid of eval()
 
@@ -17,49 +18,94 @@
 
 def parse_command_line():
     parser = argparse.ArgumentParser()
-    parser.add_argument('--target', required=True, help='target file name in sdf format with Spectrophores(TM) descriptors stored as meta-data')
-    parser.add_argument('--library', required=True, help='library of compounds with pre-computed physico-chemical properties, including Spectrophores(TM) in tabular format')
-    parser.add_argument('-c', '--column', required=True, type=int, help='#column containing the Spectrophores(TM) descriptors in the library file')
-    parser.add_argument('-o', '--output', required=True, help='output file name')
-    parser.add_argument('-n', '--normalization', default="ZeroMeanAndUnitStd", choices=['No', 'ZeroMean', 'UnitStd', 'ZeroMeanAndUnitStd'], help='Normalization method')
-    parser.add_argument('-a', '--accuracy', default="20", choices=['1', '2', '5', '10', '15', '20', '30', '36', '45', '60'], help='Accuracy expressed as angular stepsize')
-    parser.add_argument('-s', '--stereo', default="No", choices=['No', 'Unique', 'Mirror', 'All'], help='Stereospecificity of the cage')
-    parser.add_argument('-r', '--resolution', type=float, default="3.0", help='Resolution')
+    parser.add_argument(
+        "--target",
+        required=True,
+        help="target file name in sdf format with Spectrophores(TM) descriptors stored as meta-data",
+    )
+    parser.add_argument(
+        "--library",
+        required=True,
+        help="library of compounds with pre-computed physico-chemical properties, including Spectrophores(TM) in tabular format",
+    )
+    parser.add_argument(
+        "-c",
+        "--column",
+        required=True,
+        type=int,
+        help="#column containing the Spectrophores(TM) descriptors in the library file",
+    )
+    parser.add_argument("-o", "--output", required=True, help="output file name")
+    parser.add_argument(
+        "-n",
+        "--normalization",
+        default="ZeroMeanAndUnitStd",
+        choices=["No", "ZeroMean", "UnitStd", "ZeroMeanAndUnitStd"],
+        help="Normalization method",
+    )
+    parser.add_argument(
+        "-a",
+        "--accuracy",
+        default="20",
+        choices=["1", "2", "5", "10", "15", "20", "30", "36", "45", "60"],
+        help="Accuracy expressed as angular stepsize",
+    )
+    parser.add_argument(
+        "-s",
+        "--stereo",
+        default="No",
+        choices=["No", "Unique", "Mirror", "All"],
+        help="Stereospecificity of the cage",
+    )
+    parser.add_argument(
+        "-r", "--resolution", type=float, default="3.0", help="Resolution"
+    )
     return parser.parse_args()
 
 
 def set_parameters(args):
-    if args.normalization == 'No':
+    if args.normalization == "No":
         spectrophore.SetNormalization(spectrophore.NoNormalization)
     else:
-        spectrophore.SetNormalization(eval('spectrophore.NormalizationTowards' + args.normalization))
-    spectrophore.SetAccuracy(eval('spectrophore.AngStepSize' + args.accuracy))
-    spectrophore.SetStereo(eval('spectrophore.' + args.stereo + 'StereoSpecificProbes'))
+        spectrophore.SetNormalization(
+            eval("spectrophore.NormalizationTowards" + args.normalization)
+        )
+    spectrophore.SetAccuracy(eval("spectrophore.AngStepSize" + args.accuracy))
+    spectrophore.SetStereo(eval("spectrophore." + args.stereo + "StereoSpecificProbes"))
     spectrophore.SetResolution(args.resolution)
     return True
 
 
 def Compute_Spectrophores_distance(target_spectrophore, args):
-    outfile = open(args.output, 'w')
-    for mol in open(args.library, 'r'):
+    outfile = open(args.output, "w")
+    for mol in open(args.library, "r"):
         try:
-            distance = ((np.asarray(target_spectrophore, dtype=float) - np.asarray(mol.split('\t')[args.column - 1].strip().split(', '), dtype=float))**2).sum()
+            distance = (
+                (
+                    np.asarray(target_spectrophore, dtype=float)
+                    - np.asarray(
+                        mol.split("\t")[args.column - 1].strip().split(", "),
+                        dtype=float,
+                    )
+                )
+                ** 2
+            ).sum()
         except ValueError:
             distance = 0
-        outfile.write('%s\t%f\n' % (mol.strip(), distance))
+        outfile.write("%s\t%f\n" % (mol.strip(), distance))
     outfile.close()
 
 
 def __main__():
     """
-        Computation of Spectrophores(TM) distances to a target molecule.
+    Computation of Spectrophores(TM) distances to a target molecule.
     """
     args = parse_command_line()
     # This sets up the parameters for the Spectrophore generation. Parameters are set to fit those of our standard parsing tool
     set_parameters(args)
 
-    mol = next(pybel.readfile('sdf', args.target))
-    target_spectrophore = mol.data["Spectrophores(TM)"].strip().split(', ')
+    mol = next(pybel.readfile("sdf", args.target))
+    target_spectrophore = mol.data["Spectrophores(TM)"].strip().split(", ")
     # Compute the paired-distance between every molecule in the library and the target
     Compute_Spectrophores_distance(target_spectrophore, args)
 
--- a/remove_protonation_state.py	Tue Nov 10 20:33:21 2020 +0000
+++ b/remove_protonation_state.py	Thu Aug 15 11:06:27 2024 +0000
@@ -7,14 +7,15 @@
 import argparse
 
 from openbabel import openbabel, pybel
+
 openbabel.obErrorLog.StopLogging()
 
 
 def parse_command_line():
     parser = argparse.ArgumentParser()
-    parser.add_argument('--iformat', default='sdf', help='input file format')
-    parser.add_argument('-i', '--input', required=True, help='input file name')
-    parser.add_argument('-o', '--output', required=True, help='output file name')
+    parser.add_argument("--iformat", default="sdf", help="input file format")
+    parser.add_argument("-i", "--input", required=True, help="input file name")
+    parser.add_argument("-o", "--output", required=True, help="output file name")
     return parser.parse_args()
 
 
@@ -22,15 +23,15 @@
     outfile = pybel.Outputfile(args.iformat, args.output, overwrite=True)
     for mol in pybel.readfile(args.iformat, args.input):
         [atom.OBAtom.SetFormalCharge(0) for atom in mol.atoms]
-        if 'inchi' in mol.data:
-            del mol.data['inchi']  # remove inchi cache so modified mol is saved
+        if "inchi" in mol.data:
+            del mol.data["inchi"]  # remove inchi cache so modified mol is saved
         outfile.write(mol)
     outfile.close()
 
 
 def __main__():
     """
-        Remove any protonation state from each atom in each molecule.
+    Remove any protonation state from each atom in each molecule.
     """
     args = parse_command_line()
     remove_protonation(args)
--- a/subsearch.py	Tue Nov 10 20:33:21 2020 +0000
+++ b/subsearch.py	Thu Aug 15 11:06:27 2024 +0000
@@ -13,21 +13,34 @@
 import tempfile
 
 from openbabel import openbabel, pybel
+
 openbabel.obErrorLog.StopLogging()
 
 
 def parse_command_line():
     parser = argparse.ArgumentParser()
-    parser.add_argument('-i', '--infile', required=True, help='Molecule file.')
-    parser.add_argument('--iformat', help='Input format.')
-    parser.add_argument('--fastsearch-index', dest="fastsearch_index", required=True,
-                        help='Path to the openbabel fastsearch index.')
-    parser.add_argument('-o', '--outfile', required=True, help='Path to the output file.')
-    parser.add_argument('--oformat', default='smi', help='Output file format')
-    parser.add_argument("--max-candidates", dest="max_candidates", type=int, default=4000,
-                        help="The maximum number of candidates.")
-    parser.add_argument('-p', '--processors', type=int,
-                        default=multiprocessing.cpu_count())
+    parser.add_argument("-i", "--infile", required=True, help="Molecule file.")
+    parser.add_argument("--iformat", help="Input format.")
+    parser.add_argument(
+        "--fastsearch-index",
+        dest="fastsearch_index",
+        required=True,
+        help="Path to the openbabel fastsearch index.",
+    )
+    parser.add_argument(
+        "-o", "--outfile", required=True, help="Path to the output file."
+    )
+    parser.add_argument("--oformat", default="smi", help="Output file format")
+    parser.add_argument(
+        "--max-candidates",
+        dest="max_candidates",
+        type=int,
+        default=4000,
+        help="The maximum number of candidates.",
+    )
+    parser.add_argument(
+        "-p", "--processors", type=int, default=multiprocessing.cpu_count()
+    )
     return parser.parse_args()
 
 
@@ -40,20 +53,28 @@
 
 def mp_helper(query, args):
     """
-        Helper function for multiprocessing.
-        That function is a wrapper around the following command:
-        obabel file.fs -s"smarts" -Ooutfile.smi -al 999999999
+    Helper function for multiprocessing.
+    That function is a wrapper around the following command:
+    obabel file.fs -s"smarts" -Ooutfile.smi -al 999999999
     """
 
-    if args.oformat == 'names':
-        opts = '-osmi -xt'
+    if args.oformat == "names":
+        opts = "-osmi -xt"
     else:
-        opts = '-o%s' % args.oformat
+        opts = "-o%s" % args.oformat
 
     tmp = tempfile.NamedTemporaryFile(delete=False)
-    cmd = 'obabel -ifs %s -O %s %s -s%s -al %s' % (args.fastsearch_index, tmp.name, opts, query, args.max_candidates)
+    cmd = "obabel -ifs %s -O %s %s -s%s -al %s" % (
+        args.fastsearch_index,
+        tmp.name,
+        opts,
+        query,
+        args.max_candidates,
+    )
 
-    child = subprocess.Popen(cmd.split(), stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+    child = subprocess.Popen(
+        cmd.split(), stdout=subprocess.PIPE, stderr=subprocess.PIPE
+    )
 
     stdout, stderr = child.communicate()
     return_code = child.returncode
@@ -73,14 +94,14 @@
     """
     Wrapper to retrieve a striped SMILES or SMARTS string from different input formats.
     """
-    if args.iformat in ['smi', 'text', 'tabular']:
+    if args.iformat in ["smi", "text", "tabular"]:
         with open(args.infile) as text_file:
             for line in text_file:
-                yield line.split('\t')[0].strip()
+                yield line.split("\t")[0].strip()
     else:
         # inchi or sdf files
         for mol in pybel.readfile(args.iformat, args.infile):
-            yield mol.write('smiles').split('\t')[0]
+            yield mol.write("smiles").split("\t")[0]
 
 
 def substructure_search(args):
@@ -91,18 +112,18 @@
     pool.close()
     pool.join()
 
-    if args.oformat == 'names':
-        out_handle = open(args.outfile, 'w')
+    if args.oformat == "names":
+        out_handle = open(args.outfile, "w")
         for result_file, query in results:
             with open(result_file) as res_handle:
                 for line in res_handle:
-                    out_handle.write('%s\t%s\n' % (line.strip(), query))
+                    out_handle.write("%s\t%s\n" % (line.strip(), query))
             os.remove(result_file)
         out_handle.close()
     else:
-        out_handle = open(args.outfile, 'wb')
+        out_handle = open(args.outfile, "wb")
         for result_file, query in results:
-            res_handle = open(result_file, 'rb')
+            res_handle = open(result_file, "rb")
             shutil.copyfileobj(res_handle, out_handle)
             res_handle.close()
             os.remove(result_file)
@@ -111,7 +132,7 @@
 
 def __main__():
     """
-        Multiprocessing Open Babel Substructure Search.
+    Multiprocessing Open Babel Substructure Search.
     """
     args = parse_command_line()
     substructure_search(args)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/2_mol.sdf	Thu Aug 15 11:06:27 2024 +0000
@@ -0,0 +1,66 @@
+CC(=O)Oc1ccccc1C(=O)[O-] InChI=1S/C9H8O4/c1-6(10)13-8-5-3-2-4-7(8)9(11)12/h2-5H,1H3,(H,11,12)/p-1
+ OpenBabel08132415422D
+
+ 13 13  0  0  0  0  0  0  0  0999 V2000
+    0.0000    0.0000    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
+    0.0000    0.0000    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
+    0.0000    0.0000    0.0000 O   0  0  0  0  0  0  0  0  0  0  0  0
+    0.0000    0.0000    0.0000 O   0  0  0  0  0  0  0  0  0  0  0  0
+    0.0000    0.0000    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
+    0.0000    0.0000    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
+    0.0000    0.0000    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
+    0.0000    0.0000    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
+    0.0000    0.0000    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
+    0.0000    0.0000    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
+    0.0000    0.0000    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
+    0.0000    0.0000    0.0000 O   0  0  0  0  0  0  0  0  0  0  0  0
+    0.0000    0.0000    0.0000 O   0  5  0  0  0  0  0  0  0  0  0  0
+  1  2  1  0  0  0  0
+  2  3  2  0  0  0  0
+  2  4  1  0  0  0  0
+  4  5  1  0  0  0  0
+  5 10  1  0  0  0  0
+  5  6  2  0  0  0  0
+  6  7  1  0  0  0  0
+  7  8  2  0  0  0  0
+  8  9  1  0  0  0  0
+  9 10  2  0  0  0  0
+ 10 11  1  0  0  0  0
+ 11 12  2  0  0  0  0
+ 11 13  1  0  0  0  0
+M  CHG  1  13  -1
+M  END
+$$$$
+CC(=O)Oc1ccccc1C(=O)[O-] InChI=1S/C9H8O4/c1-6(10)13-8-5-3-2-4-7(8)9(11)12/h2-5H,1H3,(H,11,12)/p-1
+ OpenBabel08132415422D
+
+ 13 13  0  0  0  0  0  0  0  0999 V2000
+    0.0000    0.0000    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
+    0.0000    0.0000    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
+    0.0000    0.0000    0.0000 O   0  0  0  0  0  0  0  0  0  0  0  0
+    0.0000    0.0000    0.0000 O   0  0  0  0  0  0  0  0  0  0  0  0
+    0.0000    0.0000    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
+    0.0000    0.0000    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
+    0.0000    0.0000    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
+    0.0000    0.0000    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
+    0.0000    0.0000    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
+    0.0000    0.0000    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
+    0.0000    0.0000    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
+    0.0000    0.0000    0.0000 O   0  0  0  0  0  0  0  0  0  0  0  0
+    0.0000    0.0000    0.0000 O   0  5  0  0  0  0  0  0  0  0  0  0
+  1  2  1  0  0  0  0
+  2  3  2  0  0  0  0
+  2  4  1  0  0  0  0
+  4  5  1  0  0  0  0
+  5 10  1  0  0  0  0
+  5  6  2  0  0  0  0
+  6  7  1  0  0  0  0
+  7  8  2  0  0  0  0
+  8  9  1  0  0  0  0
+  9 10  2  0  0  0  0
+ 10 11  1  0  0  0  0
+ 11 12  2  0  0  0  0
+ 11 13  1  0  0  0  0
+M  CHG  1  13  -1
+M  END
+$$$$