Mercurial > repos > bgruening > openbabel_compound_convert
changeset 15:4242b4d68e9c draft default tip
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/openbabel commit d9c51279c061a1da948a2582d5b502ca7573adbf
author | bgruening |
---|---|
date | Thu, 15 Aug 2024 11:06:27 +0000 (5 months ago) |
parents | e2c36f62e22f |
children | |
files | change_title_to_metadata_value.py cheminfolib.py distance_finder.py multi_obgrep.py ob_addh.py ob_convert.xml ob_filter.py ob_genProp.py ob_remIons.py ob_spectrophore_search.py remove_protonation_state.py subsearch.py test-data/2_mol.sdf |
diffstat | 13 files changed, 618 insertions(+), 294 deletions(-) [+] |
line wrap: on
line diff
--- a/change_title_to_metadata_value.py Tue Nov 10 20:33:21 2020 +0000 +++ b/change_title_to_metadata_value.py Thu Aug 15 11:06:27 2024 +0000 @@ -11,6 +11,7 @@ import string from openbabel import openbabel, pybel + openbabel.obErrorLog.StopLogging() @@ -19,14 +20,19 @@ description="Change the title from a molecule file to metadata \ value of a given-id of the same molecule file.", ) - parser.add_argument('--infile', '-i', required=True, - help="path to the input file") - parser.add_argument('--outfile', '-o', required=True, - help="path to the output file") - parser.add_argument('--key', '-k', required=True, - help="the metadata key from the sdf file which should inlcude the new title") - parser.add_argument('--random', '-r', action="store_true", - help="Add random suffix to the title.") + parser.add_argument("--infile", "-i", required=True, help="path to the input file") + parser.add_argument( + "--outfile", "-o", required=True, help="path to the output file" + ) + parser.add_argument( + "--key", + "-k", + required=True, + help="the metadata key from the sdf file which should inlcude the new title", + ) + parser.add_argument( + "--random", "-r", action="store_true", help="Add random suffix to the title." + ) args = parser.parse_args() @@ -35,8 +41,11 @@ if args.key in mol.data: mol.title = mol.data[args.key] if args.random: - suffix = ''.join(random.choice(string.ascii_lowercase + string.digits) for _ in range(13)) - mol.title += '__%s' % suffix + suffix = "".join( + random.choice(string.ascii_lowercase + string.digits) + for _ in range(13) + ) + mol.title += "__%s" % suffix output.write(mol) output.close()
--- a/cheminfolib.py Tue Nov 10 20:33:21 2020 +0000 +++ b/cheminfolib.py Thu Aug 15 11:06:27 2024 +0000 @@ -11,28 +11,32 @@ import tempfile from multiprocessing import Pool - try: from galaxy import eggs - eggs.require('psycopg2') + + eggs.require("psycopg2") except ImportError: psycopg2 = None - print('psycopg2 is not available. It is currently used in the pgchem wrappers, that are not shipped with default CTB') + print( + "psycopg2 is not available. It is currently used in the pgchem wrappers, that are not shipped with default CTB" + ) try: from openbabel import openbabel, pybel + openbabel.obErrorLog.StopLogging() except ImportError: openbabel, pybel = None, None - print('OpenBabel could not be found. A few functions are not available without OpenBabel.') + print( + "OpenBabel could not be found. A few functions are not available without OpenBabel." + ) def CountLines(path): - out = subprocess.Popen(['wc', '-l', path], - stdout=subprocess.PIPE, - stderr=subprocess.STDOUT - ).communicate()[0] - return int(out.partition(b' ')[0]) + out = subprocess.Popen( + ["wc", "-l", path], stdout=subprocess.PIPE, stderr=subprocess.STDOUT + ).communicate()[0] + return int(out.partition(b" ")[0]) def grep(pattern, file_obj): @@ -49,15 +53,15 @@ for line_counter, line in enumerate(open(filepath)): if line_counter > 10000: break - if line.find('$$$$') != -1: - return 'sdf' - elif line.find('@<TRIPOS>MOLECULE') != -1: - return 'mol2' - elif line.find('ligand id') != -1: - return 'drf' - elif possible_inchi and re.findall('^InChI=', line): - return 'inchi' - elif re.findall(r'^M\s+END', line): + if line.find("$$$$") != -1: + return "sdf" + elif line.find("@<TRIPOS>MOLECULE") != -1: + return "mol2" + elif line.find("ligand id") != -1: + return "drf" + elif possible_inchi and re.findall("^InChI=", line): + return "inchi" + elif re.findall(r"^M\s+END", line): mol = True # first line is not an InChI, so it can't be an InChI file possible_inchi = False @@ -65,99 +69,128 @@ if mol: # END can occures before $$$$, so and SDF file will # be recognised as mol, if you not using this hack' - return 'mol' - return 'smi' + return "mol" + return "smi" def db_connect(args): try: - db_conn = psycopg2.connect("dbname=%s user=%s host=%s password=%s" % (args.dbname, args.dbuser, args.dbhost, args.dbpasswd)) + db_conn = psycopg2.connect( + "dbname=%s user=%s host=%s password=%s" + % (args.dbname, args.dbuser, args.dbhost, args.dbpasswd) + ) return db_conn except psycopg2.Error: - sys.exit('Unable to connect to the db') + sys.exit("Unable to connect to the db") ColumnNames = { - 'can_smiles': 'Canonical SMILES', - 'can': 'Canonical SMILES', - 'inchi': 'InChI', - 'inchi_key': 'InChI key', - 'inchi_key_first': 'InChI key first', - 'inchi_key_last': 'InChI key last', - 'molwt': 'Molecular weight', - 'hbd': 'Hydrogen-bond donors', - 'donors': 'Hydrogen-bond donors', - 'hba': 'Hydrogen-bond acceptors', - 'acceptors': 'Hydrogen-bond acceptors', - 'rotbonds': 'Rotatable bonds', - 'logp': 'logP', - 'psa': 'Polar surface area', - 'mr': 'Molecular refractivity', - 'atoms': 'Number of heavy atoms', - 'rings': 'Number of rings', - 'set_bits': 'FP2 bits', - 'id': 'Internal identifier', - 'tani': 'Tanimoto coefficient', - 'spectrophore': 'Spectrophores(TM)', - 'dist_spectrophore': 'Spectrophores(TM) distance to target', - 'synonym': 'Entry id', + "can_smiles": "Canonical SMILES", + "can": "Canonical SMILES", + "inchi": "InChI", + "inchi_key": "InChI key", + "inchi_key_first": "InChI key first", + "inchi_key_last": "InChI key last", + "molwt": "Molecular weight", + "hbd": "Hydrogen-bond donors", + "donors": "Hydrogen-bond donors", + "hba": "Hydrogen-bond acceptors", + "acceptors": "Hydrogen-bond acceptors", + "rotbonds": "Rotatable bonds", + "logp": "logP", + "psa": "Polar surface area", + "mr": "Molecular refractivity", + "atoms": "Number of heavy atoms", + "rings": "Number of rings", + "set_bits": "FP2 bits", + "id": "Internal identifier", + "tani": "Tanimoto coefficient", + "spectrophore": "Spectrophores(TM)", + "dist_spectrophore": "Spectrophores(TM) distance to target", + "synonym": "Entry id", } OBDescriptor = { - 'atoms': ["atoms", "Number of atoms"], - 'hatoms': ["hatoms", "Number of heavy atoms"], # self defined tag hatoms in plugindefines.txt - 'can_smiles': ["cansmi", "Canonical SMILES"], - 'can_smilesNS': ["cansmiNS", "Canonical SMILES without isotopes or stereo"], + "atoms": ["atoms", "Number of atoms"], + "hatoms": [ + "hatoms", + "Number of heavy atoms", + ], # self defined tag hatoms in plugindefines.txt + "can_smiles": ["cansmi", "Canonical SMILES"], + "can_smilesNS": ["cansmiNS", "Canonical SMILES without isotopes or stereo"], # ["abonds", "Number of aromatic bonds"], # ["bonds", "Number of bonds"], # ["dbonds", "Number of double bonds"], # ["formula", "Chemical formula"], - 'hba': ["HBA1", "Number of Hydrogen Bond Acceptors 1 (JoelLib)"], - 'hba2': ["HBA2", "Number of Hydrogen Bond Acceptors 2 (JoelLib)"], - 'hbd': ["HBD", "Number of Hydrogen Bond Donors (JoelLib)"], - 'inchi': ["InChI", "IUPAC InChI identifier"], - 'inchi_key': ["InChIKey", "InChIKey"], + "hba": ["HBA1", "Number of Hydrogen Bond Acceptors 1 (JoelLib)"], + "hba2": ["HBA2", "Number of Hydrogen Bond Acceptors 2 (JoelLib)"], + "hbd": ["HBD", "Number of Hydrogen Bond Donors (JoelLib)"], + "inchi": ["InChI", "IUPAC InChI identifier"], + "inchi_key": ["InChIKey", "InChIKey"], # ["L5", "Lipinski Rule of Five"], - 'logp': ["logP", "octanol/water partition coefficient"], - 'mr': ["MR", "molar refractivity"], - 'molwt': ["MW", "Molecular Weight filter"], + "logp": ["logP", "octanol/water partition coefficient"], + "mr": ["MR", "molar refractivity"], + "molwt": ["MW", "Molecular Weight filter"], # ["nF", "Number of Fluorine Atoms"], # ["s", "SMARTS filter"], # ["sbonds", "Number of single bonds"], # ["smarts", "SMARTS filter"], # ["tbonds", "Number of triple bonds"], # ["title", "For comparing a molecule's title"], - 'psa': ["TPSA", "topological polar surface area"], - 'rotbonds': ['ROTATABLE_BOND', 'rotatable bonds'], + "psa": ["TPSA", "topological polar surface area"], + "rotbonds": ["ROTATABLE_BOND", "rotatable bonds"], } def print_output(args, rows): - if args.oformat == 'table': - outfile = open(args.output, 'w') - requested_fields = (filter(lambda x: x not in ["[", "]", "'"], args.fetch)).split(', ') + if args.oformat == "table": + outfile = open(args.output, "w") + requested_fields = ( + filter(lambda x: x not in ["[", "]", "'"], args.fetch) + ).split(", ") if args.header: - outfile.write('Identifier\t' + '\t'.join([ColumnNames[key] for key in requested_fields]) + '\n') + outfile.write( + "Identifier\t" + + "\t".join([ColumnNames[key] for key in requested_fields]) + + "\n" + ) for row in rows: - outfile.write(row['synonym'] + '\t' + '\t'.join([str(row[key]) for key in requested_fields]) + '\n') + outfile.write( + row["synonym"] + + "\t" + + "\t".join([str(row[key]) for key in requested_fields]) + + "\n" + ) - elif args.oformat in ['sdf', 'mol2']: + elif args.oformat in ["sdf", "mol2"]: outfile = pybel.Outputfile(args.oformat, args.output, overwrite=True) for row in rows: try: - mol = pybel.readstring('sdf', row['mol']) - if args.oformat == 'sdf': - keys = filter(lambda x: x not in ["[", "]", "'"], args.fetch).split(', ') - mol.data.update({ColumnNames['synonym']: row['synonym']}) - if 'inchi_key' in keys: - keys = (', '.join(keys).replace("inchi_key", "inchi_key_first, inchi_key_last")).split(', ') - [mol.data.update({ColumnNames[key]: row[key]}) for key in keys if key] + mol = pybel.readstring("sdf", row["mol"]) + if args.oformat == "sdf": + keys = filter(lambda x: x not in ["[", "]", "'"], args.fetch).split( + ", " + ) + mol.data.update({ColumnNames["synonym"]: row["synonym"]}) + if "inchi_key" in keys: + keys = ( + ", ".join(keys).replace( + "inchi_key", "inchi_key_first, inchi_key_last" + ) + ).split(", ") + [ + mol.data.update({ColumnNames[key]: row[key]}) + for key in keys + if key + ] outfile.write(mol) except OSError: pass else: - outfile = open(args.output, 'w') - outfile.write('\n'.join(['%s\t%s' % (row[args.oformat], row['synonym']) for row in rows])) + outfile = open(args.output, "w") + outfile.write( + "\n".join(["%s\t%s" % (row[args.oformat], row["synonym"]) for row in rows]) + ) outfile.close() @@ -167,31 +200,37 @@ def get_properties_ext(mol): HBD = pybel.Smarts("[!#6;!H0]") - HBA = pybel.Smarts(("[$([$([#8,#16]);!$(*=N~O);" - "!$(*~N=O);X1,X2]),$([#7;v3;" - "!$([nH]);!$(*(-a)-a)])]" - )) + HBA = pybel.Smarts( + ( + "[$([$([#8,#16]);!$(*=N~O);" + "!$(*~N=O);X1,X2]),$([#7;v3;" + "!$([nH]);!$(*(-a)-a)])]" + ) + ) calc_desc_dict = mol.calcdesc() try: - logp = calc_desc_dict['logP'] + logp = calc_desc_dict["logP"] except KeyError: - logp = calc_desc_dict['LogP'] + logp = calc_desc_dict["LogP"] - return {"molwt": mol.molwt, - "logp": logp, - "donors": len(HBD.findall(mol)), - "acceptors": len(HBA.findall(mol)), - "psa": calc_desc_dict['TPSA'], - "mr": calc_desc_dict['MR'], - "rotbonds": mol.OBMol.NumRotors(), - "can": mol.write("can").split()[0].strip(), # tthis one works fine for both zinc and chembl (no ZINC code added after can descriptor string) - "inchi": mol.write("inchi").strip(), - "inchi_key": get_inchikey(mol).strip(), - "rings": len(mol.sssr), - "atoms": mol.OBMol.NumHvyAtoms(), - "spectrophore": OBspectrophore(mol), - } + return { + "molwt": mol.molwt, + "logp": logp, + "donors": len(HBD.findall(mol)), + "acceptors": len(HBA.findall(mol)), + "psa": calc_desc_dict["TPSA"], + "mr": calc_desc_dict["MR"], + "rotbonds": mol.OBMol.NumRotors(), + "can": mol.write("can") + .split()[0] + .strip(), # tthis one works fine for both zinc and chembl (no ZINC code added after can descriptor string) + "inchi": mol.write("inchi").strip(), + "inchi_key": get_inchikey(mol).strip(), + "rings": len(mol.sssr), + "atoms": mol.OBMol.NumHvyAtoms(), + "spectrophore": OBspectrophore(mol), + } def get_inchikey(mol): @@ -206,10 +245,12 @@ spectrophore = pybel.ob.OBSpectrophore() # Parameters: rotation angle = 20, normalization for mean and sd, accuracy = 3.0 A and non-stereospecific cages. spectrophore.SetNormalization(spectrophore.NormalizationTowardsZeroMeanAndUnitStd) - return ', '.join(["%.3f" % value for value in spectrophore.GetSpectrophore(mol.OBMol)]) + return ", ".join( + ["%.3f" % value for value in spectrophore.GetSpectrophore(mol.OBMol)] + ) -def split_library(lib_path, lib_format='sdf', package_size=None): +def split_library(lib_path, lib_format="sdf", package_size=None): """ Split a library of compounds. Usage: split_library(lib_path, lib_format, package_size) IT currently ONLY WORKS FOR SD-Files @@ -217,18 +258,39 @@ pack = 1 mol_counter = 0 - outfile = open('/%s/%s_pack_%i.%s' % ('/'.join(lib_path.split('/')[:-1]), lib_path.split('/')[-1].split('.')[0], pack, 'sdf'), 'w') + outfile = open( + "/%s/%s_pack_%i.%s" + % ( + "/".join(lib_path.split("/")[:-1]), + lib_path.split("/")[-1].split(".")[0], + pack, + "sdf", + ), + "w", + ) - for line in open(lib_path, 'r'): + for line in open(lib_path, "r"): outfile.write(line) - if line.strip() == '$$$$': + if line.strip() == "$$$$": mol_counter += 1 if mol_counter % package_size == 0: outfile.close() pack += 1 - outfile = open('/%s/%s_pack_%i.%s' % ('/'.join(lib_path.split('/')[:-1]), lib_path.split('/')[-1].split('.')[0], pack, 'sdf'), 'w') + outfile = open( + "/%s/%s_pack_%i.%s" + % ( + "/".join(lib_path.split("/")[:-1]), + lib_path.split("/")[-1].split(".")[0], + pack, + "sdf", + ), + "w", + ) if mol_counter * 10 % package_size == 0: - print('%i molecules parsed, starting pack nr. %i' % (mol_counter, pack - 1)) + print( + "%i molecules parsed, starting pack nr. %i" + % (mol_counter, pack - 1) + ) outfile.close() return True @@ -242,7 +304,7 @@ output_files = [] tfile = tempfile.NamedTemporaryFile(delete=False) - smiles_handle = open(smiles_file, 'r') + smiles_handle = open(smiles_file, "r") for count, line in enumerate(smiles_handle): if count % structures_in_one_file == 0 and count != 0: tfile.close() @@ -257,16 +319,19 @@ def mp_run(input_path, regex, PROCESSES, function_to_call): paths = [] - [paths.append(compound_file) for compound_file in glob.glob(str(input_path) + str(regex))] + [ + paths.append(compound_file) + for compound_file in glob.glob(str(input_path) + str(regex)) + ] paths.sort() pool = Pool(processes=PROCESSES) - print('Process initialized with', PROCESSES, 'processors') + print("Process initialized with", PROCESSES, "processors") result = pool.map_async(function_to_call, paths) result.get() return paths -if __name__ == '__main__': +if __name__ == "__main__": print(check_filetype(sys.argv[1]))
--- a/distance_finder.py Tue Nov 10 20:33:21 2020 +0000 +++ b/distance_finder.py Thu Aug 15 11:06:27 2024 +0000 @@ -19,9 +19,8 @@ def log(*args, **kwargs): - """Log output to STDERR - """ - print(*args, file=sys.stderr, ** kwargs) + """Log output to STDERR""" + print(*args, file=sys.stderr, **kwargs) def execute(ligands_sdf, points_file, outfile): @@ -35,7 +34,7 @@ points = [] # read the points - with open(points_file, 'r') as f: + with open(points_file, "r") as f: for line in f.readlines(): line.strip() if line: @@ -45,7 +44,7 @@ log("Read points", p) continue log("Failed to read line:", line) - log('Found', len(points), 'atom points') + log("Found", len(points), "atom points") sdf_writer = pybel.Outputfile("sdf", outfile, overwrite=True) @@ -53,7 +52,7 @@ for mol in pybel.readfile("sdf", ligands_sdf): count += 1 if count % 50000 == 0: - log('Processed', count) + log("Processed", count) try: # print("Processing mol", mol.title) @@ -70,32 +69,42 @@ distances = [] for i in coords: # calculates distance based on cartesian coordinates - distance = math.sqrt((point[0] - i[0])**2 + (point[1] - i[1])**2 + (point[2] - i[2])**2) + distance = math.sqrt( + (point[0] - i[0]) ** 2 + + (point[1] - i[1]) ** 2 + + (point[2] - i[2]) ** 2 + ) distances.append(distance) # log("distance:", distance) min_distance = min(distances) # log('Min:', min_distance) # log(count, p, min_distance) - mol.data['distance' + str(p)] = min_distance + mol.data["distance" + str(p)] = min_distance sdf_writer.write(mol) except Exception as e: - log('Failed to handle molecule: ' + str(e)) + log("Failed to handle molecule: " + str(e)) continue sdf_writer.close() - log('Wrote', count, 'molecules') + log("Wrote", count, "molecules") def main(): global work_dir - parser = argparse.ArgumentParser(description='XChem distances - measure distances to particular points') - parser.add_argument('-i', '--input', help="SDF containing the 3D molecules to score)") - parser.add_argument('-p', '--points', help="PDB format file with atoms") - parser.add_argument('-o', '--outfile', default='output.sdf', help="File name for results") + parser = argparse.ArgumentParser( + description="XChem distances - measure distances to particular points" + ) + parser.add_argument( + "-i", "--input", help="SDF containing the 3D molecules to score)" + ) + parser.add_argument("-p", "--points", help="PDB format file with atoms") + parser.add_argument( + "-o", "--outfile", default="output.sdf", help="File name for results" + ) args = parser.parse_args() log("XChem distances args: ", args)
--- a/multi_obgrep.py Tue Nov 10 20:33:21 2020 +0000 +++ b/multi_obgrep.py Thu Aug 15 11:06:27 2024 +0000 @@ -15,21 +15,55 @@ def parse_command_line(): parser = argparse.ArgumentParser() - parser.add_argument('-i', '--infile', required=True, help='Molecule file.') - parser.add_argument('-q', '--query', required=True, help='Query file, containing different SMARTS in each line.') - parser.add_argument('-o', '--outfile', required=True, help='Path to the output file.') + parser.add_argument("-i", "--infile", required=True, help="Molecule file.") + parser.add_argument( + "-q", + "--query", + required=True, + help="Query file, containing different SMARTS in each line.", + ) + parser.add_argument( + "-o", "--outfile", required=True, help="Path to the output file." + ) parser.add_argument("--iformat", help="Input format, like smi, sdf, inchi") - parser.add_argument("--n-times", dest="n_times", type=int, - default=0, help="Print a molecule only if the pattern occurs # times inside the molecule.") - parser.add_argument('-p', '--processors', type=int, default=multiprocessing.cpu_count()) - parser.add_argument("--invert-matches", dest="invert_matches", action="store_true", - default=False, help="Invert the matching, print non-matching molecules.") - parser.add_argument("--only-name", dest="only_name", action="store_true", - default=False, help="Only print the name of the molecules.") - parser.add_argument("--full-match", dest="full_match", action="store_true", - default=False, help="Full match, print matching-molecules only when the number of heavy atoms is also equal to the number of atoms in the SMARTS pattern.") - parser.add_argument("--number-of-matches", dest="number_of_matches", action="store_true", - default=False, help="Print the number of matches.") + parser.add_argument( + "--n-times", + dest="n_times", + type=int, + default=0, + help="Print a molecule only if the pattern occurs # times inside the molecule.", + ) + parser.add_argument( + "-p", "--processors", type=int, default=multiprocessing.cpu_count() + ) + parser.add_argument( + "--invert-matches", + dest="invert_matches", + action="store_true", + default=False, + help="Invert the matching, print non-matching molecules.", + ) + parser.add_argument( + "--only-name", + dest="only_name", + action="store_true", + default=False, + help="Only print the name of the molecules.", + ) + parser.add_argument( + "--full-match", + dest="full_match", + action="store_true", + default=False, + help="Full match, print matching-molecules only when the number of heavy atoms is also equal to the number of atoms in the SMARTS pattern.", + ) + parser.add_argument( + "--number-of-matches", + dest="number_of_matches", + action="store_true", + default=False, + help="Print the number of matches.", + ) return parser.parse_args() @@ -42,25 +76,27 @@ def mp_helper(query, args): """ - Helper function for multiprocessing. - That function is a wrapper around obgrep. + Helper function for multiprocessing. + That function is a wrapper around obgrep. """ cmd_list = [] if args.invert_matches: - cmd_list.append('-v') + cmd_list.append("-v") if args.only_name: - cmd_list.append('-n') + cmd_list.append("-n") if args.full_match: - cmd_list.append('-f') + cmd_list.append("-f") if args.number_of_matches: - cmd_list.append('-c') + cmd_list.append("-c") if args.n_times: - cmd_list.append('-t %s' % str(args.n_times)) + cmd_list.append("-t %s" % str(args.n_times)) tmp = tempfile.NamedTemporaryFile(delete=False) - cmd = 'obgrep %s "%s" %s' % (' '.join(cmd_list), query, args.infile) - child = subprocess.Popen(shlex.split(cmd), stdout=open(tmp.name, 'w+'), stderr=subprocess.PIPE) + cmd = 'obgrep %s "%s" %s' % (" ".join(cmd_list), query, args.infile) + child = subprocess.Popen( + shlex.split(cmd), stdout=open(tmp.name, "w+"), stderr=subprocess.PIPE + ) stdout, stderr = child.communicate() return (tmp.name, query) @@ -80,9 +116,9 @@ pool.close() pool.join() - out_handle = open(args.outfile, 'wb') + out_handle = open(args.outfile, "wb") for result_file, query in results: - res_handle = open(result_file, 'rb') + res_handle = open(result_file, "rb") shutil.copyfileobj(res_handle, out_handle) res_handle.close() os.remove(result_file) @@ -93,7 +129,7 @@ def __main__(): """ - Multiprocessing obgrep search. + Multiprocessing obgrep search. """ args = parse_command_line() obgrep(args)
--- a/ob_addh.py Tue Nov 10 20:33:21 2020 +0000 +++ b/ob_addh.py Thu Aug 15 11:06:27 2024 +0000 @@ -7,16 +7,28 @@ import sys from openbabel import openbabel, pybel + openbabel.obErrorLog.StopLogging() def parse_command_line(argv): parser = argparse.ArgumentParser() - parser.add_argument('--iformat', type=str, default='sdf', help='input file format') - parser.add_argument('-i', '--input', type=str, required=True, help='input file name') - parser.add_argument('-o', '--output', type=str, required=True, help='output file name') - parser.add_argument('--polar', action="store_true", default=False, help='Add hydrogen atoms only to polar atoms') - parser.add_argument('--pH', type=float, default="7.4", help='Specify target pH value') + parser.add_argument("--iformat", type=str, default="sdf", help="input file format") + parser.add_argument( + "-i", "--input", type=str, required=True, help="input file name" + ) + parser.add_argument( + "-o", "--output", type=str, required=True, help="output file name" + ) + parser.add_argument( + "--polar", + action="store_true", + default=False, + help="Add hydrogen atoms only to polar atoms", + ) + parser.add_argument( + "--pH", type=float, default="7.4", help="Specify target pH value" + ) return parser.parse_args() @@ -32,7 +44,7 @@ def __main__(): """ - Add hydrogen atoms at a certain pH value + Add hydrogen atoms at a certain pH value """ args = parse_command_line(sys.argv) addh(args)
--- a/ob_convert.xml Tue Nov 10 20:33:21 2020 +0000 +++ b/ob_convert.xml Thu Aug 15 11:06:27 2024 +0000 @@ -6,7 +6,7 @@ --> <macros> <import>macros.xml</import> - <token name="@GALAXY_VERSION@">0</token> + <token name="@GALAXY_VERSION@">1</token> </macros> <expand macro="requirements"/> <command detect_errors="aggressive"> @@ -132,7 +132,11 @@ #if float($ph) >= 0: -p $ph #end if - + + #if $appendproperties: + #set $props = str($appendproperties).replace(',', ' ') + --append '$props' + #end if ]]> </command> <inputs> @@ -155,7 +159,7 @@ <option value="cif">Crystallographic Information File</option> <option value="cml">Chemical Markup Language (CML)</option> <option value="cmlr">CML Reaction format</option> - <option value="com">Gaussian 98/03 Cartesian Input</option> + <option value="com">Gaussian 98/03 Cartesian Input(com)</option> <option value="copy">Copies raw text</option> <option value="crk2d">Chemical Resource Kit 2D diagram format</option> <option value="crk3d">Chemical Resource Kit 3D format</option> @@ -163,8 +167,6 @@ <option value="cssr">CSD CSSR format</option> <option value="ct">ChemDraw Connection Table format</option> <option value="dmol">DMol3 coordinates format</option> - <!--<option value="ent">Protein Data Bank format</option> - <option value="fa">FASTA format</option>--> <option value="fasta">FASTA format</option> <option value="feat">Feature format</option> <option value="fh">Fenske-Hall Z-Matrix format</option> @@ -172,27 +174,22 @@ <option value="fpt">Fingerprint format (fpt)</option> <option value="fract">Free Form Fractional format</option> <option value="fs">Open Babel FastSearching database (fs)</option> - <!--<option value="fsa">FASTA format</option>--> - <option value="gamin">GAMESS Input</option> - <option value="gau">Gaussian 98/03 Cartesian Input</option> - <!--<option value="gjc">Gaussian 98/03 Cartesian Input</option> - <option value="gjf">Gaussian 98/03 Cartesian Input</option>--> + <option value="gamin">GAMESS Input (gamin)</option> + <option value="inp">GAMESS Input (inp)</option> + <option value="gau">Gaussian 98/03 Cartesian Input(gau)</option> <option value="gpr">Ghemical format</option> <option value="gr96">GROMOS96 format</option> <option value="hin">HyperChem HIN format</option> <option value="inchi">IUPAC InChI</option> - <option value="inp">GAMESS Input</option> <option value="jin">Jaguar input format</option> - <!--<option value="mdl">MDL MOL format (mol)</option>--> - <option value="mmd">MacroModel format</option> - <option value="mmod">MacroModel format</option> - <!--<option value="mol">MDL MOL format (mol)</option> use SDF--> + <option value="mmd">MacroModel format (mmd)</option> + <option value="mmod">MacroModel format (mmod)</option> <option value="mol2">Sybyl Mol2 format (mol2)</option> <option value="molreport">Open Babel molecule report</option> - <option value="mop">MOPAC Cartesian format</option> - <option value="mopcrt">MOPAC Cartesian format</option> + <option value="mop">MOPAC Cartesian format (mop)</option> + <option value="mopcrt">MOPAC Cartesian format (mopcrt)</option> <option value="mopin">MOPAC Internal</option> - <option value="mpc">MOPAC Cartesian format</option> + <option value="mpc">MOPAC Cartesian format (mpc)</option> <option value="mpd">Sybyl descriptor format</option> <option value="mpqcin">MPQC simplified input format</option> <option value="nw">NWChem input format</option> @@ -204,13 +201,11 @@ <option value="qcin">Q-Chem input format</option> <option value="report">Open Babel report format</option> <option value="rxn">MDL RXN format</option> - <!--<option value="sd">MDL MOL format</option>--> <option value="sdf">MDL MOL format (sdf, mol)</option> <option value="smi">SMILES format (smi)</option> - <!--<option value="sy2">Sybyl Mol2 format</option>--> - <option value="tdd">Thermo format</option> + <option value="tdd">Thermo format (tdd)</option> <option value="test">Test format</option> - <option value="therm">Thermo format</option> + <option value="therm">Thermo format (therm)</option> <option value="tmol">TurboMole Coordinate format</option> <option value="txyz">Tinker MM2 format</option> <option value="unixyz">UniChem XYZ format</option> @@ -398,6 +393,27 @@ <param name="dative_bonds" type="boolean" truevalue="-b" falsevalue="" label="Convert dative bonds" help="e.g. [N+]([O-])=O to N(=O)=O (-b)" /> <param name="appendtotitle" type="text" value="" label="Append the specified text after each molecule title"/> + <param name="appendproperties" type="select" multiple="true" label="Molecular properties to append to the title." optional="true"> + <option value="abonds">Number of aromatic bonds</option> + <option value="atoms">Number of atoms</option> + <option value="bonds">Number of bonds</option> + <option value="cansmi">Canonical SMILES</option> + <option value="cansmiNS">Canonical SMILES without isotopes or stereo</option> + <option value="dbonds">Number of double bonds</option> + <option value="sbonds">Number of single bonds</option> + <option value="tbonds">Number of triple bonds</option> + <option value="formula">Chemical formula</option> + <option value="HBA1">Number of Hydrogen Bond Acceptors 1 (JoelLib)</option> + <option value="HBA2">Number of Hydrogen Bond Acceptors 2 (JoelLib)</option> + <option value="HBD">Number of Hydrogen Bond Donors (JoelLib)</option> + <option value="InChI">IUPAC InChI identifier</option> + <option value="InChIKey">InChIKey</option> + <option value="L5">Lipinski Rule of Five</option> + <option value="logP">octanol/water partition coefficient</option> + <option value="MR">molar refractivity</option> + <option value="MW">Molecular Weight</option> + <option value="TPSA">topological polar surface area</option> + </param> <!-- Uniqueness --> <conditional name="unique"> @@ -449,40 +465,40 @@ </collection> </outputs> <tests> - <test> + <test expect_num_outputs="1"> <param name="infile" ftype="sdf" value="CID_2244.sdf"/> <param name="oformat_opts_selector" value="cml" /> <output name="outfile" ftype="cml" file="ob_convert_on_CID2244.cml" /> </test> - <test> + <test expect_num_outputs="1"> <param name="infile" ftype="sdf" value="CID_2244.sdf"/> <param name="oformat_opts_selector" value="fs" /> - <output name="outfile" compare="contains" file="ob_convert_on_CID2244_obfs.txt" ftype="obfs"> + <output name="outfile" compare="contains" file="ob_convert_on_CID2244_obfs.txt" ftype="obfs" > <extra_files type="file" value="molecule.sdf" name="molecule.sdf" /> <extra_files type="file" value="molecule.fs" name="molecule.fs" compare="sim_size" /> </output> </test> - <test> + <test expect_num_outputs="1"> <param name="infile" ftype="sdf" value="CID_2244.sdf"/> <param name="oformat_opts_selector" value="inchi" /> <output name="outfile" ftype="inchi" file="ob_convert_on_CID2244.inchi" /> </test> - <test> + <test expect_num_outputs="1"> <param name="infile" ftype="sdf" value="CID_2244.sdf"/> <param name="oformat_opts_selector" value="pdb" /> <output name="outfile" ftype="pdb" file="ob_convert_on_CID2244.pdb" lines_diff="4" /> </test> - <test> + <test expect_num_outputs="1"> <param name="infile" ftype="sdf" value="CID_2244.sdf"/> <param name="oformat_opts_selector" value="can" /> <output name="outfile" ftype="smi" file="ob_convert_on_CID2244.smi" /> </test> - <test> + <test expect_num_outputs="1"> <param name="infile" ftype="sdf" value="CID_2244.sdf"/> <param name="oformat_opts_selector" value="sdf" /> <output name="outfile" ftype="sdf" file="ob_convert_on_CID2244.sdf" lines_diff="2"/> </test> - <test> + <test expect_num_outputs="1"> <param name="infile" ftype="smi" value="2_mol.smi"/> <param name="oformat_opts_selector" value="pdbqt"/> <param name="split" value="true"/> @@ -491,6 +507,12 @@ <element name="molecule2" file="split2.pdbqt" /> </output_collection> </test> + <test expect_num_outputs="1"> + <param name="infile" ftype="smi" value="2_mol.smi"/> + <param name="oformat_opts_selector" value="sdf" /> + <param name="appendproperties" value="cansmi,InChI"/> + <output name="outfile" ftype="sdf" file="2_mol.sdf" lines_diff="4"/> + </test> </tests> <help> <![CDATA[
--- a/ob_filter.py Tue Nov 10 20:33:21 2020 +0000 +++ b/ob_filter.py Thu Aug 15 11:06:27 2024 +0000 @@ -14,33 +14,36 @@ import cheminfolib from openbabel import pybel + cheminfolib.pybel_stop_logging() def parse_command_line(): parser = argparse.ArgumentParser() - parser.add_argument('-i', '--input', help='Input file name') - parser.add_argument('-iformat', help='Input file format') - parser.add_argument('-oformat', default='smi', - help='Output file format') - parser.add_argument('-o', '--output', help='Output file name', - required=True) - parser.add_argument('--filters', help="Specify the filters to apply", - required=True) - parser.add_argument('--list_of_names', required=False, - help="A file with list of molecule names to extract. Every name is in one line.") + parser.add_argument("-i", "--input", help="Input file name") + parser.add_argument("-iformat", help="Input file format") + parser.add_argument("-oformat", default="smi", help="Output file format") + parser.add_argument("-o", "--output", help="Output file name", required=True) + parser.add_argument("--filters", help="Specify the filters to apply", required=True) + parser.add_argument( + "--list_of_names", + required=False, + help="A file with list of molecule names to extract. Every name is in one line.", + ) return parser.parse_args() def filter_precalculated_compounds(args, filters): outfile = pybel.Outputfile(args.oformat, args.output, overwrite=True) - for mol in pybel.readfile('sdf', args.input): + for mol in pybel.readfile("sdf", args.input): for key, elem in filters.items(): # map the short description to the larger metadata names stored in the sdf file property = cheminfolib.ColumnNames.get(key, key) min = elem[0] max = elem[1] - if float(mol.data[property]) >= float(min) and float(mol.data[property]) <= float(max): + if float(mol.data[property]) >= float(min) and float( + mol.data[property] + ) <= float(max): pass else: # leave the filter loop, because one filter constrained are not satisfied @@ -56,16 +59,30 @@ if args.iformat == args.oformat: # use the -ocopy option from openbabel to speed up the filtering, additionally no conversion is carried out # http://openbabel.org/docs/dev/FileFormats/Copy_raw_text.html#copy-raw-text - cmd = 'obabel -i%s %s -ocopy -O %s --filter' % (args.iformat, args.input, args.output) + cmd = "obabel -i%s %s -ocopy -O %s --filter" % ( + args.iformat, + args.input, + args.output, + ) else: - cmd = 'obabel -i%s %s -o%s -O %s --filter' % (args.iformat, args.input, args.oformat, args.output) - filter_cmd = '' + cmd = "obabel -i%s %s -o%s -O %s --filter" % ( + args.iformat, + args.input, + args.oformat, + args.output, + ) + filter_cmd = "" # OBDescriptor stores a mapping from our desc shortcut to the OB name [0] and a long description [1] for key, elem in filters.items(): ob_descriptor_name = cheminfolib.OBDescriptor[key][0] min = elem[0] max = elem[1] - filter_cmd += ' %s>=%s %s<=%s ' % (ob_descriptor_name, min, ob_descriptor_name, max) + filter_cmd += " %s>=%s %s<=%s " % ( + ob_descriptor_name, + min, + ob_descriptor_name, + max, + ) args = shlex.split('%s "%s"' % (cmd, filter_cmd)) # print '%s "%s"' % (cmd, filter_cmd) @@ -76,18 +93,18 @@ return_code = child.returncode if return_code: - sys.stdout.write(stdout.decode('utf-8')) - sys.stderr.write(stderr.decode('utf-8')) + sys.stdout.write(stdout.decode("utf-8")) + sys.stderr.write(stderr.decode("utf-8")) sys.stderr.write("Return error code %i from command:\n" % return_code) sys.stderr.write("%s\n" % cmd) else: - sys.stdout.write(stdout.decode('utf-8')) - sys.stdout.write(stderr.decode('utf-8')) + sys.stdout.write(stdout.decode("utf-8")) + sys.stdout.write(stderr.decode("utf-8")) def filter_by_name(args): outfile = pybel.Outputfile(args.oformat, args.output, overwrite=True) - for mol in pybel.readfile('sdf', args.input): + for mol in pybel.readfile("sdf", args.input): for name in open(args.list_of_names): if mol.title.strip() == name.strip(): outfile.write(mol) @@ -96,21 +113,21 @@ def __main__(): """ - Select compounds with certain properties from a small library + Select compounds with certain properties from a small library """ args = parse_command_line() - if args.filters == '__filter_by_name__': + if args.filters == "__filter_by_name__": filter_by_name(args) return # Its a small trick to get the parameters in an easy way from the xml file. # To keep it readable in the xml file, many white-spaces are included in that string it needs to be removed. # Also the last loop creates a ',{' that is not an valid jason expression. - filters = json.loads((args.filters).replace(' ', '').replace(',}', '}')) - if args.iformat == 'sdf': + filters = json.loads((args.filters).replace(" ", "").replace(",}", "}")) + if args.iformat == "sdf": # Check if the sdf file contains all of the required metadata to invoke the precalculation filtering - mol = next(pybel.readfile('sdf', args.input)) + mol = next(pybel.readfile("sdf", args.input)) for key, elem in filters.items(): property = cheminfolib.ColumnNames.get(key, key) if property not in mol.data:
--- a/ob_genProp.py Tue Nov 10 20:33:21 2020 +0000 +++ b/ob_genProp.py Thu Aug 15 11:06:27 2024 +0000 @@ -10,43 +10,57 @@ import cheminfolib import openbabel from openbabel import pybel + openbabel.obErrorLog.StopLogging() def parse_command_line(argv): parser = argparse.ArgumentParser() - parser.add_argument('--iformat', default='sdf', help='input file format') - parser.add_argument('-i', '--input', required=True, help='input file name') - parser.add_argument('--oformat', default='sdf', choices=['sdf', 'table'], help='output file format') - parser.add_argument('--header', type=bool, help='Include the header as the first line of the output table') - parser.add_argument('-o', '--output', required=True, help='output file name') + parser.add_argument("--iformat", default="sdf", help="input file format") + parser.add_argument("-i", "--input", required=True, help="input file name") + parser.add_argument( + "--oformat", default="sdf", choices=["sdf", "table"], help="output file format" + ) + parser.add_argument( + "--header", + type=bool, + help="Include the header as the first line of the output table", + ) + parser.add_argument("-o", "--output", required=True, help="output file name") return parser.parse_args() def compute_properties(args): - if args.oformat == 'sdf': + if args.oformat == "sdf": outfile = pybel.Outputfile(args.oformat, args.output, overwrite=True) else: - outfile = open(args.output, 'w') + outfile = open(args.output, "w") if args.header: mol = next(pybel.readfile(args.iformat, args.input)) metadata = cheminfolib.get_properties_ext(mol) - outfile.write('%s\n' % '\t'.join([cheminfolib.ColumnNames[key] for key in metadata])) + outfile.write( + "%s\n" % "\t".join([cheminfolib.ColumnNames[key] for key in metadata]) + ) for mol in pybel.readfile(args.iformat, args.input): if mol.OBMol.NumHvyAtoms() > 5: metadata = cheminfolib.get_properties_ext(mol) - if args.oformat == 'sdf': - [mol.data.update({cheminfolib.ColumnNames[key]: metadata[key]}) for key in metadata] + if args.oformat == "sdf": + [ + mol.data.update({cheminfolib.ColumnNames[key]: metadata[key]}) + for key in metadata + ] outfile.write(mol) else: - outfile.write('%s\n' % ('\t'.join([str(metadata[key]) for key in metadata]))) + outfile.write( + "%s\n" % ("\t".join([str(metadata[key]) for key in metadata])) + ) outfile.close() def __main__(): """ - Physico-chemical properties are computed and stored as metadata in the sdf output file + Physico-chemical properties are computed and stored as metadata in the sdf output file """ args = parse_command_line(sys.argv) compute_properties(args)
--- a/ob_remIons.py Tue Nov 10 20:33:21 2020 +0000 +++ b/ob_remIons.py Thu Aug 15 11:06:27 2024 +0000 @@ -8,37 +8,43 @@ import argparse from openbabel import openbabel, pybel + openbabel.obErrorLog.StopLogging() def parse_command_line(): parser = argparse.ArgumentParser() - parser.add_argument('-iformat', default='sdf', help='input file format') - parser.add_argument('-i', '--input', required=True, help='input file name') - parser.add_argument('-o', '--output', required=True, help='output file name') - parser.add_argument('-idx', default=False, action='store_true', help='should output be an indexed text table? works only for inchi/smiles, otherwise is ignored') + parser.add_argument("-iformat", default="sdf", help="input file format") + parser.add_argument("-i", "--input", required=True, help="input file name") + parser.add_argument("-o", "--output", required=True, help="output file name") + parser.add_argument( + "-idx", + default=False, + action="store_true", + help="should output be an indexed text table? works only for inchi/smiles, otherwise is ignored", + ) return parser.parse_args() def remove_ions(args): - with open(args.output, 'w') as outfile: + with open(args.output, "w") as outfile: for index, mol in enumerate(pybel.readfile(args.iformat, args.input)): if mol.OBMol.NumHvyAtoms() > 5: mol.OBMol.StripSalts(0) - if 'inchi' in mol.data: - del mol.data['inchi'] # remove inchi cache so modified mol is saved + if "inchi" in mol.data: + del mol.data["inchi"] # remove inchi cache so modified mol is saved - mol = mol.write(args.iformat) if mol.OBMol.NumHvyAtoms() > 5 else '\n' + mol = mol.write(args.iformat) if mol.OBMol.NumHvyAtoms() > 5 else "\n" - if args.idx and args.iformat in ['inchi', 'smi']: - outfile.write(f'{index}\t{mol}') - elif mol != '\n': - outfile.write(f'{mol}') + if args.idx and args.iformat in ["inchi", "smi"]: + outfile.write(f"{index}\t{mol}") + elif mol != "\n": + outfile.write(f"{mol}") def __main__(): """ - Remove any counterion and delete any fragment but the largest one for each molecule. + Remove any counterion and delete any fragment but the largest one for each molecule. """ args = parse_command_line() remove_ions(args)
--- a/ob_spectrophore_search.py Tue Nov 10 20:33:21 2020 +0000 +++ b/ob_spectrophore_search.py Thu Aug 15 11:06:27 2024 +0000 @@ -8,6 +8,7 @@ import numpy as np from openbabel import openbabel, pybel + openbabel.obErrorLog.StopLogging() # TODO get rid of eval() @@ -17,49 +18,94 @@ def parse_command_line(): parser = argparse.ArgumentParser() - parser.add_argument('--target', required=True, help='target file name in sdf format with Spectrophores(TM) descriptors stored as meta-data') - parser.add_argument('--library', required=True, help='library of compounds with pre-computed physico-chemical properties, including Spectrophores(TM) in tabular format') - parser.add_argument('-c', '--column', required=True, type=int, help='#column containing the Spectrophores(TM) descriptors in the library file') - parser.add_argument('-o', '--output', required=True, help='output file name') - parser.add_argument('-n', '--normalization', default="ZeroMeanAndUnitStd", choices=['No', 'ZeroMean', 'UnitStd', 'ZeroMeanAndUnitStd'], help='Normalization method') - parser.add_argument('-a', '--accuracy', default="20", choices=['1', '2', '5', '10', '15', '20', '30', '36', '45', '60'], help='Accuracy expressed as angular stepsize') - parser.add_argument('-s', '--stereo', default="No", choices=['No', 'Unique', 'Mirror', 'All'], help='Stereospecificity of the cage') - parser.add_argument('-r', '--resolution', type=float, default="3.0", help='Resolution') + parser.add_argument( + "--target", + required=True, + help="target file name in sdf format with Spectrophores(TM) descriptors stored as meta-data", + ) + parser.add_argument( + "--library", + required=True, + help="library of compounds with pre-computed physico-chemical properties, including Spectrophores(TM) in tabular format", + ) + parser.add_argument( + "-c", + "--column", + required=True, + type=int, + help="#column containing the Spectrophores(TM) descriptors in the library file", + ) + parser.add_argument("-o", "--output", required=True, help="output file name") + parser.add_argument( + "-n", + "--normalization", + default="ZeroMeanAndUnitStd", + choices=["No", "ZeroMean", "UnitStd", "ZeroMeanAndUnitStd"], + help="Normalization method", + ) + parser.add_argument( + "-a", + "--accuracy", + default="20", + choices=["1", "2", "5", "10", "15", "20", "30", "36", "45", "60"], + help="Accuracy expressed as angular stepsize", + ) + parser.add_argument( + "-s", + "--stereo", + default="No", + choices=["No", "Unique", "Mirror", "All"], + help="Stereospecificity of the cage", + ) + parser.add_argument( + "-r", "--resolution", type=float, default="3.0", help="Resolution" + ) return parser.parse_args() def set_parameters(args): - if args.normalization == 'No': + if args.normalization == "No": spectrophore.SetNormalization(spectrophore.NoNormalization) else: - spectrophore.SetNormalization(eval('spectrophore.NormalizationTowards' + args.normalization)) - spectrophore.SetAccuracy(eval('spectrophore.AngStepSize' + args.accuracy)) - spectrophore.SetStereo(eval('spectrophore.' + args.stereo + 'StereoSpecificProbes')) + spectrophore.SetNormalization( + eval("spectrophore.NormalizationTowards" + args.normalization) + ) + spectrophore.SetAccuracy(eval("spectrophore.AngStepSize" + args.accuracy)) + spectrophore.SetStereo(eval("spectrophore." + args.stereo + "StereoSpecificProbes")) spectrophore.SetResolution(args.resolution) return True def Compute_Spectrophores_distance(target_spectrophore, args): - outfile = open(args.output, 'w') - for mol in open(args.library, 'r'): + outfile = open(args.output, "w") + for mol in open(args.library, "r"): try: - distance = ((np.asarray(target_spectrophore, dtype=float) - np.asarray(mol.split('\t')[args.column - 1].strip().split(', '), dtype=float))**2).sum() + distance = ( + ( + np.asarray(target_spectrophore, dtype=float) + - np.asarray( + mol.split("\t")[args.column - 1].strip().split(", "), + dtype=float, + ) + ) + ** 2 + ).sum() except ValueError: distance = 0 - outfile.write('%s\t%f\n' % (mol.strip(), distance)) + outfile.write("%s\t%f\n" % (mol.strip(), distance)) outfile.close() def __main__(): """ - Computation of Spectrophores(TM) distances to a target molecule. + Computation of Spectrophores(TM) distances to a target molecule. """ args = parse_command_line() # This sets up the parameters for the Spectrophore generation. Parameters are set to fit those of our standard parsing tool set_parameters(args) - mol = next(pybel.readfile('sdf', args.target)) - target_spectrophore = mol.data["Spectrophores(TM)"].strip().split(', ') + mol = next(pybel.readfile("sdf", args.target)) + target_spectrophore = mol.data["Spectrophores(TM)"].strip().split(", ") # Compute the paired-distance between every molecule in the library and the target Compute_Spectrophores_distance(target_spectrophore, args)
--- a/remove_protonation_state.py Tue Nov 10 20:33:21 2020 +0000 +++ b/remove_protonation_state.py Thu Aug 15 11:06:27 2024 +0000 @@ -7,14 +7,15 @@ import argparse from openbabel import openbabel, pybel + openbabel.obErrorLog.StopLogging() def parse_command_line(): parser = argparse.ArgumentParser() - parser.add_argument('--iformat', default='sdf', help='input file format') - parser.add_argument('-i', '--input', required=True, help='input file name') - parser.add_argument('-o', '--output', required=True, help='output file name') + parser.add_argument("--iformat", default="sdf", help="input file format") + parser.add_argument("-i", "--input", required=True, help="input file name") + parser.add_argument("-o", "--output", required=True, help="output file name") return parser.parse_args() @@ -22,15 +23,15 @@ outfile = pybel.Outputfile(args.iformat, args.output, overwrite=True) for mol in pybel.readfile(args.iformat, args.input): [atom.OBAtom.SetFormalCharge(0) for atom in mol.atoms] - if 'inchi' in mol.data: - del mol.data['inchi'] # remove inchi cache so modified mol is saved + if "inchi" in mol.data: + del mol.data["inchi"] # remove inchi cache so modified mol is saved outfile.write(mol) outfile.close() def __main__(): """ - Remove any protonation state from each atom in each molecule. + Remove any protonation state from each atom in each molecule. """ args = parse_command_line() remove_protonation(args)
--- a/subsearch.py Tue Nov 10 20:33:21 2020 +0000 +++ b/subsearch.py Thu Aug 15 11:06:27 2024 +0000 @@ -13,21 +13,34 @@ import tempfile from openbabel import openbabel, pybel + openbabel.obErrorLog.StopLogging() def parse_command_line(): parser = argparse.ArgumentParser() - parser.add_argument('-i', '--infile', required=True, help='Molecule file.') - parser.add_argument('--iformat', help='Input format.') - parser.add_argument('--fastsearch-index', dest="fastsearch_index", required=True, - help='Path to the openbabel fastsearch index.') - parser.add_argument('-o', '--outfile', required=True, help='Path to the output file.') - parser.add_argument('--oformat', default='smi', help='Output file format') - parser.add_argument("--max-candidates", dest="max_candidates", type=int, default=4000, - help="The maximum number of candidates.") - parser.add_argument('-p', '--processors', type=int, - default=multiprocessing.cpu_count()) + parser.add_argument("-i", "--infile", required=True, help="Molecule file.") + parser.add_argument("--iformat", help="Input format.") + parser.add_argument( + "--fastsearch-index", + dest="fastsearch_index", + required=True, + help="Path to the openbabel fastsearch index.", + ) + parser.add_argument( + "-o", "--outfile", required=True, help="Path to the output file." + ) + parser.add_argument("--oformat", default="smi", help="Output file format") + parser.add_argument( + "--max-candidates", + dest="max_candidates", + type=int, + default=4000, + help="The maximum number of candidates.", + ) + parser.add_argument( + "-p", "--processors", type=int, default=multiprocessing.cpu_count() + ) return parser.parse_args() @@ -40,20 +53,28 @@ def mp_helper(query, args): """ - Helper function for multiprocessing. - That function is a wrapper around the following command: - obabel file.fs -s"smarts" -Ooutfile.smi -al 999999999 + Helper function for multiprocessing. + That function is a wrapper around the following command: + obabel file.fs -s"smarts" -Ooutfile.smi -al 999999999 """ - if args.oformat == 'names': - opts = '-osmi -xt' + if args.oformat == "names": + opts = "-osmi -xt" else: - opts = '-o%s' % args.oformat + opts = "-o%s" % args.oformat tmp = tempfile.NamedTemporaryFile(delete=False) - cmd = 'obabel -ifs %s -O %s %s -s%s -al %s' % (args.fastsearch_index, tmp.name, opts, query, args.max_candidates) + cmd = "obabel -ifs %s -O %s %s -s%s -al %s" % ( + args.fastsearch_index, + tmp.name, + opts, + query, + args.max_candidates, + ) - child = subprocess.Popen(cmd.split(), stdout=subprocess.PIPE, stderr=subprocess.PIPE) + child = subprocess.Popen( + cmd.split(), stdout=subprocess.PIPE, stderr=subprocess.PIPE + ) stdout, stderr = child.communicate() return_code = child.returncode @@ -73,14 +94,14 @@ """ Wrapper to retrieve a striped SMILES or SMARTS string from different input formats. """ - if args.iformat in ['smi', 'text', 'tabular']: + if args.iformat in ["smi", "text", "tabular"]: with open(args.infile) as text_file: for line in text_file: - yield line.split('\t')[0].strip() + yield line.split("\t")[0].strip() else: # inchi or sdf files for mol in pybel.readfile(args.iformat, args.infile): - yield mol.write('smiles').split('\t')[0] + yield mol.write("smiles").split("\t")[0] def substructure_search(args): @@ -91,18 +112,18 @@ pool.close() pool.join() - if args.oformat == 'names': - out_handle = open(args.outfile, 'w') + if args.oformat == "names": + out_handle = open(args.outfile, "w") for result_file, query in results: with open(result_file) as res_handle: for line in res_handle: - out_handle.write('%s\t%s\n' % (line.strip(), query)) + out_handle.write("%s\t%s\n" % (line.strip(), query)) os.remove(result_file) out_handle.close() else: - out_handle = open(args.outfile, 'wb') + out_handle = open(args.outfile, "wb") for result_file, query in results: - res_handle = open(result_file, 'rb') + res_handle = open(result_file, "rb") shutil.copyfileobj(res_handle, out_handle) res_handle.close() os.remove(result_file) @@ -111,7 +132,7 @@ def __main__(): """ - Multiprocessing Open Babel Substructure Search. + Multiprocessing Open Babel Substructure Search. """ args = parse_command_line() substructure_search(args)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/2_mol.sdf Thu Aug 15 11:06:27 2024 +0000 @@ -0,0 +1,66 @@ +CC(=O)Oc1ccccc1C(=O)[O-] InChI=1S/C9H8O4/c1-6(10)13-8-5-3-2-4-7(8)9(11)12/h2-5H,1H3,(H,11,12)/p-1 + OpenBabel08132415422D + + 13 13 0 0 0 0 0 0 0 0999 V2000 + 0.0000 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 0.0000 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 0.0000 0.0000 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + 0.0000 0.0000 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + 0.0000 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 0.0000 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 0.0000 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 0.0000 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 0.0000 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 0.0000 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 0.0000 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 0.0000 0.0000 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + 0.0000 0.0000 0.0000 O 0 5 0 0 0 0 0 0 0 0 0 0 + 1 2 1 0 0 0 0 + 2 3 2 0 0 0 0 + 2 4 1 0 0 0 0 + 4 5 1 0 0 0 0 + 5 10 1 0 0 0 0 + 5 6 2 0 0 0 0 + 6 7 1 0 0 0 0 + 7 8 2 0 0 0 0 + 8 9 1 0 0 0 0 + 9 10 2 0 0 0 0 + 10 11 1 0 0 0 0 + 11 12 2 0 0 0 0 + 11 13 1 0 0 0 0 +M CHG 1 13 -1 +M END +$$$$ +CC(=O)Oc1ccccc1C(=O)[O-] InChI=1S/C9H8O4/c1-6(10)13-8-5-3-2-4-7(8)9(11)12/h2-5H,1H3,(H,11,12)/p-1 + OpenBabel08132415422D + + 13 13 0 0 0 0 0 0 0 0999 V2000 + 0.0000 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 0.0000 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 0.0000 0.0000 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + 0.0000 0.0000 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + 0.0000 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 0.0000 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 0.0000 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 0.0000 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 0.0000 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 0.0000 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 0.0000 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 0.0000 0.0000 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + 0.0000 0.0000 0.0000 O 0 5 0 0 0 0 0 0 0 0 0 0 + 1 2 1 0 0 0 0 + 2 3 2 0 0 0 0 + 2 4 1 0 0 0 0 + 4 5 1 0 0 0 0 + 5 10 1 0 0 0 0 + 5 6 2 0 0 0 0 + 6 7 1 0 0 0 0 + 7 8 2 0 0 0 0 + 8 9 1 0 0 0 0 + 9 10 2 0 0 0 0 + 10 11 1 0 0 0 0 + 11 12 2 0 0 0 0 + 11 13 1 0 0 0 0 +M CHG 1 13 -1 +M END +$$$$