comparison chembl.py @ 0:915e9be38994 draft

planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 2e3c3c2bd7ecdc9c2968a32f91e81136e0cb3835
author bgruening
date Mon, 05 Aug 2019 05:21:58 -0400
parents
children 6f8458d1cf46
comparison
equal deleted inserted replaced
-1:000000000000 0:915e9be38994
1 from chembl_webresource_client.new_client import new_client
2 import argparse
3
4 def open_file(filename):
5 with open(filename) as f:
6 return f.readline()
7
8 def get_smiles(res):
9 """
10 Get a list of SMILES from function results
11 """
12 smiles = set()
13 for smi in res:
14 smiles.add(smi['molecule_structures']['canonical_smiles'])
15 return smiles
16
17 def sim_search(smiles, tanimoto):
18 """
19 Return compounds which are within a Tanimoto range of the SMILES input
20 """
21 similarity = new_client.similarity
22 return similarity.filter(smiles=smiles, similarity=tanimoto).only(['molecule_structures'])
23
24 def substr_search(smiles):
25 """
26 Return compounds which contain the SMILES substructure input
27 """
28 substructure = new_client.substructure
29 return substructure.filter(smiles=smiles).only(['molecule_structures'])
30
31 def filter_drugs(mols):
32 """
33 Return only compounds which are approved drugs
34 """
35 return mols.filter(max_phase=4)
36
37 def filter_biotherapeutic(mols):
38 """
39 Return only biotherapeutic molecules
40 """
41 return mols.filter(biotherapeutic__isnull=False)
42
43 def filter_nat_prod(mols):
44 """
45 Return only natural products
46 """
47 return mols.filter(natural_product=1)
48
49 def filter_ro5(mols):
50 """
51 Return only compounds with no RO5 violations
52 """
53 return mols.filter(molecule_properties__num_ro5_violations=0)
54
55 def main():
56 parser = argparse.ArgumentParser(description='Search ChEMBL database for compounds')
57 parser.add_argument('-i', '--input', help='SMILES input')
58 parser.add_argument('-f', '--file', help='SMILES input as file')
59 parser.add_argument('-o', '--output', help="SMILES output")
60 parser.add_argument('-t', '--tanimoto', type=int, help='Tanimoto similarity score')
61 parser.add_argument('-s', '--substructure', action='store_true', help='Substructure search using the SMILES input.')
62 parser.add_argument('-d', '--drugs', action='store_true', help='Filter approved drugs')
63 parser.add_argument('-b', '--biotherapeutic', action='store_true', help='Filter biotherapeutic molecules')
64 parser.add_argument('-n', '--nat-prod', action='store_true', help='Filter natural products')
65 parser.add_argument('-r', '--ro5', action='store_true', help='Filter compounds that pass Lipinski RO5')
66
67 args = parser.parse_args()
68
69 if args.file: # get SMILES from file rather than -i option
70 args.input = open_file(args.file)
71
72 if len(args.input) < 5:
73 raise IOError('SMILES must be at least 5 characters long.')
74
75 if args.substructure: # specify search type: substructure or similarity
76 mols = substr_search(args.input)
77 else:
78 mols = sim_search(args.input, args.tanimoto)
79
80 # filter options:
81 if args.drugs:
82 mols = filter_drugs(mols)
83
84 if args.biotherapeutic:
85 mols = filter_biotherapeutic(mols)
86
87 if args.nat_prod:
88 mols = filter_nat_prod(mols)
89
90 if args.ro5:
91 mols = filter_ro5(mols)
92
93 # get SMILES from search output
94 mols = get_smiles(mols)
95
96 # write to file
97 with open(args.output, 'w') as f:
98 f.write('\n'.join(mols))
99
100
101 if __name__ == "__main__":
102 main()