Mercurial > repos > bgruening > chembl_structure_pipeline
comparison chembl.py @ 0:2f59c6239f25 draft default tip
"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e-dirty"
author | bgruening |
---|---|
date | Sat, 10 Oct 2020 09:43:40 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:2f59c6239f25 |
---|---|
1 import argparse | |
2 | |
3 from chembl_webresource_client.new_client import new_client | |
4 from chembl_webresource_client.settings import Settings | |
5 | |
6 Settings.Instance().CACHING = False | |
7 | |
8 | |
9 def open_file(filename): | |
10 with open(filename) as f: | |
11 return f.readline().split()[0] | |
12 | |
13 | |
14 def get_smiles(res): | |
15 """ | |
16 Get a list of SMILES from function results | |
17 """ | |
18 smiles = set() | |
19 for smi in res: | |
20 try: | |
21 smiles.add('{}\t{}'.format(smi['molecule_structures']['canonical_smiles'], smi['molecule_chembl_id'])) | |
22 except TypeError: | |
23 continue | |
24 return smiles | |
25 | |
26 | |
27 def sim_search(smiles, tanimoto): | |
28 """ | |
29 Return compounds which are within a Tanimoto range of the SMILES input | |
30 """ | |
31 similarity = new_client.similarity | |
32 return similarity.filter(smiles=smiles, similarity=tanimoto).only(['molecule_structures', 'molecule_chembl_id']) | |
33 | |
34 | |
35 def substr_search(smiles): | |
36 """ | |
37 Return compounds which contain the SMILES substructure input | |
38 """ | |
39 substructure = new_client.substructure | |
40 return substructure.filter(smiles=smiles).only(['molecule_structures', 'molecule_chembl_id']) | |
41 | |
42 | |
43 def filter_drugs(mols): | |
44 """ | |
45 Return only compounds which are approved drugs | |
46 """ | |
47 return mols.filter(max_phase=4) | |
48 | |
49 | |
50 def filter_biotherapeutic(mols): | |
51 """ | |
52 Return only biotherapeutic molecules | |
53 """ | |
54 return mols.filter(biotherapeutic__isnull=False) | |
55 | |
56 | |
57 def filter_nat_prod(mols): | |
58 """ | |
59 Return only natural products | |
60 """ | |
61 return mols.filter(natural_product=1) | |
62 | |
63 | |
64 def filter_ro5(mols): | |
65 """ | |
66 Return only compounds with no RO5 violations | |
67 """ | |
68 return mols.filter(molecule_properties__num_ro5_violations=0) | |
69 | |
70 | |
71 def main(): | |
72 parser = argparse.ArgumentParser(description='Search ChEMBL database for compounds') | |
73 parser.add_argument('-i', '--input', help='SMILES input') | |
74 parser.add_argument('-f', '--file', help='SMILES input as file') | |
75 parser.add_argument('-o', '--output', help="SMILES output") | |
76 parser.add_argument('-t', '--tanimoto', type=int, help='Tanimoto similarity score') | |
77 parser.add_argument('-s', '--substructure', action='store_true', help='Substructure search using the SMILES input.') | |
78 parser.add_argument('-d', '--drugs', action='store_true', help='Filter approved drugs') | |
79 parser.add_argument('-b', '--biotherapeutic', action='store_true', help='Filter biotherapeutic molecules') | |
80 parser.add_argument('-n', '--nat-prod', action='store_true', help='Filter natural products') | |
81 parser.add_argument('-r', '--ro5', action='store_true', help='Filter compounds that pass Lipinski RO5') | |
82 | |
83 args = parser.parse_args() | |
84 | |
85 if args.file: # get SMILES from file rather than -i option | |
86 args.input = open_file(args.file) | |
87 | |
88 if len(args.input) < 5: | |
89 raise IOError('SMILES must be at least 5 characters long.') | |
90 | |
91 if args.substructure: # specify search type: substructure or similarity | |
92 mols = substr_search(args.input) | |
93 else: | |
94 mols = sim_search(args.input, args.tanimoto) | |
95 | |
96 # filter options: | |
97 if args.drugs: | |
98 mols = filter_drugs(mols) | |
99 | |
100 if args.biotherapeutic: | |
101 mols = filter_biotherapeutic(mols) | |
102 | |
103 if args.nat_prod: | |
104 mols = filter_nat_prod(mols) | |
105 | |
106 if args.ro5: | |
107 mols = filter_ro5(mols) | |
108 | |
109 # get SMILES from search output | |
110 mols = get_smiles(mols) | |
111 | |
112 # write to file | |
113 with open(args.output, 'w') as f: | |
114 f.write('\n'.join(mols)) | |
115 | |
116 | |
117 if __name__ == "__main__": | |
118 main() |