Mercurial > repos > bgruening > chembl
comparison chembl.py @ 0:915e9be38994 draft
planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 2e3c3c2bd7ecdc9c2968a32f91e81136e0cb3835
author | bgruening |
---|---|
date | Mon, 05 Aug 2019 05:21:58 -0400 |
parents | |
children | 6f8458d1cf46 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:915e9be38994 |
---|---|
1 from chembl_webresource_client.new_client import new_client | |
2 import argparse | |
3 | |
4 def open_file(filename): | |
5 with open(filename) as f: | |
6 return f.readline() | |
7 | |
8 def get_smiles(res): | |
9 """ | |
10 Get a list of SMILES from function results | |
11 """ | |
12 smiles = set() | |
13 for smi in res: | |
14 smiles.add(smi['molecule_structures']['canonical_smiles']) | |
15 return smiles | |
16 | |
17 def sim_search(smiles, tanimoto): | |
18 """ | |
19 Return compounds which are within a Tanimoto range of the SMILES input | |
20 """ | |
21 similarity = new_client.similarity | |
22 return similarity.filter(smiles=smiles, similarity=tanimoto).only(['molecule_structures']) | |
23 | |
24 def substr_search(smiles): | |
25 """ | |
26 Return compounds which contain the SMILES substructure input | |
27 """ | |
28 substructure = new_client.substructure | |
29 return substructure.filter(smiles=smiles).only(['molecule_structures']) | |
30 | |
31 def filter_drugs(mols): | |
32 """ | |
33 Return only compounds which are approved drugs | |
34 """ | |
35 return mols.filter(max_phase=4) | |
36 | |
37 def filter_biotherapeutic(mols): | |
38 """ | |
39 Return only biotherapeutic molecules | |
40 """ | |
41 return mols.filter(biotherapeutic__isnull=False) | |
42 | |
43 def filter_nat_prod(mols): | |
44 """ | |
45 Return only natural products | |
46 """ | |
47 return mols.filter(natural_product=1) | |
48 | |
49 def filter_ro5(mols): | |
50 """ | |
51 Return only compounds with no RO5 violations | |
52 """ | |
53 return mols.filter(molecule_properties__num_ro5_violations=0) | |
54 | |
55 def main(): | |
56 parser = argparse.ArgumentParser(description='Search ChEMBL database for compounds') | |
57 parser.add_argument('-i', '--input', help='SMILES input') | |
58 parser.add_argument('-f', '--file', help='SMILES input as file') | |
59 parser.add_argument('-o', '--output', help="SMILES output") | |
60 parser.add_argument('-t', '--tanimoto', type=int, help='Tanimoto similarity score') | |
61 parser.add_argument('-s', '--substructure', action='store_true', help='Substructure search using the SMILES input.') | |
62 parser.add_argument('-d', '--drugs', action='store_true', help='Filter approved drugs') | |
63 parser.add_argument('-b', '--biotherapeutic', action='store_true', help='Filter biotherapeutic molecules') | |
64 parser.add_argument('-n', '--nat-prod', action='store_true', help='Filter natural products') | |
65 parser.add_argument('-r', '--ro5', action='store_true', help='Filter compounds that pass Lipinski RO5') | |
66 | |
67 args = parser.parse_args() | |
68 | |
69 if args.file: # get SMILES from file rather than -i option | |
70 args.input = open_file(args.file) | |
71 | |
72 if len(args.input) < 5: | |
73 raise IOError('SMILES must be at least 5 characters long.') | |
74 | |
75 if args.substructure: # specify search type: substructure or similarity | |
76 mols = substr_search(args.input) | |
77 else: | |
78 mols = sim_search(args.input, args.tanimoto) | |
79 | |
80 # filter options: | |
81 if args.drugs: | |
82 mols = filter_drugs(mols) | |
83 | |
84 if args.biotherapeutic: | |
85 mols = filter_biotherapeutic(mols) | |
86 | |
87 if args.nat_prod: | |
88 mols = filter_nat_prod(mols) | |
89 | |
90 if args.ro5: | |
91 mols = filter_ro5(mols) | |
92 | |
93 # get SMILES from search output | |
94 mols = get_smiles(mols) | |
95 | |
96 # write to file | |
97 with open(args.output, 'w') as f: | |
98 f.write('\n'.join(mols)) | |
99 | |
100 | |
101 if __name__ == "__main__": | |
102 main() |