comparison chembl.py @ 6:a57de37f12c2 draft

"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e"
author bgruening
date Wed, 07 Oct 2020 09:31:40 +0000
parents 1ade252ebe08
children a43a00845834
comparison
equal deleted inserted replaced
5:1ade252ebe08 6:a57de37f12c2
1 import argparse
2
3 from chembl_webresource_client.new_client import new_client
1 from chembl_webresource_client.settings import Settings 4 from chembl_webresource_client.settings import Settings
5
2 Settings.Instance().CACHING = False 6 Settings.Instance().CACHING = False
3 from chembl_webresource_client.new_client import new_client 7
4 import argparse
5 8
6 def open_file(filename): 9 def open_file(filename):
7 with open(filename) as f: 10 with open(filename) as f:
8 return f.readline().split()[0] 11 return f.readline().split()[0]
9 12
13
10 def get_smiles(res): 14 def get_smiles(res):
11 """ 15 """
12 Get a list of SMILES from function results 16 Get a list of SMILES from function results
13 """ 17 """
14 smiles = set() 18 smiles = set()
15 for smi in res: 19 for smi in res:
16 try: 20 try:
17 smiles.add('{}\t{}'.format(smi['molecule_structures']['canonical_smiles'], smi['molecule_chembl_id'])) 21 smiles.add('{}\t{}'.format(smi['molecule_structures']['canonical_smiles'], smi['molecule_chembl_id']))
18 except TypeError: 22 except TypeError:
19 continue 23 continue
20 return smiles 24 return smiles
25
21 26
22 def sim_search(smiles, tanimoto): 27 def sim_search(smiles, tanimoto):
23 """ 28 """
24 Return compounds which are within a Tanimoto range of the SMILES input 29 Return compounds which are within a Tanimoto range of the SMILES input
25 """ 30 """
26 similarity = new_client.similarity 31 similarity = new_client.similarity
27 return similarity.filter(smiles=smiles, similarity=tanimoto).only(['molecule_structures', 'molecule_chembl_id']) 32 return similarity.filter(smiles=smiles, similarity=tanimoto).only(['molecule_structures', 'molecule_chembl_id'])
28 33
34
29 def substr_search(smiles): 35 def substr_search(smiles):
30 """ 36 """
31 Return compounds which contain the SMILES substructure input 37 Return compounds which contain the SMILES substructure input
32 """ 38 """
33 substructure = new_client.substructure 39 substructure = new_client.substructure
34 return substructure.filter(smiles=smiles).only(['molecule_structures', 'molecule_chembl_id']) 40 return substructure.filter(smiles=smiles).only(['molecule_structures', 'molecule_chembl_id'])
35 41
42
36 def filter_drugs(mols): 43 def filter_drugs(mols):
37 """ 44 """
38 Return only compounds which are approved drugs 45 Return only compounds which are approved drugs
39 """ 46 """
40 return mols.filter(max_phase=4) 47 return mols.filter(max_phase=4)
48
41 49
42 def filter_biotherapeutic(mols): 50 def filter_biotherapeutic(mols):
43 """ 51 """
44 Return only biotherapeutic molecules 52 Return only biotherapeutic molecules
45 """ 53 """
46 return mols.filter(biotherapeutic__isnull=False) 54 return mols.filter(biotherapeutic__isnull=False)
47 55
56
48 def filter_nat_prod(mols): 57 def filter_nat_prod(mols):
49 """ 58 """
50 Return only natural products 59 Return only natural products
51 """ 60 """
52 return mols.filter(natural_product=1) 61 return mols.filter(natural_product=1)
53 62
63
54 def filter_ro5(mols): 64 def filter_ro5(mols):
55 """ 65 """
56 Return only compounds with no RO5 violations 66 Return only compounds with no RO5 violations
57 """ 67 """
58 return mols.filter(molecule_properties__num_ro5_violations=0) 68 return mols.filter(molecule_properties__num_ro5_violations=0)
69
59 70
60 def main(): 71 def main():
61 parser = argparse.ArgumentParser(description='Search ChEMBL database for compounds') 72 parser = argparse.ArgumentParser(description='Search ChEMBL database for compounds')
62 parser.add_argument('-i', '--input', help='SMILES input') 73 parser.add_argument('-i', '--input', help='SMILES input')
63 parser.add_argument('-f', '--file', help='SMILES input as file') 74 parser.add_argument('-f', '--file', help='SMILES input as file')
99 mols = get_smiles(mols) 110 mols = get_smiles(mols)
100 111
101 # write to file 112 # write to file
102 with open(args.output, 'w') as f: 113 with open(args.output, 'w') as f:
103 f.write('\n'.join(mols)) 114 f.write('\n'.join(mols))
104 115
105 116
106 if __name__ == "__main__": 117 if __name__ == "__main__":
107 main() 118 main()