Mercurial > repos > bgruening > chembl
comparison chembl.py @ 6:a57de37f12c2 draft
"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e"
| author | bgruening |
|---|---|
| date | Wed, 07 Oct 2020 09:31:40 +0000 |
| parents | 1ade252ebe08 |
| children | a43a00845834 |
comparison
equal
deleted
inserted
replaced
| 5:1ade252ebe08 | 6:a57de37f12c2 |
|---|---|
| 1 import argparse | |
| 2 | |
| 3 from chembl_webresource_client.new_client import new_client | |
| 1 from chembl_webresource_client.settings import Settings | 4 from chembl_webresource_client.settings import Settings |
| 5 | |
| 2 Settings.Instance().CACHING = False | 6 Settings.Instance().CACHING = False |
| 3 from chembl_webresource_client.new_client import new_client | 7 |
| 4 import argparse | |
| 5 | 8 |
| 6 def open_file(filename): | 9 def open_file(filename): |
| 7 with open(filename) as f: | 10 with open(filename) as f: |
| 8 return f.readline().split()[0] | 11 return f.readline().split()[0] |
| 9 | 12 |
| 13 | |
| 10 def get_smiles(res): | 14 def get_smiles(res): |
| 11 """ | 15 """ |
| 12 Get a list of SMILES from function results | 16 Get a list of SMILES from function results |
| 13 """ | 17 """ |
| 14 smiles = set() | 18 smiles = set() |
| 15 for smi in res: | 19 for smi in res: |
| 16 try: | 20 try: |
| 17 smiles.add('{}\t{}'.format(smi['molecule_structures']['canonical_smiles'], smi['molecule_chembl_id'])) | 21 smiles.add('{}\t{}'.format(smi['molecule_structures']['canonical_smiles'], smi['molecule_chembl_id'])) |
| 18 except TypeError: | 22 except TypeError: |
| 19 continue | 23 continue |
| 20 return smiles | 24 return smiles |
| 25 | |
| 21 | 26 |
| 22 def sim_search(smiles, tanimoto): | 27 def sim_search(smiles, tanimoto): |
| 23 """ | 28 """ |
| 24 Return compounds which are within a Tanimoto range of the SMILES input | 29 Return compounds which are within a Tanimoto range of the SMILES input |
| 25 """ | 30 """ |
| 26 similarity = new_client.similarity | 31 similarity = new_client.similarity |
| 27 return similarity.filter(smiles=smiles, similarity=tanimoto).only(['molecule_structures', 'molecule_chembl_id']) | 32 return similarity.filter(smiles=smiles, similarity=tanimoto).only(['molecule_structures', 'molecule_chembl_id']) |
| 28 | 33 |
| 34 | |
| 29 def substr_search(smiles): | 35 def substr_search(smiles): |
| 30 """ | 36 """ |
| 31 Return compounds which contain the SMILES substructure input | 37 Return compounds which contain the SMILES substructure input |
| 32 """ | 38 """ |
| 33 substructure = new_client.substructure | 39 substructure = new_client.substructure |
| 34 return substructure.filter(smiles=smiles).only(['molecule_structures', 'molecule_chembl_id']) | 40 return substructure.filter(smiles=smiles).only(['molecule_structures', 'molecule_chembl_id']) |
| 35 | 41 |
| 42 | |
| 36 def filter_drugs(mols): | 43 def filter_drugs(mols): |
| 37 """ | 44 """ |
| 38 Return only compounds which are approved drugs | 45 Return only compounds which are approved drugs |
| 39 """ | 46 """ |
| 40 return mols.filter(max_phase=4) | 47 return mols.filter(max_phase=4) |
| 48 | |
| 41 | 49 |
| 42 def filter_biotherapeutic(mols): | 50 def filter_biotherapeutic(mols): |
| 43 """ | 51 """ |
| 44 Return only biotherapeutic molecules | 52 Return only biotherapeutic molecules |
| 45 """ | 53 """ |
| 46 return mols.filter(biotherapeutic__isnull=False) | 54 return mols.filter(biotherapeutic__isnull=False) |
| 47 | 55 |
| 56 | |
| 48 def filter_nat_prod(mols): | 57 def filter_nat_prod(mols): |
| 49 """ | 58 """ |
| 50 Return only natural products | 59 Return only natural products |
| 51 """ | 60 """ |
| 52 return mols.filter(natural_product=1) | 61 return mols.filter(natural_product=1) |
| 53 | 62 |
| 63 | |
| 54 def filter_ro5(mols): | 64 def filter_ro5(mols): |
| 55 """ | 65 """ |
| 56 Return only compounds with no RO5 violations | 66 Return only compounds with no RO5 violations |
| 57 """ | 67 """ |
| 58 return mols.filter(molecule_properties__num_ro5_violations=0) | 68 return mols.filter(molecule_properties__num_ro5_violations=0) |
| 69 | |
| 59 | 70 |
| 60 def main(): | 71 def main(): |
| 61 parser = argparse.ArgumentParser(description='Search ChEMBL database for compounds') | 72 parser = argparse.ArgumentParser(description='Search ChEMBL database for compounds') |
| 62 parser.add_argument('-i', '--input', help='SMILES input') | 73 parser.add_argument('-i', '--input', help='SMILES input') |
| 63 parser.add_argument('-f', '--file', help='SMILES input as file') | 74 parser.add_argument('-f', '--file', help='SMILES input as file') |
| 99 mols = get_smiles(mols) | 110 mols = get_smiles(mols) |
| 100 | 111 |
| 101 # write to file | 112 # write to file |
| 102 with open(args.output, 'w') as f: | 113 with open(args.output, 'w') as f: |
| 103 f.write('\n'.join(mols)) | 114 f.write('\n'.join(mols)) |
| 104 | 115 |
| 105 | 116 |
| 106 if __name__ == "__main__": | 117 if __name__ == "__main__": |
| 107 main() | 118 main() |
