annotate structure_pipeline.py @ 0:2f59c6239f25 draft default tip

"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e-dirty"
author bgruening
date Sat, 10 Oct 2020 09:43:40 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
2f59c6239f25 "planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e-dirty"
bgruening
parents:
diff changeset
1 import argparse
2f59c6239f25 "planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e-dirty"
bgruening
parents:
diff changeset
2
2f59c6239f25 "planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e-dirty"
bgruening
parents:
diff changeset
3 from chembl_structure_pipeline import checker, standardizer
2f59c6239f25 "planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e-dirty"
bgruening
parents:
diff changeset
4
2f59c6239f25 "planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e-dirty"
bgruening
parents:
diff changeset
5
2f59c6239f25 "planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e-dirty"
bgruening
parents:
diff changeset
6 def load_mols(input_file):
2f59c6239f25 "planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e-dirty"
bgruening
parents:
diff changeset
7 """
2f59c6239f25 "planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e-dirty"
bgruening
parents:
diff changeset
8 Returns a list of strings, each a molblock
2f59c6239f25 "planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e-dirty"
bgruening
parents:
diff changeset
9 """
2f59c6239f25 "planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e-dirty"
bgruening
parents:
diff changeset
10 with open(input_file) as f:
2f59c6239f25 "planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e-dirty"
bgruening
parents:
diff changeset
11 mols = [''.join(('\n', mol.strip())) for mol in f.read().strip().split('$$$$\n')]
2f59c6239f25 "planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e-dirty"
bgruening
parents:
diff changeset
12 return mols
2f59c6239f25 "planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e-dirty"
bgruening
parents:
diff changeset
13
2f59c6239f25 "planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e-dirty"
bgruening
parents:
diff changeset
14
2f59c6239f25 "planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e-dirty"
bgruening
parents:
diff changeset
15 def write_mols(mols, output_file):
2f59c6239f25 "planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e-dirty"
bgruening
parents:
diff changeset
16 """
2f59c6239f25 "planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e-dirty"
bgruening
parents:
diff changeset
17 Writes a list of molblocks to an SDF
2f59c6239f25 "planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e-dirty"
bgruening
parents:
diff changeset
18 """
2f59c6239f25 "planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e-dirty"
bgruening
parents:
diff changeset
19 with open(output_file, 'w') as f:
2f59c6239f25 "planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e-dirty"
bgruening
parents:
diff changeset
20 f.write('\n$$$$'.join(mols))
2f59c6239f25 "planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e-dirty"
bgruening
parents:
diff changeset
21
2f59c6239f25 "planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e-dirty"
bgruening
parents:
diff changeset
22
2f59c6239f25 "planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e-dirty"
bgruening
parents:
diff changeset
23 def standardize_molblock(mol):
2f59c6239f25 "planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e-dirty"
bgruening
parents:
diff changeset
24 return standardizer.standardize_molblock(mol)
2f59c6239f25 "planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e-dirty"
bgruening
parents:
diff changeset
25
2f59c6239f25 "planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e-dirty"
bgruening
parents:
diff changeset
26
2f59c6239f25 "planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e-dirty"
bgruening
parents:
diff changeset
27 def get_parent_molblock(mol):
2f59c6239f25 "planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e-dirty"
bgruening
parents:
diff changeset
28 return standardizer.get_parent_molblock(mol)[0]
2f59c6239f25 "planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e-dirty"
bgruening
parents:
diff changeset
29
2f59c6239f25 "planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e-dirty"
bgruening
parents:
diff changeset
30
2f59c6239f25 "planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e-dirty"
bgruening
parents:
diff changeset
31 def check_molblock(mol):
2f59c6239f25 "planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e-dirty"
bgruening
parents:
diff changeset
32 issues = checker.check_molblock(mol)
2f59c6239f25 "planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e-dirty"
bgruening
parents:
diff changeset
33 max_penalty_score = str(max([issue[0] for issue in issues])) if issues else '0'
2f59c6239f25 "planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e-dirty"
bgruening
parents:
diff changeset
34 message = '; '.join([issue[1] for issue in issues])
2f59c6239f25 "planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e-dirty"
bgruening
parents:
diff changeset
35 mol_with_issues = '\n'.join((mol, '> <MaxPenaltyScore>', max_penalty_score, '> <IssueMessages>', message))
2f59c6239f25 "planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e-dirty"
bgruening
parents:
diff changeset
36 return mol_with_issues
2f59c6239f25 "planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e-dirty"
bgruening
parents:
diff changeset
37
2f59c6239f25 "planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e-dirty"
bgruening
parents:
diff changeset
38
2f59c6239f25 "planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e-dirty"
bgruening
parents:
diff changeset
39 def main():
2f59c6239f25 "planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e-dirty"
bgruening
parents:
diff changeset
40 parser = argparse.ArgumentParser(description='Search ChEMBL database for compounds')
2f59c6239f25 "planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e-dirty"
bgruening
parents:
diff changeset
41 parser.add_argument('-i', '--input', help='SDF/MOL input')
2f59c6239f25 "planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e-dirty"
bgruening
parents:
diff changeset
42 parser.add_argument('-o', '--output', help="Standardized output")
2f59c6239f25 "planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e-dirty"
bgruening
parents:
diff changeset
43 parser.add_argument('--standardize', action='store_true', help="Standardize molblock")
2f59c6239f25 "planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e-dirty"
bgruening
parents:
diff changeset
44 parser.add_argument('--get_parent', action='store_true', help="Get parent molblock.")
2f59c6239f25 "planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e-dirty"
bgruening
parents:
diff changeset
45 parser.add_argument('--check', action='store_true', help="Check molblock")
2f59c6239f25 "planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e-dirty"
bgruening
parents:
diff changeset
46 args = parser.parse_args()
2f59c6239f25 "planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e-dirty"
bgruening
parents:
diff changeset
47
2f59c6239f25 "planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e-dirty"
bgruening
parents:
diff changeset
48 mols = load_mols(args.input)
2f59c6239f25 "planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e-dirty"
bgruening
parents:
diff changeset
49 if args.standardize:
2f59c6239f25 "planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e-dirty"
bgruening
parents:
diff changeset
50 mols = [standardize_molblock(mol) for mol in mols]
2f59c6239f25 "planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e-dirty"
bgruening
parents:
diff changeset
51 if args.get_parent:
2f59c6239f25 "planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e-dirty"
bgruening
parents:
diff changeset
52 mols = [get_parent_molblock(mol) for mol in mols]
2f59c6239f25 "planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e-dirty"
bgruening
parents:
diff changeset
53 if args.check:
2f59c6239f25 "planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e-dirty"
bgruening
parents:
diff changeset
54 mols = [check_molblock(mol) for mol in mols]
2f59c6239f25 "planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e-dirty"
bgruening
parents:
diff changeset
55 write_mols(mols, args.output)
2f59c6239f25 "planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e-dirty"
bgruening
parents:
diff changeset
56
2f59c6239f25 "planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e-dirty"
bgruening
parents:
diff changeset
57
2f59c6239f25 "planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e-dirty"
bgruening
parents:
diff changeset
58 if __name__ == "__main__":
2f59c6239f25 "planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e-dirty"
bgruening
parents:
diff changeset
59 main()