Mercurial > repos > bgruening > chembl_structure_pipeline
annotate structure_pipeline.py @ 0:2f59c6239f25 draft default tip
"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e-dirty"
author | bgruening |
---|---|
date | Sat, 10 Oct 2020 09:43:40 +0000 |
parents | |
children |
rev | line source |
---|---|
0
2f59c6239f25
"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e-dirty"
bgruening
parents:
diff
changeset
|
1 import argparse |
2f59c6239f25
"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e-dirty"
bgruening
parents:
diff
changeset
|
2 |
2f59c6239f25
"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e-dirty"
bgruening
parents:
diff
changeset
|
3 from chembl_structure_pipeline import checker, standardizer |
2f59c6239f25
"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e-dirty"
bgruening
parents:
diff
changeset
|
4 |
2f59c6239f25
"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e-dirty"
bgruening
parents:
diff
changeset
|
5 |
2f59c6239f25
"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e-dirty"
bgruening
parents:
diff
changeset
|
6 def load_mols(input_file): |
2f59c6239f25
"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e-dirty"
bgruening
parents:
diff
changeset
|
7 """ |
2f59c6239f25
"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e-dirty"
bgruening
parents:
diff
changeset
|
8 Returns a list of strings, each a molblock |
2f59c6239f25
"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e-dirty"
bgruening
parents:
diff
changeset
|
9 """ |
2f59c6239f25
"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e-dirty"
bgruening
parents:
diff
changeset
|
10 with open(input_file) as f: |
2f59c6239f25
"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e-dirty"
bgruening
parents:
diff
changeset
|
11 mols = [''.join(('\n', mol.strip())) for mol in f.read().strip().split('$$$$\n')] |
2f59c6239f25
"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e-dirty"
bgruening
parents:
diff
changeset
|
12 return mols |
2f59c6239f25
"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e-dirty"
bgruening
parents:
diff
changeset
|
13 |
2f59c6239f25
"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e-dirty"
bgruening
parents:
diff
changeset
|
14 |
2f59c6239f25
"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e-dirty"
bgruening
parents:
diff
changeset
|
15 def write_mols(mols, output_file): |
2f59c6239f25
"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e-dirty"
bgruening
parents:
diff
changeset
|
16 """ |
2f59c6239f25
"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e-dirty"
bgruening
parents:
diff
changeset
|
17 Writes a list of molblocks to an SDF |
2f59c6239f25
"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e-dirty"
bgruening
parents:
diff
changeset
|
18 """ |
2f59c6239f25
"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e-dirty"
bgruening
parents:
diff
changeset
|
19 with open(output_file, 'w') as f: |
2f59c6239f25
"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e-dirty"
bgruening
parents:
diff
changeset
|
20 f.write('\n$$$$'.join(mols)) |
2f59c6239f25
"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e-dirty"
bgruening
parents:
diff
changeset
|
21 |
2f59c6239f25
"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e-dirty"
bgruening
parents:
diff
changeset
|
22 |
2f59c6239f25
"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e-dirty"
bgruening
parents:
diff
changeset
|
23 def standardize_molblock(mol): |
2f59c6239f25
"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e-dirty"
bgruening
parents:
diff
changeset
|
24 return standardizer.standardize_molblock(mol) |
2f59c6239f25
"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e-dirty"
bgruening
parents:
diff
changeset
|
25 |
2f59c6239f25
"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e-dirty"
bgruening
parents:
diff
changeset
|
26 |
2f59c6239f25
"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e-dirty"
bgruening
parents:
diff
changeset
|
27 def get_parent_molblock(mol): |
2f59c6239f25
"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e-dirty"
bgruening
parents:
diff
changeset
|
28 return standardizer.get_parent_molblock(mol)[0] |
2f59c6239f25
"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e-dirty"
bgruening
parents:
diff
changeset
|
29 |
2f59c6239f25
"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e-dirty"
bgruening
parents:
diff
changeset
|
30 |
2f59c6239f25
"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e-dirty"
bgruening
parents:
diff
changeset
|
31 def check_molblock(mol): |
2f59c6239f25
"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e-dirty"
bgruening
parents:
diff
changeset
|
32 issues = checker.check_molblock(mol) |
2f59c6239f25
"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e-dirty"
bgruening
parents:
diff
changeset
|
33 max_penalty_score = str(max([issue[0] for issue in issues])) if issues else '0' |
2f59c6239f25
"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e-dirty"
bgruening
parents:
diff
changeset
|
34 message = '; '.join([issue[1] for issue in issues]) |
2f59c6239f25
"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e-dirty"
bgruening
parents:
diff
changeset
|
35 mol_with_issues = '\n'.join((mol, '> <MaxPenaltyScore>', max_penalty_score, '> <IssueMessages>', message)) |
2f59c6239f25
"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e-dirty"
bgruening
parents:
diff
changeset
|
36 return mol_with_issues |
2f59c6239f25
"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e-dirty"
bgruening
parents:
diff
changeset
|
37 |
2f59c6239f25
"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e-dirty"
bgruening
parents:
diff
changeset
|
38 |
2f59c6239f25
"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e-dirty"
bgruening
parents:
diff
changeset
|
39 def main(): |
2f59c6239f25
"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e-dirty"
bgruening
parents:
diff
changeset
|
40 parser = argparse.ArgumentParser(description='Search ChEMBL database for compounds') |
2f59c6239f25
"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e-dirty"
bgruening
parents:
diff
changeset
|
41 parser.add_argument('-i', '--input', help='SDF/MOL input') |
2f59c6239f25
"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e-dirty"
bgruening
parents:
diff
changeset
|
42 parser.add_argument('-o', '--output', help="Standardized output") |
2f59c6239f25
"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e-dirty"
bgruening
parents:
diff
changeset
|
43 parser.add_argument('--standardize', action='store_true', help="Standardize molblock") |
2f59c6239f25
"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e-dirty"
bgruening
parents:
diff
changeset
|
44 parser.add_argument('--get_parent', action='store_true', help="Get parent molblock.") |
2f59c6239f25
"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e-dirty"
bgruening
parents:
diff
changeset
|
45 parser.add_argument('--check', action='store_true', help="Check molblock") |
2f59c6239f25
"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e-dirty"
bgruening
parents:
diff
changeset
|
46 args = parser.parse_args() |
2f59c6239f25
"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e-dirty"
bgruening
parents:
diff
changeset
|
47 |
2f59c6239f25
"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e-dirty"
bgruening
parents:
diff
changeset
|
48 mols = load_mols(args.input) |
2f59c6239f25
"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e-dirty"
bgruening
parents:
diff
changeset
|
49 if args.standardize: |
2f59c6239f25
"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e-dirty"
bgruening
parents:
diff
changeset
|
50 mols = [standardize_molblock(mol) for mol in mols] |
2f59c6239f25
"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e-dirty"
bgruening
parents:
diff
changeset
|
51 if args.get_parent: |
2f59c6239f25
"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e-dirty"
bgruening
parents:
diff
changeset
|
52 mols = [get_parent_molblock(mol) for mol in mols] |
2f59c6239f25
"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e-dirty"
bgruening
parents:
diff
changeset
|
53 if args.check: |
2f59c6239f25
"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e-dirty"
bgruening
parents:
diff
changeset
|
54 mols = [check_molblock(mol) for mol in mols] |
2f59c6239f25
"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e-dirty"
bgruening
parents:
diff
changeset
|
55 write_mols(mols, args.output) |
2f59c6239f25
"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e-dirty"
bgruening
parents:
diff
changeset
|
56 |
2f59c6239f25
"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e-dirty"
bgruening
parents:
diff
changeset
|
57 |
2f59c6239f25
"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e-dirty"
bgruening
parents:
diff
changeset
|
58 if __name__ == "__main__": |
2f59c6239f25
"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e-dirty"
bgruening
parents:
diff
changeset
|
59 main() |