Mercurial > repos > bgruening > chembl
annotate structure_pipeline.py @ 7:1d6fb7967ae9 draft
"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e-dirty"
author | bgruening |
---|---|
date | Sat, 10 Oct 2020 09:43:14 +0000 |
parents | a57de37f12c2 |
children |
rev | line source |
---|---|
6
a57de37f12c2
"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e"
bgruening
parents:
diff
changeset
|
1 import argparse |
a57de37f12c2
"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e"
bgruening
parents:
diff
changeset
|
2 |
a57de37f12c2
"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e"
bgruening
parents:
diff
changeset
|
3 from chembl_structure_pipeline import checker, standardizer |
a57de37f12c2
"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e"
bgruening
parents:
diff
changeset
|
4 |
a57de37f12c2
"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e"
bgruening
parents:
diff
changeset
|
5 |
a57de37f12c2
"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e"
bgruening
parents:
diff
changeset
|
6 def load_mols(input_file): |
a57de37f12c2
"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e"
bgruening
parents:
diff
changeset
|
7 """ |
a57de37f12c2
"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e"
bgruening
parents:
diff
changeset
|
8 Returns a list of strings, each a molblock |
a57de37f12c2
"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e"
bgruening
parents:
diff
changeset
|
9 """ |
a57de37f12c2
"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e"
bgruening
parents:
diff
changeset
|
10 with open(input_file) as f: |
a57de37f12c2
"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e"
bgruening
parents:
diff
changeset
|
11 mols = [''.join(('\n', mol.strip())) for mol in f.read().strip().split('$$$$\n')] |
a57de37f12c2
"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e"
bgruening
parents:
diff
changeset
|
12 return mols |
a57de37f12c2
"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e"
bgruening
parents:
diff
changeset
|
13 |
a57de37f12c2
"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e"
bgruening
parents:
diff
changeset
|
14 |
a57de37f12c2
"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e"
bgruening
parents:
diff
changeset
|
15 def write_mols(mols, output_file): |
a57de37f12c2
"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e"
bgruening
parents:
diff
changeset
|
16 """ |
a57de37f12c2
"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e"
bgruening
parents:
diff
changeset
|
17 Writes a list of molblocks to an SDF |
a57de37f12c2
"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e"
bgruening
parents:
diff
changeset
|
18 """ |
a57de37f12c2
"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e"
bgruening
parents:
diff
changeset
|
19 with open(output_file, 'w') as f: |
a57de37f12c2
"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e"
bgruening
parents:
diff
changeset
|
20 f.write('\n$$$$'.join(mols)) |
a57de37f12c2
"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e"
bgruening
parents:
diff
changeset
|
21 |
a57de37f12c2
"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e"
bgruening
parents:
diff
changeset
|
22 |
a57de37f12c2
"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e"
bgruening
parents:
diff
changeset
|
23 def standardize_molblock(mol): |
a57de37f12c2
"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e"
bgruening
parents:
diff
changeset
|
24 return standardizer.standardize_molblock(mol) |
a57de37f12c2
"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e"
bgruening
parents:
diff
changeset
|
25 |
a57de37f12c2
"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e"
bgruening
parents:
diff
changeset
|
26 |
a57de37f12c2
"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e"
bgruening
parents:
diff
changeset
|
27 def get_parent_molblock(mol): |
a57de37f12c2
"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e"
bgruening
parents:
diff
changeset
|
28 return standardizer.get_parent_molblock(mol)[0] |
a57de37f12c2
"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e"
bgruening
parents:
diff
changeset
|
29 |
a57de37f12c2
"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e"
bgruening
parents:
diff
changeset
|
30 |
a57de37f12c2
"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e"
bgruening
parents:
diff
changeset
|
31 def check_molblock(mol): |
a57de37f12c2
"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e"
bgruening
parents:
diff
changeset
|
32 issues = checker.check_molblock(mol) |
a57de37f12c2
"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e"
bgruening
parents:
diff
changeset
|
33 max_penalty_score = str(max([issue[0] for issue in issues])) if issues else '0' |
a57de37f12c2
"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e"
bgruening
parents:
diff
changeset
|
34 message = '; '.join([issue[1] for issue in issues]) |
a57de37f12c2
"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e"
bgruening
parents:
diff
changeset
|
35 mol_with_issues = '\n'.join((mol, '> <MaxPenaltyScore>', max_penalty_score, '> <IssueMessages>', message)) |
a57de37f12c2
"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e"
bgruening
parents:
diff
changeset
|
36 return mol_with_issues |
a57de37f12c2
"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e"
bgruening
parents:
diff
changeset
|
37 |
a57de37f12c2
"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e"
bgruening
parents:
diff
changeset
|
38 |
a57de37f12c2
"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e"
bgruening
parents:
diff
changeset
|
39 def main(): |
a57de37f12c2
"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e"
bgruening
parents:
diff
changeset
|
40 parser = argparse.ArgumentParser(description='Search ChEMBL database for compounds') |
a57de37f12c2
"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e"
bgruening
parents:
diff
changeset
|
41 parser.add_argument('-i', '--input', help='SDF/MOL input') |
a57de37f12c2
"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e"
bgruening
parents:
diff
changeset
|
42 parser.add_argument('-o', '--output', help="Standardized output") |
a57de37f12c2
"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e"
bgruening
parents:
diff
changeset
|
43 parser.add_argument('--standardize', action='store_true', help="Standardize molblock") |
a57de37f12c2
"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e"
bgruening
parents:
diff
changeset
|
44 parser.add_argument('--get_parent', action='store_true', help="Get parent molblock.") |
a57de37f12c2
"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e"
bgruening
parents:
diff
changeset
|
45 parser.add_argument('--check', action='store_true', help="Check molblock") |
a57de37f12c2
"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e"
bgruening
parents:
diff
changeset
|
46 args = parser.parse_args() |
a57de37f12c2
"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e"
bgruening
parents:
diff
changeset
|
47 |
a57de37f12c2
"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e"
bgruening
parents:
diff
changeset
|
48 mols = load_mols(args.input) |
a57de37f12c2
"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e"
bgruening
parents:
diff
changeset
|
49 if args.standardize: |
a57de37f12c2
"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e"
bgruening
parents:
diff
changeset
|
50 mols = [standardize_molblock(mol) for mol in mols] |
a57de37f12c2
"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e"
bgruening
parents:
diff
changeset
|
51 if args.get_parent: |
a57de37f12c2
"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e"
bgruening
parents:
diff
changeset
|
52 mols = [get_parent_molblock(mol) for mol in mols] |
a57de37f12c2
"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e"
bgruening
parents:
diff
changeset
|
53 if args.check: |
a57de37f12c2
"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e"
bgruening
parents:
diff
changeset
|
54 mols = [check_molblock(mol) for mol in mols] |
a57de37f12c2
"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e"
bgruening
parents:
diff
changeset
|
55 write_mols(mols, args.output) |
a57de37f12c2
"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e"
bgruening
parents:
diff
changeset
|
56 |
a57de37f12c2
"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e"
bgruening
parents:
diff
changeset
|
57 |
a57de37f12c2
"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e"
bgruening
parents:
diff
changeset
|
58 if __name__ == "__main__": |
a57de37f12c2
"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e"
bgruening
parents:
diff
changeset
|
59 main() |