comparison structure_pipeline.py @ 6:a57de37f12c2 draft

"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e"
author bgruening
date Wed, 07 Oct 2020 09:31:40 +0000
parents
children
comparison
equal deleted inserted replaced
5:1ade252ebe08 6:a57de37f12c2
1 import argparse
2
3 from chembl_structure_pipeline import checker, standardizer
4
5
6 def load_mols(input_file):
7 """
8 Returns a list of strings, each a molblock
9 """
10 with open(input_file) as f:
11 mols = [''.join(('\n', mol.strip())) for mol in f.read().strip().split('$$$$\n')]
12 return mols
13
14
15 def write_mols(mols, output_file):
16 """
17 Writes a list of molblocks to an SDF
18 """
19 with open(output_file, 'w') as f:
20 f.write('\n$$$$'.join(mols))
21
22
23 def standardize_molblock(mol):
24 return standardizer.standardize_molblock(mol)
25
26
27 def get_parent_molblock(mol):
28 return standardizer.get_parent_molblock(mol)[0]
29
30
31 def check_molblock(mol):
32 issues = checker.check_molblock(mol)
33 max_penalty_score = str(max([issue[0] for issue in issues])) if issues else '0'
34 message = '; '.join([issue[1] for issue in issues])
35 mol_with_issues = '\n'.join((mol, '> <MaxPenaltyScore>', max_penalty_score, '> <IssueMessages>', message))
36 return mol_with_issues
37
38
39 def main():
40 parser = argparse.ArgumentParser(description='Search ChEMBL database for compounds')
41 parser.add_argument('-i', '--input', help='SDF/MOL input')
42 parser.add_argument('-o', '--output', help="Standardized output")
43 parser.add_argument('--standardize', action='store_true', help="Standardize molblock")
44 parser.add_argument('--get_parent', action='store_true', help="Get parent molblock.")
45 parser.add_argument('--check', action='store_true', help="Check molblock")
46 args = parser.parse_args()
47
48 mols = load_mols(args.input)
49 if args.standardize:
50 mols = [standardize_molblock(mol) for mol in mols]
51 if args.get_parent:
52 mols = [get_parent_molblock(mol) for mol in mols]
53 if args.check:
54 mols = [check_molblock(mol) for mol in mols]
55 write_mols(mols, args.output)
56
57
58 if __name__ == "__main__":
59 main()