Mercurial > repos > bgruening > mordred
annotate mordred_descriptors.py @ 3:cc0f89287ecf draft default tip
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/mordred commit 944ea4bb8a9cd4244152a4a4fecd0485fabc2ad0"
author | bgruening |
---|---|
date | Tue, 28 Jul 2020 08:31:30 -0400 |
parents | d074b0c2b54f |
children |
rev | line source |
---|---|
0
ea68b86303e2
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/mordred commit 4ce352d9c9f3f1808e2ab6c019c534fd3e805959
bgruening
parents:
diff
changeset
|
1 import argparse |
ea68b86303e2
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/mordred commit 4ce352d9c9f3f1808e2ab6c019c534fd3e805959
bgruening
parents:
diff
changeset
|
2 import numpy as np |
ea68b86303e2
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/mordred commit 4ce352d9c9f3f1808e2ab6c019c534fd3e805959
bgruening
parents:
diff
changeset
|
3 import pandas as pd |
ea68b86303e2
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/mordred commit 4ce352d9c9f3f1808e2ab6c019c534fd3e805959
bgruening
parents:
diff
changeset
|
4 from mordred import Calculator, descriptors |
ea68b86303e2
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/mordred commit 4ce352d9c9f3f1808e2ab6c019c534fd3e805959
bgruening
parents:
diff
changeset
|
5 from mordred.error import Missing, Error |
ea68b86303e2
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/mordred commit 4ce352d9c9f3f1808e2ab6c019c534fd3e805959
bgruening
parents:
diff
changeset
|
6 from rdkit import Chem |
ea68b86303e2
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/mordred commit 4ce352d9c9f3f1808e2ab6c019c534fd3e805959
bgruening
parents:
diff
changeset
|
7 from rdkit.Chem.rdmolfiles import SDMolSupplier, SmilesMolSupplier |
ea68b86303e2
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/mordred commit 4ce352d9c9f3f1808e2ab6c019c534fd3e805959
bgruening
parents:
diff
changeset
|
8 |
ea68b86303e2
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/mordred commit 4ce352d9c9f3f1808e2ab6c019c534fd3e805959
bgruening
parents:
diff
changeset
|
9 |
ea68b86303e2
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/mordred commit 4ce352d9c9f3f1808e2ab6c019c534fd3e805959
bgruening
parents:
diff
changeset
|
10 def convert_errors_to_nan(el): |
ea68b86303e2
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/mordred commit 4ce352d9c9f3f1808e2ab6c019c534fd3e805959
bgruening
parents:
diff
changeset
|
11 """ |
ea68b86303e2
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/mordred commit 4ce352d9c9f3f1808e2ab6c019c534fd3e805959
bgruening
parents:
diff
changeset
|
12 Remove elements from the Mordred dataframe which are not |
ea68b86303e2
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/mordred commit 4ce352d9c9f3f1808e2ab6c019c534fd3e805959
bgruening
parents:
diff
changeset
|
13 in float or int format |
ea68b86303e2
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/mordred commit 4ce352d9c9f3f1808e2ab6c019c534fd3e805959
bgruening
parents:
diff
changeset
|
14 """ |
ea68b86303e2
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/mordred commit 4ce352d9c9f3f1808e2ab6c019c534fd3e805959
bgruening
parents:
diff
changeset
|
15 if type(el) == bool: |
ea68b86303e2
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/mordred commit 4ce352d9c9f3f1808e2ab6c019c534fd3e805959
bgruening
parents:
diff
changeset
|
16 return int(el) |
ea68b86303e2
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/mordred commit 4ce352d9c9f3f1808e2ab6c019c534fd3e805959
bgruening
parents:
diff
changeset
|
17 if type(el) not in [float, int, np.float64]: |
ea68b86303e2
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/mordred commit 4ce352d9c9f3f1808e2ab6c019c534fd3e805959
bgruening
parents:
diff
changeset
|
18 return None |
ea68b86303e2
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/mordred commit 4ce352d9c9f3f1808e2ab6c019c534fd3e805959
bgruening
parents:
diff
changeset
|
19 return el |
ea68b86303e2
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/mordred commit 4ce352d9c9f3f1808e2ab6c019c534fd3e805959
bgruening
parents:
diff
changeset
|
20 |
ea68b86303e2
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/mordred commit 4ce352d9c9f3f1808e2ab6c019c534fd3e805959
bgruening
parents:
diff
changeset
|
21 |
ea68b86303e2
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/mordred commit 4ce352d9c9f3f1808e2ab6c019c534fd3e805959
bgruening
parents:
diff
changeset
|
22 def mol_supplier(filename, ext): |
ea68b86303e2
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/mordred commit 4ce352d9c9f3f1808e2ab6c019c534fd3e805959
bgruening
parents:
diff
changeset
|
23 """ |
ea68b86303e2
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/mordred commit 4ce352d9c9f3f1808e2ab6c019c534fd3e805959
bgruening
parents:
diff
changeset
|
24 Based on the file extension, use the appropriate RDKit function to |
ea68b86303e2
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/mordred commit 4ce352d9c9f3f1808e2ab6c019c534fd3e805959
bgruening
parents:
diff
changeset
|
25 load a chemical data file (SMILES or SDF) containing multiple molecules |
ea68b86303e2
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/mordred commit 4ce352d9c9f3f1808e2ab6c019c534fd3e805959
bgruening
parents:
diff
changeset
|
26 and return a list of RDKit Mol objects |
ea68b86303e2
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/mordred commit 4ce352d9c9f3f1808e2ab6c019c534fd3e805959
bgruening
parents:
diff
changeset
|
27 """ |
ea68b86303e2
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/mordred commit 4ce352d9c9f3f1808e2ab6c019c534fd3e805959
bgruening
parents:
diff
changeset
|
28 if ext == 'sdf': |
ea68b86303e2
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/mordred commit 4ce352d9c9f3f1808e2ab6c019c534fd3e805959
bgruening
parents:
diff
changeset
|
29 return [n for n in SDMolSupplier(filename)] |
ea68b86303e2
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/mordred commit 4ce352d9c9f3f1808e2ab6c019c534fd3e805959
bgruening
parents:
diff
changeset
|
30 with open(filename) as f: |
ea68b86303e2
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/mordred commit 4ce352d9c9f3f1808e2ab6c019c534fd3e805959
bgruening
parents:
diff
changeset
|
31 mols = f.read().split('\n') |
ea68b86303e2
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/mordred commit 4ce352d9c9f3f1808e2ab6c019c534fd3e805959
bgruening
parents:
diff
changeset
|
32 if ext == 'smi': |
1
e2f40a02f31a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/mordred commit 7c47e7409a8cb19e20b4424329f5d0d9470f3b00
bgruening
parents:
0
diff
changeset
|
33 return [Chem.MolFromSmiles(mol, sanitize=True) for mol in mols if mol != ''] |
0
ea68b86303e2
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/mordred commit 4ce352d9c9f3f1808e2ab6c019c534fd3e805959
bgruening
parents:
diff
changeset
|
34 if ext == 'inchi': |
1
e2f40a02f31a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/mordred commit 7c47e7409a8cb19e20b4424329f5d0d9470f3b00
bgruening
parents:
0
diff
changeset
|
35 return [Chem.inchi.MolFromInchi(mol, sanitize=True) for mol in mols if mol != ''] |
0
ea68b86303e2
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/mordred commit 4ce352d9c9f3f1808e2ab6c019c534fd3e805959
bgruening
parents:
diff
changeset
|
36 |
ea68b86303e2
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/mordred commit 4ce352d9c9f3f1808e2ab6c019c534fd3e805959
bgruening
parents:
diff
changeset
|
37 |
2
d074b0c2b54f
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/mordred commit 7efc367809c29ff5939ea971bd00c69b7f9f5903"
bgruening
parents:
1
diff
changeset
|
38 def mordred_descriptors(mols, output, header, use_3d, smi_as_col): |
0
ea68b86303e2
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/mordred commit 4ce352d9c9f3f1808e2ab6c019c534fd3e805959
bgruening
parents:
diff
changeset
|
39 """ |
ea68b86303e2
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/mordred commit 4ce352d9c9f3f1808e2ab6c019c534fd3e805959
bgruening
parents:
diff
changeset
|
40 Calculate Mordred descriptors and save as tabular |
ea68b86303e2
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/mordred commit 4ce352d9c9f3f1808e2ab6c019c534fd3e805959
bgruening
parents:
diff
changeset
|
41 """ |
ea68b86303e2
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/mordred commit 4ce352d9c9f3f1808e2ab6c019c534fd3e805959
bgruening
parents:
diff
changeset
|
42 calc = Calculator(descriptors, ignore_3D=(not use_3d)) |
ea68b86303e2
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/mordred commit 4ce352d9c9f3f1808e2ab6c019c534fd3e805959
bgruening
parents:
diff
changeset
|
43 invalid_mols = np.where(np.array(mols) == None)[0] # indices of invalid SMILES/SDMols |
ea68b86303e2
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/mordred commit 4ce352d9c9f3f1808e2ab6c019c534fd3e805959
bgruening
parents:
diff
changeset
|
44 mols = [Chem.MolFromSmiles('') if n is None else n for n in mols] # replace invalid mols with placeholder |
ea68b86303e2
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/mordred commit 4ce352d9c9f3f1808e2ab6c019c534fd3e805959
bgruening
parents:
diff
changeset
|
45 df = calc.pandas(mols, quiet=True) # calculate descriptors |
ea68b86303e2
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/mordred commit 4ce352d9c9f3f1808e2ab6c019c534fd3e805959
bgruening
parents:
diff
changeset
|
46 for mol in invalid_mols: # remove placeholders |
ea68b86303e2
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/mordred commit 4ce352d9c9f3f1808e2ab6c019c534fd3e805959
bgruening
parents:
diff
changeset
|
47 df.iloc[mol] = np.nan |
ea68b86303e2
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/mordred commit 4ce352d9c9f3f1808e2ab6c019c534fd3e805959
bgruening
parents:
diff
changeset
|
48 df = df.applymap(convert_errors_to_nan) # remove descriptors which errored |
ea68b86303e2
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/mordred commit 4ce352d9c9f3f1808e2ab6c019c534fd3e805959
bgruening
parents:
diff
changeset
|
49 df = df.round(6) |
2
d074b0c2b54f
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/mordred commit 7efc367809c29ff5939ea971bd00c69b7f9f5903"
bgruening
parents:
1
diff
changeset
|
50 if smi_as_col: |
d074b0c2b54f
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/mordred commit 7efc367809c29ff5939ea971bd00c69b7f9f5903"
bgruening
parents:
1
diff
changeset
|
51 smiles = [Chem.MolToSmiles(mol) for mol in mols] |
d074b0c2b54f
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/mordred commit 7efc367809c29ff5939ea971bd00c69b7f9f5903"
bgruening
parents:
1
diff
changeset
|
52 df['SMILES'] = smiles |
d074b0c2b54f
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/mordred commit 7efc367809c29ff5939ea971bd00c69b7f9f5903"
bgruening
parents:
1
diff
changeset
|
53 |
0
ea68b86303e2
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/mordred commit 4ce352d9c9f3f1808e2ab6c019c534fd3e805959
bgruening
parents:
diff
changeset
|
54 df.to_csv(output, na_rep='', sep='\t', index=False, header=header) # write output |
ea68b86303e2
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/mordred commit 4ce352d9c9f3f1808e2ab6c019c534fd3e805959
bgruening
parents:
diff
changeset
|
55 |
ea68b86303e2
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/mordred commit 4ce352d9c9f3f1808e2ab6c019c534fd3e805959
bgruening
parents:
diff
changeset
|
56 |
ea68b86303e2
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/mordred commit 4ce352d9c9f3f1808e2ab6c019c534fd3e805959
bgruening
parents:
diff
changeset
|
57 if __name__ == "__main__": |
ea68b86303e2
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/mordred commit 4ce352d9c9f3f1808e2ab6c019c534fd3e805959
bgruening
parents:
diff
changeset
|
58 parser = argparse.ArgumentParser() |
ea68b86303e2
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/mordred commit 4ce352d9c9f3f1808e2ab6c019c534fd3e805959
bgruening
parents:
diff
changeset
|
59 parser.add_argument('-i', '--infile', required=True, help='Path to the input file.') |
ea68b86303e2
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/mordred commit 4ce352d9c9f3f1808e2ab6c019c534fd3e805959
bgruening
parents:
diff
changeset
|
60 parser.add_argument("--iformat", help="Specify the input file format.") |
ea68b86303e2
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/mordred commit 4ce352d9c9f3f1808e2ab6c019c534fd3e805959
bgruening
parents:
diff
changeset
|
61 |
ea68b86303e2
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/mordred commit 4ce352d9c9f3f1808e2ab6c019c534fd3e805959
bgruening
parents:
diff
changeset
|
62 parser.add_argument('-o', '--outfile', type=argparse.FileType('w+'), |
ea68b86303e2
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/mordred commit 4ce352d9c9f3f1808e2ab6c019c534fd3e805959
bgruening
parents:
diff
changeset
|
63 help="Path to the result file") |
ea68b86303e2
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/mordred commit 4ce352d9c9f3f1808e2ab6c019c534fd3e805959
bgruening
parents:
diff
changeset
|
64 |
ea68b86303e2
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/mordred commit 4ce352d9c9f3f1808e2ab6c019c534fd3e805959
bgruening
parents:
diff
changeset
|
65 parser.add_argument("--3d", dest="use_3d", action="store_true", |
ea68b86303e2
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/mordred commit 4ce352d9c9f3f1808e2ab6c019c534fd3e805959
bgruening
parents:
diff
changeset
|
66 default=False, |
ea68b86303e2
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/mordred commit 4ce352d9c9f3f1808e2ab6c019c534fd3e805959
bgruening
parents:
diff
changeset
|
67 help="Use 3d descriptors - only with SDF input.") |
ea68b86303e2
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/mordred commit 4ce352d9c9f3f1808e2ab6c019c534fd3e805959
bgruening
parents:
diff
changeset
|
68 |
ea68b86303e2
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/mordred commit 4ce352d9c9f3f1808e2ab6c019c534fd3e805959
bgruening
parents:
diff
changeset
|
69 parser.add_argument("--header", dest="header", action="store_true", |
ea68b86303e2
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/mordred commit 4ce352d9c9f3f1808e2ab6c019c534fd3e805959
bgruening
parents:
diff
changeset
|
70 default=False, |
ea68b86303e2
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/mordred commit 4ce352d9c9f3f1808e2ab6c019c534fd3e805959
bgruening
parents:
diff
changeset
|
71 help="Write header line.") |
2
d074b0c2b54f
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/mordred commit 7efc367809c29ff5939ea971bd00c69b7f9f5903"
bgruening
parents:
1
diff
changeset
|
72 |
d074b0c2b54f
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/mordred commit 7efc367809c29ff5939ea971bd00c69b7f9f5903"
bgruening
parents:
1
diff
changeset
|
73 parser.add_argument("--smiles", dest="smiles", action="store_true", |
d074b0c2b54f
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/mordred commit 7efc367809c29ff5939ea971bd00c69b7f9f5903"
bgruening
parents:
1
diff
changeset
|
74 default=False, |
d074b0c2b54f
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/mordred commit 7efc367809c29ff5939ea971bd00c69b7f9f5903"
bgruening
parents:
1
diff
changeset
|
75 help="Add a column with compound SMILES.") |
0
ea68b86303e2
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/mordred commit 4ce352d9c9f3f1808e2ab6c019c534fd3e805959
bgruening
parents:
diff
changeset
|
76 args = parser.parse_args() |
ea68b86303e2
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/mordred commit 4ce352d9c9f3f1808e2ab6c019c534fd3e805959
bgruening
parents:
diff
changeset
|
77 |
ea68b86303e2
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/mordred commit 4ce352d9c9f3f1808e2ab6c019c534fd3e805959
bgruening
parents:
diff
changeset
|
78 mols = mol_supplier(args.infile, args.iformat) |
2
d074b0c2b54f
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/mordred commit 7efc367809c29ff5939ea971bd00c69b7f9f5903"
bgruening
parents:
1
diff
changeset
|
79 mordred_descriptors(mols, args.outfile, args.header, args.use_3d, args.smiles) |