Mercurial > repos > bgruening > mordred
comparison mordred_descriptors.py @ 2:d074b0c2b54f draft
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/mordred commit 7efc367809c29ff5939ea971bd00c69b7f9f5903"
author | bgruening |
---|---|
date | Wed, 06 Nov 2019 13:59:30 -0500 |
parents | e2f40a02f31a |
children |
comparison
equal
deleted
inserted
replaced
1:e2f40a02f31a | 2:d074b0c2b54f |
---|---|
33 return [Chem.MolFromSmiles(mol, sanitize=True) for mol in mols if mol != ''] | 33 return [Chem.MolFromSmiles(mol, sanitize=True) for mol in mols if mol != ''] |
34 if ext == 'inchi': | 34 if ext == 'inchi': |
35 return [Chem.inchi.MolFromInchi(mol, sanitize=True) for mol in mols if mol != ''] | 35 return [Chem.inchi.MolFromInchi(mol, sanitize=True) for mol in mols if mol != ''] |
36 | 36 |
37 | 37 |
38 def mordred_descriptors(mols, output, header, use_3d): | 38 def mordred_descriptors(mols, output, header, use_3d, smi_as_col): |
39 """ | 39 """ |
40 Calculate Mordred descriptors and save as tabular | 40 Calculate Mordred descriptors and save as tabular |
41 """ | 41 """ |
42 calc = Calculator(descriptors, ignore_3D=(not use_3d)) | 42 calc = Calculator(descriptors, ignore_3D=(not use_3d)) |
43 invalid_mols = np.where(np.array(mols) == None)[0] # indices of invalid SMILES/SDMols | 43 invalid_mols = np.where(np.array(mols) == None)[0] # indices of invalid SMILES/SDMols |
45 df = calc.pandas(mols, quiet=True) # calculate descriptors | 45 df = calc.pandas(mols, quiet=True) # calculate descriptors |
46 for mol in invalid_mols: # remove placeholders | 46 for mol in invalid_mols: # remove placeholders |
47 df.iloc[mol] = np.nan | 47 df.iloc[mol] = np.nan |
48 df = df.applymap(convert_errors_to_nan) # remove descriptors which errored | 48 df = df.applymap(convert_errors_to_nan) # remove descriptors which errored |
49 df = df.round(6) | 49 df = df.round(6) |
50 if smi_as_col: | |
51 smiles = [Chem.MolToSmiles(mol) for mol in mols] | |
52 df['SMILES'] = smiles | |
53 | |
50 df.to_csv(output, na_rep='', sep='\t', index=False, header=header) # write output | 54 df.to_csv(output, na_rep='', sep='\t', index=False, header=header) # write output |
51 | 55 |
52 | 56 |
53 if __name__ == "__main__": | 57 if __name__ == "__main__": |
54 parser = argparse.ArgumentParser() | 58 parser = argparse.ArgumentParser() |
63 help="Use 3d descriptors - only with SDF input.") | 67 help="Use 3d descriptors - only with SDF input.") |
64 | 68 |
65 parser.add_argument("--header", dest="header", action="store_true", | 69 parser.add_argument("--header", dest="header", action="store_true", |
66 default=False, | 70 default=False, |
67 help="Write header line.") | 71 help="Write header line.") |
72 | |
73 parser.add_argument("--smiles", dest="smiles", action="store_true", | |
74 default=False, | |
75 help="Add a column with compound SMILES.") | |
68 args = parser.parse_args() | 76 args = parser.parse_args() |
69 | 77 |
70 mols = mol_supplier(args.infile, args.iformat) | 78 mols = mol_supplier(args.infile, args.iformat) |
71 mordred_descriptors(mols, args.outfile, args.header, args.use_3d) | 79 mordred_descriptors(mols, args.outfile, args.header, args.use_3d, args.smiles) |