# HG changeset patch # User bgruening # Date 1627649505 0 # Node ID 52a8d34dd08f0d2b8520f659a58ee428644c142a # Parent fc45bf8b6e01f19e500812bfca644ee195675ef9 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/silicos-it/qed commit 943ff93be2257426d69a8406ed55c838495ecf3f" diff -r fc45bf8b6e01 -r 52a8d34dd08f qed.py --- a/qed.py Tue Jul 28 08:47:05 2020 -0400 +++ b/qed.py Fri Jul 30 12:51:45 2021 +0000 @@ -1,20 +1,22 @@ #!/usr/bin/env python -__all__ = ['weights_max', 'weights_mean', 'weights_none', 'default'] +__all__ = ["weights_max", "weights_mean", "weights_none", "default"] -# RDKit -from rdkit.Chem import Descriptors -from rdkit import Chem - +import argparse +import os +import re +import sys # General from copy import deepcopy from math import exp, log -import sys, os, re -import argparse + +from rdkit import Chem +# RDKit +from rdkit.Chem import Descriptors class SilicosItError(Exception): """Base class for exceptions in Silicos-it code""" - pass + class WrongArgument(SilicosItError): """ @@ -24,172 +26,175 @@ function -- function in which error occurred msg -- explanation of the error """ + def __init__(self, function, msg): self.function = function self.msg = msg + def check_filetype(filepath): mol = False possible_inchi = True for line_counter, line in enumerate(open(filepath)): if line_counter > 10000: break - if line.find('$$$$') != -1: - return 'sdf' - elif line.find('@MOLECULE') != -1: - return 'mol2' - elif line.find('ligand id') != -1: - return 'drf' - elif possible_inchi and re.findall('^InChI=', line): - return 'inchi' - elif re.findall('^M\s+END', line): + if line.find("$$$$") != -1: + return "sdf" + elif line.find("@MOLECULE") != -1: + return "mol2" + elif line.find("ligand id") != -1: + return "drf" + elif possible_inchi and re.findall("^InChI=", line): + return "inchi" + elif re.findall("^M\s+END", line): # noqa W605 mol = True # first line is not an InChI, so it can't be an InChI file possible_inchi = False if mol: - # END can occures before $$$$, so and SDF file will + # END can occures before $$$$, so and SDF file will # be recognised as mol, if you not using this hack' - return 'mol' - return 'smi' + return "mol" + return "smi" -AliphaticRings = Chem.MolFromSmarts('[$([A;R][!a])]') + +AliphaticRings = Chem.MolFromSmarts("[$([A;R][!a])]") AcceptorSmarts = [ - '[oH0;X2]', - '[OH1;X2;v2]', - '[OH0;X2;v2]', - '[OH0;X1;v2]', - '[O-;X1]', - '[SH0;X2;v2]', - '[SH0;X1;v2]', - '[S-;X1]', - '[nH0;X2]', - '[NH0;X1;v3]', - '[$([N;+0;X3;v3]);!$(N[C,S]=O)]' - ] + "[oH0;X2]", + "[OH1;X2;v2]", + "[OH0;X2;v2]", + "[OH0;X1;v2]", + "[O-;X1]", + "[SH0;X2;v2]", + "[SH0;X1;v2]", + "[S-;X1]", + "[nH0;X2]", + "[NH0;X1;v3]", + "[$([N;+0;X3;v3]);!$(N[C,S]=O)]", +] Acceptors = [] for hba in AcceptorSmarts: Acceptors.append(Chem.MolFromSmarts(hba)) StructuralAlertSmarts = [ - '*1[O,S,N]*1', - '[S,C](=[O,S])[F,Br,Cl,I]', - '[CX4][Cl,Br,I]', - '[C,c]S(=O)(=O)O[C,c]', - '[$([CH]),$(CC)]#CC(=O)[C,c]', - '[$([CH]),$(CC)]#CC(=O)O[C,c]', - 'n[OH]', - '[$([CH]),$(CC)]#CS(=O)(=O)[C,c]', - 'C=C(C=O)C=O', - 'n1c([F,Cl,Br,I])cccc1', - '[CH1](=O)', - '[O,o][O,o]', - '[C;!R]=[N;!R]', - '[N!R]=[N!R]', - '[#6](=O)[#6](=O)', - '[S,s][S,s]', - '[N,n][NH2]', - 'C(=O)N[NH2]', - '[C,c]=S', - '[$([CH2]),$([CH][CX4]),$(C([CX4])[CX4])]=[$([CH2]),$([CH][CX4]),$(C([CX4])[CX4])]', - 'C1(=[O,N])C=CC(=[O,N])C=C1', - 'C1(=[O,N])C(=[O,N])C=CC=C1', - 'a21aa3a(aa1aaaa2)aaaa3', - 'a31a(a2a(aa1)aaaa2)aaaa3', - 'a1aa2a3a(a1)A=AA=A3=AA=A2', - 'c1cc([NH2])ccc1', - '[Hg,Fe,As,Sb,Zn,Se,se,Te,B,Si,Na,Ca,Ge,Ag,Mg,K,Ba,Sr,Be,Ti,Mo,Mn,Ru,Pd,Ni,Cu,Au,Cd,Al,Ga,Sn,Rh,Tl,Bi,Nb,Li,Pb,Hf,Ho]', - 'I', - 'OS(=O)(=O)[O-]', - '[N+](=O)[O-]', - 'C(=O)N[OH]', - 'C1NC(=O)NC(=O)1', - '[SH]', - '[S-]', - 'c1ccc([Cl,Br,I,F])c([Cl,Br,I,F])c1[Cl,Br,I,F]', - 'c1cc([Cl,Br,I,F])cc([Cl,Br,I,F])c1[Cl,Br,I,F]', - '[CR1]1[CR1][CR1][CR1][CR1][CR1][CR1]1', - '[CR1]1[CR1][CR1]cc[CR1][CR1]1', - '[CR2]1[CR2][CR2][CR2][CR2][CR2][CR2][CR2]1', - '[CR2]1[CR2][CR2]cc[CR2][CR2][CR2]1', - '[CH2R2]1N[CH2R2][CH2R2][CH2R2][CH2R2][CH2R2]1', - '[CH2R2]1N[CH2R2][CH2R2][CH2R2][CH2R2][CH2R2][CH2R2]1', - 'C#C', - '[OR2,NR2]@[CR2]@[CR2]@[OR2,NR2]@[CR2]@[CR2]@[OR2,NR2]', - '[$([N+R]),$([n+R]),$([N+]=C)][O-]', - '[C,c]=N[OH]', - '[C,c]=NOC=O', - '[C,c](=O)[CX4,CR0X3,O][C,c](=O)', - 'c1ccc2c(c1)ccc(=O)o2', - '[O+,o+,S+,s+]', - 'N=C=O', - '[NX3,NX4][F,Cl,Br,I]', - 'c1ccccc1OC(=O)[#6]', - '[CR0]=[CR0][CR0]=[CR0]', - '[C+,c+,C-,c-]', - 'N=[N+]=[N-]', - 'C12C(NC(N1)=O)CSC2', - 'c1c([OH])c([OH,NH2,NH])ccc1', - 'P', - '[N,O,S]C#N', - 'C=C=O', - '[Si][F,Cl,Br,I]', - '[SX2]O', - '[SiR0,CR0](c1ccccc1)(c2ccccc2)(c3ccccc3)', - 'O1CCCCC1OC2CCC3CCCCC3C2', - 'N=[CR0][N,n,O,S]', - '[cR2]1[cR2][cR2]([Nv3X3,Nv4X4])[cR2][cR2][cR2]1[cR2]2[cR2][cR2][cR2]([Nv3X3,Nv4X4])[cR2][cR2]2', - 'C=[C!r]C#N', - '[cR2]1[cR2]c([N+0X3R0,nX3R0])c([N+0X3R0,nX3R0])[cR2][cR2]1', - '[cR2]1[cR2]c([N+0X3R0,nX3R0])[cR2]c([N+0X3R0,nX3R0])[cR2]1', - '[cR2]1[cR2]c([N+0X3R0,nX3R0])[cR2][cR2]c1([N+0X3R0,nX3R0])', - '[OH]c1ccc([OH,NH2,NH])cc1', - 'c1ccccc1OC(=O)O', - '[SX2H0][N]', - 'c12ccccc1(SC(S)=N2)', - 'c12ccccc1(SC(=S)N2)', - 'c1nnnn1C=O', - 's1c(S)nnc1NC=O', - 'S1C=CSC1=S', - 'C(=O)Onnn', - 'OS(=O)(=O)C(F)(F)F', - 'N#CC[OH]', - 'N#CC(=O)', - 'S(=O)(=O)C#N', - 'N[CH2]C#N', - 'C1(=O)NCC1', - 'S(=O)(=O)[O-,OH]', - 'NC[F,Cl,Br,I]', - 'C=[C!r]O', - '[NX2+0]=[O+0]', - '[OR0,NR0][OR0,NR0]', - 'C(=O)O[C,H1].C(=O)O[C,H1].C(=O)O[C,H1]', - '[CX2R0][NX3R0]', - 'c1ccccc1[C;!R]=[C;!R]c2ccccc2', - '[NX3R0,NX4R0,OR0,SX2R0][CX4][NX3R0,NX4R0,OR0,SX2R0]', - '[s,S,c,C,n,N,o,O]~[n+,N+](~[s,S,c,C,n,N,o,O])(~[s,S,c,C,n,N,o,O])~[s,S,c,C,n,N,o,O]', - '[s,S,c,C,n,N,o,O]~[nX3+,NX3+](~[s,S,c,C,n,N])~[s,S,c,C,n,N]', - '[*]=[N+]=[*]', - '[SX3](=O)[O-,OH]', - 'N#N', - 'F.F.F.F', - '[R0;D2][R0;D2][R0;D2][R0;D2]', - '[cR,CR]~C(=O)NC(=O)~[cR,CR]', - 'C=!@CC=[O,S]', - '[#6,#8,#16][C,c](=O)O[C,c]', - 'c[C;R0](=[O,S])[C,c]', - 'c[SX2][C;!R]', - 'C=C=C', - 'c1nc([F,Cl,Br,I,S])ncc1', - 'c1ncnc([F,Cl,Br,I,S])c1', - 'c1nc(c2c(n1)nc(n2)[F,Cl,Br,I])', - '[C,c]S(=O)(=O)c1ccc(cc1)F', - '[15N]', - '[13C]', - '[18O]', - '[34S]' - ] + "*1[O,S,N]*1", + "[S,C](=[O,S])[F,Br,Cl,I]", + "[CX4][Cl,Br,I]", + "[C,c]S(=O)(=O)O[C,c]", + "[$([CH]),$(CC)]#CC(=O)[C,c]", + "[$([CH]),$(CC)]#CC(=O)O[C,c]", + "n[OH]", + "[$([CH]),$(CC)]#CS(=O)(=O)[C,c]", + "C=C(C=O)C=O", + "n1c([F,Cl,Br,I])cccc1", + "[CH1](=O)", + "[O,o][O,o]", + "[C;!R]=[N;!R]", + "[N!R]=[N!R]", + "[#6](=O)[#6](=O)", + "[S,s][S,s]", + "[N,n][NH2]", + "C(=O)N[NH2]", + "[C,c]=S", + "[$([CH2]),$([CH][CX4]),$(C([CX4])[CX4])]=[$([CH2]),$([CH][CX4]),$(C([CX4])[CX4])]", + "C1(=[O,N])C=CC(=[O,N])C=C1", + "C1(=[O,N])C(=[O,N])C=CC=C1", + "a21aa3a(aa1aaaa2)aaaa3", + "a31a(a2a(aa1)aaaa2)aaaa3", + "a1aa2a3a(a1)A=AA=A3=AA=A2", + "c1cc([NH2])ccc1", + "[Hg,Fe,As,Sb,Zn,Se,se,Te,B,Si,Na,Ca,Ge,Ag,Mg,K,Ba,Sr,Be,Ti,Mo,Mn,Ru,Pd,Ni,Cu,Au,Cd,Al,Ga,Sn,Rh,Tl,Bi,Nb,Li,Pb,Hf,Ho]", + "I", + "OS(=O)(=O)[O-]", + "[N+](=O)[O-]", + "C(=O)N[OH]", + "C1NC(=O)NC(=O)1", + "[SH]", + "[S-]", + "c1ccc([Cl,Br,I,F])c([Cl,Br,I,F])c1[Cl,Br,I,F]", + "c1cc([Cl,Br,I,F])cc([Cl,Br,I,F])c1[Cl,Br,I,F]", + "[CR1]1[CR1][CR1][CR1][CR1][CR1][CR1]1", + "[CR1]1[CR1][CR1]cc[CR1][CR1]1", + "[CR2]1[CR2][CR2][CR2][CR2][CR2][CR2][CR2]1", + "[CR2]1[CR2][CR2]cc[CR2][CR2][CR2]1", + "[CH2R2]1N[CH2R2][CH2R2][CH2R2][CH2R2][CH2R2]1", + "[CH2R2]1N[CH2R2][CH2R2][CH2R2][CH2R2][CH2R2][CH2R2]1", + "C#C", + "[OR2,NR2]@[CR2]@[CR2]@[OR2,NR2]@[CR2]@[CR2]@[OR2,NR2]", + "[$([N+R]),$([n+R]),$([N+]=C)][O-]", + "[C,c]=N[OH]", + "[C,c]=NOC=O", + "[C,c](=O)[CX4,CR0X3,O][C,c](=O)", + "c1ccc2c(c1)ccc(=O)o2", + "[O+,o+,S+,s+]", + "N=C=O", + "[NX3,NX4][F,Cl,Br,I]", + "c1ccccc1OC(=O)[#6]", + "[CR0]=[CR0][CR0]=[CR0]", + "[C+,c+,C-,c-]", + "N=[N+]=[N-]", + "C12C(NC(N1)=O)CSC2", + "c1c([OH])c([OH,NH2,NH])ccc1", + "P", + "[N,O,S]C#N", + "C=C=O", + "[Si][F,Cl,Br,I]", + "[SX2]O", + "[SiR0,CR0](c1ccccc1)(c2ccccc2)(c3ccccc3)", + "O1CCCCC1OC2CCC3CCCCC3C2", + "N=[CR0][N,n,O,S]", + "[cR2]1[cR2][cR2]([Nv3X3,Nv4X4])[cR2][cR2][cR2]1[cR2]2[cR2][cR2][cR2]([Nv3X3,Nv4X4])[cR2][cR2]2", + "C=[C!r]C#N", + "[cR2]1[cR2]c([N+0X3R0,nX3R0])c([N+0X3R0,nX3R0])[cR2][cR2]1", + "[cR2]1[cR2]c([N+0X3R0,nX3R0])[cR2]c([N+0X3R0,nX3R0])[cR2]1", + "[cR2]1[cR2]c([N+0X3R0,nX3R0])[cR2][cR2]c1([N+0X3R0,nX3R0])", + "[OH]c1ccc([OH,NH2,NH])cc1", + "c1ccccc1OC(=O)O", + "[SX2H0][N]", + "c12ccccc1(SC(S)=N2)", + "c12ccccc1(SC(=S)N2)", + "c1nnnn1C=O", + "s1c(S)nnc1NC=O", + "S1C=CSC1=S", + "C(=O)Onnn", + "OS(=O)(=O)C(F)(F)F", + "N#CC[OH]", + "N#CC(=O)", + "S(=O)(=O)C#N", + "N[CH2]C#N", + "C1(=O)NCC1", + "S(=O)(=O)[O-,OH]", + "NC[F,Cl,Br,I]", + "C=[C!r]O", + "[NX2+0]=[O+0]", + "[OR0,NR0][OR0,NR0]", + "C(=O)O[C,H1].C(=O)O[C,H1].C(=O)O[C,H1]", + "[CX2R0][NX3R0]", + "c1ccccc1[C;!R]=[C;!R]c2ccccc2", + "[NX3R0,NX4R0,OR0,SX2R0][CX4][NX3R0,NX4R0,OR0,SX2R0]", + "[s,S,c,C,n,N,o,O]~[n+,N+](~[s,S,c,C,n,N,o,O])(~[s,S,c,C,n,N,o,O])~[s,S,c,C,n,N,o,O]", + "[s,S,c,C,n,N,o,O]~[nX3+,NX3+](~[s,S,c,C,n,N])~[s,S,c,C,n,N]", + "[*]=[N+]=[*]", + "[SX3](=O)[O-,OH]", + "N#N", + "F.F.F.F", + "[R0;D2][R0;D2][R0;D2][R0;D2]", + "[cR,CR]~C(=O)NC(=O)~[cR,CR]", + "C=!@CC=[O,S]", + "[#6,#8,#16][C,c](=O)O[C,c]", + "c[C;R0](=[O,S])[C,c]", + "c[SX2][C;!R]", + "C=C=C", + "c1nc([F,Cl,Br,I,S])ncc1", + "c1ncnc([F,Cl,Br,I,S])c1", + "c1nc(c2c(n1)nc(n2)[F,Cl,Br,I])", + "[C,c]S(=O)(=O)c1ccc(cc1)F", + "[15N]", + "[13C]", + "[18O]", + "[34S]", +] StructuralAlerts = [] for smarts in StructuralAlertSmarts: @@ -200,26 +205,167 @@ # rows[8]: MW, ALOGP, HBA, HBD, PSA, ROTB, AROM, ALERTS # columns[7]: A, B, C, D, E, F, DMAX # ALOGP parameters from Gregory Gerebtzoff (2012, Roche) -pads1 = [ [2.817065973, 392.5754953, 290.7489764, 2.419764353, 49.22325677, 65.37051707, 104.9805561], - [0.486849448, 186.2293718, 2.066177165, 3.902720615, 1.027025453, 0.913012565, 145.4314800], - [2.948620388, 160.4605972, 3.615294657, 4.435986202, 0.290141953, 1.300669958, 148.7763046], - [1.618662227, 1010.051101, 0.985094388, 0.000000001, 0.713820843, 0.920922555, 258.1632616], - [1.876861559, 125.2232657, 62.90773554, 87.83366614, 12.01999824, 28.51324732, 104.5686167], - [0.010000000, 272.4121427, 2.558379970, 1.565547684, 1.271567166, 2.758063707, 105.4420403], - [3.217788970, 957.7374108, 2.274627939, 0.000000001, 1.317690384, 0.375760881, 312.3372610], - [0.010000000, 1199.094025, -0.09002883, 0.000000001, 0.185904477, 0.875193782, 417.7253140] ] +pads1 = [ + [ + 2.817065973, + 392.5754953, + 290.7489764, + 2.419764353, + 49.22325677, + 65.37051707, + 104.9805561, + ], + [ + 0.486849448, + 186.2293718, + 2.066177165, + 3.902720615, + 1.027025453, + 0.913012565, + 145.4314800, + ], + [ + 2.948620388, + 160.4605972, + 3.615294657, + 4.435986202, + 0.290141953, + 1.300669958, + 148.7763046, + ], + [ + 1.618662227, + 1010.051101, + 0.985094388, + 0.000000001, + 0.713820843, + 0.920922555, + 258.1632616, + ], + [ + 1.876861559, + 125.2232657, + 62.90773554, + 87.83366614, + 12.01999824, + 28.51324732, + 104.5686167, + ], + [ + 0.010000000, + 272.4121427, + 2.558379970, + 1.565547684, + 1.271567166, + 2.758063707, + 105.4420403, + ], + [ + 3.217788970, + 957.7374108, + 2.274627939, + 0.000000001, + 1.317690384, + 0.375760881, + 312.3372610, + ], + [ + 0.010000000, + 1199.094025, + -0.09002883, + 0.000000001, + 0.185904477, + 0.875193782, + 417.7253140, + ], +] # ALOGP parameters from the original publication -pads2 = [ [2.817065973, 392.5754953, 290.7489764, 2.419764353, 49.22325677, 65.37051707, 104.9805561], - [3.172690585, 137.8624751, 2.534937431, 4.581497897, 0.822739154, 0.576295591, 131.3186604], - [2.948620388, 160.4605972, 3.615294657, 4.435986202, 0.290141953, 1.300669958, 148.7763046], - [1.618662227, 1010.051101, 0.985094388, 0.000000001, 0.713820843, 0.920922555, 258.1632616], - [1.876861559, 125.2232657, 62.90773554, 87.83366614, 12.01999824, 28.51324732, 104.5686167], - [0.010000000, 272.4121427, 2.558379970, 1.565547684, 1.271567166, 2.758063707, 105.4420403], - [3.217788970, 957.7374108, 2.274627939, 0.000000001, 1.317690384, 0.375760881, 312.3372610], - [0.010000000, 1199.094025, -0.09002883, 0.000000001, 0.185904477, 0.875193782, 417.7253140] ] +pads2 = [ + [ + 2.817065973, + 392.5754953, + 290.7489764, + 2.419764353, + 49.22325677, + 65.37051707, + 104.9805561, + ], + [ + 3.172690585, + 137.8624751, + 2.534937431, + 4.581497897, + 0.822739154, + 0.576295591, + 131.3186604, + ], + [ + 2.948620388, + 160.4605972, + 3.615294657, + 4.435986202, + 0.290141953, + 1.300669958, + 148.7763046, + ], + [ + 1.618662227, + 1010.051101, + 0.985094388, + 0.000000001, + 0.713820843, + 0.920922555, + 258.1632616, + ], + [ + 1.876861559, + 125.2232657, + 62.90773554, + 87.83366614, + 12.01999824, + 28.51324732, + 104.5686167, + ], + [ + 0.010000000, + 272.4121427, + 2.558379970, + 1.565547684, + 1.271567166, + 2.758063707, + 105.4420403, + ], + [ + 3.217788970, + 957.7374108, + 2.274627939, + 0.000000001, + 1.317690384, + 0.375760881, + 312.3372610, + ], + [ + 0.010000000, + 1199.094025, + -0.09002883, + 0.000000001, + 0.185904477, + 0.875193782, + 417.7253140, + ], +] + def ads(x, a, b, c, d, e, f, dmax): - return ((a+(b/(1+exp(-1*(x-c+d/2)/e))*(1-1/(1+exp(-1*(x-c-d/2)/f))))) / dmax) + return ( + a + + ( + b + / (1 + exp(-1 * (x - c + d / 2) / e)) + * (1 - 1 / (1 + exp(-1 * (x - c - d / 2) / f))) + ) + ) / dmax + def properties(mol): """ @@ -227,25 +373,26 @@ """ matches = [] if mol is None: - raise WrongArgument("properties(mol)", "mol argument is \'None\'") + raise WrongArgument("properties(mol)", "mol argument is 'None'") x = [0] * 9 - x[0] = Descriptors.MolWt(mol) # MW - x[1] = Descriptors.MolLogP(mol) # ALOGP - for hba in Acceptors: # HBA + x[0] = Descriptors.MolWt(mol) # MW + x[1] = Descriptors.MolLogP(mol) # ALOGP + for hba in Acceptors: # HBA if mol.HasSubstructMatch(hba): matches = mol.GetSubstructMatches(hba) x[2] += len(matches) - x[3] = Descriptors.NumHDonors(mol) # HBD - x[4] = Descriptors.TPSA(mol) # PSA - x[5] = Descriptors.NumRotatableBonds(mol) # ROTB - x[6] = Chem.GetSSSR(Chem.DeleteSubstructs(deepcopy(mol), AliphaticRings)) # AROM - for alert in StructuralAlerts: # ALERTS - if (mol.HasSubstructMatch(alert)): x[7] += 1 + x[3] = Descriptors.NumHDonors(mol) # HBD + x[4] = Descriptors.TPSA(mol) # PSA + x[5] = Descriptors.NumRotatableBonds(mol) # ROTB + x[6] = Chem.GetSSSR(Chem.DeleteSubstructs(deepcopy(mol), AliphaticRings)) # AROM + for alert in StructuralAlerts: # ALERTS + if mol.HasSubstructMatch(alert): + x[7] += 1 ro5_failed = 0 if x[3] > 5: - ro5_failed += 1 #HBD + ro5_failed += 1 # HBD if x[2] > 10: - ro5_failed += 1 #HBA + ro5_failed += 1 # HBA if x[0] >= 500: ro5_failed += 1 if x[1] > 5: @@ -258,17 +405,35 @@ d = [0.00] * 8 if gerebtzoff: for i in range(0, 8): - d[i] = ads(p[i], pads1[i][0], pads1[i][1], pads1[i][2], pads1[i][3], pads1[i][4], pads1[i][5], pads1[i][6]) + d[i] = ads( + p[i], + pads1[i][0], + pads1[i][1], + pads1[i][2], + pads1[i][3], + pads1[i][4], + pads1[i][5], + pads1[i][6], + ) else: for i in range(0, 8): - d[i] = ads(p[i], pads2[i][0], pads2[i][1], pads2[i][2], pads2[i][3], pads2[i][4], pads2[i][5], pads2[i][6]) + d[i] = ads( + p[i], + pads2[i][0], + pads2[i][1], + pads2[i][2], + pads2[i][3], + pads2[i][4], + pads2[i][5], + pads2[i][6], + ) t = 0.0 for i in range(0, 8): t += w[i] * log(d[i]) - return (exp(t / sum(w))) + return exp(t / sum(w)) -def weights_max(mol, gerebtzoff = True, props = False): +def weights_max(mol, gerebtzoff=True, props=False): """ Calculates the QED descriptor using maximal descriptor weights. If props is specified we skip the calculation step and use the props-list of properties. @@ -278,7 +443,7 @@ return qed([0.50, 0.25, 0.00, 0.50, 0.00, 0.50, 0.25, 1.00], props, gerebtzoff) -def weights_mean(mol, gerebtzoff = True, props = False): +def weights_mean(mol, gerebtzoff=True, props=False): """ Calculates the QED descriptor using average descriptor weights. If props is specified we skip the calculation step and use the props-list of properties. @@ -288,7 +453,7 @@ return qed([0.66, 0.46, 0.05, 0.61, 0.06, 0.65, 0.48, 0.95], props, gerebtzoff) -def weights_none(mol, gerebtzoff = True, props = False): +def weights_none(mol, gerebtzoff=True, props=False): """ Calculates the QED descriptor using unit weights. If props is specified we skip the calculation step and use the props-list of properties. @@ -298,7 +463,7 @@ return qed([1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00], props, gerebtzoff) -def default(mol, gerebtzoff = True): +def default(mol, gerebtzoff=True): """ Calculates the QED descriptor using average descriptor weights and Gregory Gerebtzoff parameters. """ @@ -307,23 +472,34 @@ if __name__ == "__main__": parser = argparse.ArgumentParser() - parser.add_argument('-i', '--input', - required=True, - help='path to the input file name') - parser.add_argument("-m", "--method", - dest="method", - choices=['max', 'mean', 'unweighted'], - default="mean", - help="Specify the method you want to use.") - parser.add_argument("--iformat", - help="Input format. It must be supported by openbabel.") - parser.add_argument('-o', '--outfile', type=argparse.FileType('w+'), - default=sys.stdout, - help="path to the result file, default it sdtout") - parser.add_argument("--header", dest="header", action="store_true", - default=False, - help="Write header line.") - + parser.add_argument( + "-i", "--input", required=True, help="path to the input file name" + ) + parser.add_argument( + "-m", + "--method", + dest="method", + choices=["max", "mean", "unweighted"], + default="mean", + help="Specify the method you want to use.", + ) + parser.add_argument( + "--iformat", help="Input format. It must be supported by openbabel." + ) + parser.add_argument( + "-o", + "--outfile", + type=argparse.FileType("w+"), + default=sys.stdout, + help="path to the result file, default it sdtout", + ) + parser.add_argument( + "--header", + dest="header", + action="store_true", + default=False, + help="Write header line.", + ) args = parser.parse_args() @@ -343,86 +519,99 @@ # try to guess the filetype filetype = check_filetype(ifile) else: - filetype = args.iformat # sdf or smi - + filetype = args.iformat # sdf or smi """ We want to store the original SMILES in the output. So in case of a SMILES file iterate over the file and convert each line separate. """ - if filetype == 'sdf': + if filetype == "sdf": supplier = Chem.SDMolSupplier(ifile) # Process file if args.header: - args.outfile.write("MW\tALOGP\tHBA\tHBD\tPSA\tROTB\tAROM\tALERTS\tLRo5\tQED\tNAME\n") + args.outfile.write( + "MW\tALOGP\tHBA\tHBD\tPSA\tROTB\tAROM\tALERTS\tLRo5\tQED\tNAME\n" + ) count = 0 for mol in supplier: count += 1 if mol is None: - print("Warning: skipping molecule ", count, " and continuing with next.") + print( + "Warning: skipping molecule ", count, " and continuing with next." + ) continue props = properties(mol) - if args.method == 'max': + if args.method == "max": calc_qed = weights_max(mol, True, props) - elif args.method == 'unweighted': + elif args.method == "unweighted": calc_qed = weights_none(mol, True, props) else: calc_qed = weights_mean(mol, True, props) - args.outfile.write( "%.2f\t%.3f\t%d\t%d\t%.2f\t%d\t%d\t%d\t%s\t%.3f\t%-s\n" % ( - props[0], - props[1], - props[2], - props[3], - props[4], - props[5], - props[6], - props[7], - props[8], - calc_qed, - mol.GetProp("_Name"), - )) - elif filetype == 'smi': - supplier = Chem.SmilesMolSupplier( ifile, " \t", 0, 1, False, True ) + args.outfile.write( + "%.2f\t%.3f\t%d\t%d\t%.2f\t%d\t%d\t%d\t%s\t%.3f\t%-s\n" + % ( + props[0], + props[1], + props[2], + props[3], + props[4], + props[5], + props[6], + props[7], + props[8], + calc_qed, + mol.GetProp("_Name"), + ) + ) + elif filetype == "smi": + supplier = Chem.SmilesMolSupplier(ifile, " \t", 0, 1, False, True) # Process file if args.header: - args.outfile.write("MW\tALOGP\tHBA\tHBD\tPSA\tROTB\tAROM\tALERTS\tLRo5\tQED\tNAME\tSMILES\n") + args.outfile.write( + "MW\tALOGP\tHBA\tHBD\tPSA\tROTB\tAROM\tALERTS\tLRo5\tQED\tNAME\tSMILES\n" + ) count = 0 for line in open(ifile): - tokens = line.strip().split('\t') + tokens = line.strip().split("\t") if len(tokens) > 1: smiles, title = tokens else: smiles = tokens[0] - title = '' + title = "" mol = Chem.MolFromSmiles(smiles) count += 1 if mol is None: - print("Warning: skipping molecule ", count, " and continuing with next.") + print( + "Warning: skipping molecule ", count, " and continuing with next." + ) continue props = properties(mol) - if args.method == 'max': + if args.method == "max": calc_qed = weights_max(mol, True, props) - elif args.method == 'unweighted': + elif args.method == "unweighted": calc_qed = weights_none(mol, True, props) else: calc_qed = weights_mean(mol, True, props) - args.outfile.write( "%.2f\t%.3f\t%d\t%d\t%.2f\t%d\t%d\t%d\t%s\t%.3f\t%-s\t%s\n" % ( - props[0], - props[1], - props[2], - props[3], - props[4], - props[5], - props[6], - props[7], - props[8], - calc_qed, - title, - smiles - )) + args.outfile.write( + "%.2f\t%.3f\t%d\t%d\t%.2f\t%d\t%d\t%d\t%s\t%.3f\t%-s\t%s\n" + % ( + props[0], + props[1], + props[2], + props[3], + props[4], + props[5], + props[6], + props[7], + props[8], + calc_qed, + title, + smiles, + ) + ) else: sys.exit("Error: unknown file-type: %s" % filetype) diff -r fc45bf8b6e01 -r 52a8d34dd08f silicos_qed.xml --- a/silicos_qed.xml Tue Jul 28 08:47:05 2020 -0400 +++ b/silicos_qed.xml Fri Jul 30 12:51:45 2021 +0000 @@ -1,6 +1,6 @@ - 2019.03.1 + 2021.03.4 0 quantitative estimation (QED) with RDKit