comparison qed.py @ 3:52a8d34dd08f draft default tip

"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/silicos-it/qed commit 943ff93be2257426d69a8406ed55c838495ecf3f"
author bgruening
date Fri, 30 Jul 2021 12:51:45 +0000
parents ab73abead7fa
children
comparison
equal deleted inserted replaced
2:fc45bf8b6e01 3:52a8d34dd08f
1 #!/usr/bin/env python 1 #!/usr/bin/env python
2 __all__ = ['weights_max', 'weights_mean', 'weights_none', 'default'] 2 __all__ = ["weights_max", "weights_mean", "weights_none", "default"]
3 3
4 # RDKit 4 import argparse
5 from rdkit.Chem import Descriptors 5 import os
6 from rdkit import Chem 6 import re
7 7 import sys
8 # General 8 # General
9 from copy import deepcopy 9 from copy import deepcopy
10 from math import exp, log 10 from math import exp, log
11 import sys, os, re 11
12 import argparse 12 from rdkit import Chem
13 # RDKit
14 from rdkit.Chem import Descriptors
13 15
14 16
15 class SilicosItError(Exception): 17 class SilicosItError(Exception):
16 """Base class for exceptions in Silicos-it code""" 18 """Base class for exceptions in Silicos-it code"""
17 pass 19
18 20
19 class WrongArgument(SilicosItError): 21 class WrongArgument(SilicosItError):
20 """ 22 """
21 Exception raised when argument to function is not of correct type. 23 Exception raised when argument to function is not of correct type.
22 24
23 Attributes: 25 Attributes:
24 function -- function in which error occurred 26 function -- function in which error occurred
25 msg -- explanation of the error 27 msg -- explanation of the error
26 """ 28 """
29
27 def __init__(self, function, msg): 30 def __init__(self, function, msg):
28 self.function = function 31 self.function = function
29 self.msg = msg 32 self.msg = msg
33
30 34
31 def check_filetype(filepath): 35 def check_filetype(filepath):
32 mol = False 36 mol = False
33 possible_inchi = True 37 possible_inchi = True
34 for line_counter, line in enumerate(open(filepath)): 38 for line_counter, line in enumerate(open(filepath)):
35 if line_counter > 10000: 39 if line_counter > 10000:
36 break 40 break
37 if line.find('$$$$') != -1: 41 if line.find("$$$$") != -1:
38 return 'sdf' 42 return "sdf"
39 elif line.find('@<TRIPOS>MOLECULE') != -1: 43 elif line.find("@<TRIPOS>MOLECULE") != -1:
40 return 'mol2' 44 return "mol2"
41 elif line.find('ligand id') != -1: 45 elif line.find("ligand id") != -1:
42 return 'drf' 46 return "drf"
43 elif possible_inchi and re.findall('^InChI=', line): 47 elif possible_inchi and re.findall("^InChI=", line):
44 return 'inchi' 48 return "inchi"
45 elif re.findall('^M\s+END', line): 49 elif re.findall("^M\s+END", line): # noqa W605
46 mol = True 50 mol = True
47 # first line is not an InChI, so it can't be an InChI file 51 # first line is not an InChI, so it can't be an InChI file
48 possible_inchi = False 52 possible_inchi = False
49 53
50 if mol: 54 if mol:
51 # END can occures before $$$$, so and SDF file will 55 # END can occures before $$$$, so and SDF file will
52 # be recognised as mol, if you not using this hack' 56 # be recognised as mol, if you not using this hack'
53 return 'mol' 57 return "mol"
54 return 'smi' 58 return "smi"
55 59
56 AliphaticRings = Chem.MolFromSmarts('[$([A;R][!a])]') 60
61 AliphaticRings = Chem.MolFromSmarts("[$([A;R][!a])]")
57 62
58 AcceptorSmarts = [ 63 AcceptorSmarts = [
59 '[oH0;X2]', 64 "[oH0;X2]",
60 '[OH1;X2;v2]', 65 "[OH1;X2;v2]",
61 '[OH0;X2;v2]', 66 "[OH0;X2;v2]",
62 '[OH0;X1;v2]', 67 "[OH0;X1;v2]",
63 '[O-;X1]', 68 "[O-;X1]",
64 '[SH0;X2;v2]', 69 "[SH0;X2;v2]",
65 '[SH0;X1;v2]', 70 "[SH0;X1;v2]",
66 '[S-;X1]', 71 "[S-;X1]",
67 '[nH0;X2]', 72 "[nH0;X2]",
68 '[NH0;X1;v3]', 73 "[NH0;X1;v3]",
69 '[$([N;+0;X3;v3]);!$(N[C,S]=O)]' 74 "[$([N;+0;X3;v3]);!$(N[C,S]=O)]",
70 ] 75 ]
71 Acceptors = [] 76 Acceptors = []
72 for hba in AcceptorSmarts: 77 for hba in AcceptorSmarts:
73 Acceptors.append(Chem.MolFromSmarts(hba)) 78 Acceptors.append(Chem.MolFromSmarts(hba))
74 79
75 StructuralAlertSmarts = [ 80 StructuralAlertSmarts = [
76 '*1[O,S,N]*1', 81 "*1[O,S,N]*1",
77 '[S,C](=[O,S])[F,Br,Cl,I]', 82 "[S,C](=[O,S])[F,Br,Cl,I]",
78 '[CX4][Cl,Br,I]', 83 "[CX4][Cl,Br,I]",
79 '[C,c]S(=O)(=O)O[C,c]', 84 "[C,c]S(=O)(=O)O[C,c]",
80 '[$([CH]),$(CC)]#CC(=O)[C,c]', 85 "[$([CH]),$(CC)]#CC(=O)[C,c]",
81 '[$([CH]),$(CC)]#CC(=O)O[C,c]', 86 "[$([CH]),$(CC)]#CC(=O)O[C,c]",
82 'n[OH]', 87 "n[OH]",
83 '[$([CH]),$(CC)]#CS(=O)(=O)[C,c]', 88 "[$([CH]),$(CC)]#CS(=O)(=O)[C,c]",
84 'C=C(C=O)C=O', 89 "C=C(C=O)C=O",
85 'n1c([F,Cl,Br,I])cccc1', 90 "n1c([F,Cl,Br,I])cccc1",
86 '[CH1](=O)', 91 "[CH1](=O)",
87 '[O,o][O,o]', 92 "[O,o][O,o]",
88 '[C;!R]=[N;!R]', 93 "[C;!R]=[N;!R]",
89 '[N!R]=[N!R]', 94 "[N!R]=[N!R]",
90 '[#6](=O)[#6](=O)', 95 "[#6](=O)[#6](=O)",
91 '[S,s][S,s]', 96 "[S,s][S,s]",
92 '[N,n][NH2]', 97 "[N,n][NH2]",
93 'C(=O)N[NH2]', 98 "C(=O)N[NH2]",
94 '[C,c]=S', 99 "[C,c]=S",
95 '[$([CH2]),$([CH][CX4]),$(C([CX4])[CX4])]=[$([CH2]),$([CH][CX4]),$(C([CX4])[CX4])]', 100 "[$([CH2]),$([CH][CX4]),$(C([CX4])[CX4])]=[$([CH2]),$([CH][CX4]),$(C([CX4])[CX4])]",
96 'C1(=[O,N])C=CC(=[O,N])C=C1', 101 "C1(=[O,N])C=CC(=[O,N])C=C1",
97 'C1(=[O,N])C(=[O,N])C=CC=C1', 102 "C1(=[O,N])C(=[O,N])C=CC=C1",
98 'a21aa3a(aa1aaaa2)aaaa3', 103 "a21aa3a(aa1aaaa2)aaaa3",
99 'a31a(a2a(aa1)aaaa2)aaaa3', 104 "a31a(a2a(aa1)aaaa2)aaaa3",
100 'a1aa2a3a(a1)A=AA=A3=AA=A2', 105 "a1aa2a3a(a1)A=AA=A3=AA=A2",
101 'c1cc([NH2])ccc1', 106 "c1cc([NH2])ccc1",
102 '[Hg,Fe,As,Sb,Zn,Se,se,Te,B,Si,Na,Ca,Ge,Ag,Mg,K,Ba,Sr,Be,Ti,Mo,Mn,Ru,Pd,Ni,Cu,Au,Cd,Al,Ga,Sn,Rh,Tl,Bi,Nb,Li,Pb,Hf,Ho]', 107 "[Hg,Fe,As,Sb,Zn,Se,se,Te,B,Si,Na,Ca,Ge,Ag,Mg,K,Ba,Sr,Be,Ti,Mo,Mn,Ru,Pd,Ni,Cu,Au,Cd,Al,Ga,Sn,Rh,Tl,Bi,Nb,Li,Pb,Hf,Ho]",
103 'I', 108 "I",
104 'OS(=O)(=O)[O-]', 109 "OS(=O)(=O)[O-]",
105 '[N+](=O)[O-]', 110 "[N+](=O)[O-]",
106 'C(=O)N[OH]', 111 "C(=O)N[OH]",
107 'C1NC(=O)NC(=O)1', 112 "C1NC(=O)NC(=O)1",
108 '[SH]', 113 "[SH]",
109 '[S-]', 114 "[S-]",
110 'c1ccc([Cl,Br,I,F])c([Cl,Br,I,F])c1[Cl,Br,I,F]', 115 "c1ccc([Cl,Br,I,F])c([Cl,Br,I,F])c1[Cl,Br,I,F]",
111 'c1cc([Cl,Br,I,F])cc([Cl,Br,I,F])c1[Cl,Br,I,F]', 116 "c1cc([Cl,Br,I,F])cc([Cl,Br,I,F])c1[Cl,Br,I,F]",
112 '[CR1]1[CR1][CR1][CR1][CR1][CR1][CR1]1', 117 "[CR1]1[CR1][CR1][CR1][CR1][CR1][CR1]1",
113 '[CR1]1[CR1][CR1]cc[CR1][CR1]1', 118 "[CR1]1[CR1][CR1]cc[CR1][CR1]1",
114 '[CR2]1[CR2][CR2][CR2][CR2][CR2][CR2][CR2]1', 119 "[CR2]1[CR2][CR2][CR2][CR2][CR2][CR2][CR2]1",
115 '[CR2]1[CR2][CR2]cc[CR2][CR2][CR2]1', 120 "[CR2]1[CR2][CR2]cc[CR2][CR2][CR2]1",
116 '[CH2R2]1N[CH2R2][CH2R2][CH2R2][CH2R2][CH2R2]1', 121 "[CH2R2]1N[CH2R2][CH2R2][CH2R2][CH2R2][CH2R2]1",
117 '[CH2R2]1N[CH2R2][CH2R2][CH2R2][CH2R2][CH2R2][CH2R2]1', 122 "[CH2R2]1N[CH2R2][CH2R2][CH2R2][CH2R2][CH2R2][CH2R2]1",
118 'C#C', 123 "C#C",
119 '[OR2,NR2]@[CR2]@[CR2]@[OR2,NR2]@[CR2]@[CR2]@[OR2,NR2]', 124 "[OR2,NR2]@[CR2]@[CR2]@[OR2,NR2]@[CR2]@[CR2]@[OR2,NR2]",
120 '[$([N+R]),$([n+R]),$([N+]=C)][O-]', 125 "[$([N+R]),$([n+R]),$([N+]=C)][O-]",
121 '[C,c]=N[OH]', 126 "[C,c]=N[OH]",
122 '[C,c]=NOC=O', 127 "[C,c]=NOC=O",
123 '[C,c](=O)[CX4,CR0X3,O][C,c](=O)', 128 "[C,c](=O)[CX4,CR0X3,O][C,c](=O)",
124 'c1ccc2c(c1)ccc(=O)o2', 129 "c1ccc2c(c1)ccc(=O)o2",
125 '[O+,o+,S+,s+]', 130 "[O+,o+,S+,s+]",
126 'N=C=O', 131 "N=C=O",
127 '[NX3,NX4][F,Cl,Br,I]', 132 "[NX3,NX4][F,Cl,Br,I]",
128 'c1ccccc1OC(=O)[#6]', 133 "c1ccccc1OC(=O)[#6]",
129 '[CR0]=[CR0][CR0]=[CR0]', 134 "[CR0]=[CR0][CR0]=[CR0]",
130 '[C+,c+,C-,c-]', 135 "[C+,c+,C-,c-]",
131 'N=[N+]=[N-]', 136 "N=[N+]=[N-]",
132 'C12C(NC(N1)=O)CSC2', 137 "C12C(NC(N1)=O)CSC2",
133 'c1c([OH])c([OH,NH2,NH])ccc1', 138 "c1c([OH])c([OH,NH2,NH])ccc1",
134 'P', 139 "P",
135 '[N,O,S]C#N', 140 "[N,O,S]C#N",
136 'C=C=O', 141 "C=C=O",
137 '[Si][F,Cl,Br,I]', 142 "[Si][F,Cl,Br,I]",
138 '[SX2]O', 143 "[SX2]O",
139 '[SiR0,CR0](c1ccccc1)(c2ccccc2)(c3ccccc3)', 144 "[SiR0,CR0](c1ccccc1)(c2ccccc2)(c3ccccc3)",
140 'O1CCCCC1OC2CCC3CCCCC3C2', 145 "O1CCCCC1OC2CCC3CCCCC3C2",
141 'N=[CR0][N,n,O,S]', 146 "N=[CR0][N,n,O,S]",
142 '[cR2]1[cR2][cR2]([Nv3X3,Nv4X4])[cR2][cR2][cR2]1[cR2]2[cR2][cR2][cR2]([Nv3X3,Nv4X4])[cR2][cR2]2', 147 "[cR2]1[cR2][cR2]([Nv3X3,Nv4X4])[cR2][cR2][cR2]1[cR2]2[cR2][cR2][cR2]([Nv3X3,Nv4X4])[cR2][cR2]2",
143 'C=[C!r]C#N', 148 "C=[C!r]C#N",
144 '[cR2]1[cR2]c([N+0X3R0,nX3R0])c([N+0X3R0,nX3R0])[cR2][cR2]1', 149 "[cR2]1[cR2]c([N+0X3R0,nX3R0])c([N+0X3R0,nX3R0])[cR2][cR2]1",
145 '[cR2]1[cR2]c([N+0X3R0,nX3R0])[cR2]c([N+0X3R0,nX3R0])[cR2]1', 150 "[cR2]1[cR2]c([N+0X3R0,nX3R0])[cR2]c([N+0X3R0,nX3R0])[cR2]1",
146 '[cR2]1[cR2]c([N+0X3R0,nX3R0])[cR2][cR2]c1([N+0X3R0,nX3R0])', 151 "[cR2]1[cR2]c([N+0X3R0,nX3R0])[cR2][cR2]c1([N+0X3R0,nX3R0])",
147 '[OH]c1ccc([OH,NH2,NH])cc1', 152 "[OH]c1ccc([OH,NH2,NH])cc1",
148 'c1ccccc1OC(=O)O', 153 "c1ccccc1OC(=O)O",
149 '[SX2H0][N]', 154 "[SX2H0][N]",
150 'c12ccccc1(SC(S)=N2)', 155 "c12ccccc1(SC(S)=N2)",
151 'c12ccccc1(SC(=S)N2)', 156 "c12ccccc1(SC(=S)N2)",
152 'c1nnnn1C=O', 157 "c1nnnn1C=O",
153 's1c(S)nnc1NC=O', 158 "s1c(S)nnc1NC=O",
154 'S1C=CSC1=S', 159 "S1C=CSC1=S",
155 'C(=O)Onnn', 160 "C(=O)Onnn",
156 'OS(=O)(=O)C(F)(F)F', 161 "OS(=O)(=O)C(F)(F)F",
157 'N#CC[OH]', 162 "N#CC[OH]",
158 'N#CC(=O)', 163 "N#CC(=O)",
159 'S(=O)(=O)C#N', 164 "S(=O)(=O)C#N",
160 'N[CH2]C#N', 165 "N[CH2]C#N",
161 'C1(=O)NCC1', 166 "C1(=O)NCC1",
162 'S(=O)(=O)[O-,OH]', 167 "S(=O)(=O)[O-,OH]",
163 'NC[F,Cl,Br,I]', 168 "NC[F,Cl,Br,I]",
164 'C=[C!r]O', 169 "C=[C!r]O",
165 '[NX2+0]=[O+0]', 170 "[NX2+0]=[O+0]",
166 '[OR0,NR0][OR0,NR0]', 171 "[OR0,NR0][OR0,NR0]",
167 'C(=O)O[C,H1].C(=O)O[C,H1].C(=O)O[C,H1]', 172 "C(=O)O[C,H1].C(=O)O[C,H1].C(=O)O[C,H1]",
168 '[CX2R0][NX3R0]', 173 "[CX2R0][NX3R0]",
169 'c1ccccc1[C;!R]=[C;!R]c2ccccc2', 174 "c1ccccc1[C;!R]=[C;!R]c2ccccc2",
170 '[NX3R0,NX4R0,OR0,SX2R0][CX4][NX3R0,NX4R0,OR0,SX2R0]', 175 "[NX3R0,NX4R0,OR0,SX2R0][CX4][NX3R0,NX4R0,OR0,SX2R0]",
171 '[s,S,c,C,n,N,o,O]~[n+,N+](~[s,S,c,C,n,N,o,O])(~[s,S,c,C,n,N,o,O])~[s,S,c,C,n,N,o,O]', 176 "[s,S,c,C,n,N,o,O]~[n+,N+](~[s,S,c,C,n,N,o,O])(~[s,S,c,C,n,N,o,O])~[s,S,c,C,n,N,o,O]",
172 '[s,S,c,C,n,N,o,O]~[nX3+,NX3+](~[s,S,c,C,n,N])~[s,S,c,C,n,N]', 177 "[s,S,c,C,n,N,o,O]~[nX3+,NX3+](~[s,S,c,C,n,N])~[s,S,c,C,n,N]",
173 '[*]=[N+]=[*]', 178 "[*]=[N+]=[*]",
174 '[SX3](=O)[O-,OH]', 179 "[SX3](=O)[O-,OH]",
175 'N#N', 180 "N#N",
176 'F.F.F.F', 181 "F.F.F.F",
177 '[R0;D2][R0;D2][R0;D2][R0;D2]', 182 "[R0;D2][R0;D2][R0;D2][R0;D2]",
178 '[cR,CR]~C(=O)NC(=O)~[cR,CR]', 183 "[cR,CR]~C(=O)NC(=O)~[cR,CR]",
179 'C=!@CC=[O,S]', 184 "C=!@CC=[O,S]",
180 '[#6,#8,#16][C,c](=O)O[C,c]', 185 "[#6,#8,#16][C,c](=O)O[C,c]",
181 'c[C;R0](=[O,S])[C,c]', 186 "c[C;R0](=[O,S])[C,c]",
182 'c[SX2][C;!R]', 187 "c[SX2][C;!R]",
183 'C=C=C', 188 "C=C=C",
184 'c1nc([F,Cl,Br,I,S])ncc1', 189 "c1nc([F,Cl,Br,I,S])ncc1",
185 'c1ncnc([F,Cl,Br,I,S])c1', 190 "c1ncnc([F,Cl,Br,I,S])c1",
186 'c1nc(c2c(n1)nc(n2)[F,Cl,Br,I])', 191 "c1nc(c2c(n1)nc(n2)[F,Cl,Br,I])",
187 '[C,c]S(=O)(=O)c1ccc(cc1)F', 192 "[C,c]S(=O)(=O)c1ccc(cc1)F",
188 '[15N]', 193 "[15N]",
189 '[13C]', 194 "[13C]",
190 '[18O]', 195 "[18O]",
191 '[34S]' 196 "[34S]",
192 ] 197 ]
193 198
194 StructuralAlerts = [] 199 StructuralAlerts = []
195 for smarts in StructuralAlertSmarts: 200 for smarts in StructuralAlertSmarts:
196 StructuralAlerts.append(Chem.MolFromSmarts(smarts)) 201 StructuralAlerts.append(Chem.MolFromSmarts(smarts))
197 202
198 203
199 # ADS parameters for the 8 molecular properties: [row][column] 204 # ADS parameters for the 8 molecular properties: [row][column]
200 # rows[8]: MW, ALOGP, HBA, HBD, PSA, ROTB, AROM, ALERTS 205 # rows[8]: MW, ALOGP, HBA, HBD, PSA, ROTB, AROM, ALERTS
201 # columns[7]: A, B, C, D, E, F, DMAX 206 # columns[7]: A, B, C, D, E, F, DMAX
202 # ALOGP parameters from Gregory Gerebtzoff (2012, Roche) 207 # ALOGP parameters from Gregory Gerebtzoff (2012, Roche)
203 pads1 = [ [2.817065973, 392.5754953, 290.7489764, 2.419764353, 49.22325677, 65.37051707, 104.9805561], 208 pads1 = [
204 [0.486849448, 186.2293718, 2.066177165, 3.902720615, 1.027025453, 0.913012565, 145.4314800], 209 [
205 [2.948620388, 160.4605972, 3.615294657, 4.435986202, 0.290141953, 1.300669958, 148.7763046], 210 2.817065973,
206 [1.618662227, 1010.051101, 0.985094388, 0.000000001, 0.713820843, 0.920922555, 258.1632616], 211 392.5754953,
207 [1.876861559, 125.2232657, 62.90773554, 87.83366614, 12.01999824, 28.51324732, 104.5686167], 212 290.7489764,
208 [0.010000000, 272.4121427, 2.558379970, 1.565547684, 1.271567166, 2.758063707, 105.4420403], 213 2.419764353,
209 [3.217788970, 957.7374108, 2.274627939, 0.000000001, 1.317690384, 0.375760881, 312.3372610], 214 49.22325677,
210 [0.010000000, 1199.094025, -0.09002883, 0.000000001, 0.185904477, 0.875193782, 417.7253140] ] 215 65.37051707,
216 104.9805561,
217 ],
218 [
219 0.486849448,
220 186.2293718,
221 2.066177165,
222 3.902720615,
223 1.027025453,
224 0.913012565,
225 145.4314800,
226 ],
227 [
228 2.948620388,
229 160.4605972,
230 3.615294657,
231 4.435986202,
232 0.290141953,
233 1.300669958,
234 148.7763046,
235 ],
236 [
237 1.618662227,
238 1010.051101,
239 0.985094388,
240 0.000000001,
241 0.713820843,
242 0.920922555,
243 258.1632616,
244 ],
245 [
246 1.876861559,
247 125.2232657,
248 62.90773554,
249 87.83366614,
250 12.01999824,
251 28.51324732,
252 104.5686167,
253 ],
254 [
255 0.010000000,
256 272.4121427,
257 2.558379970,
258 1.565547684,
259 1.271567166,
260 2.758063707,
261 105.4420403,
262 ],
263 [
264 3.217788970,
265 957.7374108,
266 2.274627939,
267 0.000000001,
268 1.317690384,
269 0.375760881,
270 312.3372610,
271 ],
272 [
273 0.010000000,
274 1199.094025,
275 -0.09002883,
276 0.000000001,
277 0.185904477,
278 0.875193782,
279 417.7253140,
280 ],
281 ]
211 # ALOGP parameters from the original publication 282 # ALOGP parameters from the original publication
212 pads2 = [ [2.817065973, 392.5754953, 290.7489764, 2.419764353, 49.22325677, 65.37051707, 104.9805561], 283 pads2 = [
213 [3.172690585, 137.8624751, 2.534937431, 4.581497897, 0.822739154, 0.576295591, 131.3186604], 284 [
214 [2.948620388, 160.4605972, 3.615294657, 4.435986202, 0.290141953, 1.300669958, 148.7763046], 285 2.817065973,
215 [1.618662227, 1010.051101, 0.985094388, 0.000000001, 0.713820843, 0.920922555, 258.1632616], 286 392.5754953,
216 [1.876861559, 125.2232657, 62.90773554, 87.83366614, 12.01999824, 28.51324732, 104.5686167], 287 290.7489764,
217 [0.010000000, 272.4121427, 2.558379970, 1.565547684, 1.271567166, 2.758063707, 105.4420403], 288 2.419764353,
218 [3.217788970, 957.7374108, 2.274627939, 0.000000001, 1.317690384, 0.375760881, 312.3372610], 289 49.22325677,
219 [0.010000000, 1199.094025, -0.09002883, 0.000000001, 0.185904477, 0.875193782, 417.7253140] ] 290 65.37051707,
291 104.9805561,
292 ],
293 [
294 3.172690585,
295 137.8624751,
296 2.534937431,
297 4.581497897,
298 0.822739154,
299 0.576295591,
300 131.3186604,
301 ],
302 [
303 2.948620388,
304 160.4605972,
305 3.615294657,
306 4.435986202,
307 0.290141953,
308 1.300669958,
309 148.7763046,
310 ],
311 [
312 1.618662227,
313 1010.051101,
314 0.985094388,
315 0.000000001,
316 0.713820843,
317 0.920922555,
318 258.1632616,
319 ],
320 [
321 1.876861559,
322 125.2232657,
323 62.90773554,
324 87.83366614,
325 12.01999824,
326 28.51324732,
327 104.5686167,
328 ],
329 [
330 0.010000000,
331 272.4121427,
332 2.558379970,
333 1.565547684,
334 1.271567166,
335 2.758063707,
336 105.4420403,
337 ],
338 [
339 3.217788970,
340 957.7374108,
341 2.274627939,
342 0.000000001,
343 1.317690384,
344 0.375760881,
345 312.3372610,
346 ],
347 [
348 0.010000000,
349 1199.094025,
350 -0.09002883,
351 0.000000001,
352 0.185904477,
353 0.875193782,
354 417.7253140,
355 ],
356 ]
357
220 358
221 def ads(x, a, b, c, d, e, f, dmax): 359 def ads(x, a, b, c, d, e, f, dmax):
222 return ((a+(b/(1+exp(-1*(x-c+d/2)/e))*(1-1/(1+exp(-1*(x-c-d/2)/f))))) / dmax) 360 return (
361 a
362 + (
363 b
364 / (1 + exp(-1 * (x - c + d / 2) / e))
365 * (1 - 1 / (1 + exp(-1 * (x - c - d / 2) / f)))
366 )
367 ) / dmax
368
223 369
224 def properties(mol): 370 def properties(mol):
225 """ 371 """
226 Calculates the properties that are required to calculate the QED descriptor. 372 Calculates the properties that are required to calculate the QED descriptor.
227 """ 373 """
228 matches = [] 374 matches = []
229 if mol is None: 375 if mol is None:
230 raise WrongArgument("properties(mol)", "mol argument is \'None\'") 376 raise WrongArgument("properties(mol)", "mol argument is 'None'")
231 x = [0] * 9 377 x = [0] * 9
232 x[0] = Descriptors.MolWt(mol) # MW 378 x[0] = Descriptors.MolWt(mol) # MW
233 x[1] = Descriptors.MolLogP(mol) # ALOGP 379 x[1] = Descriptors.MolLogP(mol) # ALOGP
234 for hba in Acceptors: # HBA 380 for hba in Acceptors: # HBA
235 if mol.HasSubstructMatch(hba): 381 if mol.HasSubstructMatch(hba):
236 matches = mol.GetSubstructMatches(hba) 382 matches = mol.GetSubstructMatches(hba)
237 x[2] += len(matches) 383 x[2] += len(matches)
238 x[3] = Descriptors.NumHDonors(mol) # HBD 384 x[3] = Descriptors.NumHDonors(mol) # HBD
239 x[4] = Descriptors.TPSA(mol) # PSA 385 x[4] = Descriptors.TPSA(mol) # PSA
240 x[5] = Descriptors.NumRotatableBonds(mol) # ROTB 386 x[5] = Descriptors.NumRotatableBonds(mol) # ROTB
241 x[6] = Chem.GetSSSR(Chem.DeleteSubstructs(deepcopy(mol), AliphaticRings)) # AROM 387 x[6] = Chem.GetSSSR(Chem.DeleteSubstructs(deepcopy(mol), AliphaticRings)) # AROM
242 for alert in StructuralAlerts: # ALERTS 388 for alert in StructuralAlerts: # ALERTS
243 if (mol.HasSubstructMatch(alert)): x[7] += 1 389 if mol.HasSubstructMatch(alert):
390 x[7] += 1
244 ro5_failed = 0 391 ro5_failed = 0
245 if x[3] > 5: 392 if x[3] > 5:
246 ro5_failed += 1 #HBD 393 ro5_failed += 1 # HBD
247 if x[2] > 10: 394 if x[2] > 10:
248 ro5_failed += 1 #HBA 395 ro5_failed += 1 # HBA
249 if x[0] >= 500: 396 if x[0] >= 500:
250 ro5_failed += 1 397 ro5_failed += 1
251 if x[1] > 5: 398 if x[1] > 5:
252 ro5_failed += 1 399 ro5_failed += 1
253 x[8] = ro5_failed 400 x[8] = ro5_failed
256 403
257 def qed(w, p, gerebtzoff): 404 def qed(w, p, gerebtzoff):
258 d = [0.00] * 8 405 d = [0.00] * 8
259 if gerebtzoff: 406 if gerebtzoff:
260 for i in range(0, 8): 407 for i in range(0, 8):
261 d[i] = ads(p[i], pads1[i][0], pads1[i][1], pads1[i][2], pads1[i][3], pads1[i][4], pads1[i][5], pads1[i][6]) 408 d[i] = ads(
409 p[i],
410 pads1[i][0],
411 pads1[i][1],
412 pads1[i][2],
413 pads1[i][3],
414 pads1[i][4],
415 pads1[i][5],
416 pads1[i][6],
417 )
262 else: 418 else:
263 for i in range(0, 8): 419 for i in range(0, 8):
264 d[i] = ads(p[i], pads2[i][0], pads2[i][1], pads2[i][2], pads2[i][3], pads2[i][4], pads2[i][5], pads2[i][6]) 420 d[i] = ads(
421 p[i],
422 pads2[i][0],
423 pads2[i][1],
424 pads2[i][2],
425 pads2[i][3],
426 pads2[i][4],
427 pads2[i][5],
428 pads2[i][6],
429 )
265 t = 0.0 430 t = 0.0
266 for i in range(0, 8): 431 for i in range(0, 8):
267 t += w[i] * log(d[i]) 432 t += w[i] * log(d[i])
268 return (exp(t / sum(w))) 433 return exp(t / sum(w))
269 434
270 435
271 def weights_max(mol, gerebtzoff = True, props = False): 436 def weights_max(mol, gerebtzoff=True, props=False):
272 """ 437 """
273 Calculates the QED descriptor using maximal descriptor weights. 438 Calculates the QED descriptor using maximal descriptor weights.
274 If props is specified we skip the calculation step and use the props-list of properties. 439 If props is specified we skip the calculation step and use the props-list of properties.
275 """ 440 """
276 if not props: 441 if not props:
277 props = properties(mol) 442 props = properties(mol)
278 return qed([0.50, 0.25, 0.00, 0.50, 0.00, 0.50, 0.25, 1.00], props, gerebtzoff) 443 return qed([0.50, 0.25, 0.00, 0.50, 0.00, 0.50, 0.25, 1.00], props, gerebtzoff)
279 444
280 445
281 def weights_mean(mol, gerebtzoff = True, props = False): 446 def weights_mean(mol, gerebtzoff=True, props=False):
282 """ 447 """
283 Calculates the QED descriptor using average descriptor weights. 448 Calculates the QED descriptor using average descriptor weights.
284 If props is specified we skip the calculation step and use the props-list of properties. 449 If props is specified we skip the calculation step and use the props-list of properties.
285 """ 450 """
286 if not props: 451 if not props:
287 props = properties(mol) 452 props = properties(mol)
288 return qed([0.66, 0.46, 0.05, 0.61, 0.06, 0.65, 0.48, 0.95], props, gerebtzoff) 453 return qed([0.66, 0.46, 0.05, 0.61, 0.06, 0.65, 0.48, 0.95], props, gerebtzoff)
289 454
290 455
291 def weights_none(mol, gerebtzoff = True, props = False): 456 def weights_none(mol, gerebtzoff=True, props=False):
292 """ 457 """
293 Calculates the QED descriptor using unit weights. 458 Calculates the QED descriptor using unit weights.
294 If props is specified we skip the calculation step and use the props-list of properties. 459 If props is specified we skip the calculation step and use the props-list of properties.
295 """ 460 """
296 if not props: 461 if not props:
297 props = properties(mol) 462 props = properties(mol)
298 return qed([1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00], props, gerebtzoff) 463 return qed([1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00], props, gerebtzoff)
299 464
300 465
301 def default(mol, gerebtzoff = True): 466 def default(mol, gerebtzoff=True):
302 """ 467 """
303 Calculates the QED descriptor using average descriptor weights and Gregory Gerebtzoff parameters. 468 Calculates the QED descriptor using average descriptor weights and Gregory Gerebtzoff parameters.
304 """ 469 """
305 return weights_mean(mol, gerebtzoff) 470 return weights_mean(mol, gerebtzoff)
306 471
307 472
308 if __name__ == "__main__": 473 if __name__ == "__main__":
309 parser = argparse.ArgumentParser() 474 parser = argparse.ArgumentParser()
310 parser.add_argument('-i', '--input', 475 parser.add_argument(
311 required=True, 476 "-i", "--input", required=True, help="path to the input file name"
312 help='path to the input file name') 477 )
313 parser.add_argument("-m", "--method", 478 parser.add_argument(
314 dest="method", 479 "-m",
315 choices=['max', 'mean', 'unweighted'], 480 "--method",
316 default="mean", 481 dest="method",
317 help="Specify the method you want to use.") 482 choices=["max", "mean", "unweighted"],
318 parser.add_argument("--iformat", 483 default="mean",
319 help="Input format. It must be supported by openbabel.") 484 help="Specify the method you want to use.",
320 parser.add_argument('-o', '--outfile', type=argparse.FileType('w+'), 485 )
321 default=sys.stdout, 486 parser.add_argument(
322 help="path to the result file, default it sdtout") 487 "--iformat", help="Input format. It must be supported by openbabel."
323 parser.add_argument("--header", dest="header", action="store_true", 488 )
324 default=False, 489 parser.add_argument(
325 help="Write header line.") 490 "-o",
326 491 "--outfile",
492 type=argparse.FileType("w+"),
493 default=sys.stdout,
494 help="path to the result file, default it sdtout",
495 )
496 parser.add_argument(
497 "--header",
498 dest="header",
499 action="store_true",
500 default=False,
501 help="Write header line.",
502 )
327 503
328 args = parser.parse_args() 504 args = parser.parse_args()
329 505
330 # Elucidate filetype and open supplier 506 # Elucidate filetype and open supplier
331 ifile = os.path.abspath(args.input) 507 ifile = os.path.abspath(args.input)
341 517
342 if not args.iformat: 518 if not args.iformat:
343 # try to guess the filetype 519 # try to guess the filetype
344 filetype = check_filetype(ifile) 520 filetype = check_filetype(ifile)
345 else: 521 else:
346 filetype = args.iformat # sdf or smi 522 filetype = args.iformat # sdf or smi
347
348 523
349 """ 524 """
350 We want to store the original SMILES in the output. So in case of a SMILES file iterate over the file and convert each line separate. 525 We want to store the original SMILES in the output. So in case of a SMILES file iterate over the file and convert each line separate.
351 """ 526 """
352 if filetype == 'sdf': 527 if filetype == "sdf":
353 supplier = Chem.SDMolSupplier(ifile) 528 supplier = Chem.SDMolSupplier(ifile)
354 # Process file 529 # Process file
355 if args.header: 530 if args.header:
356 args.outfile.write("MW\tALOGP\tHBA\tHBD\tPSA\tROTB\tAROM\tALERTS\tLRo5\tQED\tNAME\n") 531 args.outfile.write(
532 "MW\tALOGP\tHBA\tHBD\tPSA\tROTB\tAROM\tALERTS\tLRo5\tQED\tNAME\n"
533 )
357 count = 0 534 count = 0
358 for mol in supplier: 535 for mol in supplier:
359 count += 1 536 count += 1
360 if mol is None: 537 if mol is None:
361 print("Warning: skipping molecule ", count, " and continuing with next.") 538 print(
539 "Warning: skipping molecule ", count, " and continuing with next."
540 )
362 continue 541 continue
363 props = properties(mol) 542 props = properties(mol)
364 543
365 if args.method == 'max': 544 if args.method == "max":
366 calc_qed = weights_max(mol, True, props) 545 calc_qed = weights_max(mol, True, props)
367 elif args.method == 'unweighted': 546 elif args.method == "unweighted":
368 calc_qed = weights_none(mol, True, props) 547 calc_qed = weights_none(mol, True, props)
369 else: 548 else:
370 calc_qed = weights_mean(mol, True, props) 549 calc_qed = weights_mean(mol, True, props)
371 550
372 args.outfile.write( "%.2f\t%.3f\t%d\t%d\t%.2f\t%d\t%d\t%d\t%s\t%.3f\t%-s\n" % ( 551 args.outfile.write(
373 props[0], 552 "%.2f\t%.3f\t%d\t%d\t%.2f\t%d\t%d\t%d\t%s\t%.3f\t%-s\n"
374 props[1], 553 % (
375 props[2], 554 props[0],
376 props[3], 555 props[1],
377 props[4], 556 props[2],
378 props[5], 557 props[3],
379 props[6], 558 props[4],
380 props[7], 559 props[5],
381 props[8], 560 props[6],
382 calc_qed, 561 props[7],
383 mol.GetProp("_Name"), 562 props[8],
384 )) 563 calc_qed,
385 elif filetype == 'smi': 564 mol.GetProp("_Name"),
386 supplier = Chem.SmilesMolSupplier( ifile, " \t", 0, 1, False, True ) 565 )
566 )
567 elif filetype == "smi":
568 supplier = Chem.SmilesMolSupplier(ifile, " \t", 0, 1, False, True)
387 569
388 # Process file 570 # Process file
389 if args.header: 571 if args.header:
390 args.outfile.write("MW\tALOGP\tHBA\tHBD\tPSA\tROTB\tAROM\tALERTS\tLRo5\tQED\tNAME\tSMILES\n") 572 args.outfile.write(
573 "MW\tALOGP\tHBA\tHBD\tPSA\tROTB\tAROM\tALERTS\tLRo5\tQED\tNAME\tSMILES\n"
574 )
391 count = 0 575 count = 0
392 for line in open(ifile): 576 for line in open(ifile):
393 tokens = line.strip().split('\t') 577 tokens = line.strip().split("\t")
394 if len(tokens) > 1: 578 if len(tokens) > 1:
395 smiles, title = tokens 579 smiles, title = tokens
396 else: 580 else:
397 smiles = tokens[0] 581 smiles = tokens[0]
398 title = '' 582 title = ""
399 mol = Chem.MolFromSmiles(smiles) 583 mol = Chem.MolFromSmiles(smiles)
400 count += 1 584 count += 1
401 if mol is None: 585 if mol is None:
402 print("Warning: skipping molecule ", count, " and continuing with next.") 586 print(
587 "Warning: skipping molecule ", count, " and continuing with next."
588 )
403 continue 589 continue
404 props = properties(mol) 590 props = properties(mol)
405 591
406 if args.method == 'max': 592 if args.method == "max":
407 calc_qed = weights_max(mol, True, props) 593 calc_qed = weights_max(mol, True, props)
408 elif args.method == 'unweighted': 594 elif args.method == "unweighted":
409 calc_qed = weights_none(mol, True, props) 595 calc_qed = weights_none(mol, True, props)
410 else: 596 else:
411 calc_qed = weights_mean(mol, True, props) 597 calc_qed = weights_mean(mol, True, props)
412 598
413 args.outfile.write( "%.2f\t%.3f\t%d\t%d\t%.2f\t%d\t%d\t%d\t%s\t%.3f\t%-s\t%s\n" % ( 599 args.outfile.write(
414 props[0], 600 "%.2f\t%.3f\t%d\t%d\t%.2f\t%d\t%d\t%d\t%s\t%.3f\t%-s\t%s\n"
415 props[1], 601 % (
416 props[2], 602 props[0],
417 props[3], 603 props[1],
418 props[4], 604 props[2],
419 props[5], 605 props[3],
420 props[6], 606 props[4],
421 props[7], 607 props[5],
422 props[8], 608 props[6],
423 calc_qed, 609 props[7],
424 title, 610 props[8],
425 smiles 611 calc_qed,
426 )) 612 title,
613 smiles,
614 )
615 )
427 else: 616 else:
428 sys.exit("Error: unknown file-type: %s" % filetype) 617 sys.exit("Error: unknown file-type: %s" % filetype)