Mercurial > repos > bgruening > qed
comparison qed.py @ 3:52a8d34dd08f draft default tip
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/silicos-it/qed commit 943ff93be2257426d69a8406ed55c838495ecf3f"
author | bgruening |
---|---|
date | Fri, 30 Jul 2021 12:51:45 +0000 |
parents | ab73abead7fa |
children |
comparison
equal
deleted
inserted
replaced
2:fc45bf8b6e01 | 3:52a8d34dd08f |
---|---|
1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
2 __all__ = ['weights_max', 'weights_mean', 'weights_none', 'default'] | 2 __all__ = ["weights_max", "weights_mean", "weights_none", "default"] |
3 | 3 |
4 # RDKit | 4 import argparse |
5 from rdkit.Chem import Descriptors | 5 import os |
6 from rdkit import Chem | 6 import re |
7 | 7 import sys |
8 # General | 8 # General |
9 from copy import deepcopy | 9 from copy import deepcopy |
10 from math import exp, log | 10 from math import exp, log |
11 import sys, os, re | 11 |
12 import argparse | 12 from rdkit import Chem |
13 # RDKit | |
14 from rdkit.Chem import Descriptors | |
13 | 15 |
14 | 16 |
15 class SilicosItError(Exception): | 17 class SilicosItError(Exception): |
16 """Base class for exceptions in Silicos-it code""" | 18 """Base class for exceptions in Silicos-it code""" |
17 pass | 19 |
18 | 20 |
19 class WrongArgument(SilicosItError): | 21 class WrongArgument(SilicosItError): |
20 """ | 22 """ |
21 Exception raised when argument to function is not of correct type. | 23 Exception raised when argument to function is not of correct type. |
22 | 24 |
23 Attributes: | 25 Attributes: |
24 function -- function in which error occurred | 26 function -- function in which error occurred |
25 msg -- explanation of the error | 27 msg -- explanation of the error |
26 """ | 28 """ |
29 | |
27 def __init__(self, function, msg): | 30 def __init__(self, function, msg): |
28 self.function = function | 31 self.function = function |
29 self.msg = msg | 32 self.msg = msg |
33 | |
30 | 34 |
31 def check_filetype(filepath): | 35 def check_filetype(filepath): |
32 mol = False | 36 mol = False |
33 possible_inchi = True | 37 possible_inchi = True |
34 for line_counter, line in enumerate(open(filepath)): | 38 for line_counter, line in enumerate(open(filepath)): |
35 if line_counter > 10000: | 39 if line_counter > 10000: |
36 break | 40 break |
37 if line.find('$$$$') != -1: | 41 if line.find("$$$$") != -1: |
38 return 'sdf' | 42 return "sdf" |
39 elif line.find('@<TRIPOS>MOLECULE') != -1: | 43 elif line.find("@<TRIPOS>MOLECULE") != -1: |
40 return 'mol2' | 44 return "mol2" |
41 elif line.find('ligand id') != -1: | 45 elif line.find("ligand id") != -1: |
42 return 'drf' | 46 return "drf" |
43 elif possible_inchi and re.findall('^InChI=', line): | 47 elif possible_inchi and re.findall("^InChI=", line): |
44 return 'inchi' | 48 return "inchi" |
45 elif re.findall('^M\s+END', line): | 49 elif re.findall("^M\s+END", line): # noqa W605 |
46 mol = True | 50 mol = True |
47 # first line is not an InChI, so it can't be an InChI file | 51 # first line is not an InChI, so it can't be an InChI file |
48 possible_inchi = False | 52 possible_inchi = False |
49 | 53 |
50 if mol: | 54 if mol: |
51 # END can occures before $$$$, so and SDF file will | 55 # END can occures before $$$$, so and SDF file will |
52 # be recognised as mol, if you not using this hack' | 56 # be recognised as mol, if you not using this hack' |
53 return 'mol' | 57 return "mol" |
54 return 'smi' | 58 return "smi" |
55 | 59 |
56 AliphaticRings = Chem.MolFromSmarts('[$([A;R][!a])]') | 60 |
61 AliphaticRings = Chem.MolFromSmarts("[$([A;R][!a])]") | |
57 | 62 |
58 AcceptorSmarts = [ | 63 AcceptorSmarts = [ |
59 '[oH0;X2]', | 64 "[oH0;X2]", |
60 '[OH1;X2;v2]', | 65 "[OH1;X2;v2]", |
61 '[OH0;X2;v2]', | 66 "[OH0;X2;v2]", |
62 '[OH0;X1;v2]', | 67 "[OH0;X1;v2]", |
63 '[O-;X1]', | 68 "[O-;X1]", |
64 '[SH0;X2;v2]', | 69 "[SH0;X2;v2]", |
65 '[SH0;X1;v2]', | 70 "[SH0;X1;v2]", |
66 '[S-;X1]', | 71 "[S-;X1]", |
67 '[nH0;X2]', | 72 "[nH0;X2]", |
68 '[NH0;X1;v3]', | 73 "[NH0;X1;v3]", |
69 '[$([N;+0;X3;v3]);!$(N[C,S]=O)]' | 74 "[$([N;+0;X3;v3]);!$(N[C,S]=O)]", |
70 ] | 75 ] |
71 Acceptors = [] | 76 Acceptors = [] |
72 for hba in AcceptorSmarts: | 77 for hba in AcceptorSmarts: |
73 Acceptors.append(Chem.MolFromSmarts(hba)) | 78 Acceptors.append(Chem.MolFromSmarts(hba)) |
74 | 79 |
75 StructuralAlertSmarts = [ | 80 StructuralAlertSmarts = [ |
76 '*1[O,S,N]*1', | 81 "*1[O,S,N]*1", |
77 '[S,C](=[O,S])[F,Br,Cl,I]', | 82 "[S,C](=[O,S])[F,Br,Cl,I]", |
78 '[CX4][Cl,Br,I]', | 83 "[CX4][Cl,Br,I]", |
79 '[C,c]S(=O)(=O)O[C,c]', | 84 "[C,c]S(=O)(=O)O[C,c]", |
80 '[$([CH]),$(CC)]#CC(=O)[C,c]', | 85 "[$([CH]),$(CC)]#CC(=O)[C,c]", |
81 '[$([CH]),$(CC)]#CC(=O)O[C,c]', | 86 "[$([CH]),$(CC)]#CC(=O)O[C,c]", |
82 'n[OH]', | 87 "n[OH]", |
83 '[$([CH]),$(CC)]#CS(=O)(=O)[C,c]', | 88 "[$([CH]),$(CC)]#CS(=O)(=O)[C,c]", |
84 'C=C(C=O)C=O', | 89 "C=C(C=O)C=O", |
85 'n1c([F,Cl,Br,I])cccc1', | 90 "n1c([F,Cl,Br,I])cccc1", |
86 '[CH1](=O)', | 91 "[CH1](=O)", |
87 '[O,o][O,o]', | 92 "[O,o][O,o]", |
88 '[C;!R]=[N;!R]', | 93 "[C;!R]=[N;!R]", |
89 '[N!R]=[N!R]', | 94 "[N!R]=[N!R]", |
90 '[#6](=O)[#6](=O)', | 95 "[#6](=O)[#6](=O)", |
91 '[S,s][S,s]', | 96 "[S,s][S,s]", |
92 '[N,n][NH2]', | 97 "[N,n][NH2]", |
93 'C(=O)N[NH2]', | 98 "C(=O)N[NH2]", |
94 '[C,c]=S', | 99 "[C,c]=S", |
95 '[$([CH2]),$([CH][CX4]),$(C([CX4])[CX4])]=[$([CH2]),$([CH][CX4]),$(C([CX4])[CX4])]', | 100 "[$([CH2]),$([CH][CX4]),$(C([CX4])[CX4])]=[$([CH2]),$([CH][CX4]),$(C([CX4])[CX4])]", |
96 'C1(=[O,N])C=CC(=[O,N])C=C1', | 101 "C1(=[O,N])C=CC(=[O,N])C=C1", |
97 'C1(=[O,N])C(=[O,N])C=CC=C1', | 102 "C1(=[O,N])C(=[O,N])C=CC=C1", |
98 'a21aa3a(aa1aaaa2)aaaa3', | 103 "a21aa3a(aa1aaaa2)aaaa3", |
99 'a31a(a2a(aa1)aaaa2)aaaa3', | 104 "a31a(a2a(aa1)aaaa2)aaaa3", |
100 'a1aa2a3a(a1)A=AA=A3=AA=A2', | 105 "a1aa2a3a(a1)A=AA=A3=AA=A2", |
101 'c1cc([NH2])ccc1', | 106 "c1cc([NH2])ccc1", |
102 '[Hg,Fe,As,Sb,Zn,Se,se,Te,B,Si,Na,Ca,Ge,Ag,Mg,K,Ba,Sr,Be,Ti,Mo,Mn,Ru,Pd,Ni,Cu,Au,Cd,Al,Ga,Sn,Rh,Tl,Bi,Nb,Li,Pb,Hf,Ho]', | 107 "[Hg,Fe,As,Sb,Zn,Se,se,Te,B,Si,Na,Ca,Ge,Ag,Mg,K,Ba,Sr,Be,Ti,Mo,Mn,Ru,Pd,Ni,Cu,Au,Cd,Al,Ga,Sn,Rh,Tl,Bi,Nb,Li,Pb,Hf,Ho]", |
103 'I', | 108 "I", |
104 'OS(=O)(=O)[O-]', | 109 "OS(=O)(=O)[O-]", |
105 '[N+](=O)[O-]', | 110 "[N+](=O)[O-]", |
106 'C(=O)N[OH]', | 111 "C(=O)N[OH]", |
107 'C1NC(=O)NC(=O)1', | 112 "C1NC(=O)NC(=O)1", |
108 '[SH]', | 113 "[SH]", |
109 '[S-]', | 114 "[S-]", |
110 'c1ccc([Cl,Br,I,F])c([Cl,Br,I,F])c1[Cl,Br,I,F]', | 115 "c1ccc([Cl,Br,I,F])c([Cl,Br,I,F])c1[Cl,Br,I,F]", |
111 'c1cc([Cl,Br,I,F])cc([Cl,Br,I,F])c1[Cl,Br,I,F]', | 116 "c1cc([Cl,Br,I,F])cc([Cl,Br,I,F])c1[Cl,Br,I,F]", |
112 '[CR1]1[CR1][CR1][CR1][CR1][CR1][CR1]1', | 117 "[CR1]1[CR1][CR1][CR1][CR1][CR1][CR1]1", |
113 '[CR1]1[CR1][CR1]cc[CR1][CR1]1', | 118 "[CR1]1[CR1][CR1]cc[CR1][CR1]1", |
114 '[CR2]1[CR2][CR2][CR2][CR2][CR2][CR2][CR2]1', | 119 "[CR2]1[CR2][CR2][CR2][CR2][CR2][CR2][CR2]1", |
115 '[CR2]1[CR2][CR2]cc[CR2][CR2][CR2]1', | 120 "[CR2]1[CR2][CR2]cc[CR2][CR2][CR2]1", |
116 '[CH2R2]1N[CH2R2][CH2R2][CH2R2][CH2R2][CH2R2]1', | 121 "[CH2R2]1N[CH2R2][CH2R2][CH2R2][CH2R2][CH2R2]1", |
117 '[CH2R2]1N[CH2R2][CH2R2][CH2R2][CH2R2][CH2R2][CH2R2]1', | 122 "[CH2R2]1N[CH2R2][CH2R2][CH2R2][CH2R2][CH2R2][CH2R2]1", |
118 'C#C', | 123 "C#C", |
119 '[OR2,NR2]@[CR2]@[CR2]@[OR2,NR2]@[CR2]@[CR2]@[OR2,NR2]', | 124 "[OR2,NR2]@[CR2]@[CR2]@[OR2,NR2]@[CR2]@[CR2]@[OR2,NR2]", |
120 '[$([N+R]),$([n+R]),$([N+]=C)][O-]', | 125 "[$([N+R]),$([n+R]),$([N+]=C)][O-]", |
121 '[C,c]=N[OH]', | 126 "[C,c]=N[OH]", |
122 '[C,c]=NOC=O', | 127 "[C,c]=NOC=O", |
123 '[C,c](=O)[CX4,CR0X3,O][C,c](=O)', | 128 "[C,c](=O)[CX4,CR0X3,O][C,c](=O)", |
124 'c1ccc2c(c1)ccc(=O)o2', | 129 "c1ccc2c(c1)ccc(=O)o2", |
125 '[O+,o+,S+,s+]', | 130 "[O+,o+,S+,s+]", |
126 'N=C=O', | 131 "N=C=O", |
127 '[NX3,NX4][F,Cl,Br,I]', | 132 "[NX3,NX4][F,Cl,Br,I]", |
128 'c1ccccc1OC(=O)[#6]', | 133 "c1ccccc1OC(=O)[#6]", |
129 '[CR0]=[CR0][CR0]=[CR0]', | 134 "[CR0]=[CR0][CR0]=[CR0]", |
130 '[C+,c+,C-,c-]', | 135 "[C+,c+,C-,c-]", |
131 'N=[N+]=[N-]', | 136 "N=[N+]=[N-]", |
132 'C12C(NC(N1)=O)CSC2', | 137 "C12C(NC(N1)=O)CSC2", |
133 'c1c([OH])c([OH,NH2,NH])ccc1', | 138 "c1c([OH])c([OH,NH2,NH])ccc1", |
134 'P', | 139 "P", |
135 '[N,O,S]C#N', | 140 "[N,O,S]C#N", |
136 'C=C=O', | 141 "C=C=O", |
137 '[Si][F,Cl,Br,I]', | 142 "[Si][F,Cl,Br,I]", |
138 '[SX2]O', | 143 "[SX2]O", |
139 '[SiR0,CR0](c1ccccc1)(c2ccccc2)(c3ccccc3)', | 144 "[SiR0,CR0](c1ccccc1)(c2ccccc2)(c3ccccc3)", |
140 'O1CCCCC1OC2CCC3CCCCC3C2', | 145 "O1CCCCC1OC2CCC3CCCCC3C2", |
141 'N=[CR0][N,n,O,S]', | 146 "N=[CR0][N,n,O,S]", |
142 '[cR2]1[cR2][cR2]([Nv3X3,Nv4X4])[cR2][cR2][cR2]1[cR2]2[cR2][cR2][cR2]([Nv3X3,Nv4X4])[cR2][cR2]2', | 147 "[cR2]1[cR2][cR2]([Nv3X3,Nv4X4])[cR2][cR2][cR2]1[cR2]2[cR2][cR2][cR2]([Nv3X3,Nv4X4])[cR2][cR2]2", |
143 'C=[C!r]C#N', | 148 "C=[C!r]C#N", |
144 '[cR2]1[cR2]c([N+0X3R0,nX3R0])c([N+0X3R0,nX3R0])[cR2][cR2]1', | 149 "[cR2]1[cR2]c([N+0X3R0,nX3R0])c([N+0X3R0,nX3R0])[cR2][cR2]1", |
145 '[cR2]1[cR2]c([N+0X3R0,nX3R0])[cR2]c([N+0X3R0,nX3R0])[cR2]1', | 150 "[cR2]1[cR2]c([N+0X3R0,nX3R0])[cR2]c([N+0X3R0,nX3R0])[cR2]1", |
146 '[cR2]1[cR2]c([N+0X3R0,nX3R0])[cR2][cR2]c1([N+0X3R0,nX3R0])', | 151 "[cR2]1[cR2]c([N+0X3R0,nX3R0])[cR2][cR2]c1([N+0X3R0,nX3R0])", |
147 '[OH]c1ccc([OH,NH2,NH])cc1', | 152 "[OH]c1ccc([OH,NH2,NH])cc1", |
148 'c1ccccc1OC(=O)O', | 153 "c1ccccc1OC(=O)O", |
149 '[SX2H0][N]', | 154 "[SX2H0][N]", |
150 'c12ccccc1(SC(S)=N2)', | 155 "c12ccccc1(SC(S)=N2)", |
151 'c12ccccc1(SC(=S)N2)', | 156 "c12ccccc1(SC(=S)N2)", |
152 'c1nnnn1C=O', | 157 "c1nnnn1C=O", |
153 's1c(S)nnc1NC=O', | 158 "s1c(S)nnc1NC=O", |
154 'S1C=CSC1=S', | 159 "S1C=CSC1=S", |
155 'C(=O)Onnn', | 160 "C(=O)Onnn", |
156 'OS(=O)(=O)C(F)(F)F', | 161 "OS(=O)(=O)C(F)(F)F", |
157 'N#CC[OH]', | 162 "N#CC[OH]", |
158 'N#CC(=O)', | 163 "N#CC(=O)", |
159 'S(=O)(=O)C#N', | 164 "S(=O)(=O)C#N", |
160 'N[CH2]C#N', | 165 "N[CH2]C#N", |
161 'C1(=O)NCC1', | 166 "C1(=O)NCC1", |
162 'S(=O)(=O)[O-,OH]', | 167 "S(=O)(=O)[O-,OH]", |
163 'NC[F,Cl,Br,I]', | 168 "NC[F,Cl,Br,I]", |
164 'C=[C!r]O', | 169 "C=[C!r]O", |
165 '[NX2+0]=[O+0]', | 170 "[NX2+0]=[O+0]", |
166 '[OR0,NR0][OR0,NR0]', | 171 "[OR0,NR0][OR0,NR0]", |
167 'C(=O)O[C,H1].C(=O)O[C,H1].C(=O)O[C,H1]', | 172 "C(=O)O[C,H1].C(=O)O[C,H1].C(=O)O[C,H1]", |
168 '[CX2R0][NX3R0]', | 173 "[CX2R0][NX3R0]", |
169 'c1ccccc1[C;!R]=[C;!R]c2ccccc2', | 174 "c1ccccc1[C;!R]=[C;!R]c2ccccc2", |
170 '[NX3R0,NX4R0,OR0,SX2R0][CX4][NX3R0,NX4R0,OR0,SX2R0]', | 175 "[NX3R0,NX4R0,OR0,SX2R0][CX4][NX3R0,NX4R0,OR0,SX2R0]", |
171 '[s,S,c,C,n,N,o,O]~[n+,N+](~[s,S,c,C,n,N,o,O])(~[s,S,c,C,n,N,o,O])~[s,S,c,C,n,N,o,O]', | 176 "[s,S,c,C,n,N,o,O]~[n+,N+](~[s,S,c,C,n,N,o,O])(~[s,S,c,C,n,N,o,O])~[s,S,c,C,n,N,o,O]", |
172 '[s,S,c,C,n,N,o,O]~[nX3+,NX3+](~[s,S,c,C,n,N])~[s,S,c,C,n,N]', | 177 "[s,S,c,C,n,N,o,O]~[nX3+,NX3+](~[s,S,c,C,n,N])~[s,S,c,C,n,N]", |
173 '[*]=[N+]=[*]', | 178 "[*]=[N+]=[*]", |
174 '[SX3](=O)[O-,OH]', | 179 "[SX3](=O)[O-,OH]", |
175 'N#N', | 180 "N#N", |
176 'F.F.F.F', | 181 "F.F.F.F", |
177 '[R0;D2][R0;D2][R0;D2][R0;D2]', | 182 "[R0;D2][R0;D2][R0;D2][R0;D2]", |
178 '[cR,CR]~C(=O)NC(=O)~[cR,CR]', | 183 "[cR,CR]~C(=O)NC(=O)~[cR,CR]", |
179 'C=!@CC=[O,S]', | 184 "C=!@CC=[O,S]", |
180 '[#6,#8,#16][C,c](=O)O[C,c]', | 185 "[#6,#8,#16][C,c](=O)O[C,c]", |
181 'c[C;R0](=[O,S])[C,c]', | 186 "c[C;R0](=[O,S])[C,c]", |
182 'c[SX2][C;!R]', | 187 "c[SX2][C;!R]", |
183 'C=C=C', | 188 "C=C=C", |
184 'c1nc([F,Cl,Br,I,S])ncc1', | 189 "c1nc([F,Cl,Br,I,S])ncc1", |
185 'c1ncnc([F,Cl,Br,I,S])c1', | 190 "c1ncnc([F,Cl,Br,I,S])c1", |
186 'c1nc(c2c(n1)nc(n2)[F,Cl,Br,I])', | 191 "c1nc(c2c(n1)nc(n2)[F,Cl,Br,I])", |
187 '[C,c]S(=O)(=O)c1ccc(cc1)F', | 192 "[C,c]S(=O)(=O)c1ccc(cc1)F", |
188 '[15N]', | 193 "[15N]", |
189 '[13C]', | 194 "[13C]", |
190 '[18O]', | 195 "[18O]", |
191 '[34S]' | 196 "[34S]", |
192 ] | 197 ] |
193 | 198 |
194 StructuralAlerts = [] | 199 StructuralAlerts = [] |
195 for smarts in StructuralAlertSmarts: | 200 for smarts in StructuralAlertSmarts: |
196 StructuralAlerts.append(Chem.MolFromSmarts(smarts)) | 201 StructuralAlerts.append(Chem.MolFromSmarts(smarts)) |
197 | 202 |
198 | 203 |
199 # ADS parameters for the 8 molecular properties: [row][column] | 204 # ADS parameters for the 8 molecular properties: [row][column] |
200 # rows[8]: MW, ALOGP, HBA, HBD, PSA, ROTB, AROM, ALERTS | 205 # rows[8]: MW, ALOGP, HBA, HBD, PSA, ROTB, AROM, ALERTS |
201 # columns[7]: A, B, C, D, E, F, DMAX | 206 # columns[7]: A, B, C, D, E, F, DMAX |
202 # ALOGP parameters from Gregory Gerebtzoff (2012, Roche) | 207 # ALOGP parameters from Gregory Gerebtzoff (2012, Roche) |
203 pads1 = [ [2.817065973, 392.5754953, 290.7489764, 2.419764353, 49.22325677, 65.37051707, 104.9805561], | 208 pads1 = [ |
204 [0.486849448, 186.2293718, 2.066177165, 3.902720615, 1.027025453, 0.913012565, 145.4314800], | 209 [ |
205 [2.948620388, 160.4605972, 3.615294657, 4.435986202, 0.290141953, 1.300669958, 148.7763046], | 210 2.817065973, |
206 [1.618662227, 1010.051101, 0.985094388, 0.000000001, 0.713820843, 0.920922555, 258.1632616], | 211 392.5754953, |
207 [1.876861559, 125.2232657, 62.90773554, 87.83366614, 12.01999824, 28.51324732, 104.5686167], | 212 290.7489764, |
208 [0.010000000, 272.4121427, 2.558379970, 1.565547684, 1.271567166, 2.758063707, 105.4420403], | 213 2.419764353, |
209 [3.217788970, 957.7374108, 2.274627939, 0.000000001, 1.317690384, 0.375760881, 312.3372610], | 214 49.22325677, |
210 [0.010000000, 1199.094025, -0.09002883, 0.000000001, 0.185904477, 0.875193782, 417.7253140] ] | 215 65.37051707, |
216 104.9805561, | |
217 ], | |
218 [ | |
219 0.486849448, | |
220 186.2293718, | |
221 2.066177165, | |
222 3.902720615, | |
223 1.027025453, | |
224 0.913012565, | |
225 145.4314800, | |
226 ], | |
227 [ | |
228 2.948620388, | |
229 160.4605972, | |
230 3.615294657, | |
231 4.435986202, | |
232 0.290141953, | |
233 1.300669958, | |
234 148.7763046, | |
235 ], | |
236 [ | |
237 1.618662227, | |
238 1010.051101, | |
239 0.985094388, | |
240 0.000000001, | |
241 0.713820843, | |
242 0.920922555, | |
243 258.1632616, | |
244 ], | |
245 [ | |
246 1.876861559, | |
247 125.2232657, | |
248 62.90773554, | |
249 87.83366614, | |
250 12.01999824, | |
251 28.51324732, | |
252 104.5686167, | |
253 ], | |
254 [ | |
255 0.010000000, | |
256 272.4121427, | |
257 2.558379970, | |
258 1.565547684, | |
259 1.271567166, | |
260 2.758063707, | |
261 105.4420403, | |
262 ], | |
263 [ | |
264 3.217788970, | |
265 957.7374108, | |
266 2.274627939, | |
267 0.000000001, | |
268 1.317690384, | |
269 0.375760881, | |
270 312.3372610, | |
271 ], | |
272 [ | |
273 0.010000000, | |
274 1199.094025, | |
275 -0.09002883, | |
276 0.000000001, | |
277 0.185904477, | |
278 0.875193782, | |
279 417.7253140, | |
280 ], | |
281 ] | |
211 # ALOGP parameters from the original publication | 282 # ALOGP parameters from the original publication |
212 pads2 = [ [2.817065973, 392.5754953, 290.7489764, 2.419764353, 49.22325677, 65.37051707, 104.9805561], | 283 pads2 = [ |
213 [3.172690585, 137.8624751, 2.534937431, 4.581497897, 0.822739154, 0.576295591, 131.3186604], | 284 [ |
214 [2.948620388, 160.4605972, 3.615294657, 4.435986202, 0.290141953, 1.300669958, 148.7763046], | 285 2.817065973, |
215 [1.618662227, 1010.051101, 0.985094388, 0.000000001, 0.713820843, 0.920922555, 258.1632616], | 286 392.5754953, |
216 [1.876861559, 125.2232657, 62.90773554, 87.83366614, 12.01999824, 28.51324732, 104.5686167], | 287 290.7489764, |
217 [0.010000000, 272.4121427, 2.558379970, 1.565547684, 1.271567166, 2.758063707, 105.4420403], | 288 2.419764353, |
218 [3.217788970, 957.7374108, 2.274627939, 0.000000001, 1.317690384, 0.375760881, 312.3372610], | 289 49.22325677, |
219 [0.010000000, 1199.094025, -0.09002883, 0.000000001, 0.185904477, 0.875193782, 417.7253140] ] | 290 65.37051707, |
291 104.9805561, | |
292 ], | |
293 [ | |
294 3.172690585, | |
295 137.8624751, | |
296 2.534937431, | |
297 4.581497897, | |
298 0.822739154, | |
299 0.576295591, | |
300 131.3186604, | |
301 ], | |
302 [ | |
303 2.948620388, | |
304 160.4605972, | |
305 3.615294657, | |
306 4.435986202, | |
307 0.290141953, | |
308 1.300669958, | |
309 148.7763046, | |
310 ], | |
311 [ | |
312 1.618662227, | |
313 1010.051101, | |
314 0.985094388, | |
315 0.000000001, | |
316 0.713820843, | |
317 0.920922555, | |
318 258.1632616, | |
319 ], | |
320 [ | |
321 1.876861559, | |
322 125.2232657, | |
323 62.90773554, | |
324 87.83366614, | |
325 12.01999824, | |
326 28.51324732, | |
327 104.5686167, | |
328 ], | |
329 [ | |
330 0.010000000, | |
331 272.4121427, | |
332 2.558379970, | |
333 1.565547684, | |
334 1.271567166, | |
335 2.758063707, | |
336 105.4420403, | |
337 ], | |
338 [ | |
339 3.217788970, | |
340 957.7374108, | |
341 2.274627939, | |
342 0.000000001, | |
343 1.317690384, | |
344 0.375760881, | |
345 312.3372610, | |
346 ], | |
347 [ | |
348 0.010000000, | |
349 1199.094025, | |
350 -0.09002883, | |
351 0.000000001, | |
352 0.185904477, | |
353 0.875193782, | |
354 417.7253140, | |
355 ], | |
356 ] | |
357 | |
220 | 358 |
221 def ads(x, a, b, c, d, e, f, dmax): | 359 def ads(x, a, b, c, d, e, f, dmax): |
222 return ((a+(b/(1+exp(-1*(x-c+d/2)/e))*(1-1/(1+exp(-1*(x-c-d/2)/f))))) / dmax) | 360 return ( |
361 a | |
362 + ( | |
363 b | |
364 / (1 + exp(-1 * (x - c + d / 2) / e)) | |
365 * (1 - 1 / (1 + exp(-1 * (x - c - d / 2) / f))) | |
366 ) | |
367 ) / dmax | |
368 | |
223 | 369 |
224 def properties(mol): | 370 def properties(mol): |
225 """ | 371 """ |
226 Calculates the properties that are required to calculate the QED descriptor. | 372 Calculates the properties that are required to calculate the QED descriptor. |
227 """ | 373 """ |
228 matches = [] | 374 matches = [] |
229 if mol is None: | 375 if mol is None: |
230 raise WrongArgument("properties(mol)", "mol argument is \'None\'") | 376 raise WrongArgument("properties(mol)", "mol argument is 'None'") |
231 x = [0] * 9 | 377 x = [0] * 9 |
232 x[0] = Descriptors.MolWt(mol) # MW | 378 x[0] = Descriptors.MolWt(mol) # MW |
233 x[1] = Descriptors.MolLogP(mol) # ALOGP | 379 x[1] = Descriptors.MolLogP(mol) # ALOGP |
234 for hba in Acceptors: # HBA | 380 for hba in Acceptors: # HBA |
235 if mol.HasSubstructMatch(hba): | 381 if mol.HasSubstructMatch(hba): |
236 matches = mol.GetSubstructMatches(hba) | 382 matches = mol.GetSubstructMatches(hba) |
237 x[2] += len(matches) | 383 x[2] += len(matches) |
238 x[3] = Descriptors.NumHDonors(mol) # HBD | 384 x[3] = Descriptors.NumHDonors(mol) # HBD |
239 x[4] = Descriptors.TPSA(mol) # PSA | 385 x[4] = Descriptors.TPSA(mol) # PSA |
240 x[5] = Descriptors.NumRotatableBonds(mol) # ROTB | 386 x[5] = Descriptors.NumRotatableBonds(mol) # ROTB |
241 x[6] = Chem.GetSSSR(Chem.DeleteSubstructs(deepcopy(mol), AliphaticRings)) # AROM | 387 x[6] = Chem.GetSSSR(Chem.DeleteSubstructs(deepcopy(mol), AliphaticRings)) # AROM |
242 for alert in StructuralAlerts: # ALERTS | 388 for alert in StructuralAlerts: # ALERTS |
243 if (mol.HasSubstructMatch(alert)): x[7] += 1 | 389 if mol.HasSubstructMatch(alert): |
390 x[7] += 1 | |
244 ro5_failed = 0 | 391 ro5_failed = 0 |
245 if x[3] > 5: | 392 if x[3] > 5: |
246 ro5_failed += 1 #HBD | 393 ro5_failed += 1 # HBD |
247 if x[2] > 10: | 394 if x[2] > 10: |
248 ro5_failed += 1 #HBA | 395 ro5_failed += 1 # HBA |
249 if x[0] >= 500: | 396 if x[0] >= 500: |
250 ro5_failed += 1 | 397 ro5_failed += 1 |
251 if x[1] > 5: | 398 if x[1] > 5: |
252 ro5_failed += 1 | 399 ro5_failed += 1 |
253 x[8] = ro5_failed | 400 x[8] = ro5_failed |
256 | 403 |
257 def qed(w, p, gerebtzoff): | 404 def qed(w, p, gerebtzoff): |
258 d = [0.00] * 8 | 405 d = [0.00] * 8 |
259 if gerebtzoff: | 406 if gerebtzoff: |
260 for i in range(0, 8): | 407 for i in range(0, 8): |
261 d[i] = ads(p[i], pads1[i][0], pads1[i][1], pads1[i][2], pads1[i][3], pads1[i][4], pads1[i][5], pads1[i][6]) | 408 d[i] = ads( |
409 p[i], | |
410 pads1[i][0], | |
411 pads1[i][1], | |
412 pads1[i][2], | |
413 pads1[i][3], | |
414 pads1[i][4], | |
415 pads1[i][5], | |
416 pads1[i][6], | |
417 ) | |
262 else: | 418 else: |
263 for i in range(0, 8): | 419 for i in range(0, 8): |
264 d[i] = ads(p[i], pads2[i][0], pads2[i][1], pads2[i][2], pads2[i][3], pads2[i][4], pads2[i][5], pads2[i][6]) | 420 d[i] = ads( |
421 p[i], | |
422 pads2[i][0], | |
423 pads2[i][1], | |
424 pads2[i][2], | |
425 pads2[i][3], | |
426 pads2[i][4], | |
427 pads2[i][5], | |
428 pads2[i][6], | |
429 ) | |
265 t = 0.0 | 430 t = 0.0 |
266 for i in range(0, 8): | 431 for i in range(0, 8): |
267 t += w[i] * log(d[i]) | 432 t += w[i] * log(d[i]) |
268 return (exp(t / sum(w))) | 433 return exp(t / sum(w)) |
269 | 434 |
270 | 435 |
271 def weights_max(mol, gerebtzoff = True, props = False): | 436 def weights_max(mol, gerebtzoff=True, props=False): |
272 """ | 437 """ |
273 Calculates the QED descriptor using maximal descriptor weights. | 438 Calculates the QED descriptor using maximal descriptor weights. |
274 If props is specified we skip the calculation step and use the props-list of properties. | 439 If props is specified we skip the calculation step and use the props-list of properties. |
275 """ | 440 """ |
276 if not props: | 441 if not props: |
277 props = properties(mol) | 442 props = properties(mol) |
278 return qed([0.50, 0.25, 0.00, 0.50, 0.00, 0.50, 0.25, 1.00], props, gerebtzoff) | 443 return qed([0.50, 0.25, 0.00, 0.50, 0.00, 0.50, 0.25, 1.00], props, gerebtzoff) |
279 | 444 |
280 | 445 |
281 def weights_mean(mol, gerebtzoff = True, props = False): | 446 def weights_mean(mol, gerebtzoff=True, props=False): |
282 """ | 447 """ |
283 Calculates the QED descriptor using average descriptor weights. | 448 Calculates the QED descriptor using average descriptor weights. |
284 If props is specified we skip the calculation step and use the props-list of properties. | 449 If props is specified we skip the calculation step and use the props-list of properties. |
285 """ | 450 """ |
286 if not props: | 451 if not props: |
287 props = properties(mol) | 452 props = properties(mol) |
288 return qed([0.66, 0.46, 0.05, 0.61, 0.06, 0.65, 0.48, 0.95], props, gerebtzoff) | 453 return qed([0.66, 0.46, 0.05, 0.61, 0.06, 0.65, 0.48, 0.95], props, gerebtzoff) |
289 | 454 |
290 | 455 |
291 def weights_none(mol, gerebtzoff = True, props = False): | 456 def weights_none(mol, gerebtzoff=True, props=False): |
292 """ | 457 """ |
293 Calculates the QED descriptor using unit weights. | 458 Calculates the QED descriptor using unit weights. |
294 If props is specified we skip the calculation step and use the props-list of properties. | 459 If props is specified we skip the calculation step and use the props-list of properties. |
295 """ | 460 """ |
296 if not props: | 461 if not props: |
297 props = properties(mol) | 462 props = properties(mol) |
298 return qed([1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00], props, gerebtzoff) | 463 return qed([1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00], props, gerebtzoff) |
299 | 464 |
300 | 465 |
301 def default(mol, gerebtzoff = True): | 466 def default(mol, gerebtzoff=True): |
302 """ | 467 """ |
303 Calculates the QED descriptor using average descriptor weights and Gregory Gerebtzoff parameters. | 468 Calculates the QED descriptor using average descriptor weights and Gregory Gerebtzoff parameters. |
304 """ | 469 """ |
305 return weights_mean(mol, gerebtzoff) | 470 return weights_mean(mol, gerebtzoff) |
306 | 471 |
307 | 472 |
308 if __name__ == "__main__": | 473 if __name__ == "__main__": |
309 parser = argparse.ArgumentParser() | 474 parser = argparse.ArgumentParser() |
310 parser.add_argument('-i', '--input', | 475 parser.add_argument( |
311 required=True, | 476 "-i", "--input", required=True, help="path to the input file name" |
312 help='path to the input file name') | 477 ) |
313 parser.add_argument("-m", "--method", | 478 parser.add_argument( |
314 dest="method", | 479 "-m", |
315 choices=['max', 'mean', 'unweighted'], | 480 "--method", |
316 default="mean", | 481 dest="method", |
317 help="Specify the method you want to use.") | 482 choices=["max", "mean", "unweighted"], |
318 parser.add_argument("--iformat", | 483 default="mean", |
319 help="Input format. It must be supported by openbabel.") | 484 help="Specify the method you want to use.", |
320 parser.add_argument('-o', '--outfile', type=argparse.FileType('w+'), | 485 ) |
321 default=sys.stdout, | 486 parser.add_argument( |
322 help="path to the result file, default it sdtout") | 487 "--iformat", help="Input format. It must be supported by openbabel." |
323 parser.add_argument("--header", dest="header", action="store_true", | 488 ) |
324 default=False, | 489 parser.add_argument( |
325 help="Write header line.") | 490 "-o", |
326 | 491 "--outfile", |
492 type=argparse.FileType("w+"), | |
493 default=sys.stdout, | |
494 help="path to the result file, default it sdtout", | |
495 ) | |
496 parser.add_argument( | |
497 "--header", | |
498 dest="header", | |
499 action="store_true", | |
500 default=False, | |
501 help="Write header line.", | |
502 ) | |
327 | 503 |
328 args = parser.parse_args() | 504 args = parser.parse_args() |
329 | 505 |
330 # Elucidate filetype and open supplier | 506 # Elucidate filetype and open supplier |
331 ifile = os.path.abspath(args.input) | 507 ifile = os.path.abspath(args.input) |
341 | 517 |
342 if not args.iformat: | 518 if not args.iformat: |
343 # try to guess the filetype | 519 # try to guess the filetype |
344 filetype = check_filetype(ifile) | 520 filetype = check_filetype(ifile) |
345 else: | 521 else: |
346 filetype = args.iformat # sdf or smi | 522 filetype = args.iformat # sdf or smi |
347 | |
348 | 523 |
349 """ | 524 """ |
350 We want to store the original SMILES in the output. So in case of a SMILES file iterate over the file and convert each line separate. | 525 We want to store the original SMILES in the output. So in case of a SMILES file iterate over the file and convert each line separate. |
351 """ | 526 """ |
352 if filetype == 'sdf': | 527 if filetype == "sdf": |
353 supplier = Chem.SDMolSupplier(ifile) | 528 supplier = Chem.SDMolSupplier(ifile) |
354 # Process file | 529 # Process file |
355 if args.header: | 530 if args.header: |
356 args.outfile.write("MW\tALOGP\tHBA\tHBD\tPSA\tROTB\tAROM\tALERTS\tLRo5\tQED\tNAME\n") | 531 args.outfile.write( |
532 "MW\tALOGP\tHBA\tHBD\tPSA\tROTB\tAROM\tALERTS\tLRo5\tQED\tNAME\n" | |
533 ) | |
357 count = 0 | 534 count = 0 |
358 for mol in supplier: | 535 for mol in supplier: |
359 count += 1 | 536 count += 1 |
360 if mol is None: | 537 if mol is None: |
361 print("Warning: skipping molecule ", count, " and continuing with next.") | 538 print( |
539 "Warning: skipping molecule ", count, " and continuing with next." | |
540 ) | |
362 continue | 541 continue |
363 props = properties(mol) | 542 props = properties(mol) |
364 | 543 |
365 if args.method == 'max': | 544 if args.method == "max": |
366 calc_qed = weights_max(mol, True, props) | 545 calc_qed = weights_max(mol, True, props) |
367 elif args.method == 'unweighted': | 546 elif args.method == "unweighted": |
368 calc_qed = weights_none(mol, True, props) | 547 calc_qed = weights_none(mol, True, props) |
369 else: | 548 else: |
370 calc_qed = weights_mean(mol, True, props) | 549 calc_qed = weights_mean(mol, True, props) |
371 | 550 |
372 args.outfile.write( "%.2f\t%.3f\t%d\t%d\t%.2f\t%d\t%d\t%d\t%s\t%.3f\t%-s\n" % ( | 551 args.outfile.write( |
373 props[0], | 552 "%.2f\t%.3f\t%d\t%d\t%.2f\t%d\t%d\t%d\t%s\t%.3f\t%-s\n" |
374 props[1], | 553 % ( |
375 props[2], | 554 props[0], |
376 props[3], | 555 props[1], |
377 props[4], | 556 props[2], |
378 props[5], | 557 props[3], |
379 props[6], | 558 props[4], |
380 props[7], | 559 props[5], |
381 props[8], | 560 props[6], |
382 calc_qed, | 561 props[7], |
383 mol.GetProp("_Name"), | 562 props[8], |
384 )) | 563 calc_qed, |
385 elif filetype == 'smi': | 564 mol.GetProp("_Name"), |
386 supplier = Chem.SmilesMolSupplier( ifile, " \t", 0, 1, False, True ) | 565 ) |
566 ) | |
567 elif filetype == "smi": | |
568 supplier = Chem.SmilesMolSupplier(ifile, " \t", 0, 1, False, True) | |
387 | 569 |
388 # Process file | 570 # Process file |
389 if args.header: | 571 if args.header: |
390 args.outfile.write("MW\tALOGP\tHBA\tHBD\tPSA\tROTB\tAROM\tALERTS\tLRo5\tQED\tNAME\tSMILES\n") | 572 args.outfile.write( |
573 "MW\tALOGP\tHBA\tHBD\tPSA\tROTB\tAROM\tALERTS\tLRo5\tQED\tNAME\tSMILES\n" | |
574 ) | |
391 count = 0 | 575 count = 0 |
392 for line in open(ifile): | 576 for line in open(ifile): |
393 tokens = line.strip().split('\t') | 577 tokens = line.strip().split("\t") |
394 if len(tokens) > 1: | 578 if len(tokens) > 1: |
395 smiles, title = tokens | 579 smiles, title = tokens |
396 else: | 580 else: |
397 smiles = tokens[0] | 581 smiles = tokens[0] |
398 title = '' | 582 title = "" |
399 mol = Chem.MolFromSmiles(smiles) | 583 mol = Chem.MolFromSmiles(smiles) |
400 count += 1 | 584 count += 1 |
401 if mol is None: | 585 if mol is None: |
402 print("Warning: skipping molecule ", count, " and continuing with next.") | 586 print( |
587 "Warning: skipping molecule ", count, " and continuing with next." | |
588 ) | |
403 continue | 589 continue |
404 props = properties(mol) | 590 props = properties(mol) |
405 | 591 |
406 if args.method == 'max': | 592 if args.method == "max": |
407 calc_qed = weights_max(mol, True, props) | 593 calc_qed = weights_max(mol, True, props) |
408 elif args.method == 'unweighted': | 594 elif args.method == "unweighted": |
409 calc_qed = weights_none(mol, True, props) | 595 calc_qed = weights_none(mol, True, props) |
410 else: | 596 else: |
411 calc_qed = weights_mean(mol, True, props) | 597 calc_qed = weights_mean(mol, True, props) |
412 | 598 |
413 args.outfile.write( "%.2f\t%.3f\t%d\t%d\t%.2f\t%d\t%d\t%d\t%s\t%.3f\t%-s\t%s\n" % ( | 599 args.outfile.write( |
414 props[0], | 600 "%.2f\t%.3f\t%d\t%d\t%.2f\t%d\t%d\t%d\t%s\t%.3f\t%-s\t%s\n" |
415 props[1], | 601 % ( |
416 props[2], | 602 props[0], |
417 props[3], | 603 props[1], |
418 props[4], | 604 props[2], |
419 props[5], | 605 props[3], |
420 props[6], | 606 props[4], |
421 props[7], | 607 props[5], |
422 props[8], | 608 props[6], |
423 calc_qed, | 609 props[7], |
424 title, | 610 props[8], |
425 smiles | 611 calc_qed, |
426 )) | 612 title, |
613 smiles, | |
614 ) | |
615 ) | |
427 else: | 616 else: |
428 sys.exit("Error: unknown file-type: %s" % filetype) | 617 sys.exit("Error: unknown file-type: %s" % filetype) |