# HG changeset patch # User bgruening # Date 1495526234 14400 # Node ID 5ccd3a4327850bd6089cc46c2325bd24aad21efe planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/silicos-it/qed commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6 diff -r 000000000000 -r 5ccd3a432785 errors.pyc Binary file errors.pyc has changed diff -r 000000000000 -r 5ccd3a432785 qed.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/qed.py Tue May 23 03:57:14 2017 -0400 @@ -0,0 +1,428 @@ +#!/usr/bin/env python +__all__ = ['weights_max', 'weights_mean', 'weights_none', 'default'] + +# RDKit +from rdkit.Chem import Descriptors +from rdkit import Chem + +# General +from copy import deepcopy +from math import exp, log +import sys, os, re +import argparse + + +class SilicosItError(Exception): + """Base class for exceptions in Silicos-it code""" + pass + +class WrongArgument(SilicosItError): + """ + Exception raised when argument to function is not of correct type. + + Attributes: + function -- function in which error occurred + msg -- explanation of the error + """ + def __init__(self, function, msg): + self.function = function + self.msg = msg + +def check_filetype(filepath): + mol = False + possible_inchi = True + for line_counter, line in enumerate(open(filepath)): + if line_counter > 10000: + break + if line.find('$$$$') != -1: + return 'sdf' + elif line.find('@MOLECULE') != -1: + return 'mol2' + elif line.find('ligand id') != -1: + return 'drf' + elif possible_inchi and re.findall('^InChI=', line): + return 'inchi' + elif re.findall('^M\s+END', line): + mol = True + # first line is not an InChI, so it can't be an InChI file + possible_inchi = False + + if mol: + # END can occures before $$$$, so and SDF file will + # be recognised as mol, if you not using this hack' + return 'mol' + return 'smi' + +AliphaticRings = Chem.MolFromSmarts('[$([A;R][!a])]') + +AcceptorSmarts = [ + '[oH0;X2]', + '[OH1;X2;v2]', + '[OH0;X2;v2]', + '[OH0;X1;v2]', + '[O-;X1]', + '[SH0;X2;v2]', + '[SH0;X1;v2]', + '[S-;X1]', + '[nH0;X2]', + '[NH0;X1;v3]', + '[$([N;+0;X3;v3]);!$(N[C,S]=O)]' + ] +Acceptors = [] +for hba in AcceptorSmarts: + Acceptors.append(Chem.MolFromSmarts(hba)) + +StructuralAlertSmarts = [ + '*1[O,S,N]*1', + '[S,C](=[O,S])[F,Br,Cl,I]', + '[CX4][Cl,Br,I]', + '[C,c]S(=O)(=O)O[C,c]', + '[$([CH]),$(CC)]#CC(=O)[C,c]', + '[$([CH]),$(CC)]#CC(=O)O[C,c]', + 'n[OH]', + '[$([CH]),$(CC)]#CS(=O)(=O)[C,c]', + 'C=C(C=O)C=O', + 'n1c([F,Cl,Br,I])cccc1', + '[CH1](=O)', + '[O,o][O,o]', + '[C;!R]=[N;!R]', + '[N!R]=[N!R]', + '[#6](=O)[#6](=O)', + '[S,s][S,s]', + '[N,n][NH2]', + 'C(=O)N[NH2]', + '[C,c]=S', + '[$([CH2]),$([CH][CX4]),$(C([CX4])[CX4])]=[$([CH2]),$([CH][CX4]),$(C([CX4])[CX4])]', + 'C1(=[O,N])C=CC(=[O,N])C=C1', + 'C1(=[O,N])C(=[O,N])C=CC=C1', + 'a21aa3a(aa1aaaa2)aaaa3', + 'a31a(a2a(aa1)aaaa2)aaaa3', + 'a1aa2a3a(a1)A=AA=A3=AA=A2', + 'c1cc([NH2])ccc1', + '[Hg,Fe,As,Sb,Zn,Se,se,Te,B,Si,Na,Ca,Ge,Ag,Mg,K,Ba,Sr,Be,Ti,Mo,Mn,Ru,Pd,Ni,Cu,Au,Cd,Al,Ga,Sn,Rh,Tl,Bi,Nb,Li,Pb,Hf,Ho]', + 'I', + 'OS(=O)(=O)[O-]', + '[N+](=O)[O-]', + 'C(=O)N[OH]', + 'C1NC(=O)NC(=O)1', + '[SH]', + '[S-]', + 'c1ccc([Cl,Br,I,F])c([Cl,Br,I,F])c1[Cl,Br,I,F]', + 'c1cc([Cl,Br,I,F])cc([Cl,Br,I,F])c1[Cl,Br,I,F]', + '[CR1]1[CR1][CR1][CR1][CR1][CR1][CR1]1', + '[CR1]1[CR1][CR1]cc[CR1][CR1]1', + '[CR2]1[CR2][CR2][CR2][CR2][CR2][CR2][CR2]1', + '[CR2]1[CR2][CR2]cc[CR2][CR2][CR2]1', + '[CH2R2]1N[CH2R2][CH2R2][CH2R2][CH2R2][CH2R2]1', + '[CH2R2]1N[CH2R2][CH2R2][CH2R2][CH2R2][CH2R2][CH2R2]1', + 'C#C', + '[OR2,NR2]@[CR2]@[CR2]@[OR2,NR2]@[CR2]@[CR2]@[OR2,NR2]', + '[$([N+R]),$([n+R]),$([N+]=C)][O-]', + '[C,c]=N[OH]', + '[C,c]=NOC=O', + '[C,c](=O)[CX4,CR0X3,O][C,c](=O)', + 'c1ccc2c(c1)ccc(=O)o2', + '[O+,o+,S+,s+]', + 'N=C=O', + '[NX3,NX4][F,Cl,Br,I]', + 'c1ccccc1OC(=O)[#6]', + '[CR0]=[CR0][CR0]=[CR0]', + '[C+,c+,C-,c-]', + 'N=[N+]=[N-]', + 'C12C(NC(N1)=O)CSC2', + 'c1c([OH])c([OH,NH2,NH])ccc1', + 'P', + '[N,O,S]C#N', + 'C=C=O', + '[Si][F,Cl,Br,I]', + '[SX2]O', + '[SiR0,CR0](c1ccccc1)(c2ccccc2)(c3ccccc3)', + 'O1CCCCC1OC2CCC3CCCCC3C2', + 'N=[CR0][N,n,O,S]', + '[cR2]1[cR2][cR2]([Nv3X3,Nv4X4])[cR2][cR2][cR2]1[cR2]2[cR2][cR2][cR2]([Nv3X3,Nv4X4])[cR2][cR2]2', + 'C=[C!r]C#N', + '[cR2]1[cR2]c([N+0X3R0,nX3R0])c([N+0X3R0,nX3R0])[cR2][cR2]1', + '[cR2]1[cR2]c([N+0X3R0,nX3R0])[cR2]c([N+0X3R0,nX3R0])[cR2]1', + '[cR2]1[cR2]c([N+0X3R0,nX3R0])[cR2][cR2]c1([N+0X3R0,nX3R0])', + '[OH]c1ccc([OH,NH2,NH])cc1', + 'c1ccccc1OC(=O)O', + '[SX2H0][N]', + 'c12ccccc1(SC(S)=N2)', + 'c12ccccc1(SC(=S)N2)', + 'c1nnnn1C=O', + 's1c(S)nnc1NC=O', + 'S1C=CSC1=S', + 'C(=O)Onnn', + 'OS(=O)(=O)C(F)(F)F', + 'N#CC[OH]', + 'N#CC(=O)', + 'S(=O)(=O)C#N', + 'N[CH2]C#N', + 'C1(=O)NCC1', + 'S(=O)(=O)[O-,OH]', + 'NC[F,Cl,Br,I]', + 'C=[C!r]O', + '[NX2+0]=[O+0]', + '[OR0,NR0][OR0,NR0]', + 'C(=O)O[C,H1].C(=O)O[C,H1].C(=O)O[C,H1]', + '[CX2R0][NX3R0]', + 'c1ccccc1[C;!R]=[C;!R]c2ccccc2', + '[NX3R0,NX4R0,OR0,SX2R0][CX4][NX3R0,NX4R0,OR0,SX2R0]', + '[s,S,c,C,n,N,o,O]~[n+,N+](~[s,S,c,C,n,N,o,O])(~[s,S,c,C,n,N,o,O])~[s,S,c,C,n,N,o,O]', + '[s,S,c,C,n,N,o,O]~[nX3+,NX3+](~[s,S,c,C,n,N])~[s,S,c,C,n,N]', + '[*]=[N+]=[*]', + '[SX3](=O)[O-,OH]', + 'N#N', + 'F.F.F.F', + '[R0;D2][R0;D2][R0;D2][R0;D2]', + '[cR,CR]~C(=O)NC(=O)~[cR,CR]', + 'C=!@CC=[O,S]', + '[#6,#8,#16][C,c](=O)O[C,c]', + 'c[C;R0](=[O,S])[C,c]', + 'c[SX2][C;!R]', + 'C=C=C', + 'c1nc([F,Cl,Br,I,S])ncc1', + 'c1ncnc([F,Cl,Br,I,S])c1', + 'c1nc(c2c(n1)nc(n2)[F,Cl,Br,I])', + '[C,c]S(=O)(=O)c1ccc(cc1)F', + '[15N]', + '[13C]', + '[18O]', + '[34S]' + ] + +StructuralAlerts = [] +for smarts in StructuralAlertSmarts: + StructuralAlerts.append(Chem.MolFromSmarts(smarts)) + + +# ADS parameters for the 8 molecular properties: [row][column] +# rows[8]: MW, ALOGP, HBA, HBD, PSA, ROTB, AROM, ALERTS +# columns[7]: A, B, C, D, E, F, DMAX +# ALOGP parameters from Gregory Gerebtzoff (2012, Roche) +pads1 = [ [2.817065973, 392.5754953, 290.7489764, 2.419764353, 49.22325677, 65.37051707, 104.9805561], + [0.486849448, 186.2293718, 2.066177165, 3.902720615, 1.027025453, 0.913012565, 145.4314800], + [2.948620388, 160.4605972, 3.615294657, 4.435986202, 0.290141953, 1.300669958, 148.7763046], + [1.618662227, 1010.051101, 0.985094388, 0.000000001, 0.713820843, 0.920922555, 258.1632616], + [1.876861559, 125.2232657, 62.90773554, 87.83366614, 12.01999824, 28.51324732, 104.5686167], + [0.010000000, 272.4121427, 2.558379970, 1.565547684, 1.271567166, 2.758063707, 105.4420403], + [3.217788970, 957.7374108, 2.274627939, 0.000000001, 1.317690384, 0.375760881, 312.3372610], + [0.010000000, 1199.094025, -0.09002883, 0.000000001, 0.185904477, 0.875193782, 417.7253140] ] +# ALOGP parameters from the original publication +pads2 = [ [2.817065973, 392.5754953, 290.7489764, 2.419764353, 49.22325677, 65.37051707, 104.9805561], + [3.172690585, 137.8624751, 2.534937431, 4.581497897, 0.822739154, 0.576295591, 131.3186604], + [2.948620388, 160.4605972, 3.615294657, 4.435986202, 0.290141953, 1.300669958, 148.7763046], + [1.618662227, 1010.051101, 0.985094388, 0.000000001, 0.713820843, 0.920922555, 258.1632616], + [1.876861559, 125.2232657, 62.90773554, 87.83366614, 12.01999824, 28.51324732, 104.5686167], + [0.010000000, 272.4121427, 2.558379970, 1.565547684, 1.271567166, 2.758063707, 105.4420403], + [3.217788970, 957.7374108, 2.274627939, 0.000000001, 1.317690384, 0.375760881, 312.3372610], + [0.010000000, 1199.094025, -0.09002883, 0.000000001, 0.185904477, 0.875193782, 417.7253140] ] + +def ads(x, a, b, c, d, e, f, dmax): + return ((a+(b/(1+exp(-1*(x-c+d/2)/e))*(1-1/(1+exp(-1*(x-c-d/2)/f))))) / dmax) + +def properties(mol): + """ + Calculates the properties that are required to calculate the QED descriptor. + """ + matches = [] + if mol is None: + raise WrongArgument("properties(mol)", "mol argument is \'None\'") + x = [0] * 9 + x[0] = Descriptors.MolWt(mol) # MW + x[1] = Descriptors.MolLogP(mol) # ALOGP + for hba in Acceptors: # HBA + if mol.HasSubstructMatch(hba): + matches = mol.GetSubstructMatches(hba) + x[2] += len(matches) + x[3] = Descriptors.NumHDonors(mol) # HBD + x[4] = Descriptors.TPSA(mol) # PSA + x[5] = Descriptors.NumRotatableBonds(mol) # ROTB + x[6] = Chem.GetSSSR(Chem.DeleteSubstructs(deepcopy(mol), AliphaticRings)) # AROM + for alert in StructuralAlerts: # ALERTS + if (mol.HasSubstructMatch(alert)): x[7] += 1 + ro5_failed = 0 + if x[3] > 5: + ro5_failed += 1 #HBD + if x[2] > 10: + ro5_failed += 1 #HBA + if x[0] >= 500: + ro5_failed += 1 + if x[1] > 5: + ro5_failed += 1 + x[8] = ro5_failed + return x + + +def qed(w, p, gerebtzoff): + d = [0.00] * 8 + if gerebtzoff: + for i in range(0, 8): + d[i] = ads(p[i], pads1[i][0], pads1[i][1], pads1[i][2], pads1[i][3], pads1[i][4], pads1[i][5], pads1[i][6]) + else: + for i in range(0, 8): + d[i] = ads(p[i], pads2[i][0], pads2[i][1], pads2[i][2], pads2[i][3], pads2[i][4], pads2[i][5], pads2[i][6]) + t = 0.0 + for i in range(0, 8): + t += w[i] * log(d[i]) + return (exp(t / sum(w))) + + +def weights_max(mol, gerebtzoff = True, props = False): + """ + Calculates the QED descriptor using maximal descriptor weights. + If props is specified we skip the calculation step and use the props-list of properties. + """ + if not props: + props = properties(mol) + return qed([0.50, 0.25, 0.00, 0.50, 0.00, 0.50, 0.25, 1.00], props, gerebtzoff) + + +def weights_mean(mol, gerebtzoff = True, props = False): + """ + Calculates the QED descriptor using average descriptor weights. + If props is specified we skip the calculation step and use the props-list of properties. + """ + if not props: + props = properties(mol) + return qed([0.66, 0.46, 0.05, 0.61, 0.06, 0.65, 0.48, 0.95], props, gerebtzoff) + + +def weights_none(mol, gerebtzoff = True, props = False): + """ + Calculates the QED descriptor using unit weights. + If props is specified we skip the calculation step and use the props-list of properties. + """ + if not props: + props = properties(mol) + return qed([1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00], props, gerebtzoff) + + +def default(mol, gerebtzoff = True): + """ + Calculates the QED descriptor using average descriptor weights and Gregory Gerebtzoff parameters. + """ + return weights_mean(mol, gerebtzoff) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument('-i', '--input', + required=True, + help='path to the input file name') + parser.add_argument("-m", "--method", + dest="method", + choices=['max', 'mean', 'unweighted'], + default="mean", + help="Specify the method you want to use.") + parser.add_argument("--iformat", + help="Input format. It must be supported by openbabel.") + parser.add_argument('-o', '--outfile', type=argparse.FileType('w+'), + default=sys.stdout, + help="path to the result file, default it sdtout") + parser.add_argument("--header", dest="header", action="store_true", + default=False, + help="Write header line.") + + + args = parser.parse_args() + + # Elucidate filetype and open supplier + ifile = os.path.abspath(args.input) + if not os.path.isfile(ifile): + print "Error: ", ifile, " is not a file or cannot be found." + sys.exit(1) + if not os.path.exists(ifile): + print "Error: ", ifile, " does not exist or cannot be found." + sys.exit(1) + if not os.access(ifile, os.R_OK): + print "Error: ", ifile, " is not readable." + sys.exit(1) + + if not args.iformat: + # try to guess the filetype + filetype = check_filetype( ifile ) + else: + filetype = args.iformat # sdf or smi + + + """ + We want to store the original SMILES in the output. So in case of a SMILES file iterate over the file and convert each line separate. + """ + if filetype == 'sdf': + supplier = Chem.SDMolSupplier( ifile ) + # Process file + if args.header: + args.outfile.write("MW\tALOGP\tHBA\tHBD\tPSA\tROTB\tAROM\tALERTS\tLRo5\tQED\tNAME\n") + count = 0 + for mol in supplier: + count += 1 + if mol is None: + print "Warning: skipping molecule ", count, " and continuing with next." + continue + props = properties(mol) + + if args.method == 'max': + calc_qed = weights_max(mol, True, props) + elif args.method == 'unweighted': + calc_qed = weights_none(mol, True, props) + else: + calc_qed = weights_mean(mol, True, props) + + args.outfile.write( "%.2f\t%.3f\t%d\t%d\t%.2f\t%d\t%d\t%d\t%s\t%.3f\t%-s\n" % ( + props[0], + props[1], + props[2], + props[3], + props[4], + props[5], + props[6], + props[7], + props[8], + calc_qed, + mol.GetProp("_Name"), + )) + elif filetype == 'smi': + supplier = Chem.SmilesMolSupplier( ifile, " \t", 0, 1, False, True ) + + # Process file + if args.header: + args.outfile.write("MW\tALOGP\tHBA\tHBD\tPSA\tROTB\tAROM\tALERTS\tLRo5\tQED\tNAME\tSMILES\n") + count = 0 + for line in open(ifile): + tokens = line.strip().split('\t') + if len(tokens) > 1: + smiles, title = tokens + else: + smiles = tokens[0] + title = '' + mol = Chem.MolFromSmiles(smiles) + count += 1 + if mol is None: + print "Warning: skipping molecule ", count, " and continuing with next." + continue + props = properties(mol) + + if args.method == 'max': + calc_qed = weights_max(mol, True, props) + elif args.method == 'unweighted': + calc_qed = weights_none(mol, True, props) + else: + calc_qed = weights_mean(mol, True, props) + + args.outfile.write( "%.2f\t%.3f\t%d\t%d\t%.2f\t%d\t%d\t%d\t%s\t%.3f\t%-s\t%s\n" % ( + props[0], + props[1], + props[2], + props[3], + props[4], + props[5], + props[6], + props[7], + props[8], + calc_qed, + title, + smiles + )) + else: + sys.exit("Error: unknown file-type: %s" % filetype) diff -r 000000000000 -r 5ccd3a432785 silicos_qed.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/silicos_qed.xml Tue May 23 03:57:14 2017 -0400 @@ -0,0 +1,123 @@ + + quantitative estimation (QED) + + + rdkit + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 10.1038/nchem.1243 + + diff -r 000000000000 -r 5ccd3a432785 test-data/qed_test.smi --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/qed_test.smi Tue May 23 03:57:14 2017 -0400 @@ -0,0 +1,25 @@ +Nc1nc(NC2CC2)c2ncn([C@@H]3C[C@H](CO)C=C3)c2n1 Abacavir +CC(=O)NCCCS(O)(=O)=O Acamprosate +CCCC(=O)Nc1ccc(OCC(O)CNC(C)C)c(c1)C(C)=O Acebutolol +CC(=O)Nc1ccc(O)cc1 Acetaminophen +CC(=O)Nc1nnc(s1)S(N)(=O)=O Acetazolamide +CC(=O)c1ccc(cc1)S(=O)(=O)NC(=O)NC1CCCCC1 Acetohexamide +CC(=O)c1ccc2Sc3ccccc3N(CCCN3CCN(CCO)CC3)c2c1 Acetophenazine +Fc4ccc(C1CCNCC1COc3ccc2OCOc2c3)cc4 Paroxetine +Cc1oncc1C(=O)Nc2ccc(C(F)(F)F)cc2 Leflunomide +CN1C4CCCC1CC(NC(=O)c2nn(C)c3ccccc23)C4 Granisetron +CCCN2CC(CSC)CC1c3cccc4[nH]cc(CC12)c34 Pergolide +CCc3c(C)[nH]c2CCC(CN1CCOCC1)C(=O)c23 Molindone +CCCCCCCCCCCCCCCC(=O)OCC(NC(=O)C(Cl)Cl)C(O)c1ccc([N+]([O-])=O)cc1 ChloramphenicalPalmitate +CCCCCCCCCCCCCCCOC(=O)C2C(O)C(O)C(C(NC(=O)C1CC(CCC)CN1C)C(C)Cl)OC2SC ClindamycinPalmitate +CCOc3nc2cccc(C(=O)OC(C)OC(=O)OC1CCCCC1)c2n3Cc6ccc(c4ccccc4c5nn[nH]n5)cc6 CandesartanCilexetil +CN(C)CCC=c2c1ccccc1sc3ccc(Cl)cc23 Chlorprothixene +O=c3c(O)c(C2CCC(c1ccc(Cl)cc1)CC2)c(=O)c4ccccc34 Atovaquone +CN(C)CCCN3c1ccccc1CCc2ccc(Cl)cc23 Clomipramine +CN4CCCC(CC3c1ccccc1Sc2ccccc23)C4 Methixene +CCN(CC)C(C)Cn3c1ccccc1sc2ccccc23 Ethopropazine +N=C(CCSCc1csc(N=C(N)N)n1)NS(N)(=O)=O Famotidine +CNC(=NCCSCc1nc[nH]c1C)NC#N Cimetidine +CCCCCNC(=N)NN=Cc1c[nH]c2ccc(CO)cc12 Tegaserod +C=CC3=C(C(=O)O)N2C(=O)C(NC(=O)C(=NO)c1csc(N)n1)C2SC3 Cefdinir +CC5(C)SC4C(NC(=O)C(C(=O)Oc2ccc1CCCc1c2)c3ccccc3)C(=O)N4C5C(=O)O CarbenicillinIndanyl diff -r 000000000000 -r 5ccd3a432785 test-data/qed_test_max.tab --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/qed_test_max.tab Tue May 23 03:57:14 2017 -0400 @@ -0,0 +1,26 @@ +MW ALOGP HBA HBD PSA ROTB AROM ALERTS LRo5 QED NAME SMILES +286.34 1.092 6 3 101.88 4 2 1 0 0.715 Abacavir Nc1nc(NC2CC2)c2ncn([C@@H]3C[C@H](CO)C=C3)c2n1 +181.21 -0.600 4 2 83.47 4 0 2 0 0.436 Acamprosate CC(=O)NCCCS(O)(=O)=O +336.43 2.365 5 3 87.66 10 1 1 0 0.550 Acebutolol CCCC(=O)Nc1ccc(OCC(O)CNC(C)C)c(c1)C(C)=O +151.16 1.351 2 2 49.33 1 1 1 0 0.583 Acetaminophen CC(=O)Nc1ccc(O)cc1 +222.25 -0.856 5 2 115.04 2 1 1 0 0.671 Acetazolamide CC(=O)Nc1nnc(s1)S(N)(=O)=O +324.40 2.210 4 2 92.34 4 1 1 0 0.808 Acetohexamide CC(=O)c1ccc(cc1)S(=O)(=O)NC(=O)NC1CCCCC1 +411.57 3.492 6 1 47.02 7 2 1 0 0.674 Acetophenazine CC(=O)c1ccc2Sc3ccccc3N(CCCN3CCN(CCO)CC3)c2c1 +329.37 3.327 4 1 39.72 4 2 0 0 0.913 Paroxetine Fc4ccc(C1CCNCC1COc3ccc2OCOc2c3)cc4 +270.21 3.254 3 1 55.13 2 2 0 0 0.889 Leflunomide Cc1oncc1C(=O)Nc2ccc(C(F)(F)F)cc2 +312.42 2.318 3 1 50.16 2 2 0 0 0.915 Granisetron CN1C4CCCC1CC(NC(=O)c2nn(C)c3ccccc23)C4 +314.50 4.271 2 1 19.03 4 2 0 0 0.886 Pergolide CCCN2CC(CSC)CC1c3cccc4[nH]cc(CC12)c34 +276.38 1.963 3 1 45.33 3 1 0 0 0.916 Molindone CCc3c(C)[nH]c2CCC(CN1CCOCC1)C(=O)c23 +561.55 6.941 6 2 118.77 21 1 5 2 0.041 ChloramphenicalPalmitate CCCCCCCCCCCCCCCC(=O)OCC(NC(=O)C(Cl)Cl)C(O)c1ccc([N+]([O-])=O)cc1 +663.41 6.279 8 3 108.33 22 0 3 2 0.064 ClindamycinPalmitate CCCCCCCCCCCCCCCOC(=O)C2C(O)C(O)C(C(NC(=O)C1CC(CCC)CN1C)C(C)Cl)OC2SC +610.67 6.319 10 1 143.34 10 5 2 2 0.169 CandesartanCilexetil CCOc3nc2cccc(C(=O)OC(C)OC(=O)OC1CCCCC1)c2n3Cc6ccc(c4ccccc4c5nn[nH]n5)cc6 +315.87 5.188 2 0 3.24 3 3 0 1 0.693 Chlorprothixene CN(C)CCC=c2c1ccccc1sc3ccc(Cl)cc23 +366.84 5.505 3 1 54.37 2 2 0 1 0.771 Atovaquone O=c3c(O)c(C2CCC(c1ccc(Cl)cc1)CC2)c(=O)c4ccccc34 +314.86 4.528 2 0 6.48 4 2 0 0 0.802 Clomipramine CN(C)CCCN3c1ccccc1CCc2ccc(Cl)cc23 +309.48 5.015 2 0 3.24 2 2 0 1 0.765 Methixene CN4CCCC(CC3c1ccccc1Sc2ccccc23)C4 +312.48 5.020 3 0 6.48 5 2 0 1 0.761 Ethopropazine CCN(CC)C(C)Cn3c1ccccc1sc2ccccc23 +337.46 -0.558 6 5 173.33 7 1 3 0 0.219 Famotidine N=C(CCSCc1csc(N=C(N)N)n1)NS(N)(=O)=O +252.35 0.597 5 3 88.89 5 1 5 0 0.157 Cimetidine CNC(=NCCSCc1nc[nH]c1C)NC#N +301.39 2.298 3 5 96.29 7 2 4 0 0.165 Tegaserod CCCCCNC(=N)NN=Cc1c[nH]c2ccc(CO)cc12 +395.42 -0.172 8 4 158.21 5 1 4 0 0.181 Cefdinir C=CC3=C(C(=O)O)N2C(=O)C(NC(=O)C(=NO)c1csc(N)n1)C2SC3 +494.57 2.496 7 2 113.01 6 2 4 0 0.202 CarbenicillinIndanyl CC5(C)SC4C(NC(=O)C(C(=O)Oc2ccc1CCCc1c2)c3ccccc3)C(=O)N4C5C(=O)O diff -r 000000000000 -r 5ccd3a432785 test-data/qed_test_mean.tab --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/qed_test_mean.tab Tue May 23 03:57:14 2017 -0400 @@ -0,0 +1,26 @@ +MW ALOGP HBA HBD PSA ROTB AROM ALERTS LRo5 QED NAME SMILES +286.34 1.092 6 3 101.88 4 2 1 0 0.737 Abacavir Nc1nc(NC2CC2)c2ncn([C@@H]3C[C@H](CO)C=C3)c2n1 +181.21 -0.600 4 2 83.47 4 0 2 0 0.467 Acamprosate CC(=O)NCCCS(O)(=O)=O +336.43 2.365 5 3 87.66 10 1 1 0 0.571 Acebutolol CCCC(=O)Nc1ccc(OCC(O)CNC(C)C)c(c1)C(C)=O +151.16 1.351 2 2 49.33 1 1 1 0 0.602 Acetaminophen CC(=O)Nc1ccc(O)cc1 +222.25 -0.856 5 2 115.04 2 1 1 0 0.662 Acetazolamide CC(=O)Nc1nnc(s1)S(N)(=O)=O +324.40 2.210 4 2 92.34 4 1 1 0 0.833 Acetohexamide CC(=O)c1ccc(cc1)S(=O)(=O)NC(=O)NC1CCCCC1 +411.57 3.492 6 1 47.02 7 2 1 0 0.688 Acetophenazine CC(=O)c1ccc2Sc3ccccc3N(CCCN3CCN(CCO)CC3)c2c1 +329.37 3.327 4 1 39.72 4 2 0 0 0.917 Paroxetine Fc4ccc(C1CCNCC1COc3ccc2OCOc2c3)cc4 +270.21 3.254 3 1 55.13 2 2 0 0 0.896 Leflunomide Cc1oncc1C(=O)Nc2ccc(C(F)(F)F)cc2 +312.42 2.318 3 1 50.16 2 2 0 0 0.927 Granisetron CN1C4CCCC1CC(NC(=O)c2nn(C)c3ccccc23)C4 +314.50 4.271 2 1 19.03 4 2 0 0 0.871 Pergolide CCCN2CC(CSC)CC1c3cccc4[nH]cc(CC12)c34 +276.38 1.963 3 1 45.33 3 1 0 0 0.923 Molindone CCc3c(C)[nH]c2CCC(CN1CCOCC1)C(=O)c23 +561.55 6.941 6 2 118.77 21 1 5 2 0.056 ChloramphenicalPalmitate CCCCCCCCCCCCCCCC(=O)OCC(NC(=O)C(Cl)Cl)C(O)c1ccc([N+]([O-])=O)cc1 +663.41 6.279 8 3 108.33 22 0 3 2 0.071 ClindamycinPalmitate CCCCCCCCCCCCCCCOC(=O)C2C(O)C(O)C(C(NC(=O)C1CC(CCC)CN1C)C(C)Cl)OC2SC +610.67 6.319 10 1 143.34 10 5 2 2 0.141 CandesartanCilexetil CCOc3nc2cccc(C(=O)OC(C)OC(=O)OC1CCCCC1)c2n3Cc6ccc(c4ccccc4c5nn[nH]n5)cc6 +315.87 5.188 2 0 3.24 3 3 0 1 0.629 Chlorprothixene CN(C)CCC=c2c1ccccc1sc3ccc(Cl)cc23 +366.84 5.505 3 1 54.37 2 2 0 1 0.741 Atovaquone O=c3c(O)c(C2CCC(c1ccc(Cl)cc1)CC2)c(=O)c4ccccc34 +314.86 4.528 2 0 6.48 4 2 0 0 0.782 Clomipramine CN(C)CCCN3c1ccccc1CCc2ccc(Cl)cc23 +309.48 5.015 2 0 3.24 2 2 0 1 0.735 Methixene CN4CCCC(CC3c1ccccc1Sc2ccccc23)C4 +312.48 5.020 3 0 6.48 5 2 0 1 0.734 Ethopropazine CCN(CC)C(C)Cn3c1ccccc1sc2ccccc23 +337.46 -0.558 6 5 173.33 7 1 3 0 0.263 Famotidine N=C(CCSCc1csc(N=C(N)N)n1)NS(N)(=O)=O +252.35 0.597 5 3 88.89 5 1 5 0 0.239 Cimetidine CNC(=NCCSCc1nc[nH]c1C)NC#N +301.39 2.298 3 5 96.29 7 2 4 0 0.235 Tegaserod CCCCCNC(=N)NN=Cc1c[nH]c2ccc(CO)cc12 +395.42 -0.172 8 4 158.21 5 1 4 0 0.239 Cefdinir C=CC3=C(C(=O)O)N2C(=O)C(NC(=O)C(=NO)c1csc(N)n1)C2SC3 +494.57 2.496 7 2 113.01 6 2 4 0 0.274 CarbenicillinIndanyl CC5(C)SC4C(NC(=O)C(C(=O)Oc2ccc1CCCc1c2)c3ccccc3)C(=O)N4C5C(=O)O diff -r 000000000000 -r 5ccd3a432785 test-data/qed_test_unweighted.tab --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/qed_test_unweighted.tab Tue May 23 03:57:14 2017 -0400 @@ -0,0 +1,26 @@ +MW ALOGP HBA HBD PSA ROTB AROM ALERTS LRo5 QED NAME SMILES +286.34 1.092 6 3 101.88 4 2 1 0 0.713 Abacavir Nc1nc(NC2CC2)c2ncn([C@@H]3C[C@H](CO)C=C3)c2n1 +181.21 -0.600 4 2 83.47 4 0 2 0 0.559 Acamprosate CC(=O)NCCCS(O)(=O)=O +336.43 2.365 5 3 87.66 10 1 1 0 0.624 Acebutolol CCCC(=O)Nc1ccc(OCC(O)CNC(C)C)c(c1)C(C)=O +151.16 1.351 2 2 49.33 1 1 1 0 0.688 Acetaminophen CC(=O)Nc1ccc(O)cc1 +222.25 -0.856 5 2 115.04 2 1 1 0 0.645 Acetazolamide CC(=O)Nc1nnc(s1)S(N)(=O)=O +324.40 2.210 4 2 92.34 4 1 1 0 0.848 Acetohexamide CC(=O)c1ccc(cc1)S(=O)(=O)NC(=O)NC1CCCCC1 +411.57 3.492 6 1 47.02 7 2 1 0 0.711 Acetophenazine CC(=O)c1ccc2Sc3ccccc3N(CCCN3CCN(CCO)CC3)c2c1 +329.37 3.327 4 1 39.72 4 2 0 0 0.919 Paroxetine Fc4ccc(C1CCNCC1COc3ccc2OCOc2c3)cc4 +270.21 3.254 3 1 55.13 2 2 0 0 0.921 Leflunomide Cc1oncc1C(=O)Nc2ccc(C(F)(F)F)cc2 +312.42 2.318 3 1 50.16 2 2 0 0 0.950 Granisetron CN1C4CCCC1CC(NC(=O)c2nn(C)c3ccccc23)C4 +314.50 4.271 2 1 19.03 4 2 0 0 0.830 Pergolide CCCN2CC(CSC)CC1c3cccc4[nH]cc(CC12)c34 +276.38 1.963 3 1 45.33 3 1 0 0 0.941 Molindone CCc3c(C)[nH]c2CCC(CN1CCOCC1)C(=O)c23 +561.55 6.941 6 2 118.77 21 1 5 2 0.114 ChloramphenicalPalmitate CCCCCCCCCCCCCCCC(=O)OCC(NC(=O)C(Cl)Cl)C(O)c1ccc([N+]([O-])=O)cc1 +663.41 6.279 8 3 108.33 22 0 3 2 0.110 ClindamycinPalmitate CCCCCCCCCCCCCCCOC(=O)C2C(O)C(O)C(C(NC(=O)C1CC(CCC)CN1C)C(C)Cl)OC2SC +610.67 6.319 10 1 143.34 10 5 2 2 0.124 CandesartanCilexetil CCOc3nc2cccc(C(=O)OC(C)OC(=O)OC1CCCCC1)c2n3Cc6ccc(c4ccccc4c5nn[nH]n5)cc6 +315.87 5.188 2 0 3.24 3 3 0 1 0.553 Chlorprothixene CN(C)CCC=c2c1ccccc1sc3ccc(Cl)cc23 +366.84 5.505 3 1 54.37 2 2 0 1 0.759 Atovaquone O=c3c(O)c(C2CCC(c1ccc(Cl)cc1)CC2)c(=O)c4ccccc34 +314.86 4.528 2 0 6.48 4 2 0 0 0.707 Clomipramine CN(C)CCCN3c1ccccc1CCc2ccc(Cl)cc23 +309.48 5.015 2 0 3.24 2 2 0 1 0.651 Methixene CN4CCCC(CC3c1ccccc1Sc2ccccc23)C4 +312.48 5.020 3 0 6.48 5 2 0 1 0.669 Ethopropazine CCN(CC)C(C)Cn3c1ccccc1sc2ccccc23 +337.46 -0.558 6 5 173.33 7 1 3 0 0.292 Famotidine N=C(CCSCc1csc(N=C(N)N)n1)NS(N)(=O)=O +252.35 0.597 5 3 88.89 5 1 5 0 0.411 Cimetidine CNC(=NCCSCc1nc[nH]c1C)NC#N +301.39 2.298 3 5 96.29 7 2 4 0 0.389 Tegaserod CCCCCNC(=N)NN=Cc1c[nH]c2ccc(CO)cc12 +395.42 -0.172 8 4 158.21 5 1 4 0 0.275 Cefdinir C=CC3=C(C(=O)O)N2C(=O)C(NC(=O)C(=NO)c1csc(N)n1)C2SC3 +494.57 2.496 7 2 113.01 6 2 4 0 0.382 CarbenicillinIndanyl CC5(C)SC4C(NC(=O)C(C(=O)Oc2ccc1CCCc1c2)c3ccccc3)C(=O)N4C5C(=O)O