Mercurial > repos > marcel > caddsuite_mac10_6
comparison CADDSuite-1.0.1/data/OpenBabel/MACCS.txt @ 5:e30a41af9d2b
Uploaded
author | marcel |
---|---|
date | Tue, 15 Nov 2011 10:53:16 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
4:af446ca2d5c6 | 5:e30a41af9d2b |
---|---|
1 #Comments after SMARTS | |
2 # Extracted from RDKit r1553 Nov 2010 rdkit/Chem/MACCSKeys.py | |
3 # | |
4 # Copyright (C) 2001-2008 greg Landrum and Rational Discovery LLC | |
5 # | |
6 # @@ All Rights Reserved @@ | |
7 # This file is part of the RDKit. | |
8 # The contents are covered by the terms of the BSD license | |
9 # which is included in the file license.txt, found at the root | |
10 # of the RDKit source tree. | |
11 # | |
12 # SMARTS definitions for the publically available MACCS keys | |
13 | |
14 # I compared the MACCS fingerprints generated here with those from two | |
15 # other packages (not MDL, unfortunately). Of course there are | |
16 # disagreements between the various fingerprints still, but I think | |
17 # these definitions work pretty well. Some notes: | |
18 | |
19 # 1) most of the differences have to do with aromaticity | |
20 # 2) there's a discrepancy sometimes because the current RDKit | |
21 # definitions do not require multiple matches to be distinct. e.g. the | |
22 # SMILES C(=O)CC(=O) can match the (hypothetical) key O=CC twice in my | |
23 # definition. It's not clear to me what the correct behavior is. | |
24 # 3) Some keys are not fully defined in the MDL documentation | |
25 # 4) Two keys, 125 and 166, have to be done outside of SMARTS. | |
26 # 5) Key 1 (ISOTOPE) isn't defined | |
27 | |
28 # these are SMARTS patterns corresponding to the MDL MACCS keys | |
29 1:('?',0), # ISOTOPE | |
30 #2:('[#103,#104,#105,#106,#107,#106,#109,#110,#111,#112]',0), # ISOTOPE Not complete | |
31 2:('[#103,#104]',0), # ISOTOPE Not complete | |
32 3:('[Ge,As,Se,Sn,Sb,Te,Tl,Pb,Bi]',0), # Group IVa,Va,VIa Periods 4-6 (Ge...) *NOTE* spec wrong | |
33 4:('[Ac,Th,Pa,U,Np,Pu,Am,Cm,Bk,Cf,Es,Fm,Md,No,Lr]',0), # actinide | |
34 5:('[Sc,Ti,Y,Zr,Hf]',0), # Group IIIB,IVB (Sc...) *NOTE* spec wrong | |
35 6:('[La,Ce,Pr,Nd,Pm,Sm,Eu,Gd,Tb,Dy,Ho,Er,Tm,Yb,Lu]',0), # Lanthanide | |
36 7:('[V,Cr,Mn,Nb,Mo,Tc,Ta,W,Re]',0), # Group VB,VIB,VIIB (V...) *NOTE* spec wrong | |
37 8:('[!#6;!#1]1~*~*~*~1',0), # QAAA@1 | |
38 9:('[Fe,Co,Ni,Ru,Rh,Pd,Os,Ir,Pt]',0), # Group VIII (Fe...) | |
39 10:('[Be,Mg,Ca,Sr,Ba,Ra]',0), # Group IIa (Alkaline earth) | |
40 11:('*1~*~*~*~1',0), # 4M Ring | |
41 12:('[Cu,Zn,Ag,Cd,Au,Hg]',0), # Group IB,IIB (Cu..) | |
42 13:('[#8]~[#7](~[#6])~[#6]',0), # ON(C)C | |
43 14:('[#16]-[#16]',0), # S-S | |
44 15:('[#8]~[#6](~[#8])~[#8]',0), # OC(O)O | |
45 16:('[!#6;!#1]1~*~*~1',0), # QAA@1 | |
46 17:('[#6]#[#6]',0), #CTC | |
47 18:('[B,Al,Ga,In,Tl]',0), # Group IIIA (B...) *NOTE* spec wrong | |
48 19:('*1~*~*~*~*~*~*~1',0), # 7M Ring | |
49 20:('[Si]',0), #Si | |
50 21:('[#6]=[#6](~[!#6;!#1])~[!#6;!#1]',0), # C=C(Q)Q | |
51 22:('*1~*~*~1',0), # 3M Ring | |
52 23:('[#7]~[#6](~[#8])~[#8]',0), # NC(O)O | |
53 24:('[#7]-[#8]',0), # N-O | |
54 25:('[#7]~[#6](~[#7])~[#7]',0), # NC(N)N | |
55 26:('[#6]=;@[#6](@*)@*',0), # C$=C($A)$A | |
56 27:('[I]',0), # I | |
57 28:('[!#6;!#1]~[CH2]~[!#6;!#1]',0), # QCH2Q | |
58 29:('[#15]',0),# P | |
59 30:('[#6]~[!#6;!#1](~[#6])(~[#6])~*',0), # CQ(C)(C)A | |
60 31:('[!#6;!#1]~[F,Cl,Br,I]',0), # QX | |
61 32:('[#6]~[#16]~[#7]',0), # CSN | |
62 33:('[#7]~[#16]',0), # NS | |
63 34:('[CH2]=*',0), # CH2=A | |
64 35:('[Li,Na,K,Rb,Cs,Fr]',0), # Group IA (Alkali Metal) | |
65 36:('[#16R]',0), # S Heterocycle | |
66 37:('[#7]~[#6](~[#8])~[#7]',0), # NC(O)N | |
67 38:('[#7]~[#6](~[#6])~[#7]',0), # NC(C)N | |
68 39:('[#8]~[#16](~[#8])~[#8]',0), # OS(O)O | |
69 40:('[#16]-[#8]',0), # S-O | |
70 41:('[#6]#[#7]',0), # CTN | |
71 42:('F',0), # F | |
72 43:('[!C;!c;!#1;!H0]~*~[!C;!c;!#1;!H0]',0), # QHAQH | |
73 44:('?',0), # OTHER | |
74 45:('[#6]=[#6]~[#7]',0), # C=CN | |
75 46:('Br',0), # BR | |
76 47:('[#16]~*~[#7]',0), # SAN | |
77 48:('[#8]~[!#6;!#1](~[#8])(~[#8])',0), # OQ(O)O | |
78 49:('[!+0]',0), # CHARGE | |
79 50:('[#6]=[#6](~[#6])~[#6]',0), # C=C(C)C | |
80 51:('[#6]~[#16]~[#8]',0), # CSO | |
81 52:('[#7]~[#7]',0), # NN | |
82 53:('[!#6;!#1;!H0]~*~*~*~[!#6;!#1;!H0]',0), # QHAAAQH | |
83 54:('[!#6;!#1;!H0]~*~*~[!#6;!#1;!H0]',0), # QHAAQH | |
84 55:('[#8]~[#16]~[#8]',0), #OSO | |
85 56:('[#8]~[#7](~[#8])~[#6]',0), # ON(O)C | |
86 57:('[#8R]',0), # O Heterocycle | |
87 58:('[!#6;!#1]~[#16]~[!#6;!#1]',0), # QSQ | |
88 59:('[#16]!:*:*',0), # Snot%A%A | |
89 60:('[#16]=[#8]',0), # S=O | |
90 61:('*~[#16](~*)~*',0), # AS(A)A | |
91 62:('*@*!@*@*',0), # A$!A$A | |
92 63:('[#7]=[#8]',0), # N=O | |
93 64:('*@*!@[#16]',0), # A$A!S | |
94 65:('c:n',0), # C%N | |
95 66:('[#6]~[#6](~[#6])(~[#6])~*',0), # CC(C)(C)A | |
96 67:('[!#6;!#1]~[#16]',0), # QS | |
97 68:('[!#6;!#1;!H0]~[!#6;!#1;!H0]',0), # QHQH (&...) FIX: incomplete definition | |
98 69:('[!#6;!#1]~[!#6;!#1;!H0]',0), # QQH | |
99 70:('[!#6;!#1]~[#7]~[!#6;!#1]',0), # QNQ | |
100 71:('[#7]~[#8]',0), # NO | |
101 72:('[#8]~*~*~[#8]',0), # OAAO | |
102 73:('[#16]=*',0), # S=A | |
103 74:('[CH3]~*~[CH3]',0), # CH3ACH3 | |
104 75:('*!@[#7]@*',0), # A!N$A | |
105 76:('[#6]=[#6](~*)~*',0), # C=C(A)A | |
106 77:('[#7]~*~[#7]',0), # NAN | |
107 78:('[#6]=[#7]',0), # C=N | |
108 79:('[#7]~*~*~[#7]',0), # NAAN | |
109 80:('[#7]~*~*~*~[#7]',0), # NAAAN | |
110 81:('[#16]~*(~*)~*',0), # SA(A)A | |
111 82:('*~[CH2]~[!#6;!#1;!H0]',0), # ACH2QH | |
112 83:('[!#6;!#1]1~*~*~*~*~1',0), # QAAAA@1 | |
113 84:('[NH2]',0), #NH2 | |
114 85:('[#6]~[#7](~[#6])~[#6]',0), # CN(C)C | |
115 86:('[C;H2,H3][!#6;!#1][C;H2,H3]',0), # CH2QCH2 | |
116 87:('[F,Cl,Br,I]!@*@*',0), # X!A$A | |
117 88:('[#16]',0), # S | |
118 89:('[#8]~*~*~*~[#8]',0), # OAAAO | |
119 90:('[$([!#6;!#1;!H0]~*~*~[CH2]~*),$([!#6;!#1;!H0;R]1@[R]@[R]@[CH2;R]1),$([!#6;!#1;!H0]~[R]1@[R]@[CH2;R]1)]',0), # QHAACH2A | |
120 91:('[$([!#6;!#1;!H0]~*~*~*~[CH2]~*),$([!#6;!#1;!H0;R]1@[R]@[R]@[R]@[CH2;R]1),$([!#6;!#1;!H0]~[R]1@[R]@[R]@[CH2;R]1),$([!#6;!#1;!H0]~*~[R]1@[R]@[CH2;R]1)]',0), # QHAAACH2A | |
121 92:('[#8]~[#6](~[#7])~[#6]',0), # OC(N)C | |
122 93:('[!#6;!#1]~[CH3]',0), # QCH3 | |
123 94:('[!#6;!#1]~[#7]',0), # QN | |
124 95:('[#7]~*~*~[#8]',0), # NAAO | |
125 96:('*1~*~*~*~*~1',0), # 5 M ring | |
126 97:('[#7]~*~*~*~[#8]',0), # NAAAO | |
127 98:('[!#6;!#1]1~*~*~*~*~*~1',0), # QAAAAA@1 | |
128 99:('[#6]=[#6]',0), # C=C | |
129 100:('*~[CH2]~[#7]',0), # ACH2N | |
130 101:('[$([R]@1@[R]@[R]@[R]@[R]@[R]@[R]@[R]1),$([R]@1@[R]@[R]@[R]@[R]@[R]@[R]@[R]@[R]1),$([R]@1@[R]@[R]@[R]@[R]@[R]@[R]@[R]@[R]@[R]1),$([R]@1@[R]@[R]@[R]@[R]@[R]@[R]@[R]@[R]@[R]@[R]1),$([R]@1@[R]@[R]@[R]@[R]@[R]@[R]@[R]@[R]@[R]@[R]@[R]1),$([R]@1@[R]@[R]@[R]@[R]@[R]@[R]@[R]@[R]@[R]@[R]@[R]@[R]1),$([R]@1@[R]@[R]@[R]@[R]@[R]@[R]@[R]@[R]@[R]@[R]@[R]@[R]@[R]1)]',0), # 8M Ring or larger. This only handles up to ring sizes of 14 | |
131 102:('[!#6;!#1]~[#8]',0), # QO | |
132 103:('Cl',0), # CL | |
133 104:('[!#6;!#1;!H0]~*~[CH2]~*',0), # QHACH2A | |
134 105:('*@*(@*)@*',0), # A$A($A)$A | |
135 106:('[!#6;!#1]~*(~[!#6;!#1])~[!#6;!#1]',0), # QA(Q)Q | |
136 107:('[F,Cl,Br,I]~*(~*)~*',0), # XA(A)A | |
137 108:('[CH3]~*~*~*~[CH2]~*',0), # CH3AAACH2A | |
138 109:('*~[CH2]~[#8]',0), # ACH2O | |
139 110:('[#7]~[#6]~[#8]',0), # NCO | |
140 111:('[#7]~*~[CH2]~*',0), # NACH2A | |
141 112:('*~*(~*)(~*)~*',0), # AA(A)(A)A | |
142 113:('[#8]!:*:*',0), # Onot%A%A | |
143 114:('[CH3]~[CH2]~*',0), # CH3CH2A | |
144 115:('[CH3]~*~[CH2]~*',0), # CH3ACH2A | |
145 116:('[$([CH3]~*~*~[CH2]~*),$([CH3]~*1~*~[CH2]1)]',0), # CH3AACH2A | |
146 117:('[#7]~*~[#8]',0), # NAO | |
147 118:('[$(*~[CH2]~[CH2]~*),$(*1~[CH2]~[CH2]1)]',1), # ACH2CH2A > 1 | |
148 119:('[#7]=*',0), # N=A | |
149 120:('[!#6;R]',1), # Heterocyclic atom > 1 (&...) FIX: incomplete definition | |
150 121:('[#7;R]',0), # N Heterocycle | |
151 122:('*~[#7](~*)~*',0), # AN(A)A | |
152 123:('[#8]~[#6]~[#8]',0), # OCO | |
153 124:('[!#6;!#1]~[!#6;!#1]',0), # QQ | |
154 125:('?',0), # Aromatic Ring > 1 | |
155 126:('*!@[#8]!@*',0), # A!O!A | |
156 127:('*@*!@[#8]',1), # A$A!O > 1 (&...) FIX: incomplete definition | |
157 128:('[$(*~[CH2]~*~*~*~[CH2]~*),$([R]1@[CH2;R]@[R]@[R]@[R]@[CH2;R]1),$(*~[CH2]~[R]1@[R]@[R]@[CH2;R]1),$(*~[CH2]~*~[R]1@[R]@[CH2;R]1)]',0), # ACH2AAACH2A | |
158 129:('[$(*~[CH2]~*~*~[CH2]~*),$([R]1@[CH2]@[R]@[R]@[CH2;R]1),$(*~[CH2]~[R]1@[R]@[CH2;R]1)]',0), # ACH2AACH2A | |
159 130:('[!#6;!#1]~[!#6;!#1]',1), # QQ > 1 (&...) FIX: incomplete definition | |
160 131:('[!#6;!#1;!H0]',1), # QH > 1 | |
161 132:('[#8]~*~[CH2]~*',0), # OACH2A | |
162 133:('*@*!@[#7]',0), # A$A!N | |
163 134:('[F,Cl,Br,I]',0), # X (HALOGEN) | |
164 135:('[#7]!:*:*',0), # Nnot%A%A | |
165 136:('[#8]=*',1), # O=A>1 | |
166 137:('[!C;!c;R]',0), # Heterocycle | |
167 138:('[!#6;!#1]~[CH2]~*',1), # QCH2A>1 (&...) FIX: incomplete definition | |
168 139:('[O;!H0]',0), # OH | |
169 140:('[#8]',3), # O > 3 (&...) FIX: incomplete definition | |
170 141:('[CH3]',2), # CH3 > 2 (&...) FIX: incomplete definition | |
171 142:('[#7]',1), # N > 1 | |
172 143:('*@*!@[#8]',0), # A$A!O | |
173 144:('*!:*:*!:*',0), # Anot%A%Anot%A | |
174 145:('*1~*~*~*~*~*~1',1), # 6M ring > 1 | |
175 146:('[#8]',2), # O > 2 | |
176 147:('[$(*~[CH2]~[CH2]~*),$([R]1@[CH2;R]@[CH2;R]1)]',0), # ACH2CH2A | |
177 148:('*~[!#6;!#1](~*)~*',0), # AQ(A)A | |
178 149:('[C;H3,H4]',1), # CH3 > 1 | |
179 150:('*!@*@*!@*',0), # A!A$A!A | |
180 151:('[#7;!H0]',0), # NH | |
181 152:('[#8]~[#6](~[#6])~[#6]',0), # OC(C)C | |
182 153:('[!#6;!#1]~[CH2]~*',0), # QCH2A | |
183 154:('[#6]=[#8]',0), # C=O | |
184 155:('*!@[CH2]!@*',0), # A!CH2!A | |
185 156:('[#7]~*(~*)~*',0), # NA(A)A | |
186 157:('[#6]-[#8]',0), # C-O | |
187 158:('[#6]-[#7]',0), # C-N | |
188 159:('[#8]',1), # O>1 | |
189 160:('[C;H3,H4]',0), #CH3 | |
190 161:('[#7]',0), # N | |
191 162:('a',0), # Aromatic | |
192 163:('*1~*~*~*~*~*~1',0), # 6M Ring | |
193 164:('[#8]',0), # O | |
194 165:('[R]',0), # Ring | |
195 166:('?',0), # Fragments FIX: this can't be done in SMARTS | |
196 | |
197 # obabel -:"CNO" -oftp -xs | |
198 # 24: N-O 68: QHQH (&...) 69: QQH 71: NO 93: QCH3 94: QN 102: QO | |
199 # 124: QQ 131: QH > 1 *2 139: OH 151: NH 158: C-N 160: CH3 161: N 164: O |