comparison Kinatest-R_part1.R @ 10:de59605e960a draft

Uploaded
author jfb
date Thu, 08 Feb 2018 14:51:06 -0500
parents
children
comparison
equal deleted inserted replaced
9:f80306fc5d69 10:de59605e960a
1
2 ImportedSubstrateList<- read.csv("input1", stringsAsFactors=FALSE)
3 NegativeSubstrateList<- read.csv("input2", stringsAsFactors=FALSE)
4 SubstrateBackgroundFrequency<- read.csv("input3", stringsAsFactors=FALSE)
5
6 ScreenerFilename<-"screener"
7
8
9
10 FILENAME<-"output1.csv"
11 FILENAME2<-"output2.csv"
12 FILENAME3<-"output3.csv"
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37 OutputMatrix<-"KinaseMatrix.csv"
38 CharacterizationTable<-"CharacterizationTableForThisKinase.csv"
39 SDtable<-"SDtableforthisKinase"
40 SiteSelectivityTable<-"SiteSelectivityForThisKinase"
41
42
43
44 substrates<-matrix(rep("A",times=((nrow(ImportedSubstrateList)-1)*15)),ncol = 15)
45 #SeqsToBeScored<-"asdasd"
46
47 for (i in 2:nrow(ImportedSubstrateList))
48 {
49 substratemotif<-ImportedSubstrateList[i,4:18]
50 substratemotif[8]<-"Y"
51 #substratemotif<-paste(substratemotif,sep = "",collapse = "")
52 j=i-1
53 substratemotif<-unlist(substratemotif)
54 substrates[j,1:15]<-substratemotif
55 }
56
57 # SpacesToOs<-c(""="O",)
58 # substrates<-SpacesToOs[substrates]
59
60 SubstrateBackgroundFrequency[nrow(SubstrateBackgroundFrequency),2]
61
62 if(2==2){
63 Amean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),2]), na.rm=TRUE)
64 Cmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),3]), na.rm=TRUE)
65 Dmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),4]), na.rm=TRUE)
66 Emean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),5]), na.rm=TRUE)
67 Fmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),6]), na.rm=TRUE)
68 Gmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),7]), na.rm=TRUE)
69 Hmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),8]), na.rm=TRUE)
70 Imean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),9]), na.rm=TRUE)
71 Kmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),10]), na.rm=TRUE)
72 Lmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),11]), na.rm=TRUE)
73 Mmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),12]), na.rm=TRUE)
74 Nmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),13]), na.rm=TRUE)
75 Pmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),14]), na.rm=TRUE)
76 Qmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),15]), na.rm=TRUE)
77 Rmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),16]), na.rm=TRUE)
78 Smean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),17]), na.rm=TRUE)
79 Tmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),18]), na.rm=TRUE)
80 Vmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),19]), na.rm=TRUE)
81 Wmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),20]), na.rm=TRUE)
82 Ymean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),21]), na.rm=TRUE)
83
84 AllMeans<-c(Amean,Cmean,Dmean,Emean,Fmean,Gmean,Hmean,Imean,Kmean,Lmean,Mmean,Nmean,Pmean,Qmean,Rmean,Smean,Tmean,Vmean,Wmean,Ymean)
85
86 Asd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),2]), na.rm=TRUE)
87 Csd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),3]), na.rm=TRUE)
88 Dsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),4]), na.rm=TRUE)
89 Esd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),5]), na.rm=TRUE)
90 Fsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),6]), na.rm=TRUE)
91 Gsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),7]), na.rm=TRUE)
92 Hsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),8]), na.rm=TRUE)
93 Isd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),9]), na.rm=TRUE)
94 Ksd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),10]), na.rm=TRUE)
95 Lsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),11]), na.rm=TRUE)
96 Msd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),12]), na.rm=TRUE)
97 Nsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),13]), na.rm=TRUE)
98 Psd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),14]), na.rm=TRUE)
99 Qsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),15]), na.rm=TRUE)
100 Rsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),16]), na.rm=TRUE)
101 Ssd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),17]), na.rm=TRUE)
102 Tsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),18]), na.rm=TRUE)
103 Vsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),19]), na.rm=TRUE)
104 Wsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),20]), na.rm=TRUE)
105 Ysd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),21]), na.rm=TRUE)
106 }
107 AllSDs<-c(Asd,Csd,Dsd,Esd,Fsd,Gsd,Hsd,Isd,Ksd,Lsd,Msd,Nsd,Psd,Qsd,Rsd,Ssd,Tsd,Vsd,Wsd,Ysd)
108 #this is subbackfreq SDs
109
110 SBF_statisticalvalues<-cbind(AllMeans,AllSDs)
111
112 #create the percent table
113 if (1==1){
114 Column1<-substrates[,1]
115 Column2<-substrates[,2]
116 Column3<-substrates[,3]
117 Column4<-substrates[,4]
118 Column5<-substrates[,5]
119 Column6<-substrates[,6]
120 Column7<-substrates[,7]
121 Column8<-substrates[,8]
122 Column9<-substrates[,9]
123 Column10<-substrates[,10]
124 Column11<-substrates[,11]
125 Column12<-substrates[,12]
126 Column13<-substrates[,13]
127 Column14<-substrates[,14]
128 Column15<-substrates[,15]
129
130 spaces1<-sum((Column1%in% ""))
131 spaces2<-sum(Column2%in% "")
132 spaces3<-sum(Column3%in% "")
133 spaces4<-sum(Column4%in% "")
134 spaces5<-sum(Column5%in% "")
135 spaces6<-sum(Column6%in% "")
136 spaces7<-sum(Column7%in% "")
137 spaces8<-sum(Column8%in% "")
138 spaces9<-sum(Column9%in% "")
139 spaces10<-sum(Column10%in% "")
140 spaces11<-sum(Column11%in% "")
141 spaces12<-sum(Column12%in% "")
142 spaces13<-sum(Column13%in% "")
143 spaces14<-sum(Column14%in% "")
144 spaces15<-sum(Column15%in% "")
145
146 A1<-sum(Column1 %in% "A")/(length(Column1)-spaces1)
147 A2<-sum(Column2 %in% "A")/(length(Column2)-spaces2)
148 A3<-sum(Column3 %in% "A")/(length(Column3)-spaces3)
149 A4<-sum(Column4 %in% "A")/(length(Column4)-spaces4)
150 A5<-sum(Column5 %in% "A")/(length(Column5)-spaces5)
151 A6<-sum(Column6 %in% "A")/(length(Column6)-spaces6)
152 A7<-sum(Column7 %in% "A")/(length(Column7)-spaces7)
153 A8<-sum(Column8 %in% "A")/(length(Column8)-spaces8)
154 A9<-sum(Column9 %in% "A")/(length(Column9)-spaces9)
155 A10<-sum(Column10 %in% "A")/(length(Column10)-spaces10)
156 A11<-sum(Column11 %in% "A")/(length(Column11)-spaces11)
157 A12<-sum(Column12 %in% "A")/(length(Column12)-spaces12)
158 A13<-sum(Column13 %in% "A")/(length(Column13)-spaces13)
159 A14<-sum(Column14 %in% "A")/(length(Column14)-spaces14)
160 A15<-sum(Column15 %in% "A")/(length(Column15)-spaces15)
161 AllAs<-cbind(A1,A2,A3,A4,A5,A6,A7,A8,A9,A10,A11,A12,A13,A14,A15)
162
163 C1<-sum(Column1 %in% "C")/(length(Column1)-spaces1)
164 C2<-sum(Column2 %in% "C")/(length(Column2)-spaces2)
165 C3<-sum(Column3 %in% "C")/(length(Column3)-spaces3)
166 C4<-sum(Column4 %in% "C")/(length(Column4)-spaces4)
167 C5<-sum(Column5 %in% "C")/(length(Column5)-spaces5)
168 C6<-sum(Column6 %in% "C")/(length(Column6)-spaces6)
169 C7<-sum(Column7 %in% "C")/(length(Column7)-spaces7)
170 C8<-sum(Column8 %in% "C")/(length(Column8)-spaces8)
171 C9<-sum(Column9 %in% "C")/(length(Column9)-spaces9)
172 C10<-sum(Column10 %in% "C")/(length(Column10)-spaces10)
173 C11<-sum(Column11 %in% "C")/(length(Column11)-spaces11)
174 C12<-sum(Column12 %in% "C")/(length(Column12)-spaces12)
175 C13<-sum(Column13 %in% "C")/(length(Column13)-spaces13)
176 C14<-sum(Column14 %in% "C")/(length(Column14)-spaces14)
177 C15<-sum(Column15 %in% "C")/(length(Column15)-spaces15)
178 CllCs<-cbind(C1,C2,C3,C4,C5,C6,C7,C8,C9,C10,C11,C12,C13,C14,C15)
179
180 D1<-sum(Column1 %in% "D")/(length(Column1)-spaces1)
181 D2<-sum(Column2 %in% "D")/(length(Column2)-spaces2)
182 D3<-sum(Column3 %in% "D")/(length(Column3)-spaces3)
183 D4<-sum(Column4 %in% "D")/(length(Column4)-spaces4)
184 D5<-sum(Column5 %in% "D")/(length(Column5)-spaces5)
185 D6<-sum(Column6 %in% "D")/(length(Column6)-spaces6)
186 D7<-sum(Column7 %in% "D")/(length(Column7)-spaces7)
187 D8<-sum(Column8 %in% "D")/(length(Column8)-spaces8)
188 D9<-sum(Column9 %in% "D")/(length(Column9)-spaces9)
189 D10<-sum(Column10 %in% "D")/(length(Column10)-spaces10)
190 D11<-sum(Column11 %in% "D")/(length(Column11)-spaces11)
191 D12<-sum(Column12 %in% "D")/(length(Column12)-spaces12)
192 D13<-sum(Column13 %in% "D")/(length(Column13)-spaces13)
193 D14<-sum(Column14 %in% "D")/(length(Column14)-spaces14)
194 D15<-sum(Column15 %in% "D")/(length(Column15)-spaces15)
195 DllDs<-cbind(D1,D2,D3,D4,D5,D6,D7,D8,D9,D10,D11,D12,D13,D14,D15)
196
197 E1<-sum(Column1 %in% "E")/(length(Column1)-spaces1)
198 E2<-sum(Column2 %in% "E")/(length(Column2)-spaces2)
199 E3<-sum(Column3 %in% "E")/(length(Column3)-spaces3)
200 E4<-sum(Column4 %in% "E")/(length(Column4)-spaces4)
201 E5<-sum(Column5 %in% "E")/(length(Column5)-spaces5)
202 E6<-sum(Column6 %in% "E")/(length(Column6)-spaces6)
203 E7<-sum(Column7 %in% "E")/(length(Column7)-spaces7)
204 E8<-sum(Column8 %in% "E")/(length(Column8)-spaces8)
205 E9<-sum(Column9 %in% "E")/(length(Column9)-spaces9)
206 E10<-sum(Column10 %in% "E")/(length(Column10)-spaces10)
207 E11<-sum(Column11 %in% "E")/(length(Column11)-spaces11)
208 E12<-sum(Column12 %in% "E")/(length(Column12)-spaces12)
209 E13<-sum(Column13 %in% "E")/(length(Column13)-spaces13)
210 E14<-sum(Column14 %in% "E")/(length(Column14)-spaces14)
211 E15<-sum(Column15 %in% "E")/(length(Column15)-spaces15)
212 EllEs<-cbind(E1,E2,E3,E4,E5,E6,E7,E8,E9,E10,E11,E12,E13,E14,E15)
213
214
215 F1<-sum(Column1 %in% "F")/(length(Column1)-spaces1)
216 F2<-sum(Column2 %in% "F")/(length(Column2)-spaces2)
217 F3<-sum(Column3 %in% "F")/(length(Column3)-spaces3)
218 F4<-sum(Column4 %in% "F")/(length(Column4)-spaces4)
219 F5<-sum(Column5 %in% "F")/(length(Column5)-spaces5)
220 F6<-sum(Column6 %in% "F")/(length(Column6)-spaces6)
221 F7<-sum(Column7 %in% "F")/(length(Column7)-spaces7)
222 F8<-sum(Column8 %in% "F")/(length(Column8)-spaces8)
223 F9<-sum(Column9 %in% "F")/(length(Column9)-spaces9)
224 F10<-sum(Column10 %in% "F")/(length(Column10)-spaces10)
225 F11<-sum(Column11 %in% "F")/(length(Column11)-spaces11)
226 F12<-sum(Column12 %in% "F")/(length(Column12)-spaces12)
227 F13<-sum(Column13 %in% "F")/(length(Column13)-spaces13)
228 F14<-sum(Column14 %in% "F")/(length(Column14)-spaces14)
229 F15<-sum(Column15 %in% "F")/(length(Column15)-spaces15)
230 FllFs<-cbind(F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13,F14,F15)
231
232
233 G1<-sum(Column1 %in% "G")/(length(Column1)-spaces1)
234 G2<-sum(Column2 %in% "G")/(length(Column2)-spaces2)
235 G3<-sum(Column3 %in% "G")/(length(Column3)-spaces3)
236 G4<-sum(Column4 %in% "G")/(length(Column4)-spaces4)
237 G5<-sum(Column5 %in% "G")/(length(Column5)-spaces5)
238 G6<-sum(Column6 %in% "G")/(length(Column6)-spaces6)
239 G7<-sum(Column7 %in% "G")/(length(Column7)-spaces7)
240 G8<-sum(Column8 %in% "G")/(length(Column8)-spaces8)
241 G9<-sum(Column9 %in% "G")/(length(Column9)-spaces9)
242 G10<-sum(Column10 %in% "G")/(length(Column10)-spaces10)
243 G11<-sum(Column11 %in% "G")/(length(Column11)-spaces11)
244 G12<-sum(Column12 %in% "G")/(length(Column12)-spaces12)
245 G13<-sum(Column13 %in% "G")/(length(Column13)-spaces13)
246 G14<-sum(Column14 %in% "G")/(length(Column14)-spaces14)
247 G15<-sum(Column15 %in% "G")/(length(Column15)-spaces15)
248 GllGs<-cbind(G1,G2,G3,G4,G5,G6,G7,G8,G9,G10,G11,G12,G13,G14,G15)
249
250
251 H1<-sum(Column1 %in% "H")/(length(Column1)-spaces1)
252 H2<-sum(Column2 %in% "H")/(length(Column2)-spaces2)
253 H3<-sum(Column3 %in% "H")/(length(Column3)-spaces3)
254 H4<-sum(Column4 %in% "H")/(length(Column4)-spaces4)
255 H5<-sum(Column5 %in% "H")/(length(Column5)-spaces5)
256 H6<-sum(Column6 %in% "H")/(length(Column6)-spaces6)
257 H7<-sum(Column7 %in% "H")/(length(Column7)-spaces7)
258 H8<-sum(Column8 %in% "H")/(length(Column8)-spaces8)
259 H9<-sum(Column9 %in% "H")/(length(Column9)-spaces9)
260 H10<-sum(Column10 %in% "H")/(length(Column10)-spaces10)
261 H11<-sum(Column11 %in% "H")/(length(Column11)-spaces11)
262 H12<-sum(Column12 %in% "H")/(length(Column12)-spaces12)
263 H13<-sum(Column13 %in% "H")/(length(Column13)-spaces13)
264 H14<-sum(Column14 %in% "H")/(length(Column14)-spaces14)
265 H15<-sum(Column15 %in% "H")/(length(Column15)-spaces15)
266 HllHs<-cbind(H1,H2,H3,H4,H5,H6,H7,H8,H9,H10,H11,H12,H13,H14,H15)
267
268
269 I1<-sum(Column1 %in% "I")/(length(Column1)-spaces1)
270 I2<-sum(Column2 %in% "I")/(length(Column2)-spaces2)
271 I3<-sum(Column3 %in% "I")/(length(Column3)-spaces3)
272 I4<-sum(Column4 %in% "I")/(length(Column4)-spaces4)
273 I5<-sum(Column5 %in% "I")/(length(Column5)-spaces5)
274 I6<-sum(Column6 %in% "I")/(length(Column6)-spaces6)
275 I7<-sum(Column7 %in% "I")/(length(Column7)-spaces7)
276 I8<-sum(Column8 %in% "I")/(length(Column8)-spaces8)
277 I9<-sum(Column9 %in% "I")/(length(Column9)-spaces9)
278 I10<-sum(Column10 %in% "I")/(length(Column10)-spaces10)
279 I11<-sum(Column11 %in% "I")/(length(Column11)-spaces11)
280 I12<-sum(Column12 %in% "I")/(length(Column12)-spaces12)
281 I13<-sum(Column13 %in% "I")/(length(Column13)-spaces13)
282 I14<-sum(Column14 %in% "I")/(length(Column14)-spaces14)
283 I15<-sum(Column15 %in% "I")/(length(Column15)-spaces15)
284 IllIs<-cbind(I1,I2,I3,I4,I5,I6,I7,I8,I9,I10,I11,I12,I13,I14,I15)
285
286
287 K1<-sum(Column1 %in% "K")/(length(Column1)-spaces1)
288 K2<-sum(Column2 %in% "K")/(length(Column2)-spaces2)
289 K3<-sum(Column3 %in% "K")/(length(Column3)-spaces3)
290 K4<-sum(Column4 %in% "K")/(length(Column4)-spaces4)
291 K5<-sum(Column5 %in% "K")/(length(Column5)-spaces5)
292 K6<-sum(Column6 %in% "K")/(length(Column6)-spaces6)
293 K7<-sum(Column7 %in% "K")/(length(Column7)-spaces7)
294 K8<-sum(Column8 %in% "K")/(length(Column8)-spaces8)
295 K9<-sum(Column9 %in% "K")/(length(Column9)-spaces9)
296 K10<-sum(Column10 %in% "K")/(length(Column10)-spaces10)
297 K11<-sum(Column11 %in% "K")/(length(Column11)-spaces11)
298 K12<-sum(Column12 %in% "K")/(length(Column12)-spaces12)
299 K13<-sum(Column13 %in% "K")/(length(Column13)-spaces13)
300 K14<-sum(Column14 %in% "K")/(length(Column14)-spaces14)
301 K15<-sum(Column15 %in% "K")/(length(Column15)-spaces15)
302 KllKs<-cbind(K1,K2,K3,K4,K5,K6,K7,K8,K9,K10,K11,K12,K13,K14,K15)
303
304
305 L1<-sum(Column1 %in% "L")/(length(Column1)-spaces1)
306 L2<-sum(Column2 %in% "L")/(length(Column2)-spaces2)
307 L3<-sum(Column3 %in% "L")/(length(Column3)-spaces3)
308 L4<-sum(Column4 %in% "L")/(length(Column4)-spaces4)
309 L5<-sum(Column5 %in% "L")/(length(Column5)-spaces5)
310 L6<-sum(Column6 %in% "L")/(length(Column6)-spaces6)
311 L7<-sum(Column7 %in% "L")/(length(Column7)-spaces7)
312 L8<-sum(Column8 %in% "L")/(length(Column8)-spaces8)
313 L9<-sum(Column9 %in% "L")/(length(Column9)-spaces9)
314 L10<-sum(Column10 %in% "L")/(length(Column10)-spaces10)
315 L11<-sum(Column11 %in% "L")/(length(Column11)-spaces11)
316 L12<-sum(Column12 %in% "L")/(length(Column12)-spaces12)
317 L13<-sum(Column13 %in% "L")/(length(Column13)-spaces13)
318 L14<-sum(Column14 %in% "L")/(length(Column14)-spaces14)
319 L15<-sum(Column15 %in% "L")/(length(Column15)-spaces15)
320 LllLs<-cbind(L1,L2,L3,L4,L5,L6,L7,L8,L9,L10,L11,L12,L13,L14,L15)
321
322
323 M1<-sum(Column1 %in% "M")/(length(Column1)-spaces1)
324 M2<-sum(Column2 %in% "M")/(length(Column2)-spaces2)
325 M3<-sum(Column3 %in% "M")/(length(Column3)-spaces3)
326 M4<-sum(Column4 %in% "M")/(length(Column4)-spaces4)
327 M5<-sum(Column5 %in% "M")/(length(Column5)-spaces5)
328 M6<-sum(Column6 %in% "M")/(length(Column6)-spaces6)
329 M7<-sum(Column7 %in% "M")/(length(Column7)-spaces7)
330 M8<-sum(Column8 %in% "M")/(length(Column8)-spaces8)
331 M9<-sum(Column9 %in% "M")/(length(Column9)-spaces9)
332 M10<-sum(Column10 %in% "M")/(length(Column10)-spaces10)
333 M11<-sum(Column11 %in% "M")/(length(Column11)-spaces11)
334 M12<-sum(Column12 %in% "M")/(length(Column12)-spaces12)
335 M13<-sum(Column13 %in% "M")/(length(Column13)-spaces13)
336 M14<-sum(Column14 %in% "M")/(length(Column14)-spaces14)
337 M15<-sum(Column15 %in% "M")/(length(Column15)-spaces15)
338 MllMs<-cbind(M1,M2,M3,M4,M5,M6,M7,M8,M9,M10,M11,M12,M13,M14,M15)
339
340
341 N1<-sum(Column1 %in% "N")/(length(Column1)-spaces1)
342 N2<-sum(Column2 %in% "N")/(length(Column2)-spaces2)
343 N3<-sum(Column3 %in% "N")/(length(Column3)-spaces3)
344 N4<-sum(Column4 %in% "N")/(length(Column4)-spaces4)
345 N5<-sum(Column5 %in% "N")/(length(Column5)-spaces5)
346 N6<-sum(Column6 %in% "N")/(length(Column6)-spaces6)
347 N7<-sum(Column7 %in% "N")/(length(Column7)-spaces7)
348 N8<-sum(Column8 %in% "N")/(length(Column8)-spaces8)
349 N9<-sum(Column9 %in% "N")/(length(Column9)-spaces9)
350 N10<-sum(Column10 %in% "N")/(length(Column10)-spaces10)
351 N11<-sum(Column11 %in% "N")/(length(Column11)-spaces11)
352 N12<-sum(Column12 %in% "N")/(length(Column12)-spaces12)
353 N13<-sum(Column13 %in% "N")/(length(Column13)-spaces13)
354 N14<-sum(Column14 %in% "N")/(length(Column14)-spaces14)
355 N15<-sum(Column15 %in% "N")/(length(Column15)-spaces15)
356 NllNs<-cbind(N1,N2,N3,N4,N5,N6,N7,N8,N9,N10,N11,N12,N13,N14,N15)
357
358
359 P1<-sum(Column1 %in% "P")/(length(Column1)-spaces1)
360 P2<-sum(Column2 %in% "P")/(length(Column2)-spaces2)
361 P3<-sum(Column3 %in% "P")/(length(Column3)-spaces3)
362 P4<-sum(Column4 %in% "P")/(length(Column4)-spaces4)
363 P5<-sum(Column5 %in% "P")/(length(Column5)-spaces5)
364 P6<-sum(Column6 %in% "P")/(length(Column6)-spaces6)
365 P7<-sum(Column7 %in% "P")/(length(Column7)-spaces7)
366 P8<-sum(Column8 %in% "P")/(length(Column8)-spaces8)
367 P9<-sum(Column9 %in% "P")/(length(Column9)-spaces9)
368 P10<-sum(Column10 %in% "P")/(length(Column10)-spaces10)
369 P11<-sum(Column11 %in% "P")/(length(Column11)-spaces11)
370 P12<-sum(Column12 %in% "P")/(length(Column12)-spaces12)
371 P13<-sum(Column13 %in% "P")/(length(Column13)-spaces13)
372 P14<-sum(Column14 %in% "P")/(length(Column14)-spaces14)
373 P15<-sum(Column15 %in% "P")/(length(Column15)-spaces15)
374 PllPs<-cbind(P1,P2,P3,P4,P5,P6,P7,P8,P9,P10,P11,P12,P13,P14,P15)
375
376
377 Q1<-sum(Column1 %in% "Q")/(length(Column1)-spaces1)
378 Q2<-sum(Column2 %in% "Q")/(length(Column2)-spaces2)
379 Q3<-sum(Column3 %in% "Q")/(length(Column3)-spaces3)
380 Q4<-sum(Column4 %in% "Q")/(length(Column4)-spaces4)
381 Q5<-sum(Column5 %in% "Q")/(length(Column5)-spaces5)
382 Q6<-sum(Column6 %in% "Q")/(length(Column6)-spaces6)
383 Q7<-sum(Column7 %in% "Q")/(length(Column7)-spaces7)
384 Q8<-sum(Column8 %in% "Q")/(length(Column8)-spaces8)
385 Q9<-sum(Column9 %in% "Q")/(length(Column9)-spaces9)
386 Q10<-sum(Column10 %in% "Q")/(length(Column10)-spaces10)
387 Q11<-sum(Column11 %in% "Q")/(length(Column11)-spaces11)
388 Q12<-sum(Column12 %in% "Q")/(length(Column12)-spaces12)
389 Q13<-sum(Column13 %in% "Q")/(length(Column13)-spaces13)
390 Q14<-sum(Column14 %in% "Q")/(length(Column14)-spaces14)
391 Q15<-sum(Column15 %in% "Q")/(length(Column15)-spaces15)
392 QllQs<-cbind(Q1,Q2,Q3,Q4,Q5,Q6,Q7,Q8,Q9,Q10,Q11,Q12,Q13,Q14,Q15)
393
394
395 R1<-sum(Column1 %in% "R")/(length(Column1)-spaces1)
396 R2<-sum(Column2 %in% "R")/(length(Column2)-spaces2)
397 R3<-sum(Column3 %in% "R")/(length(Column3)-spaces3)
398 R4<-sum(Column4 %in% "R")/(length(Column4)-spaces4)
399 R5<-sum(Column5 %in% "R")/(length(Column5)-spaces5)
400 R6<-sum(Column6 %in% "R")/(length(Column6)-spaces6)
401 R7<-sum(Column7 %in% "R")/(length(Column7)-spaces7)
402 R8<-sum(Column8 %in% "R")/(length(Column8)-spaces8)
403 R9<-sum(Column9 %in% "R")/(length(Column9)-spaces9)
404 R10<-sum(Column10 %in% "R")/(length(Column10)-spaces10)
405 R11<-sum(Column11 %in% "R")/(length(Column11)-spaces11)
406 R12<-sum(Column12 %in% "R")/(length(Column12)-spaces12)
407 R13<-sum(Column13 %in% "R")/(length(Column13)-spaces13)
408 R14<-sum(Column14 %in% "R")/(length(Column14)-spaces14)
409 R15<-sum(Column15 %in% "R")/(length(Column15)-spaces15)
410 RllRs<-cbind(R1,R2,R3,R4,R5,R6,R7,R8,R9,R10,R11,R12,R13,R14,R15)
411
412
413 S1<-sum(Column1 %in% "S")/(length(Column1)-spaces1)
414 S2<-sum(Column2 %in% "S")/(length(Column2)-spaces2)
415 S3<-sum(Column3 %in% "S")/(length(Column3)-spaces3)
416 S4<-sum(Column4 %in% "S")/(length(Column4)-spaces4)
417 S5<-sum(Column5 %in% "S")/(length(Column5)-spaces5)
418 S6<-sum(Column6 %in% "S")/(length(Column6)-spaces6)
419 S7<-sum(Column7 %in% "S")/(length(Column7)-spaces7)
420 S8<-sum(Column8 %in% "S")/(length(Column8)-spaces8)
421 S9<-sum(Column9 %in% "S")/(length(Column9)-spaces9)
422 S10<-sum(Column10 %in% "S")/(length(Column10)-spaces10)
423 S11<-sum(Column11 %in% "S")/(length(Column11)-spaces11)
424 S12<-sum(Column12 %in% "S")/(length(Column12)-spaces12)
425 S13<-sum(Column13 %in% "S")/(length(Column13)-spaces13)
426 S14<-sum(Column14 %in% "S")/(length(Column14)-spaces14)
427 S15<-sum(Column15 %in% "S")/(length(Column15)-spaces15)
428 SllSs<-cbind(S1,S2,S3,S4,S5,S6,S7,S8,S9,S10,S11,S12,S13,S14,S15)
429
430
431 T1<-sum(Column1 %in% "T")/(length(Column1)-spaces1)
432 T2<-sum(Column2 %in% "T")/(length(Column2)-spaces2)
433 T3<-sum(Column3 %in% "T")/(length(Column3)-spaces3)
434 T4<-sum(Column4 %in% "T")/(length(Column4)-spaces4)
435 T5<-sum(Column5 %in% "T")/(length(Column5)-spaces5)
436 T6<-sum(Column6 %in% "T")/(length(Column6)-spaces6)
437 T7<-sum(Column7 %in% "T")/(length(Column7)-spaces7)
438 T8<-sum(Column8 %in% "T")/(length(Column8)-spaces8)
439 T9<-sum(Column9 %in% "T")/(length(Column9)-spaces9)
440 T10<-sum(Column10 %in% "T")/(length(Column10)-spaces10)
441 T11<-sum(Column11 %in% "T")/(length(Column11)-spaces11)
442 T12<-sum(Column12 %in% "T")/(length(Column12)-spaces12)
443 T13<-sum(Column13 %in% "T")/(length(Column13)-spaces13)
444 T14<-sum(Column14 %in% "T")/(length(Column14)-spaces14)
445 T15<-sum(Column15 %in% "T")/(length(Column15)-spaces15)
446 TllTs<-cbind(T1,T2,T3,T4,T5,T6,T7,T8,T9,T10,T11,T12,T13,T14,T15)
447
448
449 V1<-sum(Column1 %in% "V")/(length(Column1)-spaces1)
450 V2<-sum(Column2 %in% "V")/(length(Column2)-spaces2)
451 V3<-sum(Column3 %in% "V")/(length(Column3)-spaces3)
452 V4<-sum(Column4 %in% "V")/(length(Column4)-spaces4)
453 V5<-sum(Column5 %in% "V")/(length(Column5)-spaces5)
454 V6<-sum(Column6 %in% "V")/(length(Column6)-spaces6)
455 V7<-sum(Column7 %in% "V")/(length(Column7)-spaces7)
456 V8<-sum(Column8 %in% "V")/(length(Column8)-spaces8)
457 V9<-sum(Column9 %in% "V")/(length(Column9)-spaces9)
458 V10<-sum(Column10 %in% "V")/(length(Column10)-spaces10)
459 V11<-sum(Column11 %in% "V")/(length(Column11)-spaces11)
460 V12<-sum(Column12 %in% "V")/(length(Column12)-spaces12)
461 V13<-sum(Column13 %in% "V")/(length(Column13)-spaces13)
462 V14<-sum(Column14 %in% "V")/(length(Column14)-spaces14)
463 V15<-sum(Column15 %in% "V")/(length(Column15)-spaces15)
464 VllVs<-cbind(V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15)
465
466
467 W1<-sum(Column1 %in% "W")/(length(Column1)-spaces1)
468 W2<-sum(Column2 %in% "W")/(length(Column2)-spaces2)
469 W3<-sum(Column3 %in% "W")/(length(Column3)-spaces3)
470 W4<-sum(Column4 %in% "W")/(length(Column4)-spaces4)
471 W5<-sum(Column5 %in% "W")/(length(Column5)-spaces5)
472 W6<-sum(Column6 %in% "W")/(length(Column6)-spaces6)
473 W7<-sum(Column7 %in% "W")/(length(Column7)-spaces7)
474 W8<-sum(Column8 %in% "W")/(length(Column8)-spaces8)
475 W9<-sum(Column9 %in% "W")/(length(Column9)-spaces9)
476 W10<-sum(Column10 %in% "W")/(length(Column10)-spaces10)
477 W11<-sum(Column11 %in% "W")/(length(Column11)-spaces11)
478 W12<-sum(Column12 %in% "W")/(length(Column12)-spaces12)
479 W13<-sum(Column13 %in% "W")/(length(Column13)-spaces13)
480 W14<-sum(Column14 %in% "W")/(length(Column14)-spaces14)
481 W15<-sum(Column15 %in% "W")/(length(Column15)-spaces15)
482 WllWs<-cbind(W1,W2,W3,W4,W5,W6,W7,W8,W9,W10,W11,W12,W13,W14,W15)
483
484
485 Y1<-sum(Column1 %in% "Y")/(length(Column1)-spaces1)
486 Y2<-sum(Column2 %in% "Y")/(length(Column2)-spaces2)
487 Y3<-sum(Column3 %in% "Y")/(length(Column3)-spaces3)
488 Y4<-sum(Column4 %in% "Y")/(length(Column4)-spaces4)
489 Y5<-sum(Column5 %in% "Y")/(length(Column5)-spaces5)
490 Y6<-sum(Column6 %in% "Y")/(length(Column6)-spaces6)
491 Y7<-sum(Column7 %in% "Y")/(length(Column7)-spaces7)
492 Y8<-sum(Column8 %in% "Y")/(length(Column8)-spaces8)
493 Y9<-sum(Column9 %in% "Y")/(length(Column9)-spaces9)
494 Y10<-sum(Column10 %in% "Y")/(length(Column10)-spaces10)
495 Y11<-sum(Column11 %in% "Y")/(length(Column11)-spaces11)
496 Y12<-sum(Column12 %in% "Y")/(length(Column12)-spaces12)
497 Y13<-sum(Column13 %in% "Y")/(length(Column13)-spaces13)
498 Y14<-sum(Column14 %in% "Y")/(length(Column14)-spaces14)
499 Y15<-sum(Column15 %in% "Y")/(length(Column15)-spaces15)
500 YllYs<-cbind(Y1,Y2,Y3,Y4,Y5,Y6,Y7,Y8,Y9,Y10,Y11,Y12,Y13,Y14,Y15)
501 }
502 #this is substrate percents
503
504 #A C D E F G H I K L N P Q R S T V W Y
505
506 PercentTable<-rbind(AllAs,CllCs,DllDs,EllEs,FllFs,GllGs,HllHs,IllIs,KllKs,LllLs,MllMs,NllNs,PllPs,QllQs,RllRs,SllSs,TllTs,VllVs,WllWs,YllYs)
507 PercentTable<-PercentTable*100
508
509 #create the SD table
510 SDtable<-matrix(data = rep(1,times=(nrow(PercentTable)*ncol(PercentTable))),nrow = nrow(PercentTable),ncol = ncol(PercentTable))
511 #for every row, a percertage minus the same mean over the same SD
512 if(1==1){
513 SDtable[1,]<-(PercentTable[1,]-Amean)/Asd
514 SDtable[2,]<-(PercentTable[2,]-Cmean)/Csd
515 SDtable[3,]<-(PercentTable[3,]-Dmean)/Dsd
516 SDtable[4,]<-(PercentTable[4,]-Emean)/Esd
517 SDtable[5,]<-(PercentTable[5,]-Fmean)/Fsd
518 SDtable[6,]<-(PercentTable[6,]-Gmean)/Gsd
519 SDtable[7,]<-(PercentTable[7,]-Hmean)/Hsd
520 SDtable[8,]<-(PercentTable[8,]-Imean)/Isd
521 SDtable[9,]<-(PercentTable[9,]-Kmean)/Ksd
522 SDtable[10,]<-(PercentTable[10,]-Lmean)/Lsd
523 SDtable[11,]<-(PercentTable[11,]-Mmean)/Msd
524 SDtable[12,]<-(PercentTable[12,]-Nmean)/Nsd
525 SDtable[13,]<-(PercentTable[13,]-Pmean)/Psd
526 SDtable[14,]<-(PercentTable[14,]-Qmean)/Qsd
527 SDtable[15,]<-(PercentTable[15,]-Rmean)/Rsd
528 SDtable[16,]<-(PercentTable[16,]-Smean)/Ssd
529 SDtable[17,]<-(PercentTable[17,]-Tmean)/Tsd
530 SDtable[18,]<-(PercentTable[18,]-Vmean)/Vsd
531 SDtable[19,]<-(PercentTable[19,]-Wmean)/Wsd
532 SDtable[20,]<-(PercentTable[20,]-Ymean)/Ysd
533 }
534
535
536 SetOfAAs<-c("Letter","A","C","D","E","F","G","H","I","K","L","M","N","P","Q","R","S","T","V","W","Y")
537
538
539 SumOfSigmaAAs<-c(1:15)
540
541 for (i in 1:15){
542 SumOfSigmasValue<-0
543 for (j in 1:20){
544 value<-0
545 if (SDtable[j,i]>2){
546 value<-sum(substrates[,i]==SetOfAAs[j])
547 }
548 SumOfSigmasValue<-SumOfSigmasValue+value
549 }
550 SumOfSigmaAAs[i]<-SumOfSigmasValue
551 }
552
553 # AAs1<-length(substrates[,1])-sum(substrates[,1]=="")
554 # AAs2<-length(substrates[,2])-sum(substrates[,2]=="")
555 # AAs3<-length(substrates[,3])-sum(substrates[,3]=="")
556 # AAs4<-length(substrates[,4])-sum(substrates[,4]=="")
557 # AAs5<-length(substrates[,5])-sum(substrates[,5]=="")
558 # AAs6<-length(substrates[,6])-sum(substrates[,6]=="")
559 # AAs7<-length(substrates[,7])-sum(substrates[,7]=="")
560 # AAs8<-length(substrates[,8])-sum(substrates[,8]=="")
561 # AAs9<-length(substrates[,9])-sum(substrates[,9]=="")
562 #
563 #
564 #
565 # #AAsAtPositions<-c(AAs1,AAs2,AAs3,AAs4,AAs5,AAs6,AAs7,AAs8,AAs9)
566 # AAsAtPositions<-c(length(substrates[,1]),length(substrates[,2]),length(substrates[,3]),length(substrates[,4]),
567 # length(substrates[,5]),length(substrates[,6]),length(substrates[,7]),length(substrates[,8]),
568 # length(substrates[,9]))
569
570 SumOfExpectedSigmaAAs<-c(1:15)
571 for (i in 1:15){
572 ExpectedValue<-0
573 for (j in 1:20){
574 value<-0
575 if (SDtable[j,i]>2){
576 value<-AllMeans[j]
577 }
578 ExpectedValue<-ExpectedValue+value
579 }
580 SumOfExpectedSigmaAAs[i]<-ExpectedValue*(length(substrates[,i])-sum(substrates[,i]%in% ""))/100
581 }
582
583 SelectivityRow<-SumOfSigmaAAs/SumOfExpectedSigmaAAs
584 SelectivitySheet<-rbind(SumOfSigmaAAs,SumOfExpectedSigmaAAs,SelectivityRow)
585
586 SetOfAAs<-matrix(data = SetOfAAs,ncol = 1)
587
588 SDtableu<-SDtable
589 HeaderSD<-c(-7:7)
590 SDtable<-rbind(HeaderSD,SDtableu)
591 row.names(SDtable)<-NULL
592 SDtable<-data.frame(SetOfAAs,SDtable)
593
594 PercentTable<-rbind(HeaderSD,PercentTable)
595 row.names(PercentTable)<-NULL
596 PercentTable<-data.frame(SetOfAAs,PercentTable)
597 numberofY<-as.numeric(SubstrateBackgroundFrequency$Number.of.Y)
598 numberofY<-numberofY[!is.na(numberofY)]
599
600 numberofPY<-as.numeric(SubstrateBackgroundFrequency$Number.of.pY)
601 numberofPY<-numberofPY[!is.na(numberofPY)]
602
603 NormalizationScore<-sum(numberofPY)/sum(numberofY)
604
605 # positions<-matrix(data = NA, nrow=20,ncol = 15)
606 #
607 # #column1
608 #
609 # for (q in 1:15) {
610 # sA<-sum(substrates[,i]=="A")
611 # positions[1,i]<-sA
612 # sC<-sum(substrates[,i]=="C")
613 # positions[2,i]<-sC
614 # sD<-sum(substrates[,i]=="D")
615 # positions[3,i]<-sD
616 # sE<-sum(substrates[,i]=="E")
617 # positions[4,i]<-sE
618 # sF<-sum(substrates[,i]=="F")
619 # sG<-sum(substrates[,i]=="G")
620 # sH<-sum(substrates[,i]=="H")
621 # sI<-sum(substrates[,i]=="I")
622 # sK<-sum(substrates[,i]=="K")
623 # sL<-sum(substrates[,i]=="L")
624 # sM<-sum(substrates[,i]=="M")
625 # sN<-sum(substrates[,i]=="N")
626 # sP<-sum(substrates[,i]=="P")
627 # sQ<-sum(substrates[,i]=="Q")
628 # sR<-sum(substrates[,i]=="R")
629 # sS<-sum(substrates[,i]=="S")
630 # sT<-sum(substrates[,i]=="T")
631 # sV<-sum(substrates[,i]=="V")
632 # sW<-sum(substrates[,i]=="W")
633 # sY<-sum(substrates[,i]=="Y")
634 # positions[5,i]<-sF
635 # positions[6,i]<-sG
636 # positions[7,i]<-sH
637 # positions[8,i]<-sI
638 # positions[9,i]<-sK
639 # positions[10,i]<-sL
640 # positions[11,i]<-sM
641 # positions[12,i]<-sN
642 # positions[13,i]<-sP
643 # positions[14,i]<-sQ
644 # positions[15,i]<-sR
645 # positions[16,i]<-sS
646 # positions[17,i]<-sT
647 # positions[18,i]<-sV
648 # positions[19,i]<-sW
649 # positions[20,i]<-sY
650 # }
651
652 #here I create the positions table which is needed for the endogenous prob matrix and is simply "how many times did an AA show up in this spot?
653 if (6==6){
654 Column1<-substrates[,1]
655 Column2<-substrates[,2]
656 Column3<-substrates[,3]
657 Column4<-substrates[,4]
658 Column5<-substrates[,5]
659 Column6<-substrates[,6]
660 Column7<-substrates[,7]
661 Column8<-substrates[,8]
662 Column9<-substrates[,9]
663 Column10<-substrates[,10]
664 Column11<-substrates[,11]
665 Column12<-substrates[,12]
666 Column13<-substrates[,13]
667 Column14<-substrates[,14]
668 Column15<-substrates[,15]
669
670 spaces1<-sum((Column1%in% ""))
671 spaces2<-sum(Column2%in% "")
672 spaces3<-sum(Column3%in% "")
673 spaces4<-sum(Column4%in% "")
674 spaces5<-sum(Column5%in% "")
675 spaces6<-sum(Column6%in% "")
676 spaces7<-sum(Column7%in% "")
677 spaces8<-sum(Column8%in% "")
678 spaces9<-sum(Column9%in% "")
679 spaces10<-sum(Column10%in% "")
680 spaces11<-sum(Column11%in% "")
681 spaces12<-sum(Column12%in% "")
682 spaces13<-sum(Column13%in% "")
683 spaces14<-sum(Column14%in% "")
684 spaces15<-sum(Column15%in% "")
685
686 A1<-sum(Column1 %in% "A")
687 A2<-sum(Column2 %in% "A")
688 A3<-sum(Column3 %in% "A")
689 A4<-sum(Column4 %in% "A")
690 A5<-sum(Column5 %in% "A")
691 A6<-sum(Column6 %in% "A")
692 A7<-sum(Column7 %in% "A")
693 A8<-sum(Column8 %in% "A")
694 A9<-sum(Column9 %in% "A")
695 A10<-sum(Column10 %in% "A")
696 A11<-sum(Column11 %in% "A")
697 A12<-sum(Column12 %in% "A")
698 A13<-sum(Column13 %in% "A")
699 A14<-sum(Column14 %in% "A")
700 A15<-sum(Column15 %in% "A")
701 AllAs<-cbind(A1,A2,A3,A4,A5,A6,A7,A8,A9,A10,A11,A12,A13,A14,A15)
702
703 C1<-sum(Column1 %in% "C")
704 C2<-sum(Column2 %in% "C")
705 C3<-sum(Column3 %in% "C")
706 C4<-sum(Column4 %in% "C")
707 C5<-sum(Column5 %in% "C")
708 C6<-sum(Column6 %in% "C")
709 C7<-sum(Column7 %in% "C")
710 C8<-sum(Column8 %in% "C")
711 C9<-sum(Column9 %in% "C")
712 C10<-sum(Column10 %in% "C")
713 C11<-sum(Column11 %in% "C")
714 C12<-sum(Column12 %in% "C")
715 C13<-sum(Column13 %in% "C")
716 C14<-sum(Column14 %in% "C")
717 C15<-sum(Column15 %in% "C")
718 CllCs<-cbind(C1,C2,C3,C4,C5,C6,C7,C8,C9,C10,C11,C12,C13,C14,C15)
719
720 D1<-sum(Column1 %in% "D")
721 D2<-sum(Column2 %in% "D")
722 D3<-sum(Column3 %in% "D")
723 D4<-sum(Column4 %in% "D")
724 D5<-sum(Column5 %in% "D")
725 D6<-sum(Column6 %in% "D")
726 D7<-sum(Column7 %in% "D")
727 D8<-sum(Column8 %in% "D")
728 D9<-sum(Column9 %in% "D")
729 D10<-sum(Column10 %in% "D")
730 D11<-sum(Column11 %in% "D")
731 D12<-sum(Column12 %in% "D")
732 D13<-sum(Column13 %in% "D")
733 D14<-sum(Column14 %in% "D")
734 D15<-sum(Column15 %in% "D")
735 DllDs<-cbind(D1,D2,D3,D4,D5,D6,D7,D8,D9,D10,D11,D12,D13,D14,D15)
736
737 E1<-sum(Column1 %in% "E")
738 E2<-sum(Column2 %in% "E")
739 E3<-sum(Column3 %in% "E")
740 E4<-sum(Column4 %in% "E")
741 E5<-sum(Column5 %in% "E")
742 E6<-sum(Column6 %in% "E")
743 E7<-sum(Column7 %in% "E")
744 E8<-sum(Column8 %in% "E")
745 E9<-sum(Column9 %in% "E")
746 E10<-sum(Column10 %in% "E")
747 E11<-sum(Column11 %in% "E")
748 E12<-sum(Column12 %in% "E")
749 E13<-sum(Column13 %in% "E")
750 E14<-sum(Column14 %in% "E")
751 E15<-sum(Column15 %in% "E")
752 EllEs<-cbind(E1,E2,E3,E4,E5,E6,E7,E8,E9,E10,E11,E12,E13,E14,E15)
753
754 F1<-sum(Column1 %in% "F")
755 F2<-sum(Column2 %in% "F")
756 F3<-sum(Column3 %in% "F")
757 F4<-sum(Column4 %in% "F")
758 F5<-sum(Column5 %in% "F")
759 F6<-sum(Column6 %in% "F")
760 F7<-sum(Column7 %in% "F")
761 F8<-sum(Column8 %in% "F")
762 F9<-sum(Column9 %in% "F")
763 F10<-sum(Column10 %in% "F")
764 F11<-sum(Column11 %in% "F")
765 F12<-sum(Column12 %in% "F")
766 F13<-sum(Column13 %in% "F")
767 F14<-sum(Column14 %in% "F")
768 F15<-sum(Column15 %in% "F")
769 FllFs<-cbind(F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13,F14,F15)
770
771 G1<-sum(Column1 %in% "G")
772 G2<-sum(Column2 %in% "G")
773 G3<-sum(Column3 %in% "G")
774 G4<-sum(Column4 %in% "G")
775 G5<-sum(Column5 %in% "G")
776 G6<-sum(Column6 %in% "G")
777 G7<-sum(Column7 %in% "G")
778 G8<-sum(Column8 %in% "G")
779 G9<-sum(Column9 %in% "G")
780 G10<-sum(Column10 %in% "G")
781 G11<-sum(Column11 %in% "G")
782 G12<-sum(Column12 %in% "G")
783 G13<-sum(Column13 %in% "G")
784 G14<-sum(Column14 %in% "G")
785 G15<-sum(Column15 %in% "G")
786 GllGs<-cbind(G1,G2,G3,G4,G5,G6,G7,G8,G9,G10,G11,G12,G13,G14,G15)
787
788 H1<-sum(Column1 %in% "H")
789 H2<-sum(Column2 %in% "H")
790 H3<-sum(Column3 %in% "H")
791 H4<-sum(Column4 %in% "H")
792 H5<-sum(Column5 %in% "H")
793 H6<-sum(Column6 %in% "H")
794 H7<-sum(Column7 %in% "H")
795 H8<-sum(Column8 %in% "H")
796 H9<-sum(Column9 %in% "H")
797 H10<-sum(Column10 %in% "H")
798 H11<-sum(Column11 %in% "H")
799 H12<-sum(Column12 %in% "H")
800 H13<-sum(Column13 %in% "H")
801 H14<-sum(Column14 %in% "H")
802 H15<-sum(Column15 %in% "H")
803 HllHs<-cbind(H1,H2,H3,H4,H5,H6,H7,H8,H9,H10,H11,H12,H13,H14,H15)
804
805 I1<-sum(Column1 %in% "I")
806 I2<-sum(Column2 %in% "I")
807 I3<-sum(Column3 %in% "I")
808 I4<-sum(Column4 %in% "I")
809 I5<-sum(Column5 %in% "I")
810 I6<-sum(Column6 %in% "I")
811 I7<-sum(Column7 %in% "I")
812 I8<-sum(Column8 %in% "I")
813 I9<-sum(Column9 %in% "I")
814 I10<-sum(Column10 %in% "I")
815 I11<-sum(Column11 %in% "I")
816 I12<-sum(Column12 %in% "I")
817 I13<-sum(Column13 %in% "I")
818 I14<-sum(Column14 %in% "I")
819 I15<-sum(Column15 %in% "I")
820 IllIs<-cbind(I1,I2,I3,I4,I5,I6,I7,I8,I9,I10,I11,I12,I13,I14,I15)
821
822 K1<-sum(Column1 %in% "K")
823 K2<-sum(Column2 %in% "K")
824 K3<-sum(Column3 %in% "K")
825 K4<-sum(Column4 %in% "K")
826 K5<-sum(Column5 %in% "K")
827 K6<-sum(Column6 %in% "K")
828 K7<-sum(Column7 %in% "K")
829 K8<-sum(Column8 %in% "K")
830 K9<-sum(Column9 %in% "K")
831 K10<-sum(Column10 %in% "K")
832 K11<-sum(Column11 %in% "K")
833 K12<-sum(Column12 %in% "K")
834 K13<-sum(Column13 %in% "K")
835 K14<-sum(Column14 %in% "K")
836 K15<-sum(Column15 %in% "K")
837 KllKs<-cbind(K1,K2,K3,K4,K5,K6,K7,K8,K9,K10,K11,K12,K13,K14,K15)
838
839 L1<-sum(Column1 %in% "L")
840 L2<-sum(Column2 %in% "L")
841 L3<-sum(Column3 %in% "L")
842 L4<-sum(Column4 %in% "L")
843 L5<-sum(Column5 %in% "L")
844 L6<-sum(Column6 %in% "L")
845 L7<-sum(Column7 %in% "L")
846 L8<-sum(Column8 %in% "L")
847 L9<-sum(Column9 %in% "L")
848 L10<-sum(Column10 %in% "L")
849 L11<-sum(Column11 %in% "L")
850 L12<-sum(Column12 %in% "L")
851 L13<-sum(Column13 %in% "L")
852 L14<-sum(Column14 %in% "L")
853 L15<-sum(Column15 %in% "L")
854 LllLs<-cbind(L1,L2,L3,L4,L5,L6,L7,L8,L9,L10,L11,L12,L13,L14,L15)
855
856 M1<-sum(Column1 %in% "M")
857 M2<-sum(Column2 %in% "M")
858 M3<-sum(Column3 %in% "M")
859 M4<-sum(Column4 %in% "M")
860 M5<-sum(Column5 %in% "M")
861 M6<-sum(Column6 %in% "M")
862 M7<-sum(Column7 %in% "M")
863 M8<-sum(Column8 %in% "M")
864 M9<-sum(Column9 %in% "M")
865 M10<-sum(Column10 %in% "M")
866 M11<-sum(Column11 %in% "M")
867 M12<-sum(Column12 %in% "M")
868 M13<-sum(Column13 %in% "M")
869 M14<-sum(Column14 %in% "M")
870 M15<-sum(Column15 %in% "M")
871 MllMs<-cbind(M1,M2,M3,M4,M5,M6,M7,M8,M9,M10,M11,M12,M13,M14,M15)
872
873 N1<-sum(Column1 %in% "N")
874 N2<-sum(Column2 %in% "N")
875 N3<-sum(Column3 %in% "N")
876 N4<-sum(Column4 %in% "N")
877 N5<-sum(Column5 %in% "N")
878 N6<-sum(Column6 %in% "N")
879 N7<-sum(Column7 %in% "N")
880 N8<-sum(Column8 %in% "N")
881 N9<-sum(Column9 %in% "N")
882 N10<-sum(Column10 %in% "N")
883 N11<-sum(Column11 %in% "N")
884 N12<-sum(Column12 %in% "N")
885 N13<-sum(Column13 %in% "N")
886 N14<-sum(Column14 %in% "N")
887 N15<-sum(Column15 %in% "N")
888 NllNs<-cbind(N1,N2,N3,N4,N5,N6,N7,N8,N9,N10,N11,N12,N13,N14,N15)
889
890 P1<-sum(Column1 %in% "P")
891 P2<-sum(Column2 %in% "P")
892 P3<-sum(Column3 %in% "P")
893 P4<-sum(Column4 %in% "P")
894 P5<-sum(Column5 %in% "P")
895 P6<-sum(Column6 %in% "P")
896 P7<-sum(Column7 %in% "P")
897 P8<-sum(Column8 %in% "P")
898 P9<-sum(Column9 %in% "P")
899 P10<-sum(Column10 %in% "P")
900 P11<-sum(Column11 %in% "P")
901 P12<-sum(Column12 %in% "P")
902 P13<-sum(Column13 %in% "P")
903 P14<-sum(Column14 %in% "P")
904 P15<-sum(Column15 %in% "P")
905 PllPs<-cbind(P1,P2,P3,P4,P5,P6,P7,P8,P9,P10,P11,P12,P13,P14,P15)
906
907 Q1<-sum(Column1 %in% "Q")
908 Q2<-sum(Column2 %in% "Q")
909 Q3<-sum(Column3 %in% "Q")
910 Q4<-sum(Column4 %in% "Q")
911 Q5<-sum(Column5 %in% "Q")
912 Q6<-sum(Column6 %in% "Q")
913 Q7<-sum(Column7 %in% "Q")
914 Q8<-sum(Column8 %in% "Q")
915 Q9<-sum(Column9 %in% "Q")
916 Q10<-sum(Column10 %in% "Q")
917 Q11<-sum(Column11 %in% "Q")
918 Q12<-sum(Column12 %in% "Q")
919 Q13<-sum(Column13 %in% "Q")
920 Q14<-sum(Column14 %in% "Q")
921 Q15<-sum(Column15 %in% "Q")
922 QllQs<-cbind(Q1,Q2,Q3,Q4,Q5,Q6,Q7,Q8,Q9,Q10,Q11,Q12,Q13,Q14,Q15)
923
924 R1<-sum(Column1 %in% "R")
925 R2<-sum(Column2 %in% "R")
926 R3<-sum(Column3 %in% "R")
927 R4<-sum(Column4 %in% "R")
928 R5<-sum(Column5 %in% "R")
929 R6<-sum(Column6 %in% "R")
930 R7<-sum(Column7 %in% "R")
931 R8<-sum(Column8 %in% "R")
932 R9<-sum(Column9 %in% "R")
933 R10<-sum(Column10 %in% "R")
934 R11<-sum(Column11 %in% "R")
935 R12<-sum(Column12 %in% "R")
936 R13<-sum(Column13 %in% "R")
937 R14<-sum(Column14 %in% "R")
938 R15<-sum(Column15 %in% "R")
939 RllRs<-cbind(R1,R2,R3,R4,R5,R6,R7,R8,R9,R10,R11,R12,R13,R14,R15)
940
941 S1<-sum(Column1 %in% "S")
942 S2<-sum(Column2 %in% "S")
943 S3<-sum(Column3 %in% "S")
944 S4<-sum(Column4 %in% "S")
945 S5<-sum(Column5 %in% "S")
946 S6<-sum(Column6 %in% "S")
947 S7<-sum(Column7 %in% "S")
948 S8<-sum(Column8 %in% "S")
949 S9<-sum(Column9 %in% "S")
950 S10<-sum(Column10 %in% "S")
951 S11<-sum(Column11 %in% "S")
952 S12<-sum(Column12 %in% "S")
953 S13<-sum(Column13 %in% "S")
954 S14<-sum(Column14 %in% "S")
955 S15<-sum(Column15 %in% "S")
956 SllSs<-cbind(S1,S2,S3,S4,S5,S6,S7,S8,S9,S10,S11,S12,S13,S14,S15)
957
958 T1<-sum(Column1 %in% "T")
959 T2<-sum(Column2 %in% "T")
960 T3<-sum(Column3 %in% "T")
961 T4<-sum(Column4 %in% "T")
962 T5<-sum(Column5 %in% "T")
963 T6<-sum(Column6 %in% "T")
964 T7<-sum(Column7 %in% "T")
965 T8<-sum(Column8 %in% "T")
966 T9<-sum(Column9 %in% "T")
967 T10<-sum(Column10 %in% "T")
968 T11<-sum(Column11 %in% "T")
969 T12<-sum(Column12 %in% "T")
970 T13<-sum(Column13 %in% "T")
971 T14<-sum(Column14 %in% "T")
972 T15<-sum(Column15 %in% "T")
973 TllTs<-cbind(T1,T2,T3,T4,T5,T6,T7,T8,T9,T10,T11,T12,T13,T14,T15)
974
975 V1<-sum(Column1 %in% "V")
976 V2<-sum(Column2 %in% "V")
977 V3<-sum(Column3 %in% "V")
978 V4<-sum(Column4 %in% "V")
979 V5<-sum(Column5 %in% "V")
980 V6<-sum(Column6 %in% "V")
981 V7<-sum(Column7 %in% "V")
982 V8<-sum(Column8 %in% "V")
983 V9<-sum(Column9 %in% "V")
984 V10<-sum(Column10 %in% "V")
985 V11<-sum(Column11 %in% "V")
986 V12<-sum(Column12 %in% "V")
987 V13<-sum(Column13 %in% "V")
988 V14<-sum(Column14 %in% "V")
989 V15<-sum(Column15 %in% "V")
990 VllVs<-cbind(V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15)
991
992 W1<-sum(Column1 %in% "W")
993 W2<-sum(Column2 %in% "W")
994 W3<-sum(Column3 %in% "W")
995 W4<-sum(Column4 %in% "W")
996 W5<-sum(Column5 %in% "W")
997 W6<-sum(Column6 %in% "W")
998 W7<-sum(Column7 %in% "W")
999 W8<-sum(Column8 %in% "W")
1000 W9<-sum(Column9 %in% "W")
1001 W10<-sum(Column10 %in% "W")
1002 W11<-sum(Column11 %in% "W")
1003 W12<-sum(Column12 %in% "W")
1004 W13<-sum(Column13 %in% "W")
1005 W14<-sum(Column14 %in% "W")
1006 W15<-sum(Column15 %in% "W")
1007 WllWs<-cbind(W1,W2,W3,W4,W5,W6,W7,W8,W9,W10,W11,W12,W13,W14,W15)
1008
1009 Y1<-sum(Column1 %in% "Y")
1010 Y2<-sum(Column2 %in% "Y")
1011 Y3<-sum(Column3 %in% "Y")
1012 Y4<-sum(Column4 %in% "Y")
1013 Y5<-sum(Column5 %in% "Y")
1014 Y6<-sum(Column6 %in% "Y")
1015 Y7<-sum(Column7 %in% "Y")
1016 Y8<-sum(Column8 %in% "Y")
1017 Y9<-sum(Column9 %in% "Y")
1018 Y10<-sum(Column10 %in% "Y")
1019 Y11<-sum(Column11 %in% "Y")
1020 Y12<-sum(Column12 %in% "Y")
1021 Y13<-sum(Column13 %in% "Y")
1022 Y14<-sum(Column14 %in% "Y")
1023 Y15<-sum(Column15 %in% "Y")
1024 YllYs<-cbind(Y1,Y2,Y3,Y4,Y5,Y6,Y7,Y8,Y9,Y10,Y11,Y12,Y13,Y14,Y15)
1025 PositionTable<-rbind(AllAs,CllCs,DllDs,EllEs,FllFs,GllGs,HllHs,IllIs,KllKs,LllLs,MllMs,NllNs,PllPs,QllQs,RllRs,SllSs,TllTs,VllVs,WllWs,YllYs)
1026 }
1027 #endogenous prob matrix is AA position over subbackfreqmean
1028 dim(PositionTable)
1029 EPMtable<-PositionTable
1030 # EPMtable[1,]<-(PositionTable[1,]/(PositionTable[1,]*.01*Amean))
1031 # EPMtable[2,]<-(PositionTable[2,]/(PositionTable[2,]*.01*Cmean))
1032 # EPMtable[3,]<-(PositionTable[3,]/(PositionTable[3,]*.01*Dmean))
1033 # EPMtable[4,]<-(PositionTable[4,]/(PositionTable[4,]*.01*Emean))
1034 # EPMtable[5,]<-(PositionTable[5,]/(PositionTable[5,]*.01*Fmean))
1035 # EPMtable[6,]<-(PositionTable[6,]/(PositionTable[6,]*.01*Gmean))
1036 # EPMtable[7,]<-(PositionTable[7,]/(PositionTable[7,]*.01*Hmean))
1037 # EPMtable[8,]<-(PositionTable[8,]/(PositionTable[8,]*.01*Imean))
1038 # EPMtable[9,]<-(PositionTable[9,]/(PositionTable[9,]*.01*Kmean))
1039 # EPMtable[10,]<-(PositionTable[10,]/(PositionTable[10,]*.01*Lmean))
1040 # EPMtable[11,]<-(PositionTable[11,]/(PositionTable[11,]*.01*Mmean))
1041 # EPMtable[12,]<-(PositionTable[12,]/(PositionTable[12,]*.01*Nmean))
1042 # EPMtable[13,]<-(PositionTable[13,]/(PositionTable[13,]*.01*Pmean))
1043 # EPMtable[14,]<-(PositionTable[14,]/(PositionTable[14,]*.01*Qmean))
1044 # EPMtable[15,]<-(PositionTable[15,]/(PositionTable[15,]*.01*Rmean))
1045 # EPMtable[16,]<-(PositionTable[16,]/(PositionTable[16,]*.01*Smean))
1046 # EPMtable[17,]<-(PositionTable[17,]/(PositionTable[17,]*.01*Tmean))
1047 # EPMtable[18,]<-(PositionTable[18,]/(PositionTable[18,]*.01*Vmean))
1048 # EPMtable[19,]<-(PositionTable[19,]/(PositionTable[19,]*.01*Wmean))
1049 # EPMtable[20,]<-(PositionTable[20,]/(PositionTable[20,]*.01*Ymean))
1050
1051 columns<-c(length(Column1)-sum(Column1==""),
1052 length(Column2)-sum(Column2==""),
1053 length(Column3)-sum(Column3==""),
1054 length(Column4)-sum(Column4==""),
1055 length(Column5)-sum(Column5==""),
1056 length(Column6)-sum(Column6==""),
1057 length(Column7)-sum(Column7==""),
1058 length(Column8)-sum(Column8==""),
1059 length(Column9)-sum(Column9==""),
1060 length(Column10)-sum(Column10==""),
1061 length(Column11)-sum(Column11==""),
1062 length(Column12)-sum(Column12==""),
1063 length(Column13)-sum(Column13==""),
1064 length(Column14)-sum(Column14==""),
1065 length(Column15)-sum(Column15==""))
1066
1067 for (z in 1:15) {
1068 for (y in 1:20) {
1069 if (PositionTable[y,z]>0){
1070 EPMtable[y,z]<-PositionTable[y,z]/((columns[z]*.01*AllMeans[y]))
1071 }
1072 if (PositionTable[y,z]==0){
1073 EPMtable[y,z]<-(1/columns[z])/((columns[z]*.01*AllMeans[y]))
1074 }
1075 }
1076 }
1077 #here I created the endogenous probability matrix
1078 #now all I need to do is make the program automatically determine which SDs are >2, and then make it perform screener and sorter on those SDs
1079
1080
1081
1082
1083
1084 # write.xlsx(SDtable,file=FILENAME, sheetName = "Standard Deviation Table",col.names = FALSE,row.names = FALSE,append = TRUE)
1085 # write.xlsx(PercentTable,file = FILENAME,sheetName = "Percent Table",col.names = FALSE,row.names = FALSE,append = TRUE)
1086 # write.xlsx(SelectivitySheet,file = FILENAME,sheetName = "Site Selectivity",col.names = FALSE,row.names = FALSE,append = TRUE)
1087 # write.xlsx(EPMtable,file=FILENAME,sheetName = "Endogenous Probability Matrix",col.names = FALSE,row.names = FALSE,append = TRUE)
1088 # write.xlsx(NormalizationScore,file = FILENAME,sheetName = "Normalization Score",col.names = FALSE,row.names = FALSE,append = TRUE)
1089
1090 NormalizationScore<-c("Normalization Score",NormalizationScore)
1091
1092 write.table(x=c("SD Table"),file=FILENAME,append = TRUE,sep=",", row.names = FALSE, col.names = FALSE)
1093 write.table(SDtable,file=FILENAME,append = TRUE,sep=",", row.names = FALSE, col.names = FALSE)
1094 write.table(x=c("Percent Table"),file=FILENAME,append = TRUE,sep=",", row.names = FALSE, col.names = FALSE)
1095 write.table(PercentTable,file=FILENAME, append = TRUE,sep=",",row.names = FALSE, col.names = FALSE)
1096
1097 EPMtableu<-EPMtable
1098 HeaderSD<-c(-7:7)
1099 EPMtableu<-rbind(HeaderSD,EPMtableu)
1100 row.names(EPMtableu)<-NULL
1101 EPMtableu<-data.frame(SetOfAAs,EPMtableu)
1102
1103 write.table("Site Selectivity Matrix", file = FILENAME2, append = TRUE, sep = ",", row.names = FALSE, col.names = FALSE)
1104 SelectivityHeader=matrix(data = c("Position",-7,-6,-5,-4,-3,-2,-1,0,1,2,3,4,5,6,7),nrow = 1)
1105 head<-matrix(data=rep(" ",times=16),nrow = 1)
1106 SelectivityHeader<-rbind(head,SelectivityHeader)
1107
1108 write.table(SelectivityHeader, file = FILENAME2, append = TRUE, sep = ",", row.names = FALSE, col.names = FALSE)
1109 #colnames(SelectivitySheet)<-c("-7","-6","-5","-4","-3","-2","-1","0","1","2","3","4","5","6","7")
1110 write.table(SelectivitySheet,file = FILENAME2, append = TRUE,sep = ",",row.names = TRUE, col.names = FALSE)
1111 write.table(x=c("Endogenous Probability Matrix"),file=FILENAME2,append = TRUE,sep=",", row.names = FALSE, col.names = FALSE)
1112 write.table(EPMtableu,file = FILENAME2, append = TRUE,sep = ",",row.names = FALSE, col.names = FALSE)
1113 write.table(NormalizationScore, file = FILENAME2, append = TRUE,sep = ",",row.names = FALSE, col.names = FALSE)
1114