comparison all stuff/Commonality and Difference finderMADE 7 TO 7.R @ 0:23eea82f5192 draft

Uploaded
author jfb
date Wed, 16 Jan 2019 13:55:22 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:23eea82f5192
1 #I should make an SOP for this. Problems we encountered: no x in the xY motif, and the kilodemon
2 #the output files have both Y and xY, they shouldn't why is that happening? make it not happen
3 #make sure that accession numbers stay locked to each motif, somehow
4 #output should look just like the KALIP input
5
6 #ff you want ONLY FULL MOTIFS, put "YES" here, please use all caps
7 FullMotifsOnly_questionmark<-"NO"
8 #If you want ONLY TRUNCATED MOTIFS, put "YES" here, please use all caps
9 TruncatedMotifsOnly_questionmark<-"NO"
10 #if you want to find the overlap, put a "YES" here (all caps), if you want to find the non-overlap, put "NO" (all caps)
11 Are_You_Looking_For_Commonality<-"YES"
12
13
14 #put the names of your input files here
15 FirstSubstrateSet<- read.csv("Galaxy63-BTK_PLUS-R1_Substrates.csv", stringsAsFactors=FALSE)
16 Firstsubbackfreq<- read.csv("Galaxy64-BTK_PLUS-R1_SubstrateBackgroundFrequency.csv", header=FALSE, stringsAsFactors=FALSE)
17
18 SecondSubstrateSet<- read.csv("Galaxy65-BTK_PLUS_R2_Substrates.csv", stringsAsFactors=FALSE)
19 Secondsubbackfreq<- read.csv("Galaxy66-BTK_PLUS_R2_SubstrateBackgroundFrequency.csv", header=FALSE, stringsAsFactors=FALSE)
20
21 ThirdSubstrateSet<- read.csv("Galaxy69-BTK_PLUS_R3_Substrates.csv", stringsAsFactors=FALSE)
22 Thirdsubbackfreq<- read.csv("Galaxy70-BTK_PLUS_R3_SubstrateBackgroundFrequency.csv", header=FALSE, stringsAsFactors=FALSE)
23
24 #then put the names of your output files here
25 Shared_motifs_table<-"180719_GALAXY-BTK-plus-rep-OVLP-7to7-substrates.csv"
26 Shared_subbackfreq_table<-"180719_GALAXY-BTK-plus-rep-OVLP-7to7-SubBackFreq.csv"
27
28 # Shared_motifs_table<-"Shared motifs 7-27-17.csv"
29 # Shared_subbackfreq_table<-"SubstrateBackgrounFrequency-for-shared-motifs 4 7-27-17.csv"
30
31 First_unshared_motifs_table<-"R1 substrates.csv"
32 First_unshared_subbackfreq<-"R1 SBF.csv"
33
34 Second_unshared_motifs_table<-"R2 subs.csv"
35 Second_unshared_subbackfreq<-"R2 SBf.csv"
36
37 Third_unshared_motifs_table<-"R3 subs.csv"
38 Third_unshared_subbackfreq<-"R3 SBF.csv"
39
40 #final note, this code is going to be unworkable if you want to make a Venn diagram of more than 3 circles. I think I'll poke around
41 #other languages to see if any of them can do it.
42 ####################################################################################################################################
43
44
45
46
47
48 FirstxY<-rep("xY",times=nrow(FirstSubstrateSet))
49 FirstSubstrateSet[,11]<-FirstxY
50
51 SecondxY<-rep("xY",times=nrow(SecondSubstrateSet))
52 SecondSubstrateSet[,11]<-SecondxY
53
54 ThirdxY<-rep("xY",times=nrow(ThirdSubstrateSet))
55 ThirdSubstrateSet[,11]<-ThirdxY
56
57
58
59
60
61
62
63
64
65
66
67 ####################################################################################################################################
68 ####################################################################################################################################
69 # better version of this code written in C: what happens when two kinases share a motif, but they found that motif in two
70 # separate proteins thus two separate accession numbers?
71 # It should actually output the shared motif and BOTH accession numbers. Right now it does not, it only maps out the second
72 # accession number. So that needs to be fixed BUT you need to keep the commonality between a motif and its accession number
73 ####################################################################################################################################
74 ####################################################################################################################################
75 ####################################################################################################################################
76 ####################################################################################################################################
77
78 #Create the motif sets, deciding wether or not you're looking for truncated or full here
79 #full only
80 if (Are_You_Looking_For_Commonality=="YES"){
81 if (FullMotifsOnly_questionmark=="YES"){
82 FTLwtmotifs=matrix(,nrow = nrow(FirstSubstrateSet),ncol=1)
83 FTLwtAccessionNumbers=matrix(,nrow = nrow(FirstSubstrateSet),ncol=1)
84 leftspaces<-c()
85 rightspaces<-c()
86 for (i in 1:nrow(FirstSubstrateSet)){
87 FTLwtletters<-FirstSubstrateSet[i,4:18]
88 FTLwtletters<-FTLwtletters[FTLwtletters !="XXXXX"]
89 FTLwtletters<-paste(FTLwtletters, sep="", collapse="")
90
91
92 YYYmotif <- unlist(strsplit(FTLwtletters, split = ""))
93 YYYposition <- match(x = "x", table = YYYmotif)
94 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are
95 #just 3 letters to the left of x
96
97 YYYLettersToTheLeft <- YYYposition - 1
98 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is
99 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1
100 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1
101 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the
102 #variable the user puts in is
103
104 if (YYYLettersToTheLeft > 6 && YYYLettersToTheRight > 6) {
105 motif<-YYYmotif
106 #save that motif, which is the Y and +/- 4 amino acids, including truncation
107 motif<-motif[!motif %in% "x"]
108 motif<-paste(motif, sep="", collapse="")
109 FTLwtletters<-motif
110 FTLwtmotifs[i,1]<-FTLwtletters
111 FTLwtAccessionNumbers[i,1]<-FirstSubstrateSet[i,3]
112
113 }
114
115 }
116 # FTLwtmotifs <- FTLwtmotifs[!is.na(FTLwtmotifs)]
117 # FTLwtmotifs<-matrix(FTLwtmotifs,ncol = 1)
118 #
119
120 D835Ymotifs=matrix(,nrow = nrow(SecondSubstrateSet),ncol=1)
121 D835YAccessionNumbers<-matrix(,nrow = nrow(SecondSubstrateSet),ncol = 1)
122
123 for (i in 1:nrow(SecondSubstrateSet)){
124 D835letters<-SecondSubstrateSet[i,4:18]
125 D835letters<-D835letters[D835letters !="XXXXX"]
126 D835letters<-paste(D835letters, sep="", collapse="")
127
128
129 YYYmotif <- unlist(strsplit(D835letters, split = ""))
130 YYYposition <- match(x = "x", table = YYYmotif)
131 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are
132 #just 3 letters to the left of x
133
134 YYYLettersToTheLeft <- YYYposition - 1
135 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is
136 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1
137 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1
138 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the
139 #variable the user puts in is
140
141 if (YYYLettersToTheLeft > 6 && YYYLettersToTheRight > 6) {
142 motif<-YYYmotif
143 #add blank spaces if the motif has less than 4 letters to the left/right
144 motif<-c(leftspaces,YYYmotif,rightspaces)
145 #save that motif, which is the Y and +/- 4 amino acids, including truncation
146 motif<-motif[!motif %in% "x"]
147 motif<-paste(motif, sep="", collapse="")
148 D835letters<-motif
149 D835Ymotifs[i,1]<-D835letters
150 D835YAccessionNumbers[i,1]<-SecondSubstrateSet[i,3]
151
152 }
153 }
154
155 ITDmotifs=matrix(,nrow = nrow(ThirdSubstrateSet),ncol=1)
156 ITDAccessionNumbers<-matrix(,nrow = nrow(ThirdSubstrateSet))
157
158 for (i in 1:nrow(ThirdSubstrateSet)){
159 ITDletters<-ThirdSubstrateSet[i,4:18]
160 ITDletters<-ITDletters[ITDletters !="XXXXX"]
161 ITDletters<-paste(ITDletters, sep="", collapse="")
162 YYYmotif <- unlist(strsplit(ITDletters, split = ""))
163 YYYposition <- match(x = "x", table = YYYmotif)
164 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are
165 #just 3 letters to the left of x
166
167 YYYLettersToTheLeft <- YYYposition - 1
168 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is
169 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1
170 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1
171 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the
172 #variable the user puts in is
173
174 if (YYYLettersToTheLeft > 6 && YYYLettersToTheRight > 6) {
175 motif<-YYYmotif
176 #add blank spaces if the motif has less than 4 letters to the left/right
177 motif<-c(leftspaces,YYYmotif,rightspaces)
178 #save that motif, which is the Y and +/- 4 amino acids, including truncation
179 motif<-motif[!motif %in% "x"]
180 motif<-paste(motif, sep="", collapse="")
181 ITDletters<-motif
182 ITDmotifs[i,1]<-ITDletters
183 ITDAccessionNumbers[i,1]<-ThirdSubstrateSet[i,3]
184
185 }
186 }
187
188 }
189
190 ##############################################3
191 #Truncated only
192 if (TruncatedMotifsOnly_questionmark=="YES"){
193 FTLwtmotifs=matrix(,nrow = nrow(FirstSubstrateSet),ncol=1)
194 FTLwtAccessionNumbers=matrix(,nrow = nrow(FirstSubstrateSet),ncol=1)
195
196 for (i in 1:nrow(FirstSubstrateSet)){
197 FTLwtletters<-FirstSubstrateSet[i,4:18]
198 FTLwtletters<-FTLwtletters[FTLwtletters !="XXXXX"]
199 FTLwtletters<-paste(FTLwtletters, sep="", collapse="")
200
201
202 YYYmotif <- unlist(strsplit(FTLwtletters, split = ""))
203 YYYposition <- match(x = "x", table = YYYmotif)
204 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are
205 #just 3 letters to the left of x
206
207 YYYLettersToTheLeft <- YYYposition - 1
208 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is
209 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1
210 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1
211 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the
212 #variable the user puts in is
213
214 if (YYYLettersToTheLeft < 7 | YYYLettersToTheRight < 7) {
215 leftspaces<-rep(" ",times=(7-YYYLettersToTheLeft))
216 rightspaces<-rep(" ",times=7-(YYYLettersToTheRight))
217 #add blank spaces if the motif has less than 4 letters to the left/right
218 motif<-c(leftspaces,YYYmotif,rightspaces)
219 #save that motif, which is the Y and +/- 4 amino acids, including truncation
220 motif<-motif[!motif %in% "x"]
221 motif<-paste(motif, sep="", collapse="")
222 FTLwtletters<-motif
223 FTLwtmotifs[i,1]<-FTLwtletters
224 FTLwtAccessionNumbers[i,1]<-FirstSubstrateSet[i,3]
225 }
226
227 }
228
229 D835Ymotifs=matrix(,nrow = nrow(SecondSubstrateSet),ncol=1)
230 D835YAccessionNumbers<-matrix(,nrow = nrow(SecondSubstrateSet),ncol = 1)
231
232 for (i in 1:nrow(SecondSubstrateSet)){
233 D835letters<-SecondSubstrateSet[i,4:18]
234 D835letters<-D835letters[D835letters !="XXXXX"]
235 D835letters<-paste(D835letters, sep="", collapse="")
236
237
238 YYYmotif <- unlist(strsplit(D835letters, split = ""))
239 YYYposition <- match(x = "x", table = YYYmotif)
240 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are
241 #just 3 letters to the left of x
242
243 YYYLettersToTheLeft <- YYYposition - 1
244 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is
245 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1
246 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1
247 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the
248 #variable the user puts in is
249
250 if (YYYLettersToTheLeft < 7 | YYYLettersToTheRight < 7) {
251 leftspaces<-rep(" ",times=(7-YYYLettersToTheLeft))
252 rightspaces<-rep(" ",times=7-(YYYLettersToTheRight))
253 #add blank spaces if the motif has less than 4 letters to the left/right
254 motif<-c(leftspaces,YYYmotif,rightspaces)
255 #save that motif, which is the Y and +/- 4 amino acids, including truncation
256 motif<-motif[!motif %in% "x"]
257 motif<-paste(motif, sep="", collapse="")
258 D835letters<-motif
259 D835YAccessionNumbers[i,1]<-SecondSubstrateSet[i,3]
260 D835Ymotifs[i,1]<-D835letters
261 }
262 }
263
264 ITDmotifs=matrix(,nrow = nrow(ThirdSubstrateSet),ncol=1)
265 ITDAccessionNumbers<-matrix(,nrow = nrow(ThirdSubstrateSet))
266
267 for (i in 1:nrow(ThirdSubstrateSet)){
268 ITDletters<-ThirdSubstrateSet[i,4:18]
269 ITDletters<-ITDletters[ITDletters !="XXXXX"]
270 ITDletters<-paste(ITDletters, sep="", collapse="")
271 YYYmotif <- unlist(strsplit(ITDletters, split = ""))
272 YYYposition <- match(x = "x", table = YYYmotif)
273 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are
274 #just 3 letters to the left of x
275
276 YYYLettersToTheLeft <- YYYposition - 1
277 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is
278 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1
279 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1
280 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the
281 #variable the user puts in is
282
283 if (YYYLettersToTheLeft < 7 | YYYLettersToTheRight < 7) {
284 leftspaces<-rep(" ",times=(7-YYYLettersToTheLeft))
285 rightspaces<-rep(" ",times=7-(YYYLettersToTheRight))
286 #add blank spaces if the motif has less than 4 letters to the left/right
287 motif<-c(leftspaces,YYYmotif,rightspaces)
288 #save that motif, which is the Y and +/- 4 amino acids, including truncation
289 motif<-motif[!motif %in% "x"]
290 motif<-paste(motif, sep="", collapse="")
291 ITDletters<-motif
292 ITDAccessionNumbers[i,1]<-ThirdSubstrateSet[i,3]
293 ITDmotifs[i,1]<-ITDletters
294 }
295 }
296
297 }
298
299 ###############################################
300 #ALL motifs, full and truncated
301
302 if (FullMotifsOnly_questionmark!="YES"&&TruncatedMotifsOnly_questionmark!="YES"){
303 FTLwtmotifs=matrix(,nrow = nrow(FirstSubstrateSet),ncol=1)
304 FTLwtAccessionNumbers=matrix(,nrow = nrow(FirstSubstrateSet),ncol=1)
305
306 for (i in 1:nrow(FirstSubstrateSet)){
307 FTLwtletters<-FirstSubstrateSet[i,4:18]
308 FTLwtletters<-FTLwtletters[FTLwtletters !="XXXXX"]
309 FTLwtletters<-paste(FTLwtletters, sep="", collapse="")
310 leftspaces<-c()
311 rightspaces<-c()
312
313 YYYmotif <- unlist(strsplit(FTLwtletters, split = ""))
314 YYYposition <- match(x = "x", table = YYYmotif)
315 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are
316 #just 3 letters to the left of x
317
318 YYYLettersToTheLeft <- YYYposition - 1
319 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is
320 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1
321 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1
322 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the
323 #variable the user puts in is
324
325
326 if (YYYLettersToTheLeft < 7 | YYYLettersToTheRight < 7) {
327 leftspaces<-rep(" ",times=(7-YYYLettersToTheLeft))
328 rightspaces<-rep(" ",times=7-(YYYLettersToTheRight))
329 #add blank spaces if the motif has less than 4 letters to the left/right
330 motif<-c(leftspaces,YYYmotif,rightspaces)
331 #save that motif, which is the Y and +/- 4 amino acids, including truncation
332 motif<-motif[!motif %in% "x"]
333 motif<-paste(motif, sep="", collapse="")
334 FTLwtletters<-motif
335 FTLwtmotifs[i,1]<-FTLwtletters
336 FTLwtAccessionNumbers[i,1]<-FirstSubstrateSet[i,3]
337 }
338
339 if(YYYLettersToTheLeft>6 && YYYLettersToTheRight>6){
340 motif<-YYYmotif
341 #add blank spaces if the motif has less than 4 letters to the left/right
342 motif<-c(leftspaces,YYYmotif,rightspaces)
343 #save that motif, which is the Y and +/- 4 amino acids, including truncation
344 motif<-motif[!motif %in% "x"]
345 motif<-paste(motif, sep="", collapse="")
346 FTLwtletters<-motif
347 FTLwtmotifs[i,1]<-FTLwtletters
348 FTLwtAccessionNumbers[i,1]<-FirstSubstrateSet[i,3]
349
350
351 }
352
353 }
354
355 D835Ymotifs=matrix(,nrow = nrow(SecondSubstrateSet),ncol=1)
356 D835YAccessionNumbers<-matrix(,nrow = nrow(SecondSubstrateSet),ncol = 1)
357
358 for (i in 1:nrow(SecondSubstrateSet)){
359 D835letters<-SecondSubstrateSet[i,4:18]
360 D835letters<-D835letters[D835letters !="XXXXX"]
361 D835letters<-paste(D835letters, sep="", collapse="")
362 leftspaces<-c()
363 rightspaces<-c()
364
365 YYYmotif <- unlist(strsplit(D835letters, split = ""))
366 YYYposition <- match(x = "x", table = YYYmotif)
367 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are
368 #just 3 letters to the left of x
369
370 YYYLettersToTheLeft <- YYYposition - 1
371 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is
372 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1
373 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1
374 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the
375 #variable the user puts in is
376 if (YYYLettersToTheLeft < 7 | YYYLettersToTheRight < 7) {
377 leftspaces<-rep(" ",times=(7-YYYLettersToTheLeft))
378 rightspaces<-rep(" ",times=7-(YYYLettersToTheRight))
379 #add blank spaces if the motif has less than 4 letters to the left/right
380 motif<-c(leftspaces,YYYmotif,rightspaces)
381 #save that motif, which is the Y and +/- 4 amino acids, including truncation
382 motif<-motif[!motif %in% "x"]
383 motif<-paste(motif, sep="", collapse="")
384 D835letters<-motif
385 D835Ymotifs[i,1]<-D835letters
386 D835YAccessionNumbers[i,1]<-FirstSubstrateSet[i,3]
387 }
388
389 if(YYYLettersToTheLeft>6 && YYYLettersToTheRight>6){
390 motif<-YYYmotif
391 #add blank spaces if the motif has less than 4 letters to the left/right
392 motif<-c(leftspaces,YYYmotif,rightspaces)
393 #save that motif, which is the Y and +/- 4 amino acids, including truncation
394 motif<-motif[!motif %in% "x"]
395 motif<-paste(motif, sep="", collapse="")
396 D835letters<-motif
397 D835Ymotifs[i,1]<-D835letters
398 D835YAccessionNumbers[i,1]<-FirstSubstrateSet[i,3]
399 }
400 }
401
402
403 ITDmotifs=matrix(,nrow = nrow(ThirdSubstrateSet),ncol=1)
404 ITDAccessionNumbers<-matrix(,nrow = nrow(ThirdSubstrateSet))
405
406 for (i in 1:nrow(ThirdSubstrateSet)){
407 ITDletters<-ThirdSubstrateSet[i,4:18]
408 ITDletters<-ITDletters[ITDletters !="XXXXX"]
409 ITDletters<-paste(ITDletters, sep="", collapse="")
410 YYYmotif <- unlist(strsplit(ITDletters, split = ""))
411 leftspaces<-c()
412 rightspaces<-c()
413 YYYposition <- match(x = "x", table = YYYmotif)
414 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are
415 #just 3 letters to the left of x
416
417 YYYLettersToTheLeft <- YYYposition - 1
418 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is
419 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1
420 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1
421 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the
422 #variable the user puts in is
423 if (YYYLettersToTheLeft < 7 | YYYLettersToTheRight < 7) {
424 leftspaces<-rep(" ",times=(7-YYYLettersToTheLeft))
425 rightspaces<-rep(" ",times=7-(YYYLettersToTheRight))
426 #add blank spaces if the motif has less than 4 letters to the left/right
427 motif<-c(leftspaces,YYYmotif,rightspaces)
428 #save that motif, which is the Y and +/- 4 amino acids, including truncation
429 motif<-motif[!motif %in% "x"]
430 motif<-paste(motif, sep="", collapse="")
431 ITDletters<-motif
432 ITDmotifs[i,1]<-ITDletters
433 ITDAccessionNumbers[i,1]<-FirstSubstrateSet[i,3]
434 }
435
436 if(YYYLettersToTheLeft>6 && YYYLettersToTheRight>6){
437 motif<-YYYmotif
438 #add blank spaces if the motif has less than 4 letters to the left/right
439 motif<-c(leftspaces,YYYmotif,rightspaces)
440 #save that motif, which is the Y and +/- 4 amino acids, including truncation
441 motif<-motif[!motif %in% "x"]
442 motif<-paste(motif, sep="", collapse="")
443 ITDletters<-motif
444 ITDmotifs[i,1]<-ITDletters
445 ITDAccessionNumbers[i,1]<-FirstSubstrateSet[i,3]
446 }
447 }
448
449 }
450 #############################################################################################################################
451 #############################################################################################################################
452 #############################################################################################################################
453 #############################################################################################################################
454 #############################################################################################################################
455
456 #now look for either commonality or difference. Actually could you look for both...
457
458 if (Are_You_Looking_For_Commonality=="YES"){
459
460 columnalheader<-c(as.character(Thirdsubbackfreq[1:36,1]))
461 columnalheader<-matrix(columnalheader,nrow = 1)
462 write.table(x=columnalheader,
463 file=Shared_subbackfreq_table,
464 quote=FALSE, sep=",",
465 row.names=FALSE,col.names = FALSE, na="", append=TRUE)
466
467 FirstOverlapmotifs<-c()
468 for (i in 1:nrow(ITDmotifs)){
469 for (j in 1:nrow(D835Ymotifs)){
470 if (is.na(ITDmotifs[i,1])!=TRUE&&is.na(D835Ymotifs[j,1])!=TRUE){
471 if (ITDmotifs[i,1]==D835Ymotifs[j,1]){
472 FirstOverlapmotifs<-c(FirstOverlapmotifs,D835Ymotifs[j,1])
473 }
474 }
475 }
476 }
477
478 AllAccessionNumbers<-c()
479 columnalheader<-c(rep(NA,36))
480 FinalMatrix<-matrix(data =columnalheader,nrow = 1)
481
482 FinalMotifs<-c(rep(NA,20))
483 FinalMotifsMatrix<-matrix(data = FinalMotifs,nrow = 1)
484
485
486 for (l in 1:length(FirstOverlapmotifs)) {
487 AccessionNumber<-00000000000
488 for (k in 1:nrow(FTLwtmotifs)) {
489 AccessionNumber<-0000000000000
490 if(is.na(FTLwtmotifs[k])!=TRUE){
491 #I don't remember why, but I felt it necessary to destroy the accession number multiple times to ensure it is
492 #destroyed immediately after use
493 if (FirstOverlapmotifs[l] == FTLwtmotifs[k]) {
494 substratematrix<-FirstSubstrateSet[k,1:20]
495 substratematrix<-as.matrix(substratematrix,nrow=1)
496 FinalMotifsMatrix<-rbind(FinalMotifsMatrix,substratematrix)
497 #when you find a match between the venn diagrams, save the substrate info you get into a matrix
498
499 AccessionNumber <- as.character(FirstSubstrateSet[k, 3])
500 #then take the accession number
501
502 for (m in 1:ncol(Firstsubbackfreq)) {
503 AN <- as.character(Firstsubbackfreq[1, m])
504 if (grepl(pattern = AN,
505 x = AccessionNumber,
506 fixed = TRUE) == TRUE) {
507 outputmatrix <- as.character(Firstsubbackfreq[, m])
508 outputmatrix <- matrix(outputmatrix, nrow = 1)
509 #with that accession number, find a match in the subbackfreq file and save it here
510 FinalMatrix<-rbind(FinalMatrix,outputmatrix)
511 }
512 }
513 }
514 }
515 }
516 }
517
518
519 TrueMatrix<-FinalMatrix[!duplicated(FinalMatrix),]
520 TrueFinalMotifsMatrix<-FinalMotifsMatrix[!duplicated(FinalMotifsMatrix),]
521
522 TrueFinalMotifsMatrix<-TrueFinalMotifsMatrix[2:nrow(TrueFinalMotifsMatrix),]
523 TrueMatrix<-TrueMatrix[2:nrow(TrueMatrix),]
524
525 write.table(
526 x = TrueFinalMotifsMatrix,
527 file = Shared_motifs_table,
528 quote = FALSE,
529 sep = ",",
530 row.names = FALSE,
531 col.names = TRUE,
532 na = "",
533 append = TRUE
534 )
535
536
537 write.table(
538 x = TrueMatrix,
539 file = Shared_subbackfreq_table,
540 quote = FALSE,
541 sep = ",",
542 row.names = FALSE,
543 col.names = FALSE,
544 na = "",
545 append = TRUE
546 )
547 }
548 }
549
550 if (Are_You_Looking_For_Commonality=="NO"){
551 if (FullMotifsOnly_questionmark=="YES"){
552 FTLwtmotifs=rep(NA,times=nrow(FirstSubstrateSet))
553 FTLwtAccessionNumbers=rep(NA,times=nrow(FirstSubstrateSet))
554 leftspaces<-c()
555 rightspaces<-c()
556 for (i in 1:nrow(FirstSubstrateSet)){
557 FTLwtletters<-FirstSubstrateSet[i,4:18]
558 FTLwtletters<-FTLwtletters[FTLwtletters !="XXXXX"]
559 FTLwtletters<-paste(FTLwtletters, sep="", collapse="")
560
561
562 YYYmotif <- unlist(strsplit(FTLwtletters, split = ""))
563 YYYposition <- match(x = "x", table = YYYmotif)
564 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are
565 #just 3 letters to the left of x
566
567 YYYLettersToTheLeft <- YYYposition - 1
568 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is
569 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1
570 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1
571 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the
572 #variable the user puts in is
573
574 if (YYYLettersToTheLeft > 6 && YYYLettersToTheRight > 6) {
575 motif<-YYYmotif
576 #save that motif, which is the Y and +/- 4 amino acids, including truncation
577 motif<-motif[!motif %in% "x"]
578 motif<-paste(motif, sep="", collapse="")
579 FTLwtletters<-motif
580 FTLwtmotifs[i]<-FTLwtletters
581 FTLwtAccessionNumbers[i]<-FirstSubstrateSet[i,3]
582 }
583
584 }
585 # FTLwtmotifs <- FTLwtmotifs[!is.na(FTLwtmotifs)]
586 # FTLwtmotifs<-matrix(FTLwtmotifs,ncol = 1)
587 #
588
589 D835Ymotifs=rep(NA,times=nrow(FirstSubstrateSet))
590 D835YAccessionNumbers=rep(NA,times=nrow(FirstSubstrateSet))
591
592 for (i in 1:nrow(SecondSubstrateSet)){
593 D835letters<-SecondSubstrateSet[i,4:18]
594 D835letters<-D835letters[D835letters !="XXXXX"]
595 D835letters<-paste(D835letters, sep="", collapse="")
596
597
598 YYYmotif <- unlist(strsplit(D835letters, split = ""))
599 YYYposition <- match(x = "x", table = YYYmotif)
600 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are
601 #just 3 letters to the left of x
602
603 YYYLettersToTheLeft <- YYYposition - 1
604 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is
605 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1
606 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1
607 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the
608 #variable the user puts in is
609
610 if (YYYLettersToTheLeft > 6 && YYYLettersToTheRight > 6) {
611 motif<-YYYmotif
612 #add blank spaces if the motif has less than 4 letters to the left/right
613 motif<-c(leftspaces,YYYmotif,rightspaces)
614 #save that motif, which is the Y and +/- 4 amino acids, including truncation
615 motif<-motif[!motif %in% "x"]
616 motif<-paste(motif, sep="", collapse="")
617 D835letters<-motif
618 D835Ymotifs[i]<-D835letters
619 D835YAccessionNumbers[i]<-SecondSubstrateSet[i,3]
620 }
621 }
622
623 ITDmotifs=rep(NA,times=nrow(FirstSubstrateSet))
624 ITDAccessionNumbers=rep(NA,times=nrow(FirstSubstrateSet))
625
626 for (i in 1:nrow(ThirdSubstrateSet)){
627 ITDletters<-ThirdSubstrateSet[i,4:18]
628 ITDletters<-ITDletters[ITDletters !="XXXXX"]
629 ITDletters<-paste(ITDletters, sep="", collapse="")
630 YYYmotif <- unlist(strsplit(ITDletters, split = ""))
631 YYYposition <- match(x = "x", table = YYYmotif)
632 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are
633 #just 3 letters to the left of x
634
635 YYYLettersToTheLeft <- YYYposition - 1
636 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is
637 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1
638 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1
639 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the
640 #variable the user puts in is
641
642 if (YYYLettersToTheLeft > 6 && YYYLettersToTheRight > 6) {
643 motif<-YYYmotif
644 #add blank spaces if the motif has less than 4 letters to the left/right
645 motif<-c(leftspaces,YYYmotif,rightspaces)
646 #save that motif, which is the Y and +/- 4 amino acids, including truncation
647 motif<-motif[!motif %in% "x"]
648 motif<-paste(motif, sep="", collapse="")
649 ITDletters<-motif
650 ITDmotifs[i]<-ITDletters
651 ITDAccessionNumbers[i]<-ThirdSubstrateSet[i,3]
652
653 }
654 }
655 names(ITDmotifs)<-ITDAccessionNumbers
656 names(D835Ymotifs)<-D835YAccessionNumbers
657 names(FTLwtmotifs)<-FTLwtAccessionNumbers
658 }
659
660
661 ##############################################3
662 #Truncated only
663 if (TruncatedMotifsOnly_questionmark=="YES"){
664 FTLwtmotifs=matrix(,nrow = nrow(FirstSubstrateSet),ncol=1)
665 FTLwtAccessionNumbers=matrix(,nrow = nrow(FirstSubstrateSet),ncol=1)
666
667 for (i in 1:nrow(FirstSubstrateSet)){
668 FTLwtletters<-FirstSubstrateSet[i,4:18]
669 FTLwtletters<-FTLwtletters[FTLwtletters !="XXXXX"]
670 FTLwtletters<-paste(FTLwtletters, sep="", collapse="")
671
672
673 YYYmotif <- unlist(strsplit(FTLwtletters, split = ""))
674 YYYposition <- match(x = "x", table = YYYmotif)
675 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are
676 #just 3 letters to the left of x
677
678 YYYLettersToTheLeft <- YYYposition - 1
679 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is
680 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1
681 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1
682 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the
683 #variable the user puts in is
684
685 if (YYYLettersToTheLeft < 7 | YYYLettersToTheRight < 7) {
686 leftspaces<-rep(" ",times=(7-YYYLettersToTheLeft))
687 rightspaces<-rep(" ",times=7-(YYYLettersToTheRight))
688 #add blank spaces if the motif has less than 4 letters to the left/right
689 motif<-c(leftspaces,YYYmotif,rightspaces)
690 #save that motif, which is the Y and +/- 4 amino acids, including truncation
691 motif<-motif[!motif %in% "x"]
692 motif<-paste(motif, sep="", collapse="")
693 FTLwtletters<-motif
694 FTLwtmotifs[i,1]<-FTLwtletters
695 FTLwtAccessionNumbers[i,1]<-FirstSubstrateSet[i,3]
696 }
697
698 }
699
700 D835Ymotifs=matrix(,nrow = nrow(SecondSubstrateSet),ncol=1)
701 D835YAccessionNumbers<-matrix(,nrow = nrow(SecondSubstrateSet),ncol = 1)
702 i=2
703 for (i in 1:nrow(SecondSubstrateSet)){
704 D835letters<-SecondSubstrateSet[i,4:18]
705 D835letters<-D835letters[D835letters !="XXXXX"]
706 D835letters<-paste(D835letters, sep="", collapse="")
707
708
709 YYYmotif <- unlist(strsplit(D835letters, split = ""))
710 YYYposition <- match(x = "x", table = YYYmotif)
711 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are
712 #just 3 letters to the left of x
713
714 YYYLettersToTheLeft <- YYYposition - 1
715 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is
716 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1
717 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1
718 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the
719 #variable the user puts in is
720
721 if (YYYLettersToTheLeft < 7 | YYYLettersToTheRight < 7) {
722 leftspaces<-rep(" ",times=(7-YYYLettersToTheLeft))
723 rightspaces<-rep(" ",times=7-(YYYLettersToTheRight))
724 #add blank spaces if the motif has less than 4 letters to the left/right
725 motif<-c(leftspaces,YYYmotif,rightspaces)
726 #save that motif, which is the Y and +/- 4 amino acids, including truncation
727 motif<-motif[!motif %in% "x"]
728 motif<-paste(motif, sep="", collapse="")
729 D835letters<-motif
730 D835YAccessionNumbers[i,1]<-SecondSubstrateSet[i,3]
731 D835Ymotifs[i,1]<-D835letters
732 }
733 }
734
735 ITDmotifs=matrix(,nrow = nrow(ThirdSubstrateSet),ncol=1)
736 ITDAccessionNumbers<-matrix(,nrow = nrow(ThirdSubstrateSet))
737
738 for (i in 1:nrow(ThirdSubstrateSet)){
739 ITDletters<-ThirdSubstrateSet[i,4:18]
740 ITDletters<-ITDletters[ITDletters !="XXXXX"]
741 ITDletters<-paste(ITDletters, sep="", collapse="")
742 YYYmotif <- unlist(strsplit(ITDletters, split = ""))
743 YYYposition <- match(x = "x", table = YYYmotif)
744 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are
745 #just 3 letters to the left of x
746
747 YYYLettersToTheLeft <- YYYposition - 1
748 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is
749 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1
750 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1
751 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the
752 #variable the user puts in is
753
754 if (YYYLettersToTheLeft < 7 | YYYLettersToTheRight < 7) {
755 leftspaces<-rep(" ",times=(7-YYYLettersToTheLeft))
756 rightspaces<-rep(" ",times=7-(YYYLettersToTheRight))
757 #add blank spaces if the motif has less than 4 letters to the left/right
758 motif<-c(leftspaces,YYYmotif,rightspaces)
759 #save that motif, which is the Y and +/- 4 amino acids, including truncation
760 motif<-motif[!motif %in% "x"]
761 motif<-paste(motif, sep="", collapse="")
762 ITDletters<-motif
763 ITDAccessionNumbers[i,1]<-ThirdSubstrateSet[i,3]
764 ITDmotifs[i,1]<-ITDletters
765 }
766 }
767 names(FTLwtmotifs)<-FTLwtAccessionNumbers
768 names(D835Ymotifs)<-D835YAccessionNumbers
769 names(ITDmotifs)<-ITDAccessionNumbers
770 }
771
772 ###############################################
773 #ALL motifs, full and truncated
774
775 if (FullMotifsOnly_questionmark!="YES"&&TruncatedMotifsOnly_questionmark!="YES"){
776 FTLwtmotifs=matrix(,nrow = nrow(FirstSubstrateSet),ncol=1)
777 FTLwtAccessionNumbers=matrix(,nrow = nrow(FirstSubstrateSet),ncol=1)
778
779 for (i in 1:nrow(FirstSubstrateSet)){
780 FTLwtletters<-FirstSubstrateSet[i,4:18]
781 FTLwtletters<-FTLwtletters[FTLwtletters !="XXXXX"]
782 FTLwtletters<-paste(FTLwtletters, sep="", collapse="")
783 leftspaces<-c()
784 rightspaces<-c()
785
786 YYYmotif <- unlist(strsplit(FTLwtletters, split = ""))
787 YYYposition <- match(x = "x", table = YYYmotif)
788 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are
789 #just 3 letters to the left of x
790
791 YYYLettersToTheLeft <- YYYposition - 1
792 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is
793 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1
794 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1
795 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the
796 #variable the user puts in is
797
798
799 if (YYYLettersToTheLeft < 7 | YYYLettersToTheRight < 7) {
800 leftspaces<-rep(" ",times=(7-YYYLettersToTheLeft))
801 rightspaces<-rep(" ",times=7-(YYYLettersToTheRight))
802 #add blank spaces if the motif has less than 4 letters to the left/right
803 motif<-c(leftspaces,YYYmotif,rightspaces)
804 #save that motif, which is the Y and +/- 4 amino acids, including truncation
805 motif<-motif[!motif %in% "x"]
806 motif<-paste(motif, sep="", collapse="")
807 FTLwtletters<-motif
808 FTLwtmotifs[i,1]<-FTLwtletters
809 FTLwtAccessionNumbers[i,1]<-FirstSubstrateSet[i,3]
810 }
811
812 if(YYYLettersToTheLeft>6 && YYYLettersToTheRight>6){
813 motif<-YYYmotif
814 #add blank spaces if the motif has less than 4 letters to the left/right
815 motif<-c(leftspaces,YYYmotif,rightspaces)
816 #save that motif, which is the Y and +/- 4 amino acids, including truncation
817 motif<-motif[!motif %in% "x"]
818 motif<-paste(motif, sep="", collapse="")
819 FTLwtletters<-motif
820 FTLwtmotifs[i,1]<-FTLwtletters
821 FTLwtAccessionNumbers[i,1]<-FirstSubstrateSet[i,3]
822
823
824 }
825
826 }
827
828 D835Ymotifs=matrix(,nrow = nrow(SecondSubstrateSet),ncol=1)
829 D835YAccessionNumbers<-matrix(,nrow = nrow(SecondSubstrateSet),ncol = 1)
830
831 for (i in 1:nrow(SecondSubstrateSet)){
832 D835letters<-SecondSubstrateSet[i,4:18]
833 D835letters<-D835letters[D835letters !="XXXXX"]
834 D835letters<-paste(D835letters, sep="", collapse="")
835 leftspaces<-c()
836 rightspaces<-c()
837
838 YYYmotif <- unlist(strsplit(D835letters, split = ""))
839 YYYposition <- match(x = "x", table = YYYmotif)
840 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are
841 #just 3 letters to the left of x
842
843 YYYLettersToTheLeft <- YYYposition - 1
844 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is
845 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1
846 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1
847 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the
848 #variable the user puts in is
849 if (YYYLettersToTheLeft < 7 | YYYLettersToTheRight < 7) {
850 leftspaces<-rep(" ",times=(7-YYYLettersToTheLeft))
851 rightspaces<-rep(" ",times=7-(YYYLettersToTheRight))
852 #add blank spaces if the motif has less than 4 letters to the left/right
853 motif<-c(leftspaces,YYYmotif,rightspaces)
854 #save that motif, which is the Y and +/- 4 amino acids, including truncation
855 motif<-motif[!motif %in% "x"]
856 motif<-paste(motif, sep="", collapse="")
857 D835letters<-motif
858 D835Ymotifs[i,1]<-D835letters
859 D835YAccessionNumbers[i,1]<-FirstSubstrateSet[i,3]
860 }
861
862 if(YYYLettersToTheLeft>6 && YYYLettersToTheRight>6){
863 motif<-YYYmotif
864 #add blank spaces if the motif has less than 4 letters to the left/right
865 motif<-c(leftspaces,YYYmotif,rightspaces)
866 #save that motif, which is the Y and +/- 4 amino acids, including truncation
867 motif<-motif[!motif %in% "x"]
868 motif<-paste(motif, sep="", collapse="")
869 D835letters<-motif
870 D835Ymotifs[i,1]<-D835letters
871 D835YAccessionNumbers[i,1]<-FirstSubstrateSet[i,3]
872 }
873 }
874
875
876 ITDmotifs=matrix(,nrow = nrow(ThirdSubstrateSet),ncol=1)
877 ITDAccessionNumbers<-matrix(,nrow = nrow(ThirdSubstrateSet))
878
879 for (i in 1:nrow(ThirdSubstrateSet)){
880 ITDletters<-ThirdSubstrateSet[i,4:18]
881 ITDletters<-ITDletters[ITDletters !="XXXXX"]
882 ITDletters<-paste(ITDletters, sep="", collapse="")
883 YYYmotif <- unlist(strsplit(ITDletters, split = ""))
884 leftspaces<-c()
885 rightspaces<-c()
886 YYYposition <- match(x = "x", table = YYYmotif)
887 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are
888 #just 3 letters to the left of x
889
890 YYYLettersToTheLeft <- YYYposition - 1
891 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is
892 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1
893 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1
894 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the
895 #variable the user puts in is
896 if (YYYLettersToTheLeft < 7 | YYYLettersToTheRight < 7) {
897 leftspaces<-rep(" ",times=(7-YYYLettersToTheLeft))
898 rightspaces<-rep(" ",times=7-(YYYLettersToTheRight))
899 #add blank spaces if the motif has less than 4 letters to the left/right
900 motif<-c(leftspaces,YYYmotif,rightspaces)
901 #save that motif, which is the Y and +/- 4 amino acids, including truncation
902 motif<-motif[!motif %in% "x"]
903 motif<-paste(motif, sep="", collapse="")
904 ITDletters<-motif
905 ITDmotifs[i,1]<-ITDletters
906 ITDAccessionNumbers[i,1]<-FirstSubstrateSet[i,3]
907 }
908
909 if(YYYLettersToTheLeft>6 && YYYLettersToTheRight>6){
910 motif<-YYYmotif
911 #add blank spaces if the motif has less than 4 letters to the left/right
912 motif<-c(leftspaces,YYYmotif,rightspaces)
913 #save that motif, which is the Y and +/- 4 amino acids, including truncation
914 motif<-motif[!motif %in% "x"]
915 motif<-paste(motif, sep="", collapse="")
916 ITDletters<-motif
917 ITDmotifs[i,1]<-ITDletters
918 ITDAccessionNumbers[i,1]<-FirstSubstrateSet[i,3]
919 }
920 }
921 names(FTLwtmotifs)<-FTLwtAccessionNumbers
922 names(D835Ymotifs)<-D835YAccessionNumbers
923 names(ITDmotifs)<-ITDAccessionNumbers
924 }
925
926
927 FTLwtmotifsFINAL<-FTLwtmotifs[!FTLwtmotifs %in% D835Ymotifs]
928 FTLwtmotifsFINAL<-FTLwtmotifsFINAL[!FTLwtmotifsFINAL %in% ITDmotifs]
929 FTLwtmotifsFINAL<-FTLwtmotifsFINAL[!duplicated(FTLwtmotifsFINAL)]
930
931
932 ITDmotifsFINAL<-ITDmotifs[!ITDmotifs %in% D835Ymotifs]
933 ITDmotifsFINAL<-ITDmotifsFINAL[!ITDmotifsFINAL %in% FTLwtmotifs]
934 ITDmotifsFINAL<-ITDmotifsFINAL[!duplicated(ITDmotifsFINAL)]
935
936
937 D835YmotifsFINAL<-D835Ymotifs[!D835Ymotifs %in% FTLwtmotifs]
938 D835YmotifsFINAL<-D835YmotifsFINAL[!D835YmotifsFINAL %in% ITDmotifs]
939 D835YmotifsFINAL<-D835YmotifsFINAL[!duplicated(D835YmotifsFINAL)]
940
941
942 columnalheader<-c(rep(NA,35))
943 FTLFinalMatrix<-matrix(data =columnalheader,nrow = 1)
944
945 for (k in 1:length(FTLwtmotifsFINAL)) {
946 AN<-00000
947 #I don't remember why, but I felt it necessary to destroy the accession number multiple times to ensure it is
948 #destroyed immediately after use
949 for (m in 1:ncol(Firstsubbackfreq)) {
950 AN <- as.character(Firstsubbackfreq[1, m])
951 if (grepl(pattern = AN,
952 x = names(FTLwtmotifsFINAL[k]),
953 fixed = TRUE) == TRUE) {
954 outputmatrix <- as.character(Firstsubbackfreq[, m])
955 outputmatrix <- matrix(outputmatrix, nrow = 1)
956 #with that accession number, find a match in the subbackfreq file and save it here
957 FTLFinalMatrix<-rbind(FTLFinalMatrix,outputmatrix)
958 }
959 }
960 }
961 FTLFinalMatrix<-FTLFinalMatrix[!duplicated(FTLFinalMatrix),]
962
963 columnalheader<-c(rep(NA,35))
964 ITDFinalMatrix<-matrix(data =columnalheader,nrow = 1)
965
966 for (k in 1:length(ITDmotifsFINAL)) {
967 AN<-00000
968 #I don't remember why, but I felt it necessary to destroy the accession number multiple times to ensure it is
969 #destroyed immediately after use
970 for (m in 1:ncol(Thirdsubbackfreq)) {
971 AN <- as.character(Thirdsubbackfreq[1, m])
972 if (grepl(pattern = AN,
973 x = names(ITDmotifsFINAL[k]),
974 fixed = TRUE) == TRUE) {
975 outputmatrix <- as.character(Thirdsubbackfreq[, m])
976 outputmatrix <- matrix(outputmatrix, nrow = 1)
977 #with that accession number, find a match in the subbackfreq file and save it here
978 ITDFinalMatrix<-rbind(ITDFinalMatrix,outputmatrix)
979 }
980 }
981 }
982 ITDFinalMatrix<-ITDFinalMatrix[!duplicated(ITDFinalMatrix),]
983
984 columnalheader<-c(rep(NA,35))
985 D835YFinalMatrix<-matrix(data =columnalheader,nrow = 1)
986
987 for (k in 1:length(D835YmotifsFINAL)) {
988 #I don't remember why, but I felt it necessary to destroy the accession number multiple times to ensure it is
989 #destroyed immediately after use
990 for (m in 1:ncol(Secondsubbackfreq)) {
991 AN <- as.character(Secondsubbackfreq[1, m])
992 if (grepl(pattern = AN,
993 x = names(D835YmotifsFINAL[k]),
994 fixed = TRUE) == TRUE) {
995 outputmatrix <- as.character(Secondsubbackfreq[, m])
996 outputmatrix <- matrix(outputmatrix, nrow = 1)
997 #with that accession number, find a match in the subbackfreq file and save it here
998 D835YFinalMatrix<-rbind(D835YFinalMatrix,outputmatrix)
999 }
1000 }
1001 }
1002 D835YFinalMatrix<-D835YFinalMatrix[!duplicated(D835YFinalMatrix),]
1003
1004 FTLoutputmatrix<-matrix(data=c(FTLwtmotifsFINAL,names(FTLwtmotifsFINAL)),ncol = 2)
1005
1006
1007 write.table(x=FTLoutputmatrix,
1008 file=First_unshared_motifs_table,
1009 quote=FALSE, sep=",",
1010 row.names=FALSE,col.names = FALSE, na="", append=TRUE)
1011
1012 columnalheader<-c("Accession Numbers",as.character(Thirdsubbackfreq[1:35,1]))
1013 columnalheader<-matrix(columnalheader,nrow = 1)
1014 write.table(x=columnalheader,
1015 file=First_unshared_subbackfreq,
1016 quote=FALSE, sep=",",
1017 row.names=FALSE,col.names = FALSE, na="", append=TRUE)
1018
1019 write.table(x=FTLFinalMatrix,
1020 file=First_unshared_subbackfreq,
1021 quote=FALSE, sep=",",
1022 row.names=FALSE,col.names = FALSE, na="", append=TRUE)
1023
1024 ############################################################################################################
1025
1026 D835Youtputmatrix<-matrix(data=c(D835YmotifsFINAL,names(D835YmotifsFINAL)),ncol = 2)
1027
1028 write.table(x=D835Youtputmatrix,
1029 file=Second_unshared_motifs_table,
1030 quote=FALSE, sep=",",
1031 row.names=FALSE,col.names = FALSE, na="", append=TRUE)
1032
1033 columnalheader<-c("Accession Numbers",as.character(Thirdsubbackfreq[1:35,1]))
1034 columnalheader<-matrix(columnalheader,nrow = 1)
1035 write.table(x=columnalheader,
1036 file=Second_unshared_subbackfreq,
1037 quote=FALSE, sep=",",
1038 row.names=FALSE,col.names = FALSE, na="", append=TRUE)
1039
1040 write.table(x=D835YFinalMatrix,
1041 file=Second_unshared_subbackfreq,
1042 quote=FALSE, sep=",",
1043 row.names=FALSE,col.names = FALSE, na="", append=TRUE)
1044
1045 ############################################################################################################
1046
1047 ITDoutputmatrix<-matrix(data = c(ITDmotifsFINAL,names(ITDmotifsFINAL)),ncol = 2)
1048
1049 write.table(x=ITDoutputmatrix,
1050 file=Third_unshared_motifs_table,
1051 quote=FALSE, sep=",",
1052 row.names=FALSE,col.names = FALSE, na="", append=TRUE)
1053
1054 columnalheader<-c("Accession Numbers",as.character(Thirdsubbackfreq[1:35,1]))
1055 columnalheader<-matrix(columnalheader,nrow = 1)
1056 write.table(x=columnalheader,
1057 file=Third_unshared_subbackfreq,
1058 quote=FALSE, sep=",",
1059 row.names=FALSE,col.names = FALSE, na="", append=TRUE)
1060
1061 write.table(x=ITDFinalMatrix,
1062 file=Third_unshared_subbackfreq,
1063 quote=FALSE, sep=",",
1064 row.names=FALSE,col.names = FALSE, na="", append=TRUE)
1065
1066 }
1067
1068 # if (Are_You_Looking_For_Commonality=="NO"){
1069 #
1070 #
1071 # FTLwtmotifsFULLMATRIX<-cbind(FTLwtmotifs,FTLwtAccessionNumbers)
1072 # ITDmotifsFULLMATRIX<-cbind(ITDmotifs,ITDAccessionNumbers)
1073 # D835YmotifsFULLMATRIX<-cbind(D835Ymotifs,D835YAccessionNumbers)
1074 # # ?duplicated
1075 #
1076 # FTLwtmotifsFINAL<-FTLwtmotifs[!FTLwtmotifs %in% D835Ymotifs]
1077 # FTLwtmotifsFINAL<-FTLwtmotifsFINAL[!FTLwtmotifsFINAL %in% ITDmotifs]
1078 # FTLwtmotifsFINAL<-matrix(data=FTLwtmotifsFINAL, ncol = 1)
1079 #
1080 # ITDmotifsFINAL<-ITDmotifs[!ITDmotifs %in% D835Ymotifs]
1081 # ITDmotifsFINAL<-ITDmotifsFINAL[!ITDmotifsFINAL %in% FTLwtmotifs]
1082 # ITDmotifsFINAL<-matrix(data=ITDmotifsFINAL, ncol = 1)
1083 #
1084 # D835YmotifsFINAL<-D835Ymotifs[!D835Ymotifs %in% FTLwtmotifs]
1085 # D835YmotifsFINAL<-D835YmotifsFINAL[!D835YmotifsFINAL %in% ITDmotifs]
1086 # D835YmotifsFINAL<-matrix(data=D835YmotifsFINAL, ncol = 1)
1087 #
1088 # FTLnondupeAccessionNumbers<-c()
1089 #
1090 # for (z in 1:nrow(FTLwtmotifsFINAL)) {
1091 # for (w in 1:nrow(FTLwtmotifsFULLMATRIX)) {
1092 # if (is.na(FTLwtmotifsFULLMATRIX[w,1])!=TRUE)
1093 # if (FTLwtmotifsFINAL[z]==FTLwtmotifsFULLMATRIX[w,1]){
1094 # FTLnondupeAccessionNumbers<-c(FTLnondupeAccessionNumbers,FTLwtmotifsFULLMATRIX[w,2])
1095 # }
1096 # }
1097 # }
1098 #
1099 #
1100 # #find accession numbers here, put a matrix of those things, amino acid %, but only after I've unduped them
1101 # FTLnondupeAccessionNumbers<-FTLnondupeAccessionNumbers[!duplicated(FTLnondupeAccessionNumbers)]
1102 #
1103 # columnalheader<-c(rep(NA,35))
1104 # FTLFinalMatrix<-matrix(data =columnalheader,nrow = 1)
1105 #
1106 # for (k in 1:length(FTLnondupeAccessionNumbers)) {
1107 # #I don't remember why, but I felt it necessary to destroy the accession number multiple times to ensure it is
1108 # #destroyed immediately after use
1109 # for (m in 1:ncol(Firstsubbackfreq)) {
1110 # AN <- as.character(Firstsubbackfreq[1, m])
1111 # if (grepl(pattern = AN,
1112 # x = FTLnondupeAccessionNumbers[k],
1113 # fixed = TRUE) == TRUE) {
1114 # outputmatrix <- as.character(Firstsubbackfreq[, m])
1115 # outputmatrix <- matrix(outputmatrix, nrow = 1)
1116 # #with that accession number, find a match in the subbackfreq file and save it here
1117 # FTLFinalMatrix<-rbind(FTLFinalMatrix,outputmatrix)
1118 # }
1119 # }
1120 # }
1121 # ITDnondupeAccessionNumbers<-c()
1122 #
1123 # for (z in 1:nrow(ITDmotifsFINAL)) {
1124 # for (w in 1:nrow(ITDmotifsFULLMATRIX)) {
1125 # if (is.na(ITDmotifsFULLMATRIX[w,1])!=TRUE)
1126 # if (ITDmotifsFINAL[z]==ITDmotifsFULLMATRIX[w,1]){
1127 # ITDnondupeAccessionNumbers<-c(ITDnondupeAccessionNumbers,ITDmotifsFULLMATRIX[w,2])
1128 # }
1129 # }
1130 # }
1131 #
1132 #
1133 # #find accession numbers here, put a matrix of those things, amino acid %, but only after I've unduped them
1134 # ITDnondupeAccessionNumbers<-ITDnondupeAccessionNumbers[!duplicated(ITDnondupeAccessionNumbers)]
1135 #
1136 # columnalheader<-c(rep(NA,35))
1137 # ITDFinalMatrix<-matrix(data =columnalheader,nrow = 1)
1138 #
1139 # for (k in 1:length(ITDnondupeAccessionNumbers)) {
1140 # #I don't remember why, but I felt it necessary to destroy the accession number multiple times to ensure it is
1141 # #destroyed immediately after use
1142 # for (m in 1:ncol(Thirdsubbackfreq)) {
1143 # AN <- as.character(Thirdsubbackfreq[1, m])
1144 # if (grepl(pattern = AN,
1145 # x = ITDnondupeAccessionNumbers[k],
1146 # fixed = TRUE) == TRUE) {
1147 # outputmatrix <- as.character(Thirdsubbackfreq[, m])
1148 # outputmatrix <- matrix(outputmatrix, nrow = 1)
1149 # #with that accession number, find a match in the subbackfreq file and save it here
1150 # ITDFinalMatrix<-rbind(ITDFinalMatrix,outputmatrix)
1151 # }
1152 # }
1153 # }
1154 #
1155 #
1156 # D835YnondupeAccessionNumbers<-c()
1157 #
1158 # for (z in 1:nrow(D835YmotifsFINAL)) {
1159 # for (w in 1:nrow(D835YmotifsFULLMATRIX)) {
1160 # if (is.na(D835YmotifsFULLMATRIX[w,1])!=TRUE)
1161 # if (D835YmotifsFINAL[z]==D835YmotifsFULLMATRIX[w,1]){
1162 # D835YnondupeAccessionNumbers<-c(D835YnondupeAccessionNumbers,D835YmotifsFULLMATRIX[w,2])
1163 # }
1164 # }
1165 # }
1166 #
1167 #
1168 # #find accession numbers here, put a matrix of those things, amino acid %, but only after I've unduped them
1169 # D835YnondupeAccessionNumbers<-D835YnondupeAccessionNumbers[!duplicated(D835YnondupeAccessionNumbers)]
1170 #
1171 # columnalheader<-c(rep(NA,35))
1172 # D835YFinalMatrix<-matrix(data =columnalheader,nrow = 1)
1173 #
1174 # for (k in 1:length(D835YnondupeAccessionNumbers)) {
1175 # #I don't remember why, but I felt it necessary to destroy the accession number multiple times to ensure it is
1176 # #destroyed immediately after use
1177 # for (m in 1:ncol(Secondsubbackfreq)) {
1178 # AN <- as.character(Secondsubbackfreq[1, m])
1179 # if (grepl(pattern = AN,
1180 # x = D835YnondupeAccessionNumbers[k],
1181 # fixed = TRUE) == TRUE) {
1182 # outputmatrix <- as.character(Secondsubbackfreq[, m])
1183 # outputmatrix <- matrix(outputmatrix, nrow = 1)
1184 # #with that accession number, find a match in the subbackfreq file and save it here
1185 # D835YFinalMatrix<-rbind(D835YFinalMatrix,outputmatrix)
1186 # }
1187 # }
1188 # }
1189 #
1190 #
1191 #
1192 # # FinalFTLmotifs<-FTLwtmotifsFINAL[!duplicated(FTLwtmotifsFINAL)]
1193 # # FinalFTLAccessionNumbers<-FTLnondupeAccessionNumbers[!duplicated(FTLnondupeAccessionNumbers)]
1194 # # necessaryNAs<-rep(NA,times=(length(FinalFTLmotifs)-length(FinalFTLAccessionNumbers)))
1195 # # FinalFTLAccessionNumbers<-c(FinalFTLAccessionNumbers,necessaryNAs)
1196 # # TRUEFTLoutputmatrix<-cbind(FinalFTLmotifs,FinalFTLAccessionNumbers)
1197 # # TRUEFTLoutputmatrix
1198 #
1199 # write.table(x=FTLwtmotifsFINAL,
1200 # file=First_unshared_motifs_table,
1201 # quote=FALSE, sep=",",
1202 # row.names=FALSE,col.names = FALSE, na="", append=TRUE)
1203 #
1204 # columnalheader<-c("Accession Numbers",as.character(Thirdsubbackfreq[1:35,1]))
1205 # columnalheader<-matrix(columnalheader,nrow = 1)
1206 # write.table(x=columnalheader,
1207 # file=First_unshared_subbackfreq,
1208 # quote=FALSE, sep=",",
1209 # row.names=FALSE,col.names = FALSE, na="", append=TRUE)
1210 #
1211 # write.table(x=FTLFinalMatrix,
1212 # file=First_unshared_subbackfreq,
1213 # quote=FALSE, sep=",",
1214 # row.names=FALSE,col.names = FALSE, na="", append=TRUE)
1215 #
1216 #
1217 #
1218 #
1219 #
1220 #
1221 #
1222 #
1223 #
1224 #
1225 #
1226 #
1227 # write.table(x=D835YmotifsFINAL,
1228 # file=Second_unshared_motifs_table,
1229 # quote=FALSE, sep=",",
1230 # row.names=FALSE,col.names = FALSE, na="", append=TRUE)
1231 #
1232 # columnalheader<-c("Accession Numbers",as.character(Thirdsubbackfreq[1:35,1]))
1233 # columnalheader<-matrix(columnalheader,nrow = 1)
1234 # write.table(x=columnalheader,
1235 # file=Second_unshared_subbackfreq,
1236 # quote=FALSE, sep=",",
1237 # row.names=FALSE,col.names = FALSE, na="", append=TRUE)
1238 #
1239 # write.table(x=D835YFinalMatrix,
1240 # file=Second_unshared_subbackfreq,
1241 # quote=FALSE, sep=",",
1242 # row.names=FALSE,col.names = FALSE, na="", append=TRUE)
1243 #
1244 #
1245 #
1246 #
1247 #
1248 #
1249 #
1250 #
1251 #
1252 #
1253 #
1254 #
1255 #
1256 #
1257 #
1258 #
1259 # write.table(x=ITDmotifsFINAL,
1260 # file=Third_unshared_motifs_table,
1261 # quote=FALSE, sep=",",
1262 # row.names=FALSE,col.names = FALSE, na="", append=TRUE)
1263 #
1264 # columnalheader<-c("Accession Numbers",as.character(Thirdsubbackfreq[1:35,1]))
1265 # columnalheader<-matrix(columnalheader,nrow = 1)
1266 # write.table(x=columnalheader,
1267 # file=Third_unshared_subbackfreq,
1268 # quote=FALSE, sep=",",
1269 # row.names=FALSE,col.names = FALSE, na="", append=TRUE)
1270 #
1271 # write.table(x=ITDFinalMatrix,
1272 # file=Third_unshared_subbackfreq,
1273 # quote=FALSE, sep=",",
1274 # row.names=FALSE,col.names = FALSE, na="", append=TRUE)
1275 #
1276 #
1277 #
1278 #
1279 #
1280 #
1281 # }