comparison C and D finder/CandD.R @ 0:3e5fdf933646 draft

Uploaded
author jfb
date Fri, 25 May 2018 10:56:10 -0400
parents
children b791e2bee65c
comparison
equal deleted inserted replaced
-1:000000000000 0:3e5fdf933646
1 FirstSubstrateSet<- read.csv("input1.csv", stringsAsFactors=FALSE)
2 Firstsubbackfreq<- read.csv("input2.csv", header=FALSE, stringsAsFactors=FALSE)
3
4 SecondSubstrateSet<- read.csv("input3.csv", stringsAsFactors=FALSE)
5 Secondsubbackfreq<- read.csv("input4.csv", header=FALSE, stringsAsFactors=FALSE)
6
7 ThirdSubstrateSet<- read.csv("input5.csv", stringsAsFactors=FALSE)
8 Thirdsubbackfreq<- read.csv("input6.csv", header=FALSE, stringsAsFactors=FALSE)
9
10
11 args = commandArgs(trailingOnly=TRUE)
12
13 print(args[1])
14 print(args[2])
15 print(args[3])
16
17
18 #ff you want ONLY FULL MOTIFS, put "YES" here, please use all caps
19 FullMotifsOnly_questionmark<-args[1]
20 #If you want ONLY TRUNCATED MOTIFS, put "YES" here, please use all caps
21 TruncatedMotifsOnly_questionmark<-args[2]
22 #if you want to find the overlap, put a "YES" here (all caps), if you want to find the non-overlap, put "NO" (all caps)
23 Are_You_Looking_For_Commonality<-args[3]
24
25
26 #then put the names of your output files here
27 Shared_motifs_table<-"sharedmotifs.csv"
28 Shared_subbackfreq_table<-"sharedSBF.csv"
29
30 # Shared_motifs_table<-"Shared motifs 7-27-17.csv"
31 # Shared_subbackfreq_table<-"SubstrateBackgrounFrequency-for-shared-motifs 4 7-27-17.csv"
32
33 First_unshared_motifs_table<-"R1 substrates.csv"
34 First_unshared_subbackfreq<-"R1 SBF.csv"
35
36 Second_unshared_motifs_table<-"R2 subs.csv"
37 Second_unshared_subbackfreq<-"R2 SBf.csv"
38
39 Third_unshared_motifs_table<-"R3 subs.csv"
40 Third_unshared_subbackfreq<-"R3 SBF.csv"
41
42 #final note, this code is going to be unworkable if you want to make a Venn diagram of more than 3 circles. I think I'll poke around
43 #other languages to see if any of them can do it.
44 ####################################################################################################################################
45
46
47
48
49
50 FirstxY<-rep("xY",times=nrow(FirstSubstrateSet))
51 FirstSubstrateSet[,11]<-FirstxY
52
53 SecondxY<-rep("xY",times=nrow(SecondSubstrateSet))
54 SecondSubstrateSet[,11]<-SecondxY
55
56 ThirdxY<-rep("xY",times=nrow(ThirdSubstrateSet))
57 ThirdSubstrateSet[,11]<-ThirdxY
58
59
60
61
62
63
64
65
66
67
68
69 ####################################################################################################################################
70 ####################################################################################################################################
71 # better version of this code written in C: what happens when two kinases share a motif, but they found that motif in two
72 # separate proteins thus two separate accession numbers?
73 # It should actually output the shared motif and BOTH accession numbers. Right now it does not, it only maps out the second
74 # accession number. So that needs to be fixed BUT you need to keep the commonality between a motif and its accession number
75 ####################################################################################################################################
76 ####################################################################################################################################
77 ####################################################################################################################################
78 ####################################################################################################################################
79
80 #Create the motif sets, deciding wether or not you're looking for truncated or full here
81 #full only
82 if (Are_You_Looking_For_Commonality=="YES"){
83 if (FullMotifsOnly_questionmark=="YES"){
84 FTLwtmotifs=matrix(,nrow = nrow(FirstSubstrateSet),ncol=1)
85 FTLwtAccessionNumbers=matrix(,nrow = nrow(FirstSubstrateSet),ncol=1)
86 leftspaces<-c()
87 rightspaces<-c()
88 for (i in 1:nrow(FirstSubstrateSet)){
89 FTLwtletters<-FirstSubstrateSet[i,4:18]
90 FTLwtletters<-FTLwtletters[FTLwtletters !="XXXXX"]
91 FTLwtletters<-paste(FTLwtletters, sep="", collapse="")
92
93
94 YYYmotif <- unlist(strsplit(FTLwtletters, split = ""))
95 YYYposition <- match(x = "x", table = YYYmotif)
96 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are
97 #just 3 letters to the left of x
98
99 YYYLettersToTheLeft <- YYYposition - 1
100 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is
101 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1
102 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1
103 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the
104 #variable the user puts in is
105
106 if (YYYLettersToTheLeft > 6 && YYYLettersToTheRight > 6) {
107 motif<-YYYmotif
108 #save that motif, which is the Y and +/- 4 amino acids, including truncation
109 motif<-motif[!motif %in% "x"]
110 motif<-paste(motif, sep="", collapse="")
111 FTLwtletters<-motif
112 FTLwtmotifs[i,1]<-FTLwtletters
113 FTLwtAccessionNumbers[i,1]<-FirstSubstrateSet[i,3]
114
115 }
116
117 }
118 # FTLwtmotifs <- FTLwtmotifs[!is.na(FTLwtmotifs)]
119 # FTLwtmotifs<-matrix(FTLwtmotifs,ncol = 1)
120 #
121
122 D835Ymotifs=matrix(,nrow = nrow(SecondSubstrateSet),ncol=1)
123 D835YAccessionNumbers<-matrix(,nrow = nrow(SecondSubstrateSet),ncol = 1)
124
125 for (i in 1:nrow(SecondSubstrateSet)){
126 D835letters<-SecondSubstrateSet[i,4:18]
127 D835letters<-D835letters[D835letters !="XXXXX"]
128 D835letters<-paste(D835letters, sep="", collapse="")
129
130
131 YYYmotif <- unlist(strsplit(D835letters, split = ""))
132 YYYposition <- match(x = "x", table = YYYmotif)
133 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are
134 #just 3 letters to the left of x
135
136 YYYLettersToTheLeft <- YYYposition - 1
137 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is
138 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1
139 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1
140 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the
141 #variable the user puts in is
142
143 if (YYYLettersToTheLeft > 6 && YYYLettersToTheRight > 6) {
144 motif<-YYYmotif
145 #add blank spaces if the motif has less than 4 letters to the left/right
146 motif<-c(leftspaces,YYYmotif,rightspaces)
147 #save that motif, which is the Y and +/- 4 amino acids, including truncation
148 motif<-motif[!motif %in% "x"]
149 motif<-paste(motif, sep="", collapse="")
150 D835letters<-motif
151 D835Ymotifs[i,1]<-D835letters
152 D835YAccessionNumbers[i,1]<-SecondSubstrateSet[i,3]
153
154 }
155 }
156
157 ITDmotifs=matrix(,nrow = nrow(ThirdSubstrateSet),ncol=1)
158 ITDAccessionNumbers<-matrix(,nrow = nrow(ThirdSubstrateSet))
159
160 for (i in 1:nrow(ThirdSubstrateSet)){
161 ITDletters<-ThirdSubstrateSet[i,4:18]
162 ITDletters<-ITDletters[ITDletters !="XXXXX"]
163 ITDletters<-paste(ITDletters, sep="", collapse="")
164 YYYmotif <- unlist(strsplit(ITDletters, split = ""))
165 YYYposition <- match(x = "x", table = YYYmotif)
166 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are
167 #just 3 letters to the left of x
168
169 YYYLettersToTheLeft <- YYYposition - 1
170 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is
171 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1
172 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1
173 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the
174 #variable the user puts in is
175
176 if (YYYLettersToTheLeft > 6 && YYYLettersToTheRight > 6) {
177 motif<-YYYmotif
178 #add blank spaces if the motif has less than 4 letters to the left/right
179 motif<-c(leftspaces,YYYmotif,rightspaces)
180 #save that motif, which is the Y and +/- 4 amino acids, including truncation
181 motif<-motif[!motif %in% "x"]
182 motif<-paste(motif, sep="", collapse="")
183 ITDletters<-motif
184 ITDmotifs[i,1]<-ITDletters
185 ITDAccessionNumbers[i,1]<-ThirdSubstrateSet[i,3]
186
187 }
188 }
189
190 }
191
192 ##############################################3
193 #Truncated only
194 if (TruncatedMotifsOnly_questionmark=="YES"){
195 FTLwtmotifs=matrix(,nrow = nrow(FirstSubstrateSet),ncol=1)
196 FTLwtAccessionNumbers=matrix(,nrow = nrow(FirstSubstrateSet),ncol=1)
197
198 for (i in 1:nrow(FirstSubstrateSet)){
199 FTLwtletters<-FirstSubstrateSet[i,4:18]
200 FTLwtletters<-FTLwtletters[FTLwtletters !="XXXXX"]
201 FTLwtletters<-paste(FTLwtletters, sep="", collapse="")
202
203
204 YYYmotif <- unlist(strsplit(FTLwtletters, split = ""))
205 YYYposition <- match(x = "x", table = YYYmotif)
206 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are
207 #just 3 letters to the left of x
208
209 YYYLettersToTheLeft <- YYYposition - 1
210 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is
211 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1
212 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1
213 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the
214 #variable the user puts in is
215
216 if (YYYLettersToTheLeft < 7 | YYYLettersToTheRight < 7) {
217 leftspaces<-rep(" ",times=(7-YYYLettersToTheLeft))
218 rightspaces<-rep(" ",times=7-(YYYLettersToTheRight))
219 #add blank spaces if the motif has less than 4 letters to the left/right
220 motif<-c(leftspaces,YYYmotif,rightspaces)
221 #save that motif, which is the Y and +/- 4 amino acids, including truncation
222 motif<-motif[!motif %in% "x"]
223 motif<-paste(motif, sep="", collapse="")
224 FTLwtletters<-motif
225 FTLwtmotifs[i,1]<-FTLwtletters
226 FTLwtAccessionNumbers[i,1]<-FirstSubstrateSet[i,3]
227 }
228
229 }
230
231 D835Ymotifs=matrix(,nrow = nrow(SecondSubstrateSet),ncol=1)
232 D835YAccessionNumbers<-matrix(,nrow = nrow(SecondSubstrateSet),ncol = 1)
233
234 for (i in 1:nrow(SecondSubstrateSet)){
235 D835letters<-SecondSubstrateSet[i,4:18]
236 D835letters<-D835letters[D835letters !="XXXXX"]
237 D835letters<-paste(D835letters, sep="", collapse="")
238
239
240 YYYmotif <- unlist(strsplit(D835letters, split = ""))
241 YYYposition <- match(x = "x", table = YYYmotif)
242 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are
243 #just 3 letters to the left of x
244
245 YYYLettersToTheLeft <- YYYposition - 1
246 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is
247 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1
248 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1
249 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the
250 #variable the user puts in is
251
252 if (YYYLettersToTheLeft < 7 | YYYLettersToTheRight < 7) {
253 leftspaces<-rep(" ",times=(7-YYYLettersToTheLeft))
254 rightspaces<-rep(" ",times=7-(YYYLettersToTheRight))
255 #add blank spaces if the motif has less than 4 letters to the left/right
256 motif<-c(leftspaces,YYYmotif,rightspaces)
257 #save that motif, which is the Y and +/- 4 amino acids, including truncation
258 motif<-motif[!motif %in% "x"]
259 motif<-paste(motif, sep="", collapse="")
260 D835letters<-motif
261 D835YAccessionNumbers[i,1]<-SecondSubstrateSet[i,3]
262 D835Ymotifs[i,1]<-D835letters
263 }
264 }
265
266 ITDmotifs=matrix(,nrow = nrow(ThirdSubstrateSet),ncol=1)
267 ITDAccessionNumbers<-matrix(,nrow = nrow(ThirdSubstrateSet))
268
269 for (i in 1:nrow(ThirdSubstrateSet)){
270 ITDletters<-ThirdSubstrateSet[i,4:18]
271 ITDletters<-ITDletters[ITDletters !="XXXXX"]
272 ITDletters<-paste(ITDletters, sep="", collapse="")
273 YYYmotif <- unlist(strsplit(ITDletters, split = ""))
274 YYYposition <- match(x = "x", table = YYYmotif)
275 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are
276 #just 3 letters to the left of x
277
278 YYYLettersToTheLeft <- YYYposition - 1
279 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is
280 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1
281 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1
282 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the
283 #variable the user puts in is
284
285 if (YYYLettersToTheLeft < 7 | YYYLettersToTheRight < 7) {
286 leftspaces<-rep(" ",times=(7-YYYLettersToTheLeft))
287 rightspaces<-rep(" ",times=7-(YYYLettersToTheRight))
288 #add blank spaces if the motif has less than 4 letters to the left/right
289 motif<-c(leftspaces,YYYmotif,rightspaces)
290 #save that motif, which is the Y and +/- 4 amino acids, including truncation
291 motif<-motif[!motif %in% "x"]
292 motif<-paste(motif, sep="", collapse="")
293 ITDletters<-motif
294 ITDAccessionNumbers[i,1]<-ThirdSubstrateSet[i,3]
295 ITDmotifs[i,1]<-ITDletters
296 }
297 }
298
299 }
300
301 ###############################################
302 #ALL motifs, full and truncated
303
304 if (FullMotifsOnly_questionmark!="YES"&&TruncatedMotifsOnly_questionmark!="YES"){
305 FTLwtmotifs=matrix(,nrow = nrow(FirstSubstrateSet),ncol=1)
306 FTLwtAccessionNumbers=matrix(,nrow = nrow(FirstSubstrateSet),ncol=1)
307
308 for (i in 1:nrow(FirstSubstrateSet)){
309 FTLwtletters<-FirstSubstrateSet[i,4:18]
310 FTLwtletters<-FTLwtletters[FTLwtletters !="XXXXX"]
311 FTLwtletters<-paste(FTLwtletters, sep="", collapse="")
312 leftspaces<-c()
313 rightspaces<-c()
314
315 YYYmotif <- unlist(strsplit(FTLwtletters, split = ""))
316 YYYposition <- match(x = "x", table = YYYmotif)
317 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are
318 #just 3 letters to the left of x
319
320 YYYLettersToTheLeft <- YYYposition - 1
321 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is
322 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1
323 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1
324 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the
325 #variable the user puts in is
326
327
328 if (YYYLettersToTheLeft < 7 | YYYLettersToTheRight < 7) {
329 leftspaces<-rep(" ",times=(7-YYYLettersToTheLeft))
330 rightspaces<-rep(" ",times=7-(YYYLettersToTheRight))
331 #add blank spaces if the motif has less than 4 letters to the left/right
332 motif<-c(leftspaces,YYYmotif,rightspaces)
333 #save that motif, which is the Y and +/- 4 amino acids, including truncation
334 motif<-motif[!motif %in% "x"]
335 motif<-paste(motif, sep="", collapse="")
336 FTLwtletters<-motif
337 FTLwtmotifs[i,1]<-FTLwtletters
338 FTLwtAccessionNumbers[i,1]<-FirstSubstrateSet[i,3]
339 }
340
341 if(YYYLettersToTheLeft>6 && YYYLettersToTheRight>6){
342 motif<-YYYmotif
343 #add blank spaces if the motif has less than 4 letters to the left/right
344 motif<-c(leftspaces,YYYmotif,rightspaces)
345 #save that motif, which is the Y and +/- 4 amino acids, including truncation
346 motif<-motif[!motif %in% "x"]
347 motif<-paste(motif, sep="", collapse="")
348 FTLwtletters<-motif
349 FTLwtmotifs[i,1]<-FTLwtletters
350 FTLwtAccessionNumbers[i,1]<-FirstSubstrateSet[i,3]
351
352
353 }
354
355 }
356
357 D835Ymotifs=matrix(,nrow = nrow(SecondSubstrateSet),ncol=1)
358 D835YAccessionNumbers<-matrix(,nrow = nrow(SecondSubstrateSet),ncol = 1)
359
360 for (i in 1:nrow(SecondSubstrateSet)){
361 D835letters<-SecondSubstrateSet[i,4:18]
362 D835letters<-D835letters[D835letters !="XXXXX"]
363 D835letters<-paste(D835letters, sep="", collapse="")
364 leftspaces<-c()
365 rightspaces<-c()
366
367 YYYmotif <- unlist(strsplit(D835letters, split = ""))
368 YYYposition <- match(x = "x", table = YYYmotif)
369 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are
370 #just 3 letters to the left of x
371
372 YYYLettersToTheLeft <- YYYposition - 1
373 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is
374 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1
375 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1
376 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the
377 #variable the user puts in is
378 if (YYYLettersToTheLeft < 7 | YYYLettersToTheRight < 7) {
379 leftspaces<-rep(" ",times=(7-YYYLettersToTheLeft))
380 rightspaces<-rep(" ",times=7-(YYYLettersToTheRight))
381 #add blank spaces if the motif has less than 4 letters to the left/right
382 motif<-c(leftspaces,YYYmotif,rightspaces)
383 #save that motif, which is the Y and +/- 4 amino acids, including truncation
384 motif<-motif[!motif %in% "x"]
385 motif<-paste(motif, sep="", collapse="")
386 D835letters<-motif
387 D835Ymotifs[i,1]<-D835letters
388 D835YAccessionNumbers[i,1]<-FirstSubstrateSet[i,3]
389 }
390
391 if(YYYLettersToTheLeft>6 && YYYLettersToTheRight>6){
392 motif<-YYYmotif
393 #add blank spaces if the motif has less than 4 letters to the left/right
394 motif<-c(leftspaces,YYYmotif,rightspaces)
395 #save that motif, which is the Y and +/- 4 amino acids, including truncation
396 motif<-motif[!motif %in% "x"]
397 motif<-paste(motif, sep="", collapse="")
398 D835letters<-motif
399 D835Ymotifs[i,1]<-D835letters
400 D835YAccessionNumbers[i,1]<-FirstSubstrateSet[i,3]
401 }
402 }
403
404
405 ITDmotifs=matrix(,nrow = nrow(ThirdSubstrateSet),ncol=1)
406 ITDAccessionNumbers<-matrix(,nrow = nrow(ThirdSubstrateSet))
407
408 for (i in 1:nrow(ThirdSubstrateSet)){
409 ITDletters<-ThirdSubstrateSet[i,4:18]
410 ITDletters<-ITDletters[ITDletters !="XXXXX"]
411 ITDletters<-paste(ITDletters, sep="", collapse="")
412 YYYmotif <- unlist(strsplit(ITDletters, split = ""))
413 leftspaces<-c()
414 rightspaces<-c()
415 YYYposition <- match(x = "x", table = YYYmotif)
416 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are
417 #just 3 letters to the left of x
418
419 YYYLettersToTheLeft <- YYYposition - 1
420 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is
421 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1
422 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1
423 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the
424 #variable the user puts in is
425 if (YYYLettersToTheLeft < 7 | YYYLettersToTheRight < 7) {
426 leftspaces<-rep(" ",times=(7-YYYLettersToTheLeft))
427 rightspaces<-rep(" ",times=7-(YYYLettersToTheRight))
428 #add blank spaces if the motif has less than 4 letters to the left/right
429 motif<-c(leftspaces,YYYmotif,rightspaces)
430 #save that motif, which is the Y and +/- 4 amino acids, including truncation
431 motif<-motif[!motif %in% "x"]
432 motif<-paste(motif, sep="", collapse="")
433 ITDletters<-motif
434 ITDmotifs[i,1]<-ITDletters
435 ITDAccessionNumbers[i,1]<-FirstSubstrateSet[i,3]
436 }
437
438 if(YYYLettersToTheLeft>6 && YYYLettersToTheRight>6){
439 motif<-YYYmotif
440 #add blank spaces if the motif has less than 4 letters to the left/right
441 motif<-c(leftspaces,YYYmotif,rightspaces)
442 #save that motif, which is the Y and +/- 4 amino acids, including truncation
443 motif<-motif[!motif %in% "x"]
444 motif<-paste(motif, sep="", collapse="")
445 ITDletters<-motif
446 ITDmotifs[i,1]<-ITDletters
447 ITDAccessionNumbers[i,1]<-FirstSubstrateSet[i,3]
448 }
449 }
450
451 }
452 #############################################################################################################################
453 #############################################################################################################################
454 #############################################################################################################################
455 #############################################################################################################################
456 #############################################################################################################################
457
458 #now look for either commonality or difference. Actually could you look for both...
459
460 if (Are_You_Looking_For_Commonality=="YES"){
461
462 columnalheader<-c(as.character(Thirdsubbackfreq[1:36,1]))
463 columnalheader<-matrix(columnalheader,nrow = 1)
464 # write.table(x=columnalheader,
465 # file=Shared_subbackfreq_table,
466 # quote=FALSE, sep=",",
467 # row.names=FALSE,col.names = FALSE, na="", append=TRUE)
468
469 FirstOverlapmotifs<-c()
470 for (i in 1:nrow(ITDmotifs)){
471 for (j in 1:nrow(D835Ymotifs)){
472 if (is.na(ITDmotifs[i,1])!=TRUE&&is.na(D835Ymotifs[j,1])!=TRUE){
473 if (ITDmotifs[i,1]==D835Ymotifs[j,1]){
474 FirstOverlapmotifs<-c(FirstOverlapmotifs,D835Ymotifs[j,1])
475 }
476 }
477 }
478 }
479
480 AllAccessionNumbers<-c()
481 columnalheader<-c(rep(NA,36))
482 FinalMatrix<-matrix(data =columnalheader,nrow = 1)
483
484 FinalMotifs<-c(rep(NA,20))
485 FinalMotifsMatrix<-matrix(data = FinalMotifs,nrow = 1)
486
487
488 for (l in 1:length(FirstOverlapmotifs)) {
489 AccessionNumber<-00000000000
490 for (k in 1:nrow(FTLwtmotifs)) {
491 AccessionNumber<-0000000000000
492 if(is.na(FTLwtmotifs[k])!=TRUE){
493 #I don't remember why, but I felt it necessary to destroy the accession number multiple times to ensure it is
494 #destroyed immediately after use
495 if (FirstOverlapmotifs[l] == FTLwtmotifs[k]) {
496 substratematrix<-FirstSubstrateSet[k,1:20]
497 substratematrix<-as.matrix(substratematrix,nrow=1)
498 FinalMotifsMatrix<-rbind(FinalMotifsMatrix,substratematrix)
499 #when you find a match between the venn diagrams, save the substrate info you get into a matrix
500
501 AccessionNumber <- as.character(FirstSubstrateSet[k, 3])
502 #then take the accession number
503
504 for (m in 1:ncol(Firstsubbackfreq)) {
505 AN <- as.character(Firstsubbackfreq[1, m])
506 if (grepl(pattern = AN,
507 x = AccessionNumber,
508 fixed = TRUE) == TRUE) {
509 outputmatrix <- as.character(Firstsubbackfreq[, m])
510 outputmatrix <- matrix(outputmatrix, nrow = 1)
511 #with that accession number, find a match in the subbackfreq file and save it here
512 FinalMatrix<-rbind(FinalMatrix,outputmatrix)
513 }
514 }
515 }
516 }
517 }
518 }
519
520
521 TrueMatrix<-FinalMatrix[!duplicated(FinalMatrix),]
522 TrueFinalMotifsMatrix<-FinalMotifsMatrix[!duplicated(FinalMotifsMatrix),]
523
524 TrueFinalMotifsMatrix<-TrueFinalMotifsMatrix[2:nrow(TrueFinalMotifsMatrix),]
525 TrueMatrix<-TrueMatrix[2:nrow(TrueMatrix),]
526
527 write.table(
528 x = TrueFinalMotifsMatrix,
529 file = Shared_motifs_table,
530 quote = FALSE,
531 sep = ",",
532 row.names = FALSE,
533 col.names = TRUE,
534 na = "",
535 append = FALSE
536 )
537
538 #TrueMatrix<-t(TrueMatrix)
539 columnalheader<-c(as.character(Thirdsubbackfreq[1:36,1]))
540 columnalheader<-matrix(columnalheader,nrow = 1)
541
542 TrueMatrix<-rbind(columnalheader,TrueMatrix)
543 TrueMatrix<-t(TrueMatrix)
544
545 write.table(
546 x = TrueMatrix,
547 file = Shared_subbackfreq_table,
548 quote = FALSE,
549 sep = ",",
550 row.names = FALSE,
551 col.names = FALSE,
552 na = "",
553 append = TRUE
554 )
555 }
556 }
557
558 if (Are_You_Looking_For_Commonality=="NO"){
559 if (FullMotifsOnly_questionmark=="YES"){
560 FTLwtmotifs=rep(NA,times=nrow(FirstSubstrateSet))
561 FTLwtAccessionNumbers=rep(NA,times=nrow(FirstSubstrateSet))
562 leftspaces<-c()
563 rightspaces<-c()
564 for (i in 1:nrow(FirstSubstrateSet)){
565 FTLwtletters<-FirstSubstrateSet[i,4:18]
566 FTLwtletters<-FTLwtletters[FTLwtletters !="XXXXX"]
567 FTLwtletters<-paste(FTLwtletters, sep="", collapse="")
568
569
570 YYYmotif <- unlist(strsplit(FTLwtletters, split = ""))
571 YYYposition <- match(x = "x", table = YYYmotif)
572 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are
573 #just 3 letters to the left of x
574
575 YYYLettersToTheLeft <- YYYposition - 1
576 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is
577 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1
578 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1
579 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the
580 #variable the user puts in is
581
582 if (YYYLettersToTheLeft > 6 && YYYLettersToTheRight > 6) {
583 motif<-YYYmotif
584 #save that motif, which is the Y and +/- 4 amino acids, including truncation
585 motif<-motif[!motif %in% "x"]
586 motif<-paste(motif, sep="", collapse="")
587 FTLwtletters<-motif
588 FTLwtmotifs[i]<-FTLwtletters
589 FTLwtAccessionNumbers[i]<-FirstSubstrateSet[i,3]
590 }
591
592 }
593 # FTLwtmotifs <- FTLwtmotifs[!is.na(FTLwtmotifs)]
594 # FTLwtmotifs<-matrix(FTLwtmotifs,ncol = 1)
595 #
596
597 D835Ymotifs=rep(NA,times=nrow(FirstSubstrateSet))
598 D835YAccessionNumbers=rep(NA,times=nrow(FirstSubstrateSet))
599
600 for (i in 1:nrow(SecondSubstrateSet)){
601 D835letters<-SecondSubstrateSet[i,4:18]
602 D835letters<-D835letters[D835letters !="XXXXX"]
603 D835letters<-paste(D835letters, sep="", collapse="")
604
605
606 YYYmotif <- unlist(strsplit(D835letters, split = ""))
607 YYYposition <- match(x = "x", table = YYYmotif)
608 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are
609 #just 3 letters to the left of x
610
611 YYYLettersToTheLeft <- YYYposition - 1
612 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is
613 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1
614 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1
615 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the
616 #variable the user puts in is
617
618 if (YYYLettersToTheLeft > 6 && YYYLettersToTheRight > 6) {
619 motif<-YYYmotif
620 #add blank spaces if the motif has less than 4 letters to the left/right
621 motif<-c(leftspaces,YYYmotif,rightspaces)
622 #save that motif, which is the Y and +/- 4 amino acids, including truncation
623 motif<-motif[!motif %in% "x"]
624 motif<-paste(motif, sep="", collapse="")
625 D835letters<-motif
626 D835Ymotifs[i]<-D835letters
627 D835YAccessionNumbers[i]<-SecondSubstrateSet[i,3]
628 }
629 }
630
631 ITDmotifs=rep(NA,times=nrow(FirstSubstrateSet))
632 ITDAccessionNumbers=rep(NA,times=nrow(FirstSubstrateSet))
633
634 for (i in 1:nrow(ThirdSubstrateSet)){
635 ITDletters<-ThirdSubstrateSet[i,4:18]
636 ITDletters<-ITDletters[ITDletters !="XXXXX"]
637 ITDletters<-paste(ITDletters, sep="", collapse="")
638 YYYmotif <- unlist(strsplit(ITDletters, split = ""))
639 YYYposition <- match(x = "x", table = YYYmotif)
640 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are
641 #just 3 letters to the left of x
642
643 YYYLettersToTheLeft <- YYYposition - 1
644 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is
645 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1
646 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1
647 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the
648 #variable the user puts in is
649
650 if (YYYLettersToTheLeft > 6 && YYYLettersToTheRight > 6) {
651 motif<-YYYmotif
652 #add blank spaces if the motif has less than 4 letters to the left/right
653 motif<-c(leftspaces,YYYmotif,rightspaces)
654 #save that motif, which is the Y and +/- 4 amino acids, including truncation
655 motif<-motif[!motif %in% "x"]
656 motif<-paste(motif, sep="", collapse="")
657 ITDletters<-motif
658 ITDmotifs[i]<-ITDletters
659 ITDAccessionNumbers[i]<-ThirdSubstrateSet[i,3]
660
661 }
662 }
663 names(ITDmotifs)<-ITDAccessionNumbers
664 names(D835Ymotifs)<-D835YAccessionNumbers
665 names(FTLwtmotifs)<-FTLwtAccessionNumbers
666 }
667
668
669 ##############################################3
670 #Truncated only
671 if (TruncatedMotifsOnly_questionmark=="YES"){
672 FTLwtmotifs=matrix(,nrow = nrow(FirstSubstrateSet),ncol=1)
673 FTLwtAccessionNumbers=matrix(,nrow = nrow(FirstSubstrateSet),ncol=1)
674
675 for (i in 1:nrow(FirstSubstrateSet)){
676 FTLwtletters<-FirstSubstrateSet[i,4:18]
677 FTLwtletters<-FTLwtletters[FTLwtletters !="XXXXX"]
678 FTLwtletters<-paste(FTLwtletters, sep="", collapse="")
679
680
681 YYYmotif <- unlist(strsplit(FTLwtletters, split = ""))
682 YYYposition <- match(x = "x", table = YYYmotif)
683 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are
684 #just 3 letters to the left of x
685
686 YYYLettersToTheLeft <- YYYposition - 1
687 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is
688 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1
689 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1
690 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the
691 #variable the user puts in is
692
693 if (YYYLettersToTheLeft < 7 | YYYLettersToTheRight < 7) {
694 leftspaces<-rep(" ",times=(7-YYYLettersToTheLeft))
695 rightspaces<-rep(" ",times=7-(YYYLettersToTheRight))
696 #add blank spaces if the motif has less than 4 letters to the left/right
697 motif<-c(leftspaces,YYYmotif,rightspaces)
698 #save that motif, which is the Y and +/- 4 amino acids, including truncation
699 motif<-motif[!motif %in% "x"]
700 motif<-paste(motif, sep="", collapse="")
701 FTLwtletters<-motif
702 FTLwtmotifs[i,1]<-FTLwtletters
703 FTLwtAccessionNumbers[i,1]<-FirstSubstrateSet[i,3]
704 }
705
706 }
707
708 D835Ymotifs=matrix(,nrow = nrow(SecondSubstrateSet),ncol=1)
709 D835YAccessionNumbers<-matrix(,nrow = nrow(SecondSubstrateSet),ncol = 1)
710 i=2
711 for (i in 1:nrow(SecondSubstrateSet)){
712 D835letters<-SecondSubstrateSet[i,4:18]
713 D835letters<-D835letters[D835letters !="XXXXX"]
714 D835letters<-paste(D835letters, sep="", collapse="")
715
716
717 YYYmotif <- unlist(strsplit(D835letters, split = ""))
718 YYYposition <- match(x = "x", table = YYYmotif)
719 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are
720 #just 3 letters to the left of x
721
722 YYYLettersToTheLeft <- YYYposition - 1
723 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is
724 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1
725 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1
726 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the
727 #variable the user puts in is
728
729 if (YYYLettersToTheLeft < 7 | YYYLettersToTheRight < 7) {
730 leftspaces<-rep(" ",times=(7-YYYLettersToTheLeft))
731 rightspaces<-rep(" ",times=7-(YYYLettersToTheRight))
732 #add blank spaces if the motif has less than 4 letters to the left/right
733 motif<-c(leftspaces,YYYmotif,rightspaces)
734 #save that motif, which is the Y and +/- 4 amino acids, including truncation
735 motif<-motif[!motif %in% "x"]
736 motif<-paste(motif, sep="", collapse="")
737 D835letters<-motif
738 D835YAccessionNumbers[i,1]<-SecondSubstrateSet[i,3]
739 D835Ymotifs[i,1]<-D835letters
740 }
741 }
742
743 ITDmotifs=matrix(,nrow = nrow(ThirdSubstrateSet),ncol=1)
744 ITDAccessionNumbers<-matrix(,nrow = nrow(ThirdSubstrateSet))
745
746 for (i in 1:nrow(ThirdSubstrateSet)){
747 ITDletters<-ThirdSubstrateSet[i,4:18]
748 ITDletters<-ITDletters[ITDletters !="XXXXX"]
749 ITDletters<-paste(ITDletters, sep="", collapse="")
750 YYYmotif <- unlist(strsplit(ITDletters, split = ""))
751 YYYposition <- match(x = "x", table = YYYmotif)
752 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are
753 #just 3 letters to the left of x
754
755 YYYLettersToTheLeft <- YYYposition - 1
756 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is
757 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1
758 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1
759 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the
760 #variable the user puts in is
761
762 if (YYYLettersToTheLeft < 7 | YYYLettersToTheRight < 7) {
763 leftspaces<-rep(" ",times=(7-YYYLettersToTheLeft))
764 rightspaces<-rep(" ",times=7-(YYYLettersToTheRight))
765 #add blank spaces if the motif has less than 4 letters to the left/right
766 motif<-c(leftspaces,YYYmotif,rightspaces)
767 #save that motif, which is the Y and +/- 4 amino acids, including truncation
768 motif<-motif[!motif %in% "x"]
769 motif<-paste(motif, sep="", collapse="")
770 ITDletters<-motif
771 ITDAccessionNumbers[i,1]<-ThirdSubstrateSet[i,3]
772 ITDmotifs[i,1]<-ITDletters
773 }
774 }
775 names(FTLwtmotifs)<-FTLwtAccessionNumbers
776 names(D835Ymotifs)<-D835YAccessionNumbers
777 names(ITDmotifs)<-ITDAccessionNumbers
778 }
779
780 ###############################################
781 #ALL motifs, full and truncated
782
783 if (FullMotifsOnly_questionmark!="YES"&&TruncatedMotifsOnly_questionmark!="YES"){
784 FTLwtmotifs=matrix(,nrow = nrow(FirstSubstrateSet),ncol=1)
785 FTLwtAccessionNumbers=matrix(,nrow = nrow(FirstSubstrateSet),ncol=1)
786
787 for (i in 1:nrow(FirstSubstrateSet)){
788 FTLwtletters<-FirstSubstrateSet[i,4:18]
789 FTLwtletters<-FTLwtletters[FTLwtletters !="XXXXX"]
790 FTLwtletters<-paste(FTLwtletters, sep="", collapse="")
791 leftspaces<-c()
792 rightspaces<-c()
793
794 YYYmotif <- unlist(strsplit(FTLwtletters, split = ""))
795 YYYposition <- match(x = "x", table = YYYmotif)
796 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are
797 #just 3 letters to the left of x
798
799 YYYLettersToTheLeft <- YYYposition - 1
800 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is
801 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1
802 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1
803 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the
804 #variable the user puts in is
805
806
807 if (YYYLettersToTheLeft < 7 | YYYLettersToTheRight < 7) {
808 leftspaces<-rep(" ",times=(7-YYYLettersToTheLeft))
809 rightspaces<-rep(" ",times=7-(YYYLettersToTheRight))
810 #add blank spaces if the motif has less than 4 letters to the left/right
811 motif<-c(leftspaces,YYYmotif,rightspaces)
812 #save that motif, which is the Y and +/- 4 amino acids, including truncation
813 motif<-motif[!motif %in% "x"]
814 motif<-paste(motif, sep="", collapse="")
815 FTLwtletters<-motif
816 FTLwtmotifs[i,1]<-FTLwtletters
817 FTLwtAccessionNumbers[i,1]<-FirstSubstrateSet[i,3]
818 }
819
820 if(YYYLettersToTheLeft>6 && YYYLettersToTheRight>6){
821 motif<-YYYmotif
822 #add blank spaces if the motif has less than 4 letters to the left/right
823 motif<-c(leftspaces,YYYmotif,rightspaces)
824 #save that motif, which is the Y and +/- 4 amino acids, including truncation
825 motif<-motif[!motif %in% "x"]
826 motif<-paste(motif, sep="", collapse="")
827 FTLwtletters<-motif
828 FTLwtmotifs[i,1]<-FTLwtletters
829 FTLwtAccessionNumbers[i,1]<-FirstSubstrateSet[i,3]
830
831
832 }
833
834 }
835
836 D835Ymotifs=matrix(,nrow = nrow(SecondSubstrateSet),ncol=1)
837 D835YAccessionNumbers<-matrix(,nrow = nrow(SecondSubstrateSet),ncol = 1)
838
839 for (i in 1:nrow(SecondSubstrateSet)){
840 D835letters<-SecondSubstrateSet[i,4:18]
841 D835letters<-D835letters[D835letters !="XXXXX"]
842 D835letters<-paste(D835letters, sep="", collapse="")
843 leftspaces<-c()
844 rightspaces<-c()
845
846 YYYmotif <- unlist(strsplit(D835letters, split = ""))
847 YYYposition <- match(x = "x", table = YYYmotif)
848 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are
849 #just 3 letters to the left of x
850
851 YYYLettersToTheLeft <- YYYposition - 1
852 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is
853 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1
854 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1
855 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the
856 #variable the user puts in is
857 if (YYYLettersToTheLeft < 7 | YYYLettersToTheRight < 7) {
858 leftspaces<-rep(" ",times=(7-YYYLettersToTheLeft))
859 rightspaces<-rep(" ",times=7-(YYYLettersToTheRight))
860 #add blank spaces if the motif has less than 4 letters to the left/right
861 motif<-c(leftspaces,YYYmotif,rightspaces)
862 #save that motif, which is the Y and +/- 4 amino acids, including truncation
863 motif<-motif[!motif %in% "x"]
864 motif<-paste(motif, sep="", collapse="")
865 D835letters<-motif
866 D835Ymotifs[i,1]<-D835letters
867 D835YAccessionNumbers[i,1]<-FirstSubstrateSet[i,3]
868 }
869
870 if(YYYLettersToTheLeft>6 && YYYLettersToTheRight>6){
871 motif<-YYYmotif
872 #add blank spaces if the motif has less than 4 letters to the left/right
873 motif<-c(leftspaces,YYYmotif,rightspaces)
874 #save that motif, which is the Y and +/- 4 amino acids, including truncation
875 motif<-motif[!motif %in% "x"]
876 motif<-paste(motif, sep="", collapse="")
877 D835letters<-motif
878 D835Ymotifs[i,1]<-D835letters
879 D835YAccessionNumbers[i,1]<-FirstSubstrateSet[i,3]
880 }
881 }
882
883
884 ITDmotifs=matrix(,nrow = nrow(ThirdSubstrateSet),ncol=1)
885 ITDAccessionNumbers<-matrix(,nrow = nrow(ThirdSubstrateSet))
886
887 for (i in 1:nrow(ThirdSubstrateSet)){
888 ITDletters<-ThirdSubstrateSet[i,4:18]
889 ITDletters<-ITDletters[ITDletters !="XXXXX"]
890 ITDletters<-paste(ITDletters, sep="", collapse="")
891 YYYmotif <- unlist(strsplit(ITDletters, split = ""))
892 leftspaces<-c()
893 rightspaces<-c()
894 YYYposition <- match(x = "x", table = YYYmotif)
895 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are
896 #just 3 letters to the left of x
897
898 YYYLettersToTheLeft <- YYYposition - 1
899 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is
900 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1
901 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1
902 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the
903 #variable the user puts in is
904 if (YYYLettersToTheLeft < 7 | YYYLettersToTheRight < 7) {
905 leftspaces<-rep(" ",times=(7-YYYLettersToTheLeft))
906 rightspaces<-rep(" ",times=7-(YYYLettersToTheRight))
907 #add blank spaces if the motif has less than 4 letters to the left/right
908 motif<-c(leftspaces,YYYmotif,rightspaces)
909 #save that motif, which is the Y and +/- 4 amino acids, including truncation
910 motif<-motif[!motif %in% "x"]
911 motif<-paste(motif, sep="", collapse="")
912 ITDletters<-motif
913 ITDmotifs[i,1]<-ITDletters
914 ITDAccessionNumbers[i,1]<-FirstSubstrateSet[i,3]
915 }
916
917 if(YYYLettersToTheLeft>6 && YYYLettersToTheRight>6){
918 motif<-YYYmotif
919 #add blank spaces if the motif has less than 4 letters to the left/right
920 motif<-c(leftspaces,YYYmotif,rightspaces)
921 #save that motif, which is the Y and +/- 4 amino acids, including truncation
922 motif<-motif[!motif %in% "x"]
923 motif<-paste(motif, sep="", collapse="")
924 ITDletters<-motif
925 ITDmotifs[i,1]<-ITDletters
926 ITDAccessionNumbers[i,1]<-FirstSubstrateSet[i,3]
927 }
928 }
929 names(FTLwtmotifs)<-FTLwtAccessionNumbers
930 names(D835Ymotifs)<-D835YAccessionNumbers
931 names(ITDmotifs)<-ITDAccessionNumbers
932 }
933
934
935 FTLwtmotifsFINAL<-FTLwtmotifs[!FTLwtmotifs %in% D835Ymotifs]
936 FTLwtmotifsFINAL<-FTLwtmotifsFINAL[!FTLwtmotifsFINAL %in% ITDmotifs]
937 FTLwtmotifsFINAL<-FTLwtmotifsFINAL[!duplicated(FTLwtmotifsFINAL)]
938
939
940 ITDmotifsFINAL<-ITDmotifs[!ITDmotifs %in% D835Ymotifs]
941 ITDmotifsFINAL<-ITDmotifsFINAL[!ITDmotifsFINAL %in% FTLwtmotifs]
942 ITDmotifsFINAL<-ITDmotifsFINAL[!duplicated(ITDmotifsFINAL)]
943
944
945 D835YmotifsFINAL<-D835Ymotifs[!D835Ymotifs %in% FTLwtmotifs]
946 D835YmotifsFINAL<-D835YmotifsFINAL[!D835YmotifsFINAL %in% ITDmotifs]
947 D835YmotifsFINAL<-D835YmotifsFINAL[!duplicated(D835YmotifsFINAL)]
948
949
950 columnalheader<-c(rep(NA,36))
951 FTLFinalMatrix<-matrix(data =columnalheader,nrow = 1)
952
953 for (k in 1:length(FTLwtmotifsFINAL)) {
954 AN<-00000
955 #I don't remember why, but I felt it necessary to destroy the accession number multiple times to ensure it is
956 #destroyed immediately after use
957 for (m in 1:ncol(Firstsubbackfreq)) {
958 AN <- as.character(Firstsubbackfreq[1, m])
959 if (grepl(pattern = AN,
960 x = names(FTLwtmotifsFINAL[k]),
961 fixed = TRUE) == TRUE) {
962 outputmatrix <- as.character(Firstsubbackfreq[, m])
963 outputmatrix <- matrix(outputmatrix, nrow = 1)
964 #with that accession number, find a match in the subbackfreq file and save it here
965 FTLFinalMatrix<-rbind(FTLFinalMatrix,outputmatrix)
966 }
967 }
968 }
969 FTLFinalMatrix<-FTLFinalMatrix[!duplicated(FTLFinalMatrix),]
970
971 columnalheader<-c(rep(NA,36))
972 ITDFinalMatrix<-matrix(data =columnalheader,nrow = 1)
973
974 for (k in 1:length(ITDmotifsFINAL)) {
975 AN<-00000
976 #I don't remember why, but I felt it necessary to destroy the accession number multiple times to ensure it is
977 #destroyed immediately after use
978 for (m in 1:ncol(Thirdsubbackfreq)) {
979 AN <- as.character(Thirdsubbackfreq[1, m])
980 if (grepl(pattern = AN,
981 x = names(ITDmotifsFINAL[k]),
982 fixed = TRUE) == TRUE) {
983 outputmatrix <- as.character(Thirdsubbackfreq[, m])
984 outputmatrix <- matrix(outputmatrix, nrow = 1)
985 #with that accession number, find a match in the subbackfreq file and save it here
986 ITDFinalMatrix<-rbind(ITDFinalMatrix,outputmatrix)
987 }
988 }
989 }
990 ITDFinalMatrix<-ITDFinalMatrix[!duplicated(ITDFinalMatrix),]
991
992 columnalheader<-c(rep(NA,36))
993 D835YFinalMatrix<-matrix(data =columnalheader,nrow = 1)
994
995 for (k in 1:length(D835YmotifsFINAL)) {
996 #I don't remember why, but I felt it necessary to destroy the accession number multiple times to ensure it is
997 #destroyed immediately after use
998 for (m in 1:ncol(Secondsubbackfreq)) {
999 AN <- as.character(Secondsubbackfreq[1, m])
1000 if (grepl(pattern = AN,
1001 x = names(D835YmotifsFINAL[k]),
1002 fixed = TRUE) == TRUE) {
1003 outputmatrix <- as.character(Secondsubbackfreq[, m])
1004 outputmatrix <- matrix(outputmatrix, nrow = 1)
1005 #with that accession number, find a match in the subbackfreq file and save it here
1006 D835YFinalMatrix<-rbind(D835YFinalMatrix,outputmatrix)
1007 }
1008 }
1009 }
1010 D835YFinalMatrix<-D835YFinalMatrix[!duplicated(D835YFinalMatrix),]
1011
1012 FTLoutputmatrix<-matrix(data=c(FTLwtmotifsFINAL,names(FTLwtmotifsFINAL)),ncol = 2)
1013
1014 #another fucking for loop
1015 FLTreference<-FTLoutputmatrix[,2]
1016
1017 FirstLine<-colnames(FirstSubstrateSet)
1018 FirstLine<-FirstLine[1:23]
1019 for (q in 1:nrow(FTLoutputmatrix)) {
1020 thismotif<-unlist(strsplit(FTLoutputmatrix[q,1],""))
1021 thisoutput<-c("","",FTLoutputmatrix[q,2],thismotif,"","","","","")
1022 FirstLine<-rbind(FirstLine,thisoutput)
1023 }
1024
1025
1026
1027 write.table(x=FirstLine,
1028 file=First_unshared_motifs_table,
1029 quote=FALSE, sep=",",
1030 row.names=FALSE,col.names = FALSE, na="", append=TRUE)
1031
1032 columnalheader<-c(as.character(Thirdsubbackfreq[1:36,1]))
1033 columnalheader<-matrix(columnalheader,nrow = 1)
1034
1035 # columnalheader<-rbind(columnalheader,FTLFinalMatrix)
1036
1037 write.table(x=columnalheader,
1038 file=First_unshared_subbackfreq,
1039 quote=FALSE, sep=",",
1040 row.names=FALSE,col.names = FALSE, na="", append=TRUE)
1041
1042 write.table(x=FTLFinalMatrix[2:nrow(FTLFinalMatrix),],
1043 file=First_unshared_subbackfreq,
1044 quote=FALSE, sep=",",
1045 row.names=FALSE,col.names = FALSE, na="", append=TRUE)
1046
1047 ############################################################################################################
1048
1049 D835Youtputmatrix<-matrix(data=c(D835YmotifsFINAL,names(D835YmotifsFINAL)),ncol = 2)
1050
1051 FLTreference<-D835Youtputmatrix[,2]
1052
1053 FirstLine<-colnames(FirstSubstrateSet)
1054 FirstLine<-FirstLine[1:23]
1055 for (q in 1:nrow(D835Youtputmatrix)) {
1056 thismotif<-unlist(strsplit(D835Youtputmatrix[q,1],""))
1057 thisoutput<-c("","",D835Youtputmatrix[q,2],thismotif,"","","","","")
1058 FirstLine<-rbind(FirstLine,thisoutput)
1059 }
1060
1061
1062
1063 write.table(x=FirstLine,
1064 file=Second_unshared_motifs_table,
1065 quote=FALSE, sep=",",
1066 row.names=FALSE,col.names = FALSE, na="", append=TRUE)
1067
1068 columnalheader<-c(as.character(Thirdsubbackfreq[1:36,1]))
1069 columnalheader<-matrix(columnalheader,nrow = 1)
1070
1071 # columnalheader<-rbind(columnalheader,D835YFinalMatrix)
1072
1073 write.table(x=columnalheader,
1074 file=Second_unshared_subbackfreq,
1075 quote=FALSE, sep=",",
1076 row.names=FALSE,col.names = FALSE, na="", append=TRUE)
1077
1078 write.table(x=D835YFinalMatrix[2:nrow(D835YFinalMatrix),],
1079 file=Second_unshared_subbackfreq,
1080 quote=FALSE, sep=",",
1081 row.names=FALSE,col.names = FALSE, na="", append=TRUE)
1082
1083 ############################################################################################################
1084
1085 ITDoutputmatrix<-matrix(data = c(ITDmotifsFINAL,names(ITDmotifsFINAL)),ncol = 2)
1086
1087 FLTreference<-ITDoutputmatrix[,2]
1088
1089 FirstLine<-colnames(FirstSubstrateSet)
1090 FirstLine<-FirstLine[1:23]
1091 for (q in 1:nrow(ITDoutputmatrix)) {
1092 thismotif<-unlist(strsplit(ITDoutputmatrix[q,1],""))
1093 thisoutput<-c("","",ITDoutputmatrix[q,2],thismotif,"","","","","")
1094 FirstLine<-rbind(FirstLine,thisoutput)
1095 }
1096
1097
1098 write.table(x=FirstLine,
1099 file=Third_unshared_motifs_table,
1100 quote=FALSE, sep=",",
1101 row.names=FALSE,col.names = FALSE, na="", append=TRUE)
1102
1103 columnalheader<-c(as.character(Thirdsubbackfreq[1:36,1]))
1104 columnalheader<-matrix(columnalheader,nrow = 1)
1105
1106 # columnalheader<-rbind(columnalheader,ITDFinalMatrix)
1107
1108 write.table(x=columnalheader,
1109 file=Third_unshared_subbackfreq,
1110 quote=FALSE, sep=",",
1111 row.names=FALSE,col.names = FALSE, na="", append=TRUE)
1112
1113 write.table(x=ITDFinalMatrix[2:nrow(ITDFinalMatrix),],
1114 file=Third_unshared_subbackfreq,
1115 quote=FALSE, sep=",",
1116 row.names=FALSE,col.names = FALSE, na="", append=TRUE)
1117
1118 }