Mercurial > repos > jfb > commonality_finder
comparison C and D finder/CandD.R @ 0:3e5fdf933646 draft
Uploaded
author | jfb |
---|---|
date | Fri, 25 May 2018 10:56:10 -0400 |
parents | |
children | b791e2bee65c |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:3e5fdf933646 |
---|---|
1 FirstSubstrateSet<- read.csv("input1.csv", stringsAsFactors=FALSE) | |
2 Firstsubbackfreq<- read.csv("input2.csv", header=FALSE, stringsAsFactors=FALSE) | |
3 | |
4 SecondSubstrateSet<- read.csv("input3.csv", stringsAsFactors=FALSE) | |
5 Secondsubbackfreq<- read.csv("input4.csv", header=FALSE, stringsAsFactors=FALSE) | |
6 | |
7 ThirdSubstrateSet<- read.csv("input5.csv", stringsAsFactors=FALSE) | |
8 Thirdsubbackfreq<- read.csv("input6.csv", header=FALSE, stringsAsFactors=FALSE) | |
9 | |
10 | |
11 args = commandArgs(trailingOnly=TRUE) | |
12 | |
13 print(args[1]) | |
14 print(args[2]) | |
15 print(args[3]) | |
16 | |
17 | |
18 #ff you want ONLY FULL MOTIFS, put "YES" here, please use all caps | |
19 FullMotifsOnly_questionmark<-args[1] | |
20 #If you want ONLY TRUNCATED MOTIFS, put "YES" here, please use all caps | |
21 TruncatedMotifsOnly_questionmark<-args[2] | |
22 #if you want to find the overlap, put a "YES" here (all caps), if you want to find the non-overlap, put "NO" (all caps) | |
23 Are_You_Looking_For_Commonality<-args[3] | |
24 | |
25 | |
26 #then put the names of your output files here | |
27 Shared_motifs_table<-"sharedmotifs.csv" | |
28 Shared_subbackfreq_table<-"sharedSBF.csv" | |
29 | |
30 # Shared_motifs_table<-"Shared motifs 7-27-17.csv" | |
31 # Shared_subbackfreq_table<-"SubstrateBackgrounFrequency-for-shared-motifs 4 7-27-17.csv" | |
32 | |
33 First_unshared_motifs_table<-"R1 substrates.csv" | |
34 First_unshared_subbackfreq<-"R1 SBF.csv" | |
35 | |
36 Second_unshared_motifs_table<-"R2 subs.csv" | |
37 Second_unshared_subbackfreq<-"R2 SBf.csv" | |
38 | |
39 Third_unshared_motifs_table<-"R3 subs.csv" | |
40 Third_unshared_subbackfreq<-"R3 SBF.csv" | |
41 | |
42 #final note, this code is going to be unworkable if you want to make a Venn diagram of more than 3 circles. I think I'll poke around | |
43 #other languages to see if any of them can do it. | |
44 #################################################################################################################################### | |
45 | |
46 | |
47 | |
48 | |
49 | |
50 FirstxY<-rep("xY",times=nrow(FirstSubstrateSet)) | |
51 FirstSubstrateSet[,11]<-FirstxY | |
52 | |
53 SecondxY<-rep("xY",times=nrow(SecondSubstrateSet)) | |
54 SecondSubstrateSet[,11]<-SecondxY | |
55 | |
56 ThirdxY<-rep("xY",times=nrow(ThirdSubstrateSet)) | |
57 ThirdSubstrateSet[,11]<-ThirdxY | |
58 | |
59 | |
60 | |
61 | |
62 | |
63 | |
64 | |
65 | |
66 | |
67 | |
68 | |
69 #################################################################################################################################### | |
70 #################################################################################################################################### | |
71 # better version of this code written in C: what happens when two kinases share a motif, but they found that motif in two | |
72 # separate proteins thus two separate accession numbers? | |
73 # It should actually output the shared motif and BOTH accession numbers. Right now it does not, it only maps out the second | |
74 # accession number. So that needs to be fixed BUT you need to keep the commonality between a motif and its accession number | |
75 #################################################################################################################################### | |
76 #################################################################################################################################### | |
77 #################################################################################################################################### | |
78 #################################################################################################################################### | |
79 | |
80 #Create the motif sets, deciding wether or not you're looking for truncated or full here | |
81 #full only | |
82 if (Are_You_Looking_For_Commonality=="YES"){ | |
83 if (FullMotifsOnly_questionmark=="YES"){ | |
84 FTLwtmotifs=matrix(,nrow = nrow(FirstSubstrateSet),ncol=1) | |
85 FTLwtAccessionNumbers=matrix(,nrow = nrow(FirstSubstrateSet),ncol=1) | |
86 leftspaces<-c() | |
87 rightspaces<-c() | |
88 for (i in 1:nrow(FirstSubstrateSet)){ | |
89 FTLwtletters<-FirstSubstrateSet[i,4:18] | |
90 FTLwtletters<-FTLwtletters[FTLwtletters !="XXXXX"] | |
91 FTLwtletters<-paste(FTLwtletters, sep="", collapse="") | |
92 | |
93 | |
94 YYYmotif <- unlist(strsplit(FTLwtletters, split = "")) | |
95 YYYposition <- match(x = "x", table = YYYmotif) | |
96 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are | |
97 #just 3 letters to the left of x | |
98 | |
99 YYYLettersToTheLeft <- YYYposition - 1 | |
100 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is | |
101 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1 | |
102 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1 | |
103 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the | |
104 #variable the user puts in is | |
105 | |
106 if (YYYLettersToTheLeft > 6 && YYYLettersToTheRight > 6) { | |
107 motif<-YYYmotif | |
108 #save that motif, which is the Y and +/- 4 amino acids, including truncation | |
109 motif<-motif[!motif %in% "x"] | |
110 motif<-paste(motif, sep="", collapse="") | |
111 FTLwtletters<-motif | |
112 FTLwtmotifs[i,1]<-FTLwtletters | |
113 FTLwtAccessionNumbers[i,1]<-FirstSubstrateSet[i,3] | |
114 | |
115 } | |
116 | |
117 } | |
118 # FTLwtmotifs <- FTLwtmotifs[!is.na(FTLwtmotifs)] | |
119 # FTLwtmotifs<-matrix(FTLwtmotifs,ncol = 1) | |
120 # | |
121 | |
122 D835Ymotifs=matrix(,nrow = nrow(SecondSubstrateSet),ncol=1) | |
123 D835YAccessionNumbers<-matrix(,nrow = nrow(SecondSubstrateSet),ncol = 1) | |
124 | |
125 for (i in 1:nrow(SecondSubstrateSet)){ | |
126 D835letters<-SecondSubstrateSet[i,4:18] | |
127 D835letters<-D835letters[D835letters !="XXXXX"] | |
128 D835letters<-paste(D835letters, sep="", collapse="") | |
129 | |
130 | |
131 YYYmotif <- unlist(strsplit(D835letters, split = "")) | |
132 YYYposition <- match(x = "x", table = YYYmotif) | |
133 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are | |
134 #just 3 letters to the left of x | |
135 | |
136 YYYLettersToTheLeft <- YYYposition - 1 | |
137 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is | |
138 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1 | |
139 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1 | |
140 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the | |
141 #variable the user puts in is | |
142 | |
143 if (YYYLettersToTheLeft > 6 && YYYLettersToTheRight > 6) { | |
144 motif<-YYYmotif | |
145 #add blank spaces if the motif has less than 4 letters to the left/right | |
146 motif<-c(leftspaces,YYYmotif,rightspaces) | |
147 #save that motif, which is the Y and +/- 4 amino acids, including truncation | |
148 motif<-motif[!motif %in% "x"] | |
149 motif<-paste(motif, sep="", collapse="") | |
150 D835letters<-motif | |
151 D835Ymotifs[i,1]<-D835letters | |
152 D835YAccessionNumbers[i,1]<-SecondSubstrateSet[i,3] | |
153 | |
154 } | |
155 } | |
156 | |
157 ITDmotifs=matrix(,nrow = nrow(ThirdSubstrateSet),ncol=1) | |
158 ITDAccessionNumbers<-matrix(,nrow = nrow(ThirdSubstrateSet)) | |
159 | |
160 for (i in 1:nrow(ThirdSubstrateSet)){ | |
161 ITDletters<-ThirdSubstrateSet[i,4:18] | |
162 ITDletters<-ITDletters[ITDletters !="XXXXX"] | |
163 ITDletters<-paste(ITDletters, sep="", collapse="") | |
164 YYYmotif <- unlist(strsplit(ITDletters, split = "")) | |
165 YYYposition <- match(x = "x", table = YYYmotif) | |
166 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are | |
167 #just 3 letters to the left of x | |
168 | |
169 YYYLettersToTheLeft <- YYYposition - 1 | |
170 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is | |
171 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1 | |
172 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1 | |
173 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the | |
174 #variable the user puts in is | |
175 | |
176 if (YYYLettersToTheLeft > 6 && YYYLettersToTheRight > 6) { | |
177 motif<-YYYmotif | |
178 #add blank spaces if the motif has less than 4 letters to the left/right | |
179 motif<-c(leftspaces,YYYmotif,rightspaces) | |
180 #save that motif, which is the Y and +/- 4 amino acids, including truncation | |
181 motif<-motif[!motif %in% "x"] | |
182 motif<-paste(motif, sep="", collapse="") | |
183 ITDletters<-motif | |
184 ITDmotifs[i,1]<-ITDletters | |
185 ITDAccessionNumbers[i,1]<-ThirdSubstrateSet[i,3] | |
186 | |
187 } | |
188 } | |
189 | |
190 } | |
191 | |
192 ##############################################3 | |
193 #Truncated only | |
194 if (TruncatedMotifsOnly_questionmark=="YES"){ | |
195 FTLwtmotifs=matrix(,nrow = nrow(FirstSubstrateSet),ncol=1) | |
196 FTLwtAccessionNumbers=matrix(,nrow = nrow(FirstSubstrateSet),ncol=1) | |
197 | |
198 for (i in 1:nrow(FirstSubstrateSet)){ | |
199 FTLwtletters<-FirstSubstrateSet[i,4:18] | |
200 FTLwtletters<-FTLwtletters[FTLwtletters !="XXXXX"] | |
201 FTLwtletters<-paste(FTLwtletters, sep="", collapse="") | |
202 | |
203 | |
204 YYYmotif <- unlist(strsplit(FTLwtletters, split = "")) | |
205 YYYposition <- match(x = "x", table = YYYmotif) | |
206 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are | |
207 #just 3 letters to the left of x | |
208 | |
209 YYYLettersToTheLeft <- YYYposition - 1 | |
210 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is | |
211 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1 | |
212 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1 | |
213 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the | |
214 #variable the user puts in is | |
215 | |
216 if (YYYLettersToTheLeft < 7 | YYYLettersToTheRight < 7) { | |
217 leftspaces<-rep(" ",times=(7-YYYLettersToTheLeft)) | |
218 rightspaces<-rep(" ",times=7-(YYYLettersToTheRight)) | |
219 #add blank spaces if the motif has less than 4 letters to the left/right | |
220 motif<-c(leftspaces,YYYmotif,rightspaces) | |
221 #save that motif, which is the Y and +/- 4 amino acids, including truncation | |
222 motif<-motif[!motif %in% "x"] | |
223 motif<-paste(motif, sep="", collapse="") | |
224 FTLwtletters<-motif | |
225 FTLwtmotifs[i,1]<-FTLwtletters | |
226 FTLwtAccessionNumbers[i,1]<-FirstSubstrateSet[i,3] | |
227 } | |
228 | |
229 } | |
230 | |
231 D835Ymotifs=matrix(,nrow = nrow(SecondSubstrateSet),ncol=1) | |
232 D835YAccessionNumbers<-matrix(,nrow = nrow(SecondSubstrateSet),ncol = 1) | |
233 | |
234 for (i in 1:nrow(SecondSubstrateSet)){ | |
235 D835letters<-SecondSubstrateSet[i,4:18] | |
236 D835letters<-D835letters[D835letters !="XXXXX"] | |
237 D835letters<-paste(D835letters, sep="", collapse="") | |
238 | |
239 | |
240 YYYmotif <- unlist(strsplit(D835letters, split = "")) | |
241 YYYposition <- match(x = "x", table = YYYmotif) | |
242 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are | |
243 #just 3 letters to the left of x | |
244 | |
245 YYYLettersToTheLeft <- YYYposition - 1 | |
246 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is | |
247 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1 | |
248 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1 | |
249 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the | |
250 #variable the user puts in is | |
251 | |
252 if (YYYLettersToTheLeft < 7 | YYYLettersToTheRight < 7) { | |
253 leftspaces<-rep(" ",times=(7-YYYLettersToTheLeft)) | |
254 rightspaces<-rep(" ",times=7-(YYYLettersToTheRight)) | |
255 #add blank spaces if the motif has less than 4 letters to the left/right | |
256 motif<-c(leftspaces,YYYmotif,rightspaces) | |
257 #save that motif, which is the Y and +/- 4 amino acids, including truncation | |
258 motif<-motif[!motif %in% "x"] | |
259 motif<-paste(motif, sep="", collapse="") | |
260 D835letters<-motif | |
261 D835YAccessionNumbers[i,1]<-SecondSubstrateSet[i,3] | |
262 D835Ymotifs[i,1]<-D835letters | |
263 } | |
264 } | |
265 | |
266 ITDmotifs=matrix(,nrow = nrow(ThirdSubstrateSet),ncol=1) | |
267 ITDAccessionNumbers<-matrix(,nrow = nrow(ThirdSubstrateSet)) | |
268 | |
269 for (i in 1:nrow(ThirdSubstrateSet)){ | |
270 ITDletters<-ThirdSubstrateSet[i,4:18] | |
271 ITDletters<-ITDletters[ITDletters !="XXXXX"] | |
272 ITDletters<-paste(ITDletters, sep="", collapse="") | |
273 YYYmotif <- unlist(strsplit(ITDletters, split = "")) | |
274 YYYposition <- match(x = "x", table = YYYmotif) | |
275 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are | |
276 #just 3 letters to the left of x | |
277 | |
278 YYYLettersToTheLeft <- YYYposition - 1 | |
279 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is | |
280 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1 | |
281 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1 | |
282 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the | |
283 #variable the user puts in is | |
284 | |
285 if (YYYLettersToTheLeft < 7 | YYYLettersToTheRight < 7) { | |
286 leftspaces<-rep(" ",times=(7-YYYLettersToTheLeft)) | |
287 rightspaces<-rep(" ",times=7-(YYYLettersToTheRight)) | |
288 #add blank spaces if the motif has less than 4 letters to the left/right | |
289 motif<-c(leftspaces,YYYmotif,rightspaces) | |
290 #save that motif, which is the Y and +/- 4 amino acids, including truncation | |
291 motif<-motif[!motif %in% "x"] | |
292 motif<-paste(motif, sep="", collapse="") | |
293 ITDletters<-motif | |
294 ITDAccessionNumbers[i,1]<-ThirdSubstrateSet[i,3] | |
295 ITDmotifs[i,1]<-ITDletters | |
296 } | |
297 } | |
298 | |
299 } | |
300 | |
301 ############################################### | |
302 #ALL motifs, full and truncated | |
303 | |
304 if (FullMotifsOnly_questionmark!="YES"&&TruncatedMotifsOnly_questionmark!="YES"){ | |
305 FTLwtmotifs=matrix(,nrow = nrow(FirstSubstrateSet),ncol=1) | |
306 FTLwtAccessionNumbers=matrix(,nrow = nrow(FirstSubstrateSet),ncol=1) | |
307 | |
308 for (i in 1:nrow(FirstSubstrateSet)){ | |
309 FTLwtletters<-FirstSubstrateSet[i,4:18] | |
310 FTLwtletters<-FTLwtletters[FTLwtletters !="XXXXX"] | |
311 FTLwtletters<-paste(FTLwtletters, sep="", collapse="") | |
312 leftspaces<-c() | |
313 rightspaces<-c() | |
314 | |
315 YYYmotif <- unlist(strsplit(FTLwtletters, split = "")) | |
316 YYYposition <- match(x = "x", table = YYYmotif) | |
317 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are | |
318 #just 3 letters to the left of x | |
319 | |
320 YYYLettersToTheLeft <- YYYposition - 1 | |
321 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is | |
322 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1 | |
323 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1 | |
324 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the | |
325 #variable the user puts in is | |
326 | |
327 | |
328 if (YYYLettersToTheLeft < 7 | YYYLettersToTheRight < 7) { | |
329 leftspaces<-rep(" ",times=(7-YYYLettersToTheLeft)) | |
330 rightspaces<-rep(" ",times=7-(YYYLettersToTheRight)) | |
331 #add blank spaces if the motif has less than 4 letters to the left/right | |
332 motif<-c(leftspaces,YYYmotif,rightspaces) | |
333 #save that motif, which is the Y and +/- 4 amino acids, including truncation | |
334 motif<-motif[!motif %in% "x"] | |
335 motif<-paste(motif, sep="", collapse="") | |
336 FTLwtletters<-motif | |
337 FTLwtmotifs[i,1]<-FTLwtletters | |
338 FTLwtAccessionNumbers[i,1]<-FirstSubstrateSet[i,3] | |
339 } | |
340 | |
341 if(YYYLettersToTheLeft>6 && YYYLettersToTheRight>6){ | |
342 motif<-YYYmotif | |
343 #add blank spaces if the motif has less than 4 letters to the left/right | |
344 motif<-c(leftspaces,YYYmotif,rightspaces) | |
345 #save that motif, which is the Y and +/- 4 amino acids, including truncation | |
346 motif<-motif[!motif %in% "x"] | |
347 motif<-paste(motif, sep="", collapse="") | |
348 FTLwtletters<-motif | |
349 FTLwtmotifs[i,1]<-FTLwtletters | |
350 FTLwtAccessionNumbers[i,1]<-FirstSubstrateSet[i,3] | |
351 | |
352 | |
353 } | |
354 | |
355 } | |
356 | |
357 D835Ymotifs=matrix(,nrow = nrow(SecondSubstrateSet),ncol=1) | |
358 D835YAccessionNumbers<-matrix(,nrow = nrow(SecondSubstrateSet),ncol = 1) | |
359 | |
360 for (i in 1:nrow(SecondSubstrateSet)){ | |
361 D835letters<-SecondSubstrateSet[i,4:18] | |
362 D835letters<-D835letters[D835letters !="XXXXX"] | |
363 D835letters<-paste(D835letters, sep="", collapse="") | |
364 leftspaces<-c() | |
365 rightspaces<-c() | |
366 | |
367 YYYmotif <- unlist(strsplit(D835letters, split = "")) | |
368 YYYposition <- match(x = "x", table = YYYmotif) | |
369 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are | |
370 #just 3 letters to the left of x | |
371 | |
372 YYYLettersToTheLeft <- YYYposition - 1 | |
373 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is | |
374 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1 | |
375 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1 | |
376 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the | |
377 #variable the user puts in is | |
378 if (YYYLettersToTheLeft < 7 | YYYLettersToTheRight < 7) { | |
379 leftspaces<-rep(" ",times=(7-YYYLettersToTheLeft)) | |
380 rightspaces<-rep(" ",times=7-(YYYLettersToTheRight)) | |
381 #add blank spaces if the motif has less than 4 letters to the left/right | |
382 motif<-c(leftspaces,YYYmotif,rightspaces) | |
383 #save that motif, which is the Y and +/- 4 amino acids, including truncation | |
384 motif<-motif[!motif %in% "x"] | |
385 motif<-paste(motif, sep="", collapse="") | |
386 D835letters<-motif | |
387 D835Ymotifs[i,1]<-D835letters | |
388 D835YAccessionNumbers[i,1]<-FirstSubstrateSet[i,3] | |
389 } | |
390 | |
391 if(YYYLettersToTheLeft>6 && YYYLettersToTheRight>6){ | |
392 motif<-YYYmotif | |
393 #add blank spaces if the motif has less than 4 letters to the left/right | |
394 motif<-c(leftspaces,YYYmotif,rightspaces) | |
395 #save that motif, which is the Y and +/- 4 amino acids, including truncation | |
396 motif<-motif[!motif %in% "x"] | |
397 motif<-paste(motif, sep="", collapse="") | |
398 D835letters<-motif | |
399 D835Ymotifs[i,1]<-D835letters | |
400 D835YAccessionNumbers[i,1]<-FirstSubstrateSet[i,3] | |
401 } | |
402 } | |
403 | |
404 | |
405 ITDmotifs=matrix(,nrow = nrow(ThirdSubstrateSet),ncol=1) | |
406 ITDAccessionNumbers<-matrix(,nrow = nrow(ThirdSubstrateSet)) | |
407 | |
408 for (i in 1:nrow(ThirdSubstrateSet)){ | |
409 ITDletters<-ThirdSubstrateSet[i,4:18] | |
410 ITDletters<-ITDletters[ITDletters !="XXXXX"] | |
411 ITDletters<-paste(ITDletters, sep="", collapse="") | |
412 YYYmotif <- unlist(strsplit(ITDletters, split = "")) | |
413 leftspaces<-c() | |
414 rightspaces<-c() | |
415 YYYposition <- match(x = "x", table = YYYmotif) | |
416 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are | |
417 #just 3 letters to the left of x | |
418 | |
419 YYYLettersToTheLeft <- YYYposition - 1 | |
420 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is | |
421 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1 | |
422 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1 | |
423 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the | |
424 #variable the user puts in is | |
425 if (YYYLettersToTheLeft < 7 | YYYLettersToTheRight < 7) { | |
426 leftspaces<-rep(" ",times=(7-YYYLettersToTheLeft)) | |
427 rightspaces<-rep(" ",times=7-(YYYLettersToTheRight)) | |
428 #add blank spaces if the motif has less than 4 letters to the left/right | |
429 motif<-c(leftspaces,YYYmotif,rightspaces) | |
430 #save that motif, which is the Y and +/- 4 amino acids, including truncation | |
431 motif<-motif[!motif %in% "x"] | |
432 motif<-paste(motif, sep="", collapse="") | |
433 ITDletters<-motif | |
434 ITDmotifs[i,1]<-ITDletters | |
435 ITDAccessionNumbers[i,1]<-FirstSubstrateSet[i,3] | |
436 } | |
437 | |
438 if(YYYLettersToTheLeft>6 && YYYLettersToTheRight>6){ | |
439 motif<-YYYmotif | |
440 #add blank spaces if the motif has less than 4 letters to the left/right | |
441 motif<-c(leftspaces,YYYmotif,rightspaces) | |
442 #save that motif, which is the Y and +/- 4 amino acids, including truncation | |
443 motif<-motif[!motif %in% "x"] | |
444 motif<-paste(motif, sep="", collapse="") | |
445 ITDletters<-motif | |
446 ITDmotifs[i,1]<-ITDletters | |
447 ITDAccessionNumbers[i,1]<-FirstSubstrateSet[i,3] | |
448 } | |
449 } | |
450 | |
451 } | |
452 ############################################################################################################################# | |
453 ############################################################################################################################# | |
454 ############################################################################################################################# | |
455 ############################################################################################################################# | |
456 ############################################################################################################################# | |
457 | |
458 #now look for either commonality or difference. Actually could you look for both... | |
459 | |
460 if (Are_You_Looking_For_Commonality=="YES"){ | |
461 | |
462 columnalheader<-c(as.character(Thirdsubbackfreq[1:36,1])) | |
463 columnalheader<-matrix(columnalheader,nrow = 1) | |
464 # write.table(x=columnalheader, | |
465 # file=Shared_subbackfreq_table, | |
466 # quote=FALSE, sep=",", | |
467 # row.names=FALSE,col.names = FALSE, na="", append=TRUE) | |
468 | |
469 FirstOverlapmotifs<-c() | |
470 for (i in 1:nrow(ITDmotifs)){ | |
471 for (j in 1:nrow(D835Ymotifs)){ | |
472 if (is.na(ITDmotifs[i,1])!=TRUE&&is.na(D835Ymotifs[j,1])!=TRUE){ | |
473 if (ITDmotifs[i,1]==D835Ymotifs[j,1]){ | |
474 FirstOverlapmotifs<-c(FirstOverlapmotifs,D835Ymotifs[j,1]) | |
475 } | |
476 } | |
477 } | |
478 } | |
479 | |
480 AllAccessionNumbers<-c() | |
481 columnalheader<-c(rep(NA,36)) | |
482 FinalMatrix<-matrix(data =columnalheader,nrow = 1) | |
483 | |
484 FinalMotifs<-c(rep(NA,20)) | |
485 FinalMotifsMatrix<-matrix(data = FinalMotifs,nrow = 1) | |
486 | |
487 | |
488 for (l in 1:length(FirstOverlapmotifs)) { | |
489 AccessionNumber<-00000000000 | |
490 for (k in 1:nrow(FTLwtmotifs)) { | |
491 AccessionNumber<-0000000000000 | |
492 if(is.na(FTLwtmotifs[k])!=TRUE){ | |
493 #I don't remember why, but I felt it necessary to destroy the accession number multiple times to ensure it is | |
494 #destroyed immediately after use | |
495 if (FirstOverlapmotifs[l] == FTLwtmotifs[k]) { | |
496 substratematrix<-FirstSubstrateSet[k,1:20] | |
497 substratematrix<-as.matrix(substratematrix,nrow=1) | |
498 FinalMotifsMatrix<-rbind(FinalMotifsMatrix,substratematrix) | |
499 #when you find a match between the venn diagrams, save the substrate info you get into a matrix | |
500 | |
501 AccessionNumber <- as.character(FirstSubstrateSet[k, 3]) | |
502 #then take the accession number | |
503 | |
504 for (m in 1:ncol(Firstsubbackfreq)) { | |
505 AN <- as.character(Firstsubbackfreq[1, m]) | |
506 if (grepl(pattern = AN, | |
507 x = AccessionNumber, | |
508 fixed = TRUE) == TRUE) { | |
509 outputmatrix <- as.character(Firstsubbackfreq[, m]) | |
510 outputmatrix <- matrix(outputmatrix, nrow = 1) | |
511 #with that accession number, find a match in the subbackfreq file and save it here | |
512 FinalMatrix<-rbind(FinalMatrix,outputmatrix) | |
513 } | |
514 } | |
515 } | |
516 } | |
517 } | |
518 } | |
519 | |
520 | |
521 TrueMatrix<-FinalMatrix[!duplicated(FinalMatrix),] | |
522 TrueFinalMotifsMatrix<-FinalMotifsMatrix[!duplicated(FinalMotifsMatrix),] | |
523 | |
524 TrueFinalMotifsMatrix<-TrueFinalMotifsMatrix[2:nrow(TrueFinalMotifsMatrix),] | |
525 TrueMatrix<-TrueMatrix[2:nrow(TrueMatrix),] | |
526 | |
527 write.table( | |
528 x = TrueFinalMotifsMatrix, | |
529 file = Shared_motifs_table, | |
530 quote = FALSE, | |
531 sep = ",", | |
532 row.names = FALSE, | |
533 col.names = TRUE, | |
534 na = "", | |
535 append = FALSE | |
536 ) | |
537 | |
538 #TrueMatrix<-t(TrueMatrix) | |
539 columnalheader<-c(as.character(Thirdsubbackfreq[1:36,1])) | |
540 columnalheader<-matrix(columnalheader,nrow = 1) | |
541 | |
542 TrueMatrix<-rbind(columnalheader,TrueMatrix) | |
543 TrueMatrix<-t(TrueMatrix) | |
544 | |
545 write.table( | |
546 x = TrueMatrix, | |
547 file = Shared_subbackfreq_table, | |
548 quote = FALSE, | |
549 sep = ",", | |
550 row.names = FALSE, | |
551 col.names = FALSE, | |
552 na = "", | |
553 append = TRUE | |
554 ) | |
555 } | |
556 } | |
557 | |
558 if (Are_You_Looking_For_Commonality=="NO"){ | |
559 if (FullMotifsOnly_questionmark=="YES"){ | |
560 FTLwtmotifs=rep(NA,times=nrow(FirstSubstrateSet)) | |
561 FTLwtAccessionNumbers=rep(NA,times=nrow(FirstSubstrateSet)) | |
562 leftspaces<-c() | |
563 rightspaces<-c() | |
564 for (i in 1:nrow(FirstSubstrateSet)){ | |
565 FTLwtletters<-FirstSubstrateSet[i,4:18] | |
566 FTLwtletters<-FTLwtletters[FTLwtletters !="XXXXX"] | |
567 FTLwtletters<-paste(FTLwtletters, sep="", collapse="") | |
568 | |
569 | |
570 YYYmotif <- unlist(strsplit(FTLwtletters, split = "")) | |
571 YYYposition <- match(x = "x", table = YYYmotif) | |
572 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are | |
573 #just 3 letters to the left of x | |
574 | |
575 YYYLettersToTheLeft <- YYYposition - 1 | |
576 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is | |
577 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1 | |
578 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1 | |
579 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the | |
580 #variable the user puts in is | |
581 | |
582 if (YYYLettersToTheLeft > 6 && YYYLettersToTheRight > 6) { | |
583 motif<-YYYmotif | |
584 #save that motif, which is the Y and +/- 4 amino acids, including truncation | |
585 motif<-motif[!motif %in% "x"] | |
586 motif<-paste(motif, sep="", collapse="") | |
587 FTLwtletters<-motif | |
588 FTLwtmotifs[i]<-FTLwtletters | |
589 FTLwtAccessionNumbers[i]<-FirstSubstrateSet[i,3] | |
590 } | |
591 | |
592 } | |
593 # FTLwtmotifs <- FTLwtmotifs[!is.na(FTLwtmotifs)] | |
594 # FTLwtmotifs<-matrix(FTLwtmotifs,ncol = 1) | |
595 # | |
596 | |
597 D835Ymotifs=rep(NA,times=nrow(FirstSubstrateSet)) | |
598 D835YAccessionNumbers=rep(NA,times=nrow(FirstSubstrateSet)) | |
599 | |
600 for (i in 1:nrow(SecondSubstrateSet)){ | |
601 D835letters<-SecondSubstrateSet[i,4:18] | |
602 D835letters<-D835letters[D835letters !="XXXXX"] | |
603 D835letters<-paste(D835letters, sep="", collapse="") | |
604 | |
605 | |
606 YYYmotif <- unlist(strsplit(D835letters, split = "")) | |
607 YYYposition <- match(x = "x", table = YYYmotif) | |
608 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are | |
609 #just 3 letters to the left of x | |
610 | |
611 YYYLettersToTheLeft <- YYYposition - 1 | |
612 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is | |
613 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1 | |
614 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1 | |
615 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the | |
616 #variable the user puts in is | |
617 | |
618 if (YYYLettersToTheLeft > 6 && YYYLettersToTheRight > 6) { | |
619 motif<-YYYmotif | |
620 #add blank spaces if the motif has less than 4 letters to the left/right | |
621 motif<-c(leftspaces,YYYmotif,rightspaces) | |
622 #save that motif, which is the Y and +/- 4 amino acids, including truncation | |
623 motif<-motif[!motif %in% "x"] | |
624 motif<-paste(motif, sep="", collapse="") | |
625 D835letters<-motif | |
626 D835Ymotifs[i]<-D835letters | |
627 D835YAccessionNumbers[i]<-SecondSubstrateSet[i,3] | |
628 } | |
629 } | |
630 | |
631 ITDmotifs=rep(NA,times=nrow(FirstSubstrateSet)) | |
632 ITDAccessionNumbers=rep(NA,times=nrow(FirstSubstrateSet)) | |
633 | |
634 for (i in 1:nrow(ThirdSubstrateSet)){ | |
635 ITDletters<-ThirdSubstrateSet[i,4:18] | |
636 ITDletters<-ITDletters[ITDletters !="XXXXX"] | |
637 ITDletters<-paste(ITDletters, sep="", collapse="") | |
638 YYYmotif <- unlist(strsplit(ITDletters, split = "")) | |
639 YYYposition <- match(x = "x", table = YYYmotif) | |
640 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are | |
641 #just 3 letters to the left of x | |
642 | |
643 YYYLettersToTheLeft <- YYYposition - 1 | |
644 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is | |
645 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1 | |
646 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1 | |
647 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the | |
648 #variable the user puts in is | |
649 | |
650 if (YYYLettersToTheLeft > 6 && YYYLettersToTheRight > 6) { | |
651 motif<-YYYmotif | |
652 #add blank spaces if the motif has less than 4 letters to the left/right | |
653 motif<-c(leftspaces,YYYmotif,rightspaces) | |
654 #save that motif, which is the Y and +/- 4 amino acids, including truncation | |
655 motif<-motif[!motif %in% "x"] | |
656 motif<-paste(motif, sep="", collapse="") | |
657 ITDletters<-motif | |
658 ITDmotifs[i]<-ITDletters | |
659 ITDAccessionNumbers[i]<-ThirdSubstrateSet[i,3] | |
660 | |
661 } | |
662 } | |
663 names(ITDmotifs)<-ITDAccessionNumbers | |
664 names(D835Ymotifs)<-D835YAccessionNumbers | |
665 names(FTLwtmotifs)<-FTLwtAccessionNumbers | |
666 } | |
667 | |
668 | |
669 ##############################################3 | |
670 #Truncated only | |
671 if (TruncatedMotifsOnly_questionmark=="YES"){ | |
672 FTLwtmotifs=matrix(,nrow = nrow(FirstSubstrateSet),ncol=1) | |
673 FTLwtAccessionNumbers=matrix(,nrow = nrow(FirstSubstrateSet),ncol=1) | |
674 | |
675 for (i in 1:nrow(FirstSubstrateSet)){ | |
676 FTLwtletters<-FirstSubstrateSet[i,4:18] | |
677 FTLwtletters<-FTLwtletters[FTLwtletters !="XXXXX"] | |
678 FTLwtletters<-paste(FTLwtletters, sep="", collapse="") | |
679 | |
680 | |
681 YYYmotif <- unlist(strsplit(FTLwtletters, split = "")) | |
682 YYYposition <- match(x = "x", table = YYYmotif) | |
683 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are | |
684 #just 3 letters to the left of x | |
685 | |
686 YYYLettersToTheLeft <- YYYposition - 1 | |
687 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is | |
688 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1 | |
689 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1 | |
690 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the | |
691 #variable the user puts in is | |
692 | |
693 if (YYYLettersToTheLeft < 7 | YYYLettersToTheRight < 7) { | |
694 leftspaces<-rep(" ",times=(7-YYYLettersToTheLeft)) | |
695 rightspaces<-rep(" ",times=7-(YYYLettersToTheRight)) | |
696 #add blank spaces if the motif has less than 4 letters to the left/right | |
697 motif<-c(leftspaces,YYYmotif,rightspaces) | |
698 #save that motif, which is the Y and +/- 4 amino acids, including truncation | |
699 motif<-motif[!motif %in% "x"] | |
700 motif<-paste(motif, sep="", collapse="") | |
701 FTLwtletters<-motif | |
702 FTLwtmotifs[i,1]<-FTLwtletters | |
703 FTLwtAccessionNumbers[i,1]<-FirstSubstrateSet[i,3] | |
704 } | |
705 | |
706 } | |
707 | |
708 D835Ymotifs=matrix(,nrow = nrow(SecondSubstrateSet),ncol=1) | |
709 D835YAccessionNumbers<-matrix(,nrow = nrow(SecondSubstrateSet),ncol = 1) | |
710 i=2 | |
711 for (i in 1:nrow(SecondSubstrateSet)){ | |
712 D835letters<-SecondSubstrateSet[i,4:18] | |
713 D835letters<-D835letters[D835letters !="XXXXX"] | |
714 D835letters<-paste(D835letters, sep="", collapse="") | |
715 | |
716 | |
717 YYYmotif <- unlist(strsplit(D835letters, split = "")) | |
718 YYYposition <- match(x = "x", table = YYYmotif) | |
719 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are | |
720 #just 3 letters to the left of x | |
721 | |
722 YYYLettersToTheLeft <- YYYposition - 1 | |
723 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is | |
724 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1 | |
725 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1 | |
726 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the | |
727 #variable the user puts in is | |
728 | |
729 if (YYYLettersToTheLeft < 7 | YYYLettersToTheRight < 7) { | |
730 leftspaces<-rep(" ",times=(7-YYYLettersToTheLeft)) | |
731 rightspaces<-rep(" ",times=7-(YYYLettersToTheRight)) | |
732 #add blank spaces if the motif has less than 4 letters to the left/right | |
733 motif<-c(leftspaces,YYYmotif,rightspaces) | |
734 #save that motif, which is the Y and +/- 4 amino acids, including truncation | |
735 motif<-motif[!motif %in% "x"] | |
736 motif<-paste(motif, sep="", collapse="") | |
737 D835letters<-motif | |
738 D835YAccessionNumbers[i,1]<-SecondSubstrateSet[i,3] | |
739 D835Ymotifs[i,1]<-D835letters | |
740 } | |
741 } | |
742 | |
743 ITDmotifs=matrix(,nrow = nrow(ThirdSubstrateSet),ncol=1) | |
744 ITDAccessionNumbers<-matrix(,nrow = nrow(ThirdSubstrateSet)) | |
745 | |
746 for (i in 1:nrow(ThirdSubstrateSet)){ | |
747 ITDletters<-ThirdSubstrateSet[i,4:18] | |
748 ITDletters<-ITDletters[ITDletters !="XXXXX"] | |
749 ITDletters<-paste(ITDletters, sep="", collapse="") | |
750 YYYmotif <- unlist(strsplit(ITDletters, split = "")) | |
751 YYYposition <- match(x = "x", table = YYYmotif) | |
752 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are | |
753 #just 3 letters to the left of x | |
754 | |
755 YYYLettersToTheLeft <- YYYposition - 1 | |
756 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is | |
757 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1 | |
758 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1 | |
759 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the | |
760 #variable the user puts in is | |
761 | |
762 if (YYYLettersToTheLeft < 7 | YYYLettersToTheRight < 7) { | |
763 leftspaces<-rep(" ",times=(7-YYYLettersToTheLeft)) | |
764 rightspaces<-rep(" ",times=7-(YYYLettersToTheRight)) | |
765 #add blank spaces if the motif has less than 4 letters to the left/right | |
766 motif<-c(leftspaces,YYYmotif,rightspaces) | |
767 #save that motif, which is the Y and +/- 4 amino acids, including truncation | |
768 motif<-motif[!motif %in% "x"] | |
769 motif<-paste(motif, sep="", collapse="") | |
770 ITDletters<-motif | |
771 ITDAccessionNumbers[i,1]<-ThirdSubstrateSet[i,3] | |
772 ITDmotifs[i,1]<-ITDletters | |
773 } | |
774 } | |
775 names(FTLwtmotifs)<-FTLwtAccessionNumbers | |
776 names(D835Ymotifs)<-D835YAccessionNumbers | |
777 names(ITDmotifs)<-ITDAccessionNumbers | |
778 } | |
779 | |
780 ############################################### | |
781 #ALL motifs, full and truncated | |
782 | |
783 if (FullMotifsOnly_questionmark!="YES"&&TruncatedMotifsOnly_questionmark!="YES"){ | |
784 FTLwtmotifs=matrix(,nrow = nrow(FirstSubstrateSet),ncol=1) | |
785 FTLwtAccessionNumbers=matrix(,nrow = nrow(FirstSubstrateSet),ncol=1) | |
786 | |
787 for (i in 1:nrow(FirstSubstrateSet)){ | |
788 FTLwtletters<-FirstSubstrateSet[i,4:18] | |
789 FTLwtletters<-FTLwtletters[FTLwtletters !="XXXXX"] | |
790 FTLwtletters<-paste(FTLwtletters, sep="", collapse="") | |
791 leftspaces<-c() | |
792 rightspaces<-c() | |
793 | |
794 YYYmotif <- unlist(strsplit(FTLwtletters, split = "")) | |
795 YYYposition <- match(x = "x", table = YYYmotif) | |
796 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are | |
797 #just 3 letters to the left of x | |
798 | |
799 YYYLettersToTheLeft <- YYYposition - 1 | |
800 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is | |
801 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1 | |
802 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1 | |
803 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the | |
804 #variable the user puts in is | |
805 | |
806 | |
807 if (YYYLettersToTheLeft < 7 | YYYLettersToTheRight < 7) { | |
808 leftspaces<-rep(" ",times=(7-YYYLettersToTheLeft)) | |
809 rightspaces<-rep(" ",times=7-(YYYLettersToTheRight)) | |
810 #add blank spaces if the motif has less than 4 letters to the left/right | |
811 motif<-c(leftspaces,YYYmotif,rightspaces) | |
812 #save that motif, which is the Y and +/- 4 amino acids, including truncation | |
813 motif<-motif[!motif %in% "x"] | |
814 motif<-paste(motif, sep="", collapse="") | |
815 FTLwtletters<-motif | |
816 FTLwtmotifs[i,1]<-FTLwtletters | |
817 FTLwtAccessionNumbers[i,1]<-FirstSubstrateSet[i,3] | |
818 } | |
819 | |
820 if(YYYLettersToTheLeft>6 && YYYLettersToTheRight>6){ | |
821 motif<-YYYmotif | |
822 #add blank spaces if the motif has less than 4 letters to the left/right | |
823 motif<-c(leftspaces,YYYmotif,rightspaces) | |
824 #save that motif, which is the Y and +/- 4 amino acids, including truncation | |
825 motif<-motif[!motif %in% "x"] | |
826 motif<-paste(motif, sep="", collapse="") | |
827 FTLwtletters<-motif | |
828 FTLwtmotifs[i,1]<-FTLwtletters | |
829 FTLwtAccessionNumbers[i,1]<-FirstSubstrateSet[i,3] | |
830 | |
831 | |
832 } | |
833 | |
834 } | |
835 | |
836 D835Ymotifs=matrix(,nrow = nrow(SecondSubstrateSet),ncol=1) | |
837 D835YAccessionNumbers<-matrix(,nrow = nrow(SecondSubstrateSet),ncol = 1) | |
838 | |
839 for (i in 1:nrow(SecondSubstrateSet)){ | |
840 D835letters<-SecondSubstrateSet[i,4:18] | |
841 D835letters<-D835letters[D835letters !="XXXXX"] | |
842 D835letters<-paste(D835letters, sep="", collapse="") | |
843 leftspaces<-c() | |
844 rightspaces<-c() | |
845 | |
846 YYYmotif <- unlist(strsplit(D835letters, split = "")) | |
847 YYYposition <- match(x = "x", table = YYYmotif) | |
848 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are | |
849 #just 3 letters to the left of x | |
850 | |
851 YYYLettersToTheLeft <- YYYposition - 1 | |
852 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is | |
853 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1 | |
854 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1 | |
855 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the | |
856 #variable the user puts in is | |
857 if (YYYLettersToTheLeft < 7 | YYYLettersToTheRight < 7) { | |
858 leftspaces<-rep(" ",times=(7-YYYLettersToTheLeft)) | |
859 rightspaces<-rep(" ",times=7-(YYYLettersToTheRight)) | |
860 #add blank spaces if the motif has less than 4 letters to the left/right | |
861 motif<-c(leftspaces,YYYmotif,rightspaces) | |
862 #save that motif, which is the Y and +/- 4 amino acids, including truncation | |
863 motif<-motif[!motif %in% "x"] | |
864 motif<-paste(motif, sep="", collapse="") | |
865 D835letters<-motif | |
866 D835Ymotifs[i,1]<-D835letters | |
867 D835YAccessionNumbers[i,1]<-FirstSubstrateSet[i,3] | |
868 } | |
869 | |
870 if(YYYLettersToTheLeft>6 && YYYLettersToTheRight>6){ | |
871 motif<-YYYmotif | |
872 #add blank spaces if the motif has less than 4 letters to the left/right | |
873 motif<-c(leftspaces,YYYmotif,rightspaces) | |
874 #save that motif, which is the Y and +/- 4 amino acids, including truncation | |
875 motif<-motif[!motif %in% "x"] | |
876 motif<-paste(motif, sep="", collapse="") | |
877 D835letters<-motif | |
878 D835Ymotifs[i,1]<-D835letters | |
879 D835YAccessionNumbers[i,1]<-FirstSubstrateSet[i,3] | |
880 } | |
881 } | |
882 | |
883 | |
884 ITDmotifs=matrix(,nrow = nrow(ThirdSubstrateSet),ncol=1) | |
885 ITDAccessionNumbers<-matrix(,nrow = nrow(ThirdSubstrateSet)) | |
886 | |
887 for (i in 1:nrow(ThirdSubstrateSet)){ | |
888 ITDletters<-ThirdSubstrateSet[i,4:18] | |
889 ITDletters<-ITDletters[ITDletters !="XXXXX"] | |
890 ITDletters<-paste(ITDletters, sep="", collapse="") | |
891 YYYmotif <- unlist(strsplit(ITDletters, split = "")) | |
892 leftspaces<-c() | |
893 rightspaces<-c() | |
894 YYYposition <- match(x = "x", table = YYYmotif) | |
895 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are | |
896 #just 3 letters to the left of x | |
897 | |
898 YYYLettersToTheLeft <- YYYposition - 1 | |
899 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is | |
900 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1 | |
901 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1 | |
902 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the | |
903 #variable the user puts in is | |
904 if (YYYLettersToTheLeft < 7 | YYYLettersToTheRight < 7) { | |
905 leftspaces<-rep(" ",times=(7-YYYLettersToTheLeft)) | |
906 rightspaces<-rep(" ",times=7-(YYYLettersToTheRight)) | |
907 #add blank spaces if the motif has less than 4 letters to the left/right | |
908 motif<-c(leftspaces,YYYmotif,rightspaces) | |
909 #save that motif, which is the Y and +/- 4 amino acids, including truncation | |
910 motif<-motif[!motif %in% "x"] | |
911 motif<-paste(motif, sep="", collapse="") | |
912 ITDletters<-motif | |
913 ITDmotifs[i,1]<-ITDletters | |
914 ITDAccessionNumbers[i,1]<-FirstSubstrateSet[i,3] | |
915 } | |
916 | |
917 if(YYYLettersToTheLeft>6 && YYYLettersToTheRight>6){ | |
918 motif<-YYYmotif | |
919 #add blank spaces if the motif has less than 4 letters to the left/right | |
920 motif<-c(leftspaces,YYYmotif,rightspaces) | |
921 #save that motif, which is the Y and +/- 4 amino acids, including truncation | |
922 motif<-motif[!motif %in% "x"] | |
923 motif<-paste(motif, sep="", collapse="") | |
924 ITDletters<-motif | |
925 ITDmotifs[i,1]<-ITDletters | |
926 ITDAccessionNumbers[i,1]<-FirstSubstrateSet[i,3] | |
927 } | |
928 } | |
929 names(FTLwtmotifs)<-FTLwtAccessionNumbers | |
930 names(D835Ymotifs)<-D835YAccessionNumbers | |
931 names(ITDmotifs)<-ITDAccessionNumbers | |
932 } | |
933 | |
934 | |
935 FTLwtmotifsFINAL<-FTLwtmotifs[!FTLwtmotifs %in% D835Ymotifs] | |
936 FTLwtmotifsFINAL<-FTLwtmotifsFINAL[!FTLwtmotifsFINAL %in% ITDmotifs] | |
937 FTLwtmotifsFINAL<-FTLwtmotifsFINAL[!duplicated(FTLwtmotifsFINAL)] | |
938 | |
939 | |
940 ITDmotifsFINAL<-ITDmotifs[!ITDmotifs %in% D835Ymotifs] | |
941 ITDmotifsFINAL<-ITDmotifsFINAL[!ITDmotifsFINAL %in% FTLwtmotifs] | |
942 ITDmotifsFINAL<-ITDmotifsFINAL[!duplicated(ITDmotifsFINAL)] | |
943 | |
944 | |
945 D835YmotifsFINAL<-D835Ymotifs[!D835Ymotifs %in% FTLwtmotifs] | |
946 D835YmotifsFINAL<-D835YmotifsFINAL[!D835YmotifsFINAL %in% ITDmotifs] | |
947 D835YmotifsFINAL<-D835YmotifsFINAL[!duplicated(D835YmotifsFINAL)] | |
948 | |
949 | |
950 columnalheader<-c(rep(NA,36)) | |
951 FTLFinalMatrix<-matrix(data =columnalheader,nrow = 1) | |
952 | |
953 for (k in 1:length(FTLwtmotifsFINAL)) { | |
954 AN<-00000 | |
955 #I don't remember why, but I felt it necessary to destroy the accession number multiple times to ensure it is | |
956 #destroyed immediately after use | |
957 for (m in 1:ncol(Firstsubbackfreq)) { | |
958 AN <- as.character(Firstsubbackfreq[1, m]) | |
959 if (grepl(pattern = AN, | |
960 x = names(FTLwtmotifsFINAL[k]), | |
961 fixed = TRUE) == TRUE) { | |
962 outputmatrix <- as.character(Firstsubbackfreq[, m]) | |
963 outputmatrix <- matrix(outputmatrix, nrow = 1) | |
964 #with that accession number, find a match in the subbackfreq file and save it here | |
965 FTLFinalMatrix<-rbind(FTLFinalMatrix,outputmatrix) | |
966 } | |
967 } | |
968 } | |
969 FTLFinalMatrix<-FTLFinalMatrix[!duplicated(FTLFinalMatrix),] | |
970 | |
971 columnalheader<-c(rep(NA,36)) | |
972 ITDFinalMatrix<-matrix(data =columnalheader,nrow = 1) | |
973 | |
974 for (k in 1:length(ITDmotifsFINAL)) { | |
975 AN<-00000 | |
976 #I don't remember why, but I felt it necessary to destroy the accession number multiple times to ensure it is | |
977 #destroyed immediately after use | |
978 for (m in 1:ncol(Thirdsubbackfreq)) { | |
979 AN <- as.character(Thirdsubbackfreq[1, m]) | |
980 if (grepl(pattern = AN, | |
981 x = names(ITDmotifsFINAL[k]), | |
982 fixed = TRUE) == TRUE) { | |
983 outputmatrix <- as.character(Thirdsubbackfreq[, m]) | |
984 outputmatrix <- matrix(outputmatrix, nrow = 1) | |
985 #with that accession number, find a match in the subbackfreq file and save it here | |
986 ITDFinalMatrix<-rbind(ITDFinalMatrix,outputmatrix) | |
987 } | |
988 } | |
989 } | |
990 ITDFinalMatrix<-ITDFinalMatrix[!duplicated(ITDFinalMatrix),] | |
991 | |
992 columnalheader<-c(rep(NA,36)) | |
993 D835YFinalMatrix<-matrix(data =columnalheader,nrow = 1) | |
994 | |
995 for (k in 1:length(D835YmotifsFINAL)) { | |
996 #I don't remember why, but I felt it necessary to destroy the accession number multiple times to ensure it is | |
997 #destroyed immediately after use | |
998 for (m in 1:ncol(Secondsubbackfreq)) { | |
999 AN <- as.character(Secondsubbackfreq[1, m]) | |
1000 if (grepl(pattern = AN, | |
1001 x = names(D835YmotifsFINAL[k]), | |
1002 fixed = TRUE) == TRUE) { | |
1003 outputmatrix <- as.character(Secondsubbackfreq[, m]) | |
1004 outputmatrix <- matrix(outputmatrix, nrow = 1) | |
1005 #with that accession number, find a match in the subbackfreq file and save it here | |
1006 D835YFinalMatrix<-rbind(D835YFinalMatrix,outputmatrix) | |
1007 } | |
1008 } | |
1009 } | |
1010 D835YFinalMatrix<-D835YFinalMatrix[!duplicated(D835YFinalMatrix),] | |
1011 | |
1012 FTLoutputmatrix<-matrix(data=c(FTLwtmotifsFINAL,names(FTLwtmotifsFINAL)),ncol = 2) | |
1013 | |
1014 #another fucking for loop | |
1015 FLTreference<-FTLoutputmatrix[,2] | |
1016 | |
1017 FirstLine<-colnames(FirstSubstrateSet) | |
1018 FirstLine<-FirstLine[1:23] | |
1019 for (q in 1:nrow(FTLoutputmatrix)) { | |
1020 thismotif<-unlist(strsplit(FTLoutputmatrix[q,1],"")) | |
1021 thisoutput<-c("","",FTLoutputmatrix[q,2],thismotif,"","","","","") | |
1022 FirstLine<-rbind(FirstLine,thisoutput) | |
1023 } | |
1024 | |
1025 | |
1026 | |
1027 write.table(x=FirstLine, | |
1028 file=First_unshared_motifs_table, | |
1029 quote=FALSE, sep=",", | |
1030 row.names=FALSE,col.names = FALSE, na="", append=TRUE) | |
1031 | |
1032 columnalheader<-c(as.character(Thirdsubbackfreq[1:36,1])) | |
1033 columnalheader<-matrix(columnalheader,nrow = 1) | |
1034 | |
1035 # columnalheader<-rbind(columnalheader,FTLFinalMatrix) | |
1036 | |
1037 write.table(x=columnalheader, | |
1038 file=First_unshared_subbackfreq, | |
1039 quote=FALSE, sep=",", | |
1040 row.names=FALSE,col.names = FALSE, na="", append=TRUE) | |
1041 | |
1042 write.table(x=FTLFinalMatrix[2:nrow(FTLFinalMatrix),], | |
1043 file=First_unshared_subbackfreq, | |
1044 quote=FALSE, sep=",", | |
1045 row.names=FALSE,col.names = FALSE, na="", append=TRUE) | |
1046 | |
1047 ############################################################################################################ | |
1048 | |
1049 D835Youtputmatrix<-matrix(data=c(D835YmotifsFINAL,names(D835YmotifsFINAL)),ncol = 2) | |
1050 | |
1051 FLTreference<-D835Youtputmatrix[,2] | |
1052 | |
1053 FirstLine<-colnames(FirstSubstrateSet) | |
1054 FirstLine<-FirstLine[1:23] | |
1055 for (q in 1:nrow(D835Youtputmatrix)) { | |
1056 thismotif<-unlist(strsplit(D835Youtputmatrix[q,1],"")) | |
1057 thisoutput<-c("","",D835Youtputmatrix[q,2],thismotif,"","","","","") | |
1058 FirstLine<-rbind(FirstLine,thisoutput) | |
1059 } | |
1060 | |
1061 | |
1062 | |
1063 write.table(x=FirstLine, | |
1064 file=Second_unshared_motifs_table, | |
1065 quote=FALSE, sep=",", | |
1066 row.names=FALSE,col.names = FALSE, na="", append=TRUE) | |
1067 | |
1068 columnalheader<-c(as.character(Thirdsubbackfreq[1:36,1])) | |
1069 columnalheader<-matrix(columnalheader,nrow = 1) | |
1070 | |
1071 # columnalheader<-rbind(columnalheader,D835YFinalMatrix) | |
1072 | |
1073 write.table(x=columnalheader, | |
1074 file=Second_unshared_subbackfreq, | |
1075 quote=FALSE, sep=",", | |
1076 row.names=FALSE,col.names = FALSE, na="", append=TRUE) | |
1077 | |
1078 write.table(x=D835YFinalMatrix[2:nrow(D835YFinalMatrix),], | |
1079 file=Second_unshared_subbackfreq, | |
1080 quote=FALSE, sep=",", | |
1081 row.names=FALSE,col.names = FALSE, na="", append=TRUE) | |
1082 | |
1083 ############################################################################################################ | |
1084 | |
1085 ITDoutputmatrix<-matrix(data = c(ITDmotifsFINAL,names(ITDmotifsFINAL)),ncol = 2) | |
1086 | |
1087 FLTreference<-ITDoutputmatrix[,2] | |
1088 | |
1089 FirstLine<-colnames(FirstSubstrateSet) | |
1090 FirstLine<-FirstLine[1:23] | |
1091 for (q in 1:nrow(ITDoutputmatrix)) { | |
1092 thismotif<-unlist(strsplit(ITDoutputmatrix[q,1],"")) | |
1093 thisoutput<-c("","",ITDoutputmatrix[q,2],thismotif,"","","","","") | |
1094 FirstLine<-rbind(FirstLine,thisoutput) | |
1095 } | |
1096 | |
1097 | |
1098 write.table(x=FirstLine, | |
1099 file=Third_unshared_motifs_table, | |
1100 quote=FALSE, sep=",", | |
1101 row.names=FALSE,col.names = FALSE, na="", append=TRUE) | |
1102 | |
1103 columnalheader<-c(as.character(Thirdsubbackfreq[1:36,1])) | |
1104 columnalheader<-matrix(columnalheader,nrow = 1) | |
1105 | |
1106 # columnalheader<-rbind(columnalheader,ITDFinalMatrix) | |
1107 | |
1108 write.table(x=columnalheader, | |
1109 file=Third_unshared_subbackfreq, | |
1110 quote=FALSE, sep=",", | |
1111 row.names=FALSE,col.names = FALSE, na="", append=TRUE) | |
1112 | |
1113 write.table(x=ITDFinalMatrix[2:nrow(ITDFinalMatrix),], | |
1114 file=Third_unshared_subbackfreq, | |
1115 quote=FALSE, sep=",", | |
1116 row.names=FALSE,col.names = FALSE, na="", append=TRUE) | |
1117 | |
1118 } |