0
|
1 #I should make an SOP for this. Problems we encountered: no x in the xY motif, and the kilodemon
|
|
2 #the output files have both Y and xY, they shouldn't why is that happening? make it not happen
|
|
3 #make sure that accession numbers stay locked to each motif, somehow
|
|
4 #output should look just like the KALIP input
|
|
5
|
|
6 #ff you want ONLY FULL MOTIFS, put "YES" here, please use all caps
|
|
7 FullMotifsOnly_questionmark<-"NO"
|
|
8 #If you want ONLY TRUNCATED MOTIFS, put "YES" here, please use all caps
|
|
9 TruncatedMotifsOnly_questionmark<-"NO"
|
|
10 #if you want to find the overlap, put a "YES" here (all caps), if you want to find the non-overlap, put "NO" (all caps)
|
|
11 Are_You_Looking_For_Commonality<-"NO"
|
|
12
|
|
13
|
|
14 #put the names of your input files here
|
|
15 FirstSubstrateSet<- read.csv("Galaxy63-BTK_PLUS-R1_Substrates.csv", stringsAsFactors=FALSE)
|
|
16 Firstsubbackfreq<- read.csv("Galaxy64-BTK_PLUS-R1_SubstrateBackgroundFrequency.csv", header=FALSE, stringsAsFactors=FALSE)
|
|
17
|
|
18 SecondSubstrateSet<- read.csv("Galaxy65-BTK_PLUS_R2_Substrates.csv", stringsAsFactors=FALSE)
|
|
19 Secondsubbackfreq<- read.csv("Galaxy66-BTK_PLUS_R2_SubstrateBackgroundFrequency.csv", header=FALSE, stringsAsFactors=FALSE)
|
|
20
|
|
21 ThirdSubstrateSet<- read.csv("Galaxy69-BTK_PLUS_R3_Substrates.csv", stringsAsFactors=FALSE)
|
|
22 Thirdsubbackfreq<- read.csv("Galaxy70-BTK_PLUS_R3_SubstrateBackgroundFrequency.csv", header=FALSE, stringsAsFactors=FALSE)
|
|
23
|
|
24 #then put the names of your output files here
|
|
25 Shared_motifs_table<-"180719_GALAXY-BTK-plus-rep-OVLP-7to7-substrates.csv"
|
|
26 Shared_subbackfreq_table<-"180719_GALAXY-BTK-plus-rep-OVLP-7to7-SubBackFreq.csv"
|
|
27
|
|
28 # Shared_motifs_table<-"Shared motifs 7-27-17.csv"
|
|
29 # Shared_subbackfreq_table<-"SubstrateBackgrounFrequency-for-shared-motifs 4 7-27-17.csv"
|
|
30
|
|
31 First_unshared_motifs_table<-"R1 substrates.csv"
|
|
32 First_unshared_subbackfreq<-"R1 SBF.csv"
|
|
33
|
|
34 Second_unshared_motifs_table<-"R2 subs.csv"
|
|
35 Second_unshared_subbackfreq<-"R2 SBf.csv"
|
|
36
|
|
37 Third_unshared_motifs_table<-"R3 subs.csv"
|
|
38 Third_unshared_subbackfreq<-"R3 SBF.csv"
|
|
39
|
|
40 #final note, this code is going to be unworkable if you want to make a Venn diagram of more than 3 circles. I think I'll poke around
|
|
41 #other languages to see if any of them can do it.
|
|
42 ####################################################################################################################################
|
|
43
|
|
44
|
|
45
|
|
46
|
|
47
|
|
48 FirstxY<-rep("xY",times=nrow(FirstSubstrateSet))
|
|
49 FirstSubstrateSet[,11]<-FirstxY
|
|
50
|
|
51 SecondxY<-rep("xY",times=nrow(SecondSubstrateSet))
|
|
52 SecondSubstrateSet[,11]<-SecondxY
|
|
53
|
|
54 ThirdxY<-rep("xY",times=nrow(ThirdSubstrateSet))
|
|
55 ThirdSubstrateSet[,11]<-ThirdxY
|
|
56
|
|
57
|
|
58
|
|
59
|
|
60
|
|
61
|
|
62
|
|
63
|
|
64
|
|
65
|
|
66
|
|
67 ####################################################################################################################################
|
|
68 ####################################################################################################################################
|
|
69 # better version of this code written in C: what happens when two kinases share a motif, but they found that motif in two
|
|
70 # separate proteins thus two separate accession numbers?
|
|
71 # It should actually output the shared motif and BOTH accession numbers. Right now it does not, it only maps out the second
|
|
72 # accession number. So that needs to be fixed BUT you need to keep the commonality between a motif and its accession number
|
|
73 ####################################################################################################################################
|
|
74 ####################################################################################################################################
|
|
75 ####################################################################################################################################
|
|
76 ####################################################################################################################################
|
|
77
|
|
78 #Create the motif sets, deciding wether or not you're looking for truncated or full here
|
|
79 #full only
|
|
80 if (Are_You_Looking_For_Commonality=="YES"){
|
|
81 if (FullMotifsOnly_questionmark=="YES"){
|
|
82 FTLwtmotifs=matrix(,nrow = nrow(FirstSubstrateSet),ncol=1)
|
|
83 FTLwtAccessionNumbers=matrix(,nrow = nrow(FirstSubstrateSet),ncol=1)
|
|
84 leftspaces<-c()
|
|
85 rightspaces<-c()
|
|
86 for (i in 1:nrow(FirstSubstrateSet)){
|
|
87 FTLwtletters<-FirstSubstrateSet[i,4:18]
|
|
88 FTLwtletters<-FTLwtletters[FTLwtletters !="XXXXX"]
|
|
89 FTLwtletters<-paste(FTLwtletters, sep="", collapse="")
|
|
90
|
|
91
|
|
92 YYYmotif <- unlist(strsplit(FTLwtletters, split = ""))
|
|
93 YYYposition <- match(x = "x", table = YYYmotif)
|
|
94 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are
|
|
95 #just 3 letters to the left of x
|
|
96
|
|
97 YYYLettersToTheLeft <- YYYposition - 1
|
|
98 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is
|
|
99 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1
|
|
100 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1
|
|
101 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the
|
|
102 #variable the user puts in is
|
|
103
|
|
104 if (YYYLettersToTheLeft > 6 && YYYLettersToTheRight > 6) {
|
|
105 motif<-YYYmotif
|
|
106 #save that motif, which is the Y and +/- 4 amino acids, including truncation
|
|
107 motif<-motif[!motif %in% "x"]
|
|
108 motif<-paste(motif, sep="", collapse="")
|
|
109 FTLwtletters<-motif
|
|
110 FTLwtmotifs[i,1]<-FTLwtletters
|
|
111 FTLwtAccessionNumbers[i,1]<-FirstSubstrateSet[i,3]
|
|
112
|
|
113 }
|
|
114
|
|
115 }
|
|
116 # FTLwtmotifs <- FTLwtmotifs[!is.na(FTLwtmotifs)]
|
|
117 # FTLwtmotifs<-matrix(FTLwtmotifs,ncol = 1)
|
|
118 #
|
|
119
|
|
120 D835Ymotifs=matrix(,nrow = nrow(SecondSubstrateSet),ncol=1)
|
|
121 D835YAccessionNumbers<-matrix(,nrow = nrow(SecondSubstrateSet),ncol = 1)
|
|
122
|
|
123 for (i in 1:nrow(SecondSubstrateSet)){
|
|
124 D835letters<-SecondSubstrateSet[i,4:18]
|
|
125 D835letters<-D835letters[D835letters !="XXXXX"]
|
|
126 D835letters<-paste(D835letters, sep="", collapse="")
|
|
127
|
|
128
|
|
129 YYYmotif <- unlist(strsplit(D835letters, split = ""))
|
|
130 YYYposition <- match(x = "x", table = YYYmotif)
|
|
131 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are
|
|
132 #just 3 letters to the left of x
|
|
133
|
|
134 YYYLettersToTheLeft <- YYYposition - 1
|
|
135 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is
|
|
136 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1
|
|
137 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1
|
|
138 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the
|
|
139 #variable the user puts in is
|
|
140
|
|
141 if (YYYLettersToTheLeft > 6 && YYYLettersToTheRight > 6) {
|
|
142 motif<-YYYmotif
|
|
143 #add blank spaces if the motif has less than 4 letters to the left/right
|
|
144 motif<-c(leftspaces,YYYmotif,rightspaces)
|
|
145 #save that motif, which is the Y and +/- 4 amino acids, including truncation
|
|
146 motif<-motif[!motif %in% "x"]
|
|
147 motif<-paste(motif, sep="", collapse="")
|
|
148 D835letters<-motif
|
|
149 D835Ymotifs[i,1]<-D835letters
|
|
150 D835YAccessionNumbers[i,1]<-SecondSubstrateSet[i,3]
|
|
151
|
|
152 }
|
|
153 }
|
|
154
|
|
155 ITDmotifs=matrix(,nrow = nrow(ThirdSubstrateSet),ncol=1)
|
|
156 ITDAccessionNumbers<-matrix(,nrow = nrow(ThirdSubstrateSet))
|
|
157
|
|
158 for (i in 1:nrow(ThirdSubstrateSet)){
|
|
159 ITDletters<-ThirdSubstrateSet[i,4:18]
|
|
160 ITDletters<-ITDletters[ITDletters !="XXXXX"]
|
|
161 ITDletters<-paste(ITDletters, sep="", collapse="")
|
|
162 YYYmotif <- unlist(strsplit(ITDletters, split = ""))
|
|
163 YYYposition <- match(x = "x", table = YYYmotif)
|
|
164 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are
|
|
165 #just 3 letters to the left of x
|
|
166
|
|
167 YYYLettersToTheLeft <- YYYposition - 1
|
|
168 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is
|
|
169 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1
|
|
170 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1
|
|
171 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the
|
|
172 #variable the user puts in is
|
|
173
|
|
174 if (YYYLettersToTheLeft > 6 && YYYLettersToTheRight > 6) {
|
|
175 motif<-YYYmotif
|
|
176 #add blank spaces if the motif has less than 4 letters to the left/right
|
|
177 motif<-c(leftspaces,YYYmotif,rightspaces)
|
|
178 #save that motif, which is the Y and +/- 4 amino acids, including truncation
|
|
179 motif<-motif[!motif %in% "x"]
|
|
180 motif<-paste(motif, sep="", collapse="")
|
|
181 ITDletters<-motif
|
|
182 ITDmotifs[i,1]<-ITDletters
|
|
183 ITDAccessionNumbers[i,1]<-ThirdSubstrateSet[i,3]
|
|
184
|
|
185 }
|
|
186 }
|
|
187
|
|
188 }
|
|
189
|
|
190 ##############################################3
|
|
191 #Truncated only
|
|
192 if (TruncatedMotifsOnly_questionmark=="YES"){
|
|
193 FTLwtmotifs=matrix(,nrow = nrow(FirstSubstrateSet),ncol=1)
|
|
194 FTLwtAccessionNumbers=matrix(,nrow = nrow(FirstSubstrateSet),ncol=1)
|
|
195
|
|
196 for (i in 1:nrow(FirstSubstrateSet)){
|
|
197 FTLwtletters<-FirstSubstrateSet[i,4:18]
|
|
198 FTLwtletters<-FTLwtletters[FTLwtletters !="XXXXX"]
|
|
199 FTLwtletters<-paste(FTLwtletters, sep="", collapse="")
|
|
200
|
|
201
|
|
202 YYYmotif <- unlist(strsplit(FTLwtletters, split = ""))
|
|
203 YYYposition <- match(x = "x", table = YYYmotif)
|
|
204 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are
|
|
205 #just 3 letters to the left of x
|
|
206
|
|
207 YYYLettersToTheLeft <- YYYposition - 1
|
|
208 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is
|
|
209 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1
|
|
210 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1
|
|
211 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the
|
|
212 #variable the user puts in is
|
|
213
|
|
214 if (YYYLettersToTheLeft < 7 | YYYLettersToTheRight < 7) {
|
|
215 leftspaces<-rep(" ",times=(7-YYYLettersToTheLeft))
|
|
216 rightspaces<-rep(" ",times=7-(YYYLettersToTheRight))
|
|
217 #add blank spaces if the motif has less than 4 letters to the left/right
|
|
218 motif<-c(leftspaces,YYYmotif,rightspaces)
|
|
219 #save that motif, which is the Y and +/- 4 amino acids, including truncation
|
|
220 motif<-motif[!motif %in% "x"]
|
|
221 motif<-paste(motif, sep="", collapse="")
|
|
222 FTLwtletters<-motif
|
|
223 FTLwtmotifs[i,1]<-FTLwtletters
|
|
224 FTLwtAccessionNumbers[i,1]<-FirstSubstrateSet[i,3]
|
|
225 }
|
|
226
|
|
227 }
|
|
228
|
|
229 D835Ymotifs=matrix(,nrow = nrow(SecondSubstrateSet),ncol=1)
|
|
230 D835YAccessionNumbers<-matrix(,nrow = nrow(SecondSubstrateSet),ncol = 1)
|
|
231
|
|
232 for (i in 1:nrow(SecondSubstrateSet)){
|
|
233 D835letters<-SecondSubstrateSet[i,4:18]
|
|
234 D835letters<-D835letters[D835letters !="XXXXX"]
|
|
235 D835letters<-paste(D835letters, sep="", collapse="")
|
|
236
|
|
237
|
|
238 YYYmotif <- unlist(strsplit(D835letters, split = ""))
|
|
239 YYYposition <- match(x = "x", table = YYYmotif)
|
|
240 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are
|
|
241 #just 3 letters to the left of x
|
|
242
|
|
243 YYYLettersToTheLeft <- YYYposition - 1
|
|
244 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is
|
|
245 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1
|
|
246 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1
|
|
247 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the
|
|
248 #variable the user puts in is
|
|
249
|
|
250 if (YYYLettersToTheLeft < 7 | YYYLettersToTheRight < 7) {
|
|
251 leftspaces<-rep(" ",times=(7-YYYLettersToTheLeft))
|
|
252 rightspaces<-rep(" ",times=7-(YYYLettersToTheRight))
|
|
253 #add blank spaces if the motif has less than 4 letters to the left/right
|
|
254 motif<-c(leftspaces,YYYmotif,rightspaces)
|
|
255 #save that motif, which is the Y and +/- 4 amino acids, including truncation
|
|
256 motif<-motif[!motif %in% "x"]
|
|
257 motif<-paste(motif, sep="", collapse="")
|
|
258 D835letters<-motif
|
|
259 D835YAccessionNumbers[i,1]<-SecondSubstrateSet[i,3]
|
|
260 D835Ymotifs[i,1]<-D835letters
|
|
261 }
|
|
262 }
|
|
263
|
|
264 ITDmotifs=matrix(,nrow = nrow(ThirdSubstrateSet),ncol=1)
|
|
265 ITDAccessionNumbers<-matrix(,nrow = nrow(ThirdSubstrateSet))
|
|
266
|
|
267 for (i in 1:nrow(ThirdSubstrateSet)){
|
|
268 ITDletters<-ThirdSubstrateSet[i,4:18]
|
|
269 ITDletters<-ITDletters[ITDletters !="XXXXX"]
|
|
270 ITDletters<-paste(ITDletters, sep="", collapse="")
|
|
271 YYYmotif <- unlist(strsplit(ITDletters, split = ""))
|
|
272 YYYposition <- match(x = "x", table = YYYmotif)
|
|
273 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are
|
|
274 #just 3 letters to the left of x
|
|
275
|
|
276 YYYLettersToTheLeft <- YYYposition - 1
|
|
277 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is
|
|
278 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1
|
|
279 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1
|
|
280 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the
|
|
281 #variable the user puts in is
|
|
282
|
|
283 if (YYYLettersToTheLeft < 7 | YYYLettersToTheRight < 7) {
|
|
284 leftspaces<-rep(" ",times=(7-YYYLettersToTheLeft))
|
|
285 rightspaces<-rep(" ",times=7-(YYYLettersToTheRight))
|
|
286 #add blank spaces if the motif has less than 4 letters to the left/right
|
|
287 motif<-c(leftspaces,YYYmotif,rightspaces)
|
|
288 #save that motif, which is the Y and +/- 4 amino acids, including truncation
|
|
289 motif<-motif[!motif %in% "x"]
|
|
290 motif<-paste(motif, sep="", collapse="")
|
|
291 ITDletters<-motif
|
|
292 ITDAccessionNumbers[i,1]<-ThirdSubstrateSet[i,3]
|
|
293 ITDmotifs[i,1]<-ITDletters
|
|
294 }
|
|
295 }
|
|
296
|
|
297 }
|
|
298
|
|
299 ###############################################
|
|
300 #ALL motifs, full and truncated
|
|
301
|
|
302 if (FullMotifsOnly_questionmark!="YES"&&TruncatedMotifsOnly_questionmark!="YES"){
|
|
303 FTLwtmotifs=matrix(,nrow = nrow(FirstSubstrateSet),ncol=1)
|
|
304 FTLwtAccessionNumbers=matrix(,nrow = nrow(FirstSubstrateSet),ncol=1)
|
|
305
|
|
306 for (i in 1:nrow(FirstSubstrateSet)){
|
|
307 FTLwtletters<-FirstSubstrateSet[i,4:18]
|
|
308 FTLwtletters<-FTLwtletters[FTLwtletters !="XXXXX"]
|
|
309 FTLwtletters<-paste(FTLwtletters, sep="", collapse="")
|
|
310 leftspaces<-c()
|
|
311 rightspaces<-c()
|
|
312
|
|
313 YYYmotif <- unlist(strsplit(FTLwtletters, split = ""))
|
|
314 YYYposition <- match(x = "x", table = YYYmotif)
|
|
315 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are
|
|
316 #just 3 letters to the left of x
|
|
317
|
|
318 YYYLettersToTheLeft <- YYYposition - 1
|
|
319 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is
|
|
320 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1
|
|
321 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1
|
|
322 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the
|
|
323 #variable the user puts in is
|
|
324
|
|
325
|
|
326 if (YYYLettersToTheLeft < 7 | YYYLettersToTheRight < 7) {
|
|
327 leftspaces<-rep(" ",times=(7-YYYLettersToTheLeft))
|
|
328 rightspaces<-rep(" ",times=7-(YYYLettersToTheRight))
|
|
329 #add blank spaces if the motif has less than 4 letters to the left/right
|
|
330 motif<-c(leftspaces,YYYmotif,rightspaces)
|
|
331 #save that motif, which is the Y and +/- 4 amino acids, including truncation
|
|
332 motif<-motif[!motif %in% "x"]
|
|
333 motif<-paste(motif, sep="", collapse="")
|
|
334 FTLwtletters<-motif
|
|
335 FTLwtmotifs[i,1]<-FTLwtletters
|
|
336 FTLwtAccessionNumbers[i,1]<-FirstSubstrateSet[i,3]
|
|
337 }
|
|
338
|
|
339 if(YYYLettersToTheLeft>6 && YYYLettersToTheRight>6){
|
|
340 motif<-YYYmotif
|
|
341 #add blank spaces if the motif has less than 4 letters to the left/right
|
|
342 motif<-c(leftspaces,YYYmotif,rightspaces)
|
|
343 #save that motif, which is the Y and +/- 4 amino acids, including truncation
|
|
344 motif<-motif[!motif %in% "x"]
|
|
345 motif<-paste(motif, sep="", collapse="")
|
|
346 FTLwtletters<-motif
|
|
347 FTLwtmotifs[i,1]<-FTLwtletters
|
|
348 FTLwtAccessionNumbers[i,1]<-FirstSubstrateSet[i,3]
|
|
349
|
|
350
|
|
351 }
|
|
352
|
|
353 }
|
|
354
|
|
355 D835Ymotifs=matrix(,nrow = nrow(SecondSubstrateSet),ncol=1)
|
|
356 D835YAccessionNumbers<-matrix(,nrow = nrow(SecondSubstrateSet),ncol = 1)
|
|
357
|
|
358 for (i in 1:nrow(SecondSubstrateSet)){
|
|
359 D835letters<-SecondSubstrateSet[i,4:18]
|
|
360 D835letters<-D835letters[D835letters !="XXXXX"]
|
|
361 D835letters<-paste(D835letters, sep="", collapse="")
|
|
362 leftspaces<-c()
|
|
363 rightspaces<-c()
|
|
364
|
|
365 YYYmotif <- unlist(strsplit(D835letters, split = ""))
|
|
366 YYYposition <- match(x = "x", table = YYYmotif)
|
|
367 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are
|
|
368 #just 3 letters to the left of x
|
|
369
|
|
370 YYYLettersToTheLeft <- YYYposition - 1
|
|
371 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is
|
|
372 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1
|
|
373 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1
|
|
374 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the
|
|
375 #variable the user puts in is
|
|
376 if (YYYLettersToTheLeft < 7 | YYYLettersToTheRight < 7) {
|
|
377 leftspaces<-rep(" ",times=(7-YYYLettersToTheLeft))
|
|
378 rightspaces<-rep(" ",times=7-(YYYLettersToTheRight))
|
|
379 #add blank spaces if the motif has less than 4 letters to the left/right
|
|
380 motif<-c(leftspaces,YYYmotif,rightspaces)
|
|
381 #save that motif, which is the Y and +/- 4 amino acids, including truncation
|
|
382 motif<-motif[!motif %in% "x"]
|
|
383 motif<-paste(motif, sep="", collapse="")
|
|
384 D835letters<-motif
|
|
385 D835Ymotifs[i,1]<-D835letters
|
|
386 D835YAccessionNumbers[i,1]<-FirstSubstrateSet[i,3]
|
|
387 }
|
|
388
|
|
389 if(YYYLettersToTheLeft>6 && YYYLettersToTheRight>6){
|
|
390 motif<-YYYmotif
|
|
391 #add blank spaces if the motif has less than 4 letters to the left/right
|
|
392 motif<-c(leftspaces,YYYmotif,rightspaces)
|
|
393 #save that motif, which is the Y and +/- 4 amino acids, including truncation
|
|
394 motif<-motif[!motif %in% "x"]
|
|
395 motif<-paste(motif, sep="", collapse="")
|
|
396 D835letters<-motif
|
|
397 D835Ymotifs[i,1]<-D835letters
|
|
398 D835YAccessionNumbers[i,1]<-FirstSubstrateSet[i,3]
|
|
399 }
|
|
400 }
|
|
401
|
|
402
|
|
403 ITDmotifs=matrix(,nrow = nrow(ThirdSubstrateSet),ncol=1)
|
|
404 ITDAccessionNumbers<-matrix(,nrow = nrow(ThirdSubstrateSet))
|
|
405
|
|
406 for (i in 1:nrow(ThirdSubstrateSet)){
|
|
407 ITDletters<-ThirdSubstrateSet[i,4:18]
|
|
408 ITDletters<-ITDletters[ITDletters !="XXXXX"]
|
|
409 ITDletters<-paste(ITDletters, sep="", collapse="")
|
|
410 YYYmotif <- unlist(strsplit(ITDletters, split = ""))
|
|
411 leftspaces<-c()
|
|
412 rightspaces<-c()
|
|
413 YYYposition <- match(x = "x", table = YYYmotif)
|
|
414 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are
|
|
415 #just 3 letters to the left of x
|
|
416
|
|
417 YYYLettersToTheLeft <- YYYposition - 1
|
|
418 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is
|
|
419 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1
|
|
420 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1
|
|
421 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the
|
|
422 #variable the user puts in is
|
|
423 if (YYYLettersToTheLeft < 7 | YYYLettersToTheRight < 7) {
|
|
424 leftspaces<-rep(" ",times=(7-YYYLettersToTheLeft))
|
|
425 rightspaces<-rep(" ",times=7-(YYYLettersToTheRight))
|
|
426 #add blank spaces if the motif has less than 4 letters to the left/right
|
|
427 motif<-c(leftspaces,YYYmotif,rightspaces)
|
|
428 #save that motif, which is the Y and +/- 4 amino acids, including truncation
|
|
429 motif<-motif[!motif %in% "x"]
|
|
430 motif<-paste(motif, sep="", collapse="")
|
|
431 ITDletters<-motif
|
|
432 ITDmotifs[i,1]<-ITDletters
|
|
433 ITDAccessionNumbers[i,1]<-FirstSubstrateSet[i,3]
|
|
434 }
|
|
435
|
|
436 if(YYYLettersToTheLeft>6 && YYYLettersToTheRight>6){
|
|
437 motif<-YYYmotif
|
|
438 #add blank spaces if the motif has less than 4 letters to the left/right
|
|
439 motif<-c(leftspaces,YYYmotif,rightspaces)
|
|
440 #save that motif, which is the Y and +/- 4 amino acids, including truncation
|
|
441 motif<-motif[!motif %in% "x"]
|
|
442 motif<-paste(motif, sep="", collapse="")
|
|
443 ITDletters<-motif
|
|
444 ITDmotifs[i,1]<-ITDletters
|
|
445 ITDAccessionNumbers[i,1]<-FirstSubstrateSet[i,3]
|
|
446 }
|
|
447 }
|
|
448
|
|
449 }
|
|
450 #############################################################################################################################
|
|
451 #############################################################################################################################
|
|
452 #############################################################################################################################
|
|
453 #############################################################################################################################
|
|
454 #############################################################################################################################
|
|
455
|
|
456 #now look for either commonality or difference. Actually could you look for both...
|
|
457
|
|
458 if (Are_You_Looking_For_Commonality=="YES"){
|
|
459
|
|
460 columnalheader<-c(as.character(Thirdsubbackfreq[1:36,1]))
|
|
461 columnalheader<-matrix(columnalheader,nrow = 1)
|
|
462 write.table(x=columnalheader,
|
|
463 file=Shared_subbackfreq_table,
|
|
464 quote=FALSE, sep=",",
|
|
465 row.names=FALSE,col.names = FALSE, na="", append=TRUE)
|
|
466
|
|
467 FirstOverlapmotifs<-c()
|
|
468 for (i in 1:nrow(ITDmotifs)){
|
|
469 for (j in 1:nrow(D835Ymotifs)){
|
|
470 if (is.na(ITDmotifs[i,1])!=TRUE&&is.na(D835Ymotifs[j,1])!=TRUE){
|
|
471 if (ITDmotifs[i,1]==D835Ymotifs[j,1]){
|
|
472 FirstOverlapmotifs<-c(FirstOverlapmotifs,D835Ymotifs[j,1])
|
|
473 }
|
|
474 }
|
|
475 }
|
|
476 }
|
|
477
|
|
478 AllAccessionNumbers<-c()
|
|
479 columnalheader<-c(rep(NA,36))
|
|
480 FinalMatrix<-matrix(data =columnalheader,nrow = 1)
|
|
481
|
|
482 FinalMotifs<-c(rep(NA,20))
|
|
483 FinalMotifsMatrix<-matrix(data = FinalMotifs,nrow = 1)
|
|
484
|
|
485
|
|
486 for (l in 1:length(FirstOverlapmotifs)) {
|
|
487 AccessionNumber<-00000000000
|
|
488 for (k in 1:nrow(FTLwtmotifs)) {
|
|
489 AccessionNumber<-0000000000000
|
|
490 if(is.na(FTLwtmotifs[k])!=TRUE){
|
|
491 #I don't remember why, but I felt it necessary to destroy the accession number multiple times to ensure it is
|
|
492 #destroyed immediately after use
|
|
493 if (FirstOverlapmotifs[l] == FTLwtmotifs[k]) {
|
|
494 substratematrix<-FirstSubstrateSet[k,1:20]
|
|
495 substratematrix<-as.matrix(substratematrix,nrow=1)
|
|
496 FinalMotifsMatrix<-rbind(FinalMotifsMatrix,substratematrix)
|
|
497 #when you find a match between the venn diagrams, save the substrate info you get into a matrix
|
|
498
|
|
499 AccessionNumber <- as.character(FirstSubstrateSet[k, 3])
|
|
500 #then take the accession number
|
|
501
|
|
502 for (m in 1:ncol(Firstsubbackfreq)) {
|
|
503 AN <- as.character(Firstsubbackfreq[1, m])
|
|
504 if (grepl(pattern = AN,
|
|
505 x = AccessionNumber,
|
|
506 fixed = TRUE) == TRUE) {
|
|
507 outputmatrix <- as.character(Firstsubbackfreq[, m])
|
|
508 outputmatrix <- matrix(outputmatrix, nrow = 1)
|
|
509 #with that accession number, find a match in the subbackfreq file and save it here
|
|
510 FinalMatrix<-rbind(FinalMatrix,outputmatrix)
|
|
511 }
|
|
512 }
|
|
513 }
|
|
514 }
|
|
515 }
|
|
516 }
|
|
517
|
|
518
|
|
519 TrueMatrix<-FinalMatrix[!duplicated(FinalMatrix),]
|
|
520 TrueFinalMotifsMatrix<-FinalMotifsMatrix[!duplicated(FinalMotifsMatrix),]
|
|
521
|
|
522 TrueFinalMotifsMatrix<-TrueFinalMotifsMatrix[2:nrow(TrueFinalMotifsMatrix),]
|
|
523 TrueMatrix<-TrueMatrix[2:nrow(TrueMatrix),]
|
|
524
|
|
525 write.table(
|
|
526 x = TrueFinalMotifsMatrix,
|
|
527 file = Shared_motifs_table,
|
|
528 quote = FALSE,
|
|
529 sep = ",",
|
|
530 row.names = FALSE,
|
|
531 col.names = TRUE,
|
|
532 na = "",
|
|
533 append = TRUE
|
|
534 )
|
|
535
|
|
536
|
|
537 write.table(
|
|
538 x = TrueMatrix,
|
|
539 file = Shared_subbackfreq_table,
|
|
540 quote = FALSE,
|
|
541 sep = ",",
|
|
542 row.names = FALSE,
|
|
543 col.names = FALSE,
|
|
544 na = "",
|
|
545 append = TRUE
|
|
546 )
|
|
547 }
|
|
548 }
|
|
549
|
|
550 if (Are_You_Looking_For_Commonality=="NO"){
|
|
551 if (FullMotifsOnly_questionmark=="YES"){
|
|
552 FTLwtmotifs=rep(NA,times=nrow(FirstSubstrateSet))
|
|
553 FTLwtAccessionNumbers=rep(NA,times=nrow(FirstSubstrateSet))
|
|
554 leftspaces<-c()
|
|
555 rightspaces<-c()
|
|
556 for (i in 1:nrow(FirstSubstrateSet)){
|
|
557 FTLwtletters<-FirstSubstrateSet[i,4:18]
|
|
558 FTLwtletters<-FTLwtletters[FTLwtletters !="XXXXX"]
|
|
559 FTLwtletters<-paste(FTLwtletters, sep="", collapse="")
|
|
560
|
|
561
|
|
562 YYYmotif <- unlist(strsplit(FTLwtletters, split = ""))
|
|
563 YYYposition <- match(x = "x", table = YYYmotif)
|
|
564 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are
|
|
565 #just 3 letters to the left of x
|
|
566
|
|
567 YYYLettersToTheLeft <- YYYposition - 1
|
|
568 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is
|
|
569 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1
|
|
570 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1
|
|
571 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the
|
|
572 #variable the user puts in is
|
|
573
|
|
574 if (YYYLettersToTheLeft > 6 && YYYLettersToTheRight > 6) {
|
|
575 motif<-YYYmotif
|
|
576 #save that motif, which is the Y and +/- 4 amino acids, including truncation
|
|
577 motif<-motif[!motif %in% "x"]
|
|
578 motif<-paste(motif, sep="", collapse="")
|
|
579 FTLwtletters<-motif
|
|
580 FTLwtmotifs[i]<-FTLwtletters
|
|
581 FTLwtAccessionNumbers[i]<-FirstSubstrateSet[i,3]
|
|
582 }
|
|
583
|
|
584 }
|
|
585 # FTLwtmotifs <- FTLwtmotifs[!is.na(FTLwtmotifs)]
|
|
586 # FTLwtmotifs<-matrix(FTLwtmotifs,ncol = 1)
|
|
587 #
|
|
588
|
|
589 D835Ymotifs=rep(NA,times=nrow(FirstSubstrateSet))
|
|
590 D835YAccessionNumbers=rep(NA,times=nrow(FirstSubstrateSet))
|
|
591
|
|
592 for (i in 1:nrow(SecondSubstrateSet)){
|
|
593 D835letters<-SecondSubstrateSet[i,4:18]
|
|
594 D835letters<-D835letters[D835letters !="XXXXX"]
|
|
595 D835letters<-paste(D835letters, sep="", collapse="")
|
|
596
|
|
597
|
|
598 YYYmotif <- unlist(strsplit(D835letters, split = ""))
|
|
599 YYYposition <- match(x = "x", table = YYYmotif)
|
|
600 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are
|
|
601 #just 3 letters to the left of x
|
|
602
|
|
603 YYYLettersToTheLeft <- YYYposition - 1
|
|
604 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is
|
|
605 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1
|
|
606 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1
|
|
607 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the
|
|
608 #variable the user puts in is
|
|
609
|
|
610 if (YYYLettersToTheLeft > 6 && YYYLettersToTheRight > 6) {
|
|
611 motif<-YYYmotif
|
|
612 #add blank spaces if the motif has less than 4 letters to the left/right
|
|
613 motif<-c(leftspaces,YYYmotif,rightspaces)
|
|
614 #save that motif, which is the Y and +/- 4 amino acids, including truncation
|
|
615 motif<-motif[!motif %in% "x"]
|
|
616 motif<-paste(motif, sep="", collapse="")
|
|
617 D835letters<-motif
|
|
618 D835Ymotifs[i]<-D835letters
|
|
619 D835YAccessionNumbers[i]<-SecondSubstrateSet[i,3]
|
|
620 }
|
|
621 }
|
|
622
|
|
623 ITDmotifs=rep(NA,times=nrow(FirstSubstrateSet))
|
|
624 ITDAccessionNumbers=rep(NA,times=nrow(FirstSubstrateSet))
|
|
625
|
|
626 for (i in 1:nrow(ThirdSubstrateSet)){
|
|
627 ITDletters<-ThirdSubstrateSet[i,4:18]
|
|
628 ITDletters<-ITDletters[ITDletters !="XXXXX"]
|
|
629 ITDletters<-paste(ITDletters, sep="", collapse="")
|
|
630 YYYmotif <- unlist(strsplit(ITDletters, split = ""))
|
|
631 YYYposition <- match(x = "x", table = YYYmotif)
|
|
632 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are
|
|
633 #just 3 letters to the left of x
|
|
634
|
|
635 YYYLettersToTheLeft <- YYYposition - 1
|
|
636 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is
|
|
637 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1
|
|
638 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1
|
|
639 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the
|
|
640 #variable the user puts in is
|
|
641
|
|
642 if (YYYLettersToTheLeft > 6 && YYYLettersToTheRight > 6) {
|
|
643 motif<-YYYmotif
|
|
644 #add blank spaces if the motif has less than 4 letters to the left/right
|
|
645 motif<-c(leftspaces,YYYmotif,rightspaces)
|
|
646 #save that motif, which is the Y and +/- 4 amino acids, including truncation
|
|
647 motif<-motif[!motif %in% "x"]
|
|
648 motif<-paste(motif, sep="", collapse="")
|
|
649 ITDletters<-motif
|
|
650 ITDmotifs[i]<-ITDletters
|
|
651 ITDAccessionNumbers[i]<-ThirdSubstrateSet[i,3]
|
|
652
|
|
653 }
|
|
654 }
|
|
655 names(ITDmotifs)<-ITDAccessionNumbers
|
|
656 names(D835Ymotifs)<-D835YAccessionNumbers
|
|
657 names(FTLwtmotifs)<-FTLwtAccessionNumbers
|
|
658 }
|
|
659
|
|
660
|
|
661 ##############################################3
|
|
662 #Truncated only
|
|
663 if (TruncatedMotifsOnly_questionmark=="YES"){
|
|
664 FTLwtmotifs=matrix(,nrow = nrow(FirstSubstrateSet),ncol=1)
|
|
665 FTLwtAccessionNumbers=matrix(,nrow = nrow(FirstSubstrateSet),ncol=1)
|
|
666
|
|
667 for (i in 1:nrow(FirstSubstrateSet)){
|
|
668 FTLwtletters<-FirstSubstrateSet[i,4:18]
|
|
669 FTLwtletters<-FTLwtletters[FTLwtletters !="XXXXX"]
|
|
670 FTLwtletters<-paste(FTLwtletters, sep="", collapse="")
|
|
671
|
|
672
|
|
673 YYYmotif <- unlist(strsplit(FTLwtletters, split = ""))
|
|
674 YYYposition <- match(x = "x", table = YYYmotif)
|
|
675 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are
|
|
676 #just 3 letters to the left of x
|
|
677
|
|
678 YYYLettersToTheLeft <- YYYposition - 1
|
|
679 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is
|
|
680 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1
|
|
681 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1
|
|
682 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the
|
|
683 #variable the user puts in is
|
|
684
|
|
685 if (YYYLettersToTheLeft < 7 | YYYLettersToTheRight < 7) {
|
|
686 leftspaces<-rep(" ",times=(7-YYYLettersToTheLeft))
|
|
687 rightspaces<-rep(" ",times=7-(YYYLettersToTheRight))
|
|
688 #add blank spaces if the motif has less than 4 letters to the left/right
|
|
689 motif<-c(leftspaces,YYYmotif,rightspaces)
|
|
690 #save that motif, which is the Y and +/- 4 amino acids, including truncation
|
|
691 motif<-motif[!motif %in% "x"]
|
|
692 motif<-paste(motif, sep="", collapse="")
|
|
693 FTLwtletters<-motif
|
|
694 FTLwtmotifs[i,1]<-FTLwtletters
|
|
695 FTLwtAccessionNumbers[i,1]<-FirstSubstrateSet[i,3]
|
|
696 }
|
|
697
|
|
698 }
|
|
699
|
|
700 D835Ymotifs=matrix(,nrow = nrow(SecondSubstrateSet),ncol=1)
|
|
701 D835YAccessionNumbers<-matrix(,nrow = nrow(SecondSubstrateSet),ncol = 1)
|
|
702 i=2
|
|
703 for (i in 1:nrow(SecondSubstrateSet)){
|
|
704 D835letters<-SecondSubstrateSet[i,4:18]
|
|
705 D835letters<-D835letters[D835letters !="XXXXX"]
|
|
706 D835letters<-paste(D835letters, sep="", collapse="")
|
|
707
|
|
708
|
|
709 YYYmotif <- unlist(strsplit(D835letters, split = ""))
|
|
710 YYYposition <- match(x = "x", table = YYYmotif)
|
|
711 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are
|
|
712 #just 3 letters to the left of x
|
|
713
|
|
714 YYYLettersToTheLeft <- YYYposition - 1
|
|
715 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is
|
|
716 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1
|
|
717 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1
|
|
718 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the
|
|
719 #variable the user puts in is
|
|
720
|
|
721 if (YYYLettersToTheLeft < 7 | YYYLettersToTheRight < 7) {
|
|
722 leftspaces<-rep(" ",times=(7-YYYLettersToTheLeft))
|
|
723 rightspaces<-rep(" ",times=7-(YYYLettersToTheRight))
|
|
724 #add blank spaces if the motif has less than 4 letters to the left/right
|
|
725 motif<-c(leftspaces,YYYmotif,rightspaces)
|
|
726 #save that motif, which is the Y and +/- 4 amino acids, including truncation
|
|
727 motif<-motif[!motif %in% "x"]
|
|
728 motif<-paste(motif, sep="", collapse="")
|
|
729 D835letters<-motif
|
|
730 D835YAccessionNumbers[i,1]<-SecondSubstrateSet[i,3]
|
|
731 D835Ymotifs[i,1]<-D835letters
|
|
732 }
|
|
733 }
|
|
734
|
|
735 ITDmotifs=matrix(,nrow = nrow(ThirdSubstrateSet),ncol=1)
|
|
736 ITDAccessionNumbers<-matrix(,nrow = nrow(ThirdSubstrateSet))
|
|
737
|
|
738 for (i in 1:nrow(ThirdSubstrateSet)){
|
|
739 ITDletters<-ThirdSubstrateSet[i,4:18]
|
|
740 ITDletters<-ITDletters[ITDletters !="XXXXX"]
|
|
741 ITDletters<-paste(ITDletters, sep="", collapse="")
|
|
742 YYYmotif <- unlist(strsplit(ITDletters, split = ""))
|
|
743 YYYposition <- match(x = "x", table = YYYmotif)
|
|
744 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are
|
|
745 #just 3 letters to the left of x
|
|
746
|
|
747 YYYLettersToTheLeft <- YYYposition - 1
|
|
748 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is
|
|
749 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1
|
|
750 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1
|
|
751 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the
|
|
752 #variable the user puts in is
|
|
753
|
|
754 if (YYYLettersToTheLeft < 7 | YYYLettersToTheRight < 7) {
|
|
755 leftspaces<-rep(" ",times=(7-YYYLettersToTheLeft))
|
|
756 rightspaces<-rep(" ",times=7-(YYYLettersToTheRight))
|
|
757 #add blank spaces if the motif has less than 4 letters to the left/right
|
|
758 motif<-c(leftspaces,YYYmotif,rightspaces)
|
|
759 #save that motif, which is the Y and +/- 4 amino acids, including truncation
|
|
760 motif<-motif[!motif %in% "x"]
|
|
761 motif<-paste(motif, sep="", collapse="")
|
|
762 ITDletters<-motif
|
|
763 ITDAccessionNumbers[i,1]<-ThirdSubstrateSet[i,3]
|
|
764 ITDmotifs[i,1]<-ITDletters
|
|
765 }
|
|
766 }
|
|
767 names(FTLwtmotifs)<-FTLwtAccessionNumbers
|
|
768 names(D835Ymotifs)<-D835YAccessionNumbers
|
|
769 names(ITDmotifs)<-ITDAccessionNumbers
|
|
770 }
|
|
771
|
|
772 ###############################################
|
|
773 #ALL motifs, full and truncated
|
|
774
|
|
775 if (FullMotifsOnly_questionmark!="YES"&&TruncatedMotifsOnly_questionmark!="YES"){
|
|
776 FTLwtmotifs=matrix(,nrow = nrow(FirstSubstrateSet),ncol=1)
|
|
777 FTLwtAccessionNumbers=matrix(,nrow = nrow(FirstSubstrateSet),ncol=1)
|
|
778
|
|
779 for (i in 1:nrow(FirstSubstrateSet)){
|
|
780 FTLwtletters<-FirstSubstrateSet[i,4:18]
|
|
781 FTLwtletters<-FTLwtletters[FTLwtletters !="XXXXX"]
|
|
782 FTLwtletters<-paste(FTLwtletters, sep="", collapse="")
|
|
783 leftspaces<-c()
|
|
784 rightspaces<-c()
|
|
785
|
|
786 YYYmotif <- unlist(strsplit(FTLwtletters, split = ""))
|
|
787 YYYposition <- match(x = "x", table = YYYmotif)
|
|
788 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are
|
|
789 #just 3 letters to the left of x
|
|
790
|
|
791 YYYLettersToTheLeft <- YYYposition - 1
|
|
792 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is
|
|
793 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1
|
|
794 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1
|
|
795 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the
|
|
796 #variable the user puts in is
|
|
797
|
|
798
|
|
799 if (YYYLettersToTheLeft < 7 | YYYLettersToTheRight < 7) {
|
|
800 leftspaces<-rep(" ",times=(7-YYYLettersToTheLeft))
|
|
801 rightspaces<-rep(" ",times=7-(YYYLettersToTheRight))
|
|
802 #add blank spaces if the motif has less than 4 letters to the left/right
|
|
803 motif<-c(leftspaces,YYYmotif,rightspaces)
|
|
804 #save that motif, which is the Y and +/- 4 amino acids, including truncation
|
|
805 motif<-motif[!motif %in% "x"]
|
|
806 motif<-paste(motif, sep="", collapse="")
|
|
807 FTLwtletters<-motif
|
|
808 FTLwtmotifs[i,1]<-FTLwtletters
|
|
809 FTLwtAccessionNumbers[i,1]<-FirstSubstrateSet[i,3]
|
|
810 }
|
|
811
|
|
812 if(YYYLettersToTheLeft>6 && YYYLettersToTheRight>6){
|
|
813 motif<-YYYmotif
|
|
814 #add blank spaces if the motif has less than 4 letters to the left/right
|
|
815 motif<-c(leftspaces,YYYmotif,rightspaces)
|
|
816 #save that motif, which is the Y and +/- 4 amino acids, including truncation
|
|
817 motif<-motif[!motif %in% "x"]
|
|
818 motif<-paste(motif, sep="", collapse="")
|
|
819 FTLwtletters<-motif
|
|
820 FTLwtmotifs[i,1]<-FTLwtletters
|
|
821 FTLwtAccessionNumbers[i,1]<-FirstSubstrateSet[i,3]
|
|
822
|
|
823
|
|
824 }
|
|
825
|
|
826 }
|
|
827
|
|
828 D835Ymotifs=matrix(,nrow = nrow(SecondSubstrateSet),ncol=1)
|
|
829 D835YAccessionNumbers<-matrix(,nrow = nrow(SecondSubstrateSet),ncol = 1)
|
|
830
|
|
831 for (i in 1:nrow(SecondSubstrateSet)){
|
|
832 D835letters<-SecondSubstrateSet[i,4:18]
|
|
833 D835letters<-D835letters[D835letters !="XXXXX"]
|
|
834 D835letters<-paste(D835letters, sep="", collapse="")
|
|
835 leftspaces<-c()
|
|
836 rightspaces<-c()
|
|
837
|
|
838 YYYmotif <- unlist(strsplit(D835letters, split = ""))
|
|
839 YYYposition <- match(x = "x", table = YYYmotif)
|
|
840 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are
|
|
841 #just 3 letters to the left of x
|
|
842
|
|
843 YYYLettersToTheLeft <- YYYposition - 1
|
|
844 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is
|
|
845 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1
|
|
846 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1
|
|
847 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the
|
|
848 #variable the user puts in is
|
|
849 if (YYYLettersToTheLeft < 7 | YYYLettersToTheRight < 7) {
|
|
850 leftspaces<-rep(" ",times=(7-YYYLettersToTheLeft))
|
|
851 rightspaces<-rep(" ",times=7-(YYYLettersToTheRight))
|
|
852 #add blank spaces if the motif has less than 4 letters to the left/right
|
|
853 motif<-c(leftspaces,YYYmotif,rightspaces)
|
|
854 #save that motif, which is the Y and +/- 4 amino acids, including truncation
|
|
855 motif<-motif[!motif %in% "x"]
|
|
856 motif<-paste(motif, sep="", collapse="")
|
|
857 D835letters<-motif
|
|
858 D835Ymotifs[i,1]<-D835letters
|
|
859 D835YAccessionNumbers[i,1]<-FirstSubstrateSet[i,3]
|
|
860 }
|
|
861
|
|
862 if(YYYLettersToTheLeft>6 && YYYLettersToTheRight>6){
|
|
863 motif<-YYYmotif
|
|
864 #add blank spaces if the motif has less than 4 letters to the left/right
|
|
865 motif<-c(leftspaces,YYYmotif,rightspaces)
|
|
866 #save that motif, which is the Y and +/- 4 amino acids, including truncation
|
|
867 motif<-motif[!motif %in% "x"]
|
|
868 motif<-paste(motif, sep="", collapse="")
|
|
869 D835letters<-motif
|
|
870 D835Ymotifs[i,1]<-D835letters
|
|
871 D835YAccessionNumbers[i,1]<-FirstSubstrateSet[i,3]
|
|
872 }
|
|
873 }
|
|
874
|
|
875
|
|
876 ITDmotifs=matrix(,nrow = nrow(ThirdSubstrateSet),ncol=1)
|
|
877 ITDAccessionNumbers<-matrix(,nrow = nrow(ThirdSubstrateSet))
|
|
878
|
|
879 for (i in 1:nrow(ThirdSubstrateSet)){
|
|
880 ITDletters<-ThirdSubstrateSet[i,4:18]
|
|
881 ITDletters<-ITDletters[ITDletters !="XXXXX"]
|
|
882 ITDletters<-paste(ITDletters, sep="", collapse="")
|
|
883 YYYmotif <- unlist(strsplit(ITDletters, split = ""))
|
|
884 leftspaces<-c()
|
|
885 rightspaces<-c()
|
|
886 YYYposition <- match(x = "x", table = YYYmotif)
|
|
887 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are
|
|
888 #just 3 letters to the left of x
|
|
889
|
|
890 YYYLettersToTheLeft <- YYYposition - 1
|
|
891 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is
|
|
892 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1
|
|
893 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1
|
|
894 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the
|
|
895 #variable the user puts in is
|
|
896 if (YYYLettersToTheLeft < 7 | YYYLettersToTheRight < 7) {
|
|
897 leftspaces<-rep(" ",times=(7-YYYLettersToTheLeft))
|
|
898 rightspaces<-rep(" ",times=7-(YYYLettersToTheRight))
|
|
899 #add blank spaces if the motif has less than 4 letters to the left/right
|
|
900 motif<-c(leftspaces,YYYmotif,rightspaces)
|
|
901 #save that motif, which is the Y and +/- 4 amino acids, including truncation
|
|
902 motif<-motif[!motif %in% "x"]
|
|
903 motif<-paste(motif, sep="", collapse="")
|
|
904 ITDletters<-motif
|
|
905 ITDmotifs[i,1]<-ITDletters
|
|
906 ITDAccessionNumbers[i,1]<-FirstSubstrateSet[i,3]
|
|
907 }
|
|
908
|
|
909 if(YYYLettersToTheLeft>6 && YYYLettersToTheRight>6){
|
|
910 motif<-YYYmotif
|
|
911 #add blank spaces if the motif has less than 4 letters to the left/right
|
|
912 motif<-c(leftspaces,YYYmotif,rightspaces)
|
|
913 #save that motif, which is the Y and +/- 4 amino acids, including truncation
|
|
914 motif<-motif[!motif %in% "x"]
|
|
915 motif<-paste(motif, sep="", collapse="")
|
|
916 ITDletters<-motif
|
|
917 ITDmotifs[i,1]<-ITDletters
|
|
918 ITDAccessionNumbers[i,1]<-FirstSubstrateSet[i,3]
|
|
919 }
|
|
920 }
|
|
921 names(FTLwtmotifs)<-FTLwtAccessionNumbers
|
|
922 names(D835Ymotifs)<-D835YAccessionNumbers
|
|
923 names(ITDmotifs)<-ITDAccessionNumbers
|
|
924 }
|
|
925
|
|
926
|
|
927 FTLwtmotifsFINAL<-FTLwtmotifs[!FTLwtmotifs %in% D835Ymotifs]
|
|
928 FTLwtmotifsFINAL<-FTLwtmotifsFINAL[!FTLwtmotifsFINAL %in% ITDmotifs]
|
|
929 FTLwtmotifsFINAL<-FTLwtmotifsFINAL[!duplicated(FTLwtmotifsFINAL)]
|
|
930
|
|
931
|
|
932 ITDmotifsFINAL<-ITDmotifs[!ITDmotifs %in% D835Ymotifs]
|
|
933 ITDmotifsFINAL<-ITDmotifsFINAL[!ITDmotifsFINAL %in% FTLwtmotifs]
|
|
934 ITDmotifsFINAL<-ITDmotifsFINAL[!duplicated(ITDmotifsFINAL)]
|
|
935
|
|
936
|
|
937 D835YmotifsFINAL<-D835Ymotifs[!D835Ymotifs %in% FTLwtmotifs]
|
|
938 D835YmotifsFINAL<-D835YmotifsFINAL[!D835YmotifsFINAL %in% ITDmotifs]
|
|
939 D835YmotifsFINAL<-D835YmotifsFINAL[!duplicated(D835YmotifsFINAL)]
|
|
940
|
|
941
|
|
942 columnalheader<-c(rep(NA,35))
|
|
943 FTLFinalMatrix<-matrix(data =columnalheader,nrow = 1)
|
|
944
|
|
945 for (k in 1:length(FTLwtmotifsFINAL)) {
|
|
946 AN<-00000
|
|
947 #I don't remember why, but I felt it necessary to destroy the accession number multiple times to ensure it is
|
|
948 #destroyed immediately after use
|
|
949 for (m in 1:ncol(Firstsubbackfreq)) {
|
|
950 AN <- as.character(Firstsubbackfreq[1, m])
|
|
951 if (grepl(pattern = AN,
|
|
952 x = names(FTLwtmotifsFINAL[k]),
|
|
953 fixed = TRUE) == TRUE) {
|
|
954 outputmatrix <- as.character(Firstsubbackfreq[, m])
|
|
955 outputmatrix <- matrix(outputmatrix, nrow = 1)
|
|
956 #with that accession number, find a match in the subbackfreq file and save it here
|
|
957 FTLFinalMatrix<-rbind(FTLFinalMatrix,outputmatrix)
|
|
958 }
|
|
959 }
|
|
960 }
|
|
961 FTLFinalMatrix<-FTLFinalMatrix[!duplicated(FTLFinalMatrix),]
|
|
962
|
|
963 columnalheader<-c(rep(NA,35))
|
|
964 ITDFinalMatrix<-matrix(data =columnalheader,nrow = 1)
|
|
965
|
|
966 for (k in 1:length(ITDmotifsFINAL)) {
|
|
967 AN<-00000
|
|
968 #I don't remember why, but I felt it necessary to destroy the accession number multiple times to ensure it is
|
|
969 #destroyed immediately after use
|
|
970 for (m in 1:ncol(Thirdsubbackfreq)) {
|
|
971 AN <- as.character(Thirdsubbackfreq[1, m])
|
|
972 if (grepl(pattern = AN,
|
|
973 x = names(ITDmotifsFINAL[k]),
|
|
974 fixed = TRUE) == TRUE) {
|
|
975 outputmatrix <- as.character(Thirdsubbackfreq[, m])
|
|
976 outputmatrix <- matrix(outputmatrix, nrow = 1)
|
|
977 #with that accession number, find a match in the subbackfreq file and save it here
|
|
978 ITDFinalMatrix<-rbind(ITDFinalMatrix,outputmatrix)
|
|
979 }
|
|
980 }
|
|
981 }
|
|
982 ITDFinalMatrix<-ITDFinalMatrix[!duplicated(ITDFinalMatrix),]
|
|
983
|
|
984 columnalheader<-c(rep(NA,35))
|
|
985 D835YFinalMatrix<-matrix(data =columnalheader,nrow = 1)
|
|
986
|
|
987 for (k in 1:length(D835YmotifsFINAL)) {
|
|
988 #I don't remember why, but I felt it necessary to destroy the accession number multiple times to ensure it is
|
|
989 #destroyed immediately after use
|
|
990 for (m in 1:ncol(Secondsubbackfreq)) {
|
|
991 AN <- as.character(Secondsubbackfreq[1, m])
|
|
992 if (grepl(pattern = AN,
|
|
993 x = names(D835YmotifsFINAL[k]),
|
|
994 fixed = TRUE) == TRUE) {
|
|
995 outputmatrix <- as.character(Secondsubbackfreq[, m])
|
|
996 outputmatrix <- matrix(outputmatrix, nrow = 1)
|
|
997 #with that accession number, find a match in the subbackfreq file and save it here
|
|
998 D835YFinalMatrix<-rbind(D835YFinalMatrix,outputmatrix)
|
|
999 }
|
|
1000 }
|
|
1001 }
|
|
1002 D835YFinalMatrix<-D835YFinalMatrix[!duplicated(D835YFinalMatrix),]
|
|
1003
|
|
1004 FTLoutputmatrix<-matrix(data=c(FTLwtmotifsFINAL,names(FTLwtmotifsFINAL)),ncol = 2)
|
|
1005
|
|
1006
|
|
1007 write.table(x=FTLoutputmatrix,
|
|
1008 file=First_unshared_motifs_table,
|
|
1009 quote=FALSE, sep=",",
|
|
1010 row.names=FALSE,col.names = FALSE, na="", append=TRUE)
|
|
1011
|
|
1012 columnalheader<-c("Accession Numbers",as.character(Thirdsubbackfreq[1:35,1]))
|
|
1013 columnalheader<-matrix(columnalheader,nrow = 1)
|
|
1014 write.table(x=columnalheader,
|
|
1015 file=First_unshared_subbackfreq,
|
|
1016 quote=FALSE, sep=",",
|
|
1017 row.names=FALSE,col.names = FALSE, na="", append=TRUE)
|
|
1018
|
|
1019 write.table(x=FTLFinalMatrix,
|
|
1020 file=First_unshared_subbackfreq,
|
|
1021 quote=FALSE, sep=",",
|
|
1022 row.names=FALSE,col.names = FALSE, na="", append=TRUE)
|
|
1023
|
|
1024 ############################################################################################################
|
|
1025
|
|
1026 D835Youtputmatrix<-matrix(data=c(D835YmotifsFINAL,names(D835YmotifsFINAL)),ncol = 2)
|
|
1027
|
|
1028 write.table(x=D835Youtputmatrix,
|
|
1029 file=Second_unshared_motifs_table,
|
|
1030 quote=FALSE, sep=",",
|
|
1031 row.names=FALSE,col.names = FALSE, na="", append=TRUE)
|
|
1032
|
|
1033 columnalheader<-c("Accession Numbers",as.character(Thirdsubbackfreq[1:35,1]))
|
|
1034 columnalheader<-matrix(columnalheader,nrow = 1)
|
|
1035 write.table(x=columnalheader,
|
|
1036 file=Second_unshared_subbackfreq,
|
|
1037 quote=FALSE, sep=",",
|
|
1038 row.names=FALSE,col.names = FALSE, na="", append=TRUE)
|
|
1039
|
|
1040 write.table(x=D835YFinalMatrix,
|
|
1041 file=Second_unshared_subbackfreq,
|
|
1042 quote=FALSE, sep=",",
|
|
1043 row.names=FALSE,col.names = FALSE, na="", append=TRUE)
|
|
1044
|
|
1045 ############################################################################################################
|
|
1046
|
|
1047 ITDoutputmatrix<-matrix(data = c(ITDmotifsFINAL,names(ITDmotifsFINAL)),ncol = 2)
|
|
1048
|
|
1049 write.table(x=ITDoutputmatrix,
|
|
1050 file=Third_unshared_motifs_table,
|
|
1051 quote=FALSE, sep=",",
|
|
1052 row.names=FALSE,col.names = FALSE, na="", append=TRUE)
|
|
1053
|
|
1054 columnalheader<-c("Accession Numbers",as.character(Thirdsubbackfreq[1:35,1]))
|
|
1055 columnalheader<-matrix(columnalheader,nrow = 1)
|
|
1056 write.table(x=columnalheader,
|
|
1057 file=Third_unshared_subbackfreq,
|
|
1058 quote=FALSE, sep=",",
|
|
1059 row.names=FALSE,col.names = FALSE, na="", append=TRUE)
|
|
1060
|
|
1061 write.table(x=ITDFinalMatrix,
|
|
1062 file=Third_unshared_subbackfreq,
|
|
1063 quote=FALSE, sep=",",
|
|
1064 row.names=FALSE,col.names = FALSE, na="", append=TRUE)
|
|
1065
|
|
1066 }
|
|
1067
|
|
1068 # if (Are_You_Looking_For_Commonality=="NO"){
|
|
1069 #
|
|
1070 #
|
|
1071 # FTLwtmotifsFULLMATRIX<-cbind(FTLwtmotifs,FTLwtAccessionNumbers)
|
|
1072 # ITDmotifsFULLMATRIX<-cbind(ITDmotifs,ITDAccessionNumbers)
|
|
1073 # D835YmotifsFULLMATRIX<-cbind(D835Ymotifs,D835YAccessionNumbers)
|
|
1074 # # ?duplicated
|
|
1075 #
|
|
1076 # FTLwtmotifsFINAL<-FTLwtmotifs[!FTLwtmotifs %in% D835Ymotifs]
|
|
1077 # FTLwtmotifsFINAL<-FTLwtmotifsFINAL[!FTLwtmotifsFINAL %in% ITDmotifs]
|
|
1078 # FTLwtmotifsFINAL<-matrix(data=FTLwtmotifsFINAL, ncol = 1)
|
|
1079 #
|
|
1080 # ITDmotifsFINAL<-ITDmotifs[!ITDmotifs %in% D835Ymotifs]
|
|
1081 # ITDmotifsFINAL<-ITDmotifsFINAL[!ITDmotifsFINAL %in% FTLwtmotifs]
|
|
1082 # ITDmotifsFINAL<-matrix(data=ITDmotifsFINAL, ncol = 1)
|
|
1083 #
|
|
1084 # D835YmotifsFINAL<-D835Ymotifs[!D835Ymotifs %in% FTLwtmotifs]
|
|
1085 # D835YmotifsFINAL<-D835YmotifsFINAL[!D835YmotifsFINAL %in% ITDmotifs]
|
|
1086 # D835YmotifsFINAL<-matrix(data=D835YmotifsFINAL, ncol = 1)
|
|
1087 #
|
|
1088 # FTLnondupeAccessionNumbers<-c()
|
|
1089 #
|
|
1090 # for (z in 1:nrow(FTLwtmotifsFINAL)) {
|
|
1091 # for (w in 1:nrow(FTLwtmotifsFULLMATRIX)) {
|
|
1092 # if (is.na(FTLwtmotifsFULLMATRIX[w,1])!=TRUE)
|
|
1093 # if (FTLwtmotifsFINAL[z]==FTLwtmotifsFULLMATRIX[w,1]){
|
|
1094 # FTLnondupeAccessionNumbers<-c(FTLnondupeAccessionNumbers,FTLwtmotifsFULLMATRIX[w,2])
|
|
1095 # }
|
|
1096 # }
|
|
1097 # }
|
|
1098 #
|
|
1099 #
|
|
1100 # #find accession numbers here, put a matrix of those things, amino acid %, but only after I've unduped them
|
|
1101 # FTLnondupeAccessionNumbers<-FTLnondupeAccessionNumbers[!duplicated(FTLnondupeAccessionNumbers)]
|
|
1102 #
|
|
1103 # columnalheader<-c(rep(NA,35))
|
|
1104 # FTLFinalMatrix<-matrix(data =columnalheader,nrow = 1)
|
|
1105 #
|
|
1106 # for (k in 1:length(FTLnondupeAccessionNumbers)) {
|
|
1107 # #I don't remember why, but I felt it necessary to destroy the accession number multiple times to ensure it is
|
|
1108 # #destroyed immediately after use
|
|
1109 # for (m in 1:ncol(Firstsubbackfreq)) {
|
|
1110 # AN <- as.character(Firstsubbackfreq[1, m])
|
|
1111 # if (grepl(pattern = AN,
|
|
1112 # x = FTLnondupeAccessionNumbers[k],
|
|
1113 # fixed = TRUE) == TRUE) {
|
|
1114 # outputmatrix <- as.character(Firstsubbackfreq[, m])
|
|
1115 # outputmatrix <- matrix(outputmatrix, nrow = 1)
|
|
1116 # #with that accession number, find a match in the subbackfreq file and save it here
|
|
1117 # FTLFinalMatrix<-rbind(FTLFinalMatrix,outputmatrix)
|
|
1118 # }
|
|
1119 # }
|
|
1120 # }
|
|
1121 # ITDnondupeAccessionNumbers<-c()
|
|
1122 #
|
|
1123 # for (z in 1:nrow(ITDmotifsFINAL)) {
|
|
1124 # for (w in 1:nrow(ITDmotifsFULLMATRIX)) {
|
|
1125 # if (is.na(ITDmotifsFULLMATRIX[w,1])!=TRUE)
|
|
1126 # if (ITDmotifsFINAL[z]==ITDmotifsFULLMATRIX[w,1]){
|
|
1127 # ITDnondupeAccessionNumbers<-c(ITDnondupeAccessionNumbers,ITDmotifsFULLMATRIX[w,2])
|
|
1128 # }
|
|
1129 # }
|
|
1130 # }
|
|
1131 #
|
|
1132 #
|
|
1133 # #find accession numbers here, put a matrix of those things, amino acid %, but only after I've unduped them
|
|
1134 # ITDnondupeAccessionNumbers<-ITDnondupeAccessionNumbers[!duplicated(ITDnondupeAccessionNumbers)]
|
|
1135 #
|
|
1136 # columnalheader<-c(rep(NA,35))
|
|
1137 # ITDFinalMatrix<-matrix(data =columnalheader,nrow = 1)
|
|
1138 #
|
|
1139 # for (k in 1:length(ITDnondupeAccessionNumbers)) {
|
|
1140 # #I don't remember why, but I felt it necessary to destroy the accession number multiple times to ensure it is
|
|
1141 # #destroyed immediately after use
|
|
1142 # for (m in 1:ncol(Thirdsubbackfreq)) {
|
|
1143 # AN <- as.character(Thirdsubbackfreq[1, m])
|
|
1144 # if (grepl(pattern = AN,
|
|
1145 # x = ITDnondupeAccessionNumbers[k],
|
|
1146 # fixed = TRUE) == TRUE) {
|
|
1147 # outputmatrix <- as.character(Thirdsubbackfreq[, m])
|
|
1148 # outputmatrix <- matrix(outputmatrix, nrow = 1)
|
|
1149 # #with that accession number, find a match in the subbackfreq file and save it here
|
|
1150 # ITDFinalMatrix<-rbind(ITDFinalMatrix,outputmatrix)
|
|
1151 # }
|
|
1152 # }
|
|
1153 # }
|
|
1154 #
|
|
1155 #
|
|
1156 # D835YnondupeAccessionNumbers<-c()
|
|
1157 #
|
|
1158 # for (z in 1:nrow(D835YmotifsFINAL)) {
|
|
1159 # for (w in 1:nrow(D835YmotifsFULLMATRIX)) {
|
|
1160 # if (is.na(D835YmotifsFULLMATRIX[w,1])!=TRUE)
|
|
1161 # if (D835YmotifsFINAL[z]==D835YmotifsFULLMATRIX[w,1]){
|
|
1162 # D835YnondupeAccessionNumbers<-c(D835YnondupeAccessionNumbers,D835YmotifsFULLMATRIX[w,2])
|
|
1163 # }
|
|
1164 # }
|
|
1165 # }
|
|
1166 #
|
|
1167 #
|
|
1168 # #find accession numbers here, put a matrix of those things, amino acid %, but only after I've unduped them
|
|
1169 # D835YnondupeAccessionNumbers<-D835YnondupeAccessionNumbers[!duplicated(D835YnondupeAccessionNumbers)]
|
|
1170 #
|
|
1171 # columnalheader<-c(rep(NA,35))
|
|
1172 # D835YFinalMatrix<-matrix(data =columnalheader,nrow = 1)
|
|
1173 #
|
|
1174 # for (k in 1:length(D835YnondupeAccessionNumbers)) {
|
|
1175 # #I don't remember why, but I felt it necessary to destroy the accession number multiple times to ensure it is
|
|
1176 # #destroyed immediately after use
|
|
1177 # for (m in 1:ncol(Secondsubbackfreq)) {
|
|
1178 # AN <- as.character(Secondsubbackfreq[1, m])
|
|
1179 # if (grepl(pattern = AN,
|
|
1180 # x = D835YnondupeAccessionNumbers[k],
|
|
1181 # fixed = TRUE) == TRUE) {
|
|
1182 # outputmatrix <- as.character(Secondsubbackfreq[, m])
|
|
1183 # outputmatrix <- matrix(outputmatrix, nrow = 1)
|
|
1184 # #with that accession number, find a match in the subbackfreq file and save it here
|
|
1185 # D835YFinalMatrix<-rbind(D835YFinalMatrix,outputmatrix)
|
|
1186 # }
|
|
1187 # }
|
|
1188 # }
|
|
1189 #
|
|
1190 #
|
|
1191 #
|
|
1192 # # FinalFTLmotifs<-FTLwtmotifsFINAL[!duplicated(FTLwtmotifsFINAL)]
|
|
1193 # # FinalFTLAccessionNumbers<-FTLnondupeAccessionNumbers[!duplicated(FTLnondupeAccessionNumbers)]
|
|
1194 # # necessaryNAs<-rep(NA,times=(length(FinalFTLmotifs)-length(FinalFTLAccessionNumbers)))
|
|
1195 # # FinalFTLAccessionNumbers<-c(FinalFTLAccessionNumbers,necessaryNAs)
|
|
1196 # # TRUEFTLoutputmatrix<-cbind(FinalFTLmotifs,FinalFTLAccessionNumbers)
|
|
1197 # # TRUEFTLoutputmatrix
|
|
1198 #
|
|
1199 # write.table(x=FTLwtmotifsFINAL,
|
|
1200 # file=First_unshared_motifs_table,
|
|
1201 # quote=FALSE, sep=",",
|
|
1202 # row.names=FALSE,col.names = FALSE, na="", append=TRUE)
|
|
1203 #
|
|
1204 # columnalheader<-c("Accession Numbers",as.character(Thirdsubbackfreq[1:35,1]))
|
|
1205 # columnalheader<-matrix(columnalheader,nrow = 1)
|
|
1206 # write.table(x=columnalheader,
|
|
1207 # file=First_unshared_subbackfreq,
|
|
1208 # quote=FALSE, sep=",",
|
|
1209 # row.names=FALSE,col.names = FALSE, na="", append=TRUE)
|
|
1210 #
|
|
1211 # write.table(x=FTLFinalMatrix,
|
|
1212 # file=First_unshared_subbackfreq,
|
|
1213 # quote=FALSE, sep=",",
|
|
1214 # row.names=FALSE,col.names = FALSE, na="", append=TRUE)
|
|
1215 #
|
|
1216 #
|
|
1217 #
|
|
1218 #
|
|
1219 #
|
|
1220 #
|
|
1221 #
|
|
1222 #
|
|
1223 #
|
|
1224 #
|
|
1225 #
|
|
1226 #
|
|
1227 # write.table(x=D835YmotifsFINAL,
|
|
1228 # file=Second_unshared_motifs_table,
|
|
1229 # quote=FALSE, sep=",",
|
|
1230 # row.names=FALSE,col.names = FALSE, na="", append=TRUE)
|
|
1231 #
|
|
1232 # columnalheader<-c("Accession Numbers",as.character(Thirdsubbackfreq[1:35,1]))
|
|
1233 # columnalheader<-matrix(columnalheader,nrow = 1)
|
|
1234 # write.table(x=columnalheader,
|
|
1235 # file=Second_unshared_subbackfreq,
|
|
1236 # quote=FALSE, sep=",",
|
|
1237 # row.names=FALSE,col.names = FALSE, na="", append=TRUE)
|
|
1238 #
|
|
1239 # write.table(x=D835YFinalMatrix,
|
|
1240 # file=Second_unshared_subbackfreq,
|
|
1241 # quote=FALSE, sep=",",
|
|
1242 # row.names=FALSE,col.names = FALSE, na="", append=TRUE)
|
|
1243 #
|
|
1244 #
|
|
1245 #
|
|
1246 #
|
|
1247 #
|
|
1248 #
|
|
1249 #
|
|
1250 #
|
|
1251 #
|
|
1252 #
|
|
1253 #
|
|
1254 #
|
|
1255 #
|
|
1256 #
|
|
1257 #
|
|
1258 #
|
|
1259 # write.table(x=ITDmotifsFINAL,
|
|
1260 # file=Third_unshared_motifs_table,
|
|
1261 # quote=FALSE, sep=",",
|
|
1262 # row.names=FALSE,col.names = FALSE, na="", append=TRUE)
|
|
1263 #
|
|
1264 # columnalheader<-c("Accession Numbers",as.character(Thirdsubbackfreq[1:35,1]))
|
|
1265 # columnalheader<-matrix(columnalheader,nrow = 1)
|
|
1266 # write.table(x=columnalheader,
|
|
1267 # file=Third_unshared_subbackfreq,
|
|
1268 # quote=FALSE, sep=",",
|
|
1269 # row.names=FALSE,col.names = FALSE, na="", append=TRUE)
|
|
1270 #
|
|
1271 # write.table(x=ITDFinalMatrix,
|
|
1272 # file=Third_unshared_subbackfreq,
|
|
1273 # quote=FALSE, sep=",",
|
|
1274 # row.names=FALSE,col.names = FALSE, na="", append=TRUE)
|
|
1275 #
|
|
1276 #
|
|
1277 #
|
|
1278 #
|
|
1279 #
|
|
1280 #
|
|
1281 # }
|