Mercurial > repos > jfb > difference_finder
comparison all stuff/difference finder for 2 overlaps proper names 7-7_1-15-2019.R @ 0:23eea82f5192 draft
Uploaded
author | jfb |
---|---|
date | Wed, 16 Jan 2019 13:55:22 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:23eea82f5192 |
---|---|
1 #Difference finder for only 2 | |
2 | |
3 #ff you want ONLY FULL MOTIFS, put "YES" here, please use all caps | |
4 FullMotifsOnly_questionmark<-"NO" | |
5 #If you want ONLY TRUNCATED MOTIFS, put "YES" here, please use all caps | |
6 TruncatedMotifsOnly_questionmark<-"NO" | |
7 | |
8 FirstSubstrateSet<- read.csv("170922-BTK-MINUS-COMBO FILES_Substrates.csv", stringsAsFactors=FALSE) | |
9 Firstsubbackfreq<- read.csv("170922-BTK-MINUS-COMBO FILES_Substrates.csv", header=FALSE, stringsAsFactors=FALSE) | |
10 | |
11 SecondSubstrateSet<- read.csv("170922-btk-rep OVLP-plus_Substrates.csv", stringsAsFactors=FALSE) | |
12 Secondsubbackfreq<- read.csv("170922-btk-rep OVLP-plus_SubBackFreq.csv", header=FALSE, stringsAsFactors=FALSE) | |
13 | |
14 First_unshared_motifs_table<-"170922-BTK-MINUS-COMBO FILES_Substrates-unique.csv" | |
15 First_unshared_subbackfreq<-"170922-BTK-MINUS-COMBO FILES_SubBackFreq-unique.csv" | |
16 | |
17 Second_unshared_motifs_table<-"170922-btk-rep OVLP-plus_Substrates-unique.csv" | |
18 Second_unshared_subbackfreq<-"170922-btk-rep OVLP-plus_SubBackFreq-unique.csv" | |
19 | |
20 | |
21 LeftOfYLetters<-7 | |
22 RightOfYLetters<-7 | |
23 | |
24 if (FullMotifsOnly_questionmark=="YES"){ | |
25 FirstMotifs=rep(NA,times=nrow(FirstSubstrateSet)) | |
26 FirstAccessionNumbers=rep(NA,times=nrow(FirstSubstrateSet)) | |
27 leftspaces<-c() | |
28 rightspaces<-c() | |
29 for (i in 1:nrow(FirstSubstrateSet)){ | |
30 FirstLetters<-FirstSubstrateSet[i,7:15] | |
31 FirstLetters<-FirstLetters[FirstLetters !="XXXXX"] | |
32 FirstLetters<-paste(FirstLetters, sep="", collapse="") | |
33 | |
34 | |
35 YYYmotif <- unlist(strsplit(FirstLetters, split = "")) | |
36 YYYposition <- match(x = "x", table = YYYmotif) | |
37 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are | |
38 #just 3 letters to the left of x | |
39 | |
40 YYYLettersToTheLeft <- YYYposition - 1 | |
41 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is | |
42 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1 | |
43 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1 | |
44 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the | |
45 #variable the user puts in is | |
46 | |
47 if (YYYLettersToTheLeft > 3 && YYYLettersToTheRight > 3) { | |
48 motif<-YYYmotif | |
49 #save that motif, which is the Y and +/- 4 amino acids, including truncation | |
50 motif<-motif[!motif %in% "x"] | |
51 motif<-paste(motif, sep="", collapse="") | |
52 FirstLetters<-motif | |
53 FirstMotifs[i]<-FirstLetters | |
54 FirstAccessionNumbers[i]<-FirstSubstrateSet[i,3] | |
55 } | |
56 | |
57 } | |
58 # FirstMotifs <- FirstMotifs[!is.na(FirstMotifs)] | |
59 # FirstMotifs<-matrix(FirstMotifs,ncol = 1) | |
60 # | |
61 | |
62 SecondMotifs=rep(NA,times=nrow(FirstSubstrateSet)) | |
63 SecondAccessionNumbers=rep(NA,times=nrow(FirstSubstrateSet)) | |
64 | |
65 for (i in 1:nrow(SecondSubstrateSet)){ | |
66 SecondLetters<-SecondSubstrateSet[i,7:15] | |
67 SecondLetters<-SecondLetters[SecondLetters !="XXXXX"] | |
68 SecondLetters<-paste(SecondLetters, sep="", collapse="") | |
69 | |
70 | |
71 YYYmotif <- unlist(strsplit(SecondLetters, split = "")) | |
72 YYYposition <- match(x = "x", table = YYYmotif) | |
73 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are | |
74 #just 3 letters to the left of x | |
75 | |
76 YYYLettersToTheLeft <- YYYposition - 1 | |
77 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is | |
78 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1 | |
79 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1 | |
80 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the | |
81 #variable the user puts in is | |
82 | |
83 if (YYYLettersToTheLeft > 3 && YYYLettersToTheRight > 3) { | |
84 motif<-YYYmotif | |
85 #add blank spaces if the motif has less than 4 letters to the left/right | |
86 motif<-c(leftspaces,YYYmotif,rightspaces) | |
87 #save that motif, which is the Y and +/- 4 amino acids, including truncation | |
88 motif<-motif[!motif %in% "x"] | |
89 motif<-paste(motif, sep="", collapse="") | |
90 SecondLetters<-motif | |
91 SecondMotifs[i]<-SecondLetters | |
92 SecondAccessionNumbers[i]<-SecondSubstrateSet[i,3] | |
93 } | |
94 } | |
95 names(FirstMotifs)<-FirstAccessionNumbers | |
96 names(SecondMotifs)<-SecondAccessionNumbers | |
97 | |
98 | |
99 # ITDmotifs=rep(NA,times=nrow(FirstSubstrateSet)) | |
100 # ITDAccessionNumbers=rep(NA,times=nrow(FirstSubstrateSet)) | |
101 if(1==0){ | |
102 for (i in 1:nrow(ThirdSubstrateSet)){ | |
103 ITDletters<-ThirdSubstrateSet[i,7:15] | |
104 ITDletters<-ITDletters[ITDletters !="XXXXX"] | |
105 ITDletters<-paste(ITDletters, sep="", collapse="") | |
106 YYYmotif <- unlist(strsplit(ITDletters, split = "")) | |
107 YYYposition <- match(x = "x", table = YYYmotif) | |
108 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are | |
109 #just 3 letters to the left of x | |
110 | |
111 YYYLettersToTheLeft <- YYYposition - 1 | |
112 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is | |
113 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1 | |
114 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1 | |
115 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the | |
116 #variable the user puts in is | |
117 | |
118 if (YYYLettersToTheLeft > 3 && YYYLettersToTheRight > 3) { | |
119 motif<-YYYmotif | |
120 #add blank spaces if the motif has less than 4 letters to the left/right | |
121 motif<-c(leftspaces,YYYmotif,rightspaces) | |
122 #save that motif, which is the Y and +/- 4 amino acids, including truncation | |
123 motif<-motif[!motif %in% "x"] | |
124 motif<-paste(motif, sep="", collapse="") | |
125 ITDletters<-motif | |
126 ITDmotifs[i]<-ITDletters | |
127 ITDAccessionNumbers[i]<-ThirdSubstrateSet[i,3] | |
128 | |
129 } | |
130 } | |
131 } | |
132 #names(ITDmotifs)<-ITDAccessionNumbers | |
133 names(SecondMotifs)<-SecondAccessionNumbers | |
134 names(FirstMotifs)<-FirstAccessionNumbers | |
135 } | |
136 | |
137 | |
138 ##############################################3 | |
139 #Truncated only | |
140 if (TruncatedMotifsOnly_questionmark=="YES"){ | |
141 FirstMotifs=matrix(,nrow = nrow(FirstSubstrateSet),ncol=1) | |
142 FirstAccessionNumbers=matrix(,nrow = nrow(FirstSubstrateSet),ncol=1) | |
143 | |
144 for (i in 1:nrow(FirstSubstrateSet)){ | |
145 FirstLetters<-FirstSubstrateSet[i,7:15] | |
146 FirstLetters<-FirstLetters[FirstLetters !="XXXXX"] | |
147 FirstLetters<-paste(FirstLetters, sep="", collapse="") | |
148 | |
149 | |
150 YYYmotif <- unlist(strsplit(FirstLetters, split = "")) | |
151 YYYposition <- match(x = "x", table = YYYmotif) | |
152 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are | |
153 #just 3 letters to the left of x | |
154 | |
155 YYYLettersToTheLeft <- YYYposition - 1 | |
156 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is | |
157 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1 | |
158 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1 | |
159 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the | |
160 #variable the user puts in is | |
161 | |
162 if (YYYLettersToTheLeft < 4 | YYYLettersToTheRight < 4) { | |
163 leftspaces<-rep(" ",times=(4-YYYLettersToTheLeft)) | |
164 rightspaces<-rep(" ",times=4-(YYYLettersToTheRight)) | |
165 #add blank spaces if the motif has less than 4 letters to the left/right | |
166 motif<-c(leftspaces,YYYmotif,rightspaces) | |
167 #save that motif, which is the Y and +/- 4 amino acids, including truncation | |
168 motif<-motif[!motif %in% "x"] | |
169 motif<-paste(motif, sep="", collapse="") | |
170 FirstLetters<-motif | |
171 FirstMotifs[i,1]<-FirstLetters | |
172 FirstAccessionNumbers[i,1]<-FirstSubstrateSet[i,3] | |
173 } | |
174 | |
175 } | |
176 | |
177 SecondMotifs=matrix(,nrow = nrow(SecondSubstrateSet),ncol=1) | |
178 SecondAccessionNumbers<-matrix(,nrow = nrow(SecondSubstrateSet),ncol = 1) | |
179 i=2 | |
180 for (i in 1:nrow(SecondSubstrateSet)){ | |
181 SecondLetters<-SecondSubstrateSet[i,7:15] | |
182 SecondLetters<-SecondLetters[SecondLetters !="XXXXX"] | |
183 SecondLetters<-paste(SecondLetters, sep="", collapse="") | |
184 | |
185 | |
186 YYYmotif <- unlist(strsplit(SecondLetters, split = "")) | |
187 YYYposition <- match(x = "x", table = YYYmotif) | |
188 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are | |
189 #just 3 letters to the left of x | |
190 | |
191 YYYLettersToTheLeft <- YYYposition - 1 | |
192 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is | |
193 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1 | |
194 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1 | |
195 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the | |
196 #variable the user puts in is | |
197 | |
198 if (YYYLettersToTheLeft < 4 | YYYLettersToTheRight < 4) { | |
199 leftspaces<-rep(" ",times=(4-YYYLettersToTheLeft)) | |
200 rightspaces<-rep(" ",times=4-(YYYLettersToTheRight)) | |
201 #add blank spaces if the motif has less than 4 letters to the left/right | |
202 motif<-c(leftspaces,YYYmotif,rightspaces) | |
203 #save that motif, which is the Y and +/- 4 amino acids, including truncation | |
204 motif<-motif[!motif %in% "x"] | |
205 motif<-paste(motif, sep="", collapse="") | |
206 SecondLetters<-motif | |
207 SecondAccessionNumbers[i,1]<-SecondSubstrateSet[i,3] | |
208 SecondMotifs[i,1]<-SecondLetters | |
209 } | |
210 } | |
211 | |
212 # ITDmotifs=matrix(,nrow = nrow(ThirdSubstrateSet),ncol=1) | |
213 # ITDAccessionNumbers<-matrix(,nrow = nrow(ThirdSubstrateSet)) | |
214 if(1==0){ | |
215 for (i in 1:nrow(ThirdSubstrateSet)){ | |
216 ITDletters<-ThirdSubstrateSet[i,7:15] | |
217 ITDletters<-ITDletters[ITDletters !="XXXXX"] | |
218 ITDletters<-paste(ITDletters, sep="", collapse="") | |
219 YYYmotif <- unlist(strsplit(ITDletters, split = "")) | |
220 YYYposition <- match(x = "x", table = YYYmotif) | |
221 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are | |
222 #just 3 letters to the left of x | |
223 | |
224 YYYLettersToTheLeft <- YYYposition - 1 | |
225 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is | |
226 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1 | |
227 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1 | |
228 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the | |
229 #variable the user puts in is | |
230 | |
231 if (YYYLettersToTheLeft < 4 | YYYLettersToTheRight < 4) { | |
232 leftspaces<-rep(" ",times=(4-YYYLettersToTheLeft)) | |
233 rightspaces<-rep(" ",times=4-(YYYLettersToTheRight)) | |
234 #add blank spaces if the motif has less than 4 letters to the left/right | |
235 motif<-c(leftspaces,YYYmotif,rightspaces) | |
236 #save that motif, which is the Y and +/- 4 amino acids, including truncation | |
237 motif<-motif[!motif %in% "x"] | |
238 motif<-paste(motif, sep="", collapse="") | |
239 ITDletters<-motif | |
240 ITDAccessionNumbers[i,1]<-ThirdSubstrateSet[i,3] | |
241 ITDmotifs[i,1]<-ITDletters | |
242 } | |
243 } | |
244 } | |
245 names(FirstMotifs)<-FirstAccessionNumbers | |
246 names(SecondMotifs)<-SecondAccessionNumbers | |
247 #names(ITDmotifs)<-ITDAccessionNumbers | |
248 } | |
249 | |
250 ############################################### | |
251 #ALL motifs, full and truncated | |
252 | |
253 if (FullMotifsOnly_questionmark!="YES"&&TruncatedMotifsOnly_questionmark!="YES"){ | |
254 #print("!")} | |
255 FirstMotifs=matrix(,nrow = nrow(FirstSubstrateSet),ncol=1) | |
256 FirstAccessionNumbers=matrix(,nrow = nrow(FirstSubstrateSet),ncol=1) | |
257 | |
258 for (i in 1:nrow(FirstSubstrateSet)){ | |
259 FirstLetters<-FirstSubstrateSet[i,7:15] | |
260 FirstLetters<-FirstLetters[FirstLetters !="XXXXX"] | |
261 FirstLetters<-paste(FirstLetters, sep="", collapse="") | |
262 leftspaces<-c() | |
263 rightspaces<-c() | |
264 | |
265 YYYmotif <- unlist(strsplit(FirstLetters, split = "")) | |
266 YYYposition <- match(x = "x", table = YYYmotif) | |
267 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are | |
268 #just 3 letters to the left of x | |
269 | |
270 YYYLettersToTheLeft <- YYYposition - 1 | |
271 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is | |
272 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1 | |
273 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1 | |
274 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the | |
275 #variable the user puts in is | |
276 | |
277 | |
278 if (YYYLettersToTheLeft < 4 | YYYLettersToTheRight < 4) { | |
279 leftspaces<-rep(" ",times=(4-YYYLettersToTheLeft)) | |
280 rightspaces<-rep(" ",times=4-(YYYLettersToTheRight)) | |
281 #add blank spaces if the motif has less than 4 letters to the left/right | |
282 motif<-c(leftspaces,YYYmotif,rightspaces) | |
283 #save that motif, which is the Y and +/- 4 amino acids, including truncation | |
284 motif<-motif[!motif %in% "x"] | |
285 motif<-paste(motif, sep="", collapse="") | |
286 FirstLetters<-motif | |
287 FirstMotifs[i,1]<-FirstLetters | |
288 FirstAccessionNumbers[i,1]<-FirstSubstrateSet[i,3] | |
289 } | |
290 | |
291 if(YYYLettersToTheLeft>3 && YYYLettersToTheRight>3){ | |
292 motif<-YYYmotif | |
293 #add blank spaces if the motif has less than 4 letters to the left/right | |
294 motif<-c(leftspaces,YYYmotif,rightspaces) | |
295 #save that motif, which is the Y and +/- 4 amino acids, including truncation | |
296 motif<-motif[!motif %in% "x"] | |
297 motif<-paste(motif, sep="", collapse="") | |
298 FirstLetters<-motif | |
299 FirstMotifs[i,1]<-FirstLetters | |
300 FirstAccessionNumbers[i,1]<-FirstSubstrateSet[i,3] | |
301 | |
302 | |
303 } | |
304 | |
305 } | |
306 | |
307 SecondMotifs=matrix(,nrow = nrow(SecondSubstrateSet),ncol=1) | |
308 SecondAccessionNumbers<-matrix(,nrow = nrow(SecondSubstrateSet),ncol = 1) | |
309 | |
310 for (i in 1:nrow(SecondSubstrateSet)){ | |
311 SecondLetters<-SecondSubstrateSet[i,7:15] | |
312 SecondLetters<-SecondLetters[SecondLetters !="XXXXX"] | |
313 SecondLetters<-paste(SecondLetters, sep="", collapse="") | |
314 leftspaces<-c() | |
315 rightspaces<-c() | |
316 | |
317 YYYmotif <- unlist(strsplit(SecondLetters, split = "")) | |
318 YYYposition <- match(x = "x", table = YYYmotif) | |
319 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are | |
320 #just 3 letters to the left of x | |
321 | |
322 YYYLettersToTheLeft <- YYYposition - 1 | |
323 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is | |
324 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1 | |
325 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1 | |
326 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the | |
327 #variable the user puts in is | |
328 if (YYYLettersToTheLeft < 4 | YYYLettersToTheRight < 4) { | |
329 leftspaces<-rep(" ",times=(4-YYYLettersToTheLeft)) | |
330 rightspaces<-rep(" ",times=4-(YYYLettersToTheRight)) | |
331 #add blank spaces if the motif has less than 4 letters to the left/right | |
332 motif<-c(leftspaces,YYYmotif,rightspaces) | |
333 #save that motif, which is the Y and +/- 4 amino acids, including truncation | |
334 motif<-motif[!motif %in% "x"] | |
335 motif<-paste(motif, sep="", collapse="") | |
336 SecondLetters<-motif | |
337 SecondMotifs[i,1]<-SecondLetters | |
338 SecondAccessionNumbers[i,1]<-SecondSubstrateSet[i,3] | |
339 } | |
340 | |
341 if(YYYLettersToTheLeft>3 && YYYLettersToTheRight>3){ | |
342 motif<-YYYmotif | |
343 #add blank spaces if the motif has less than 4 letters to the left/right | |
344 motif<-c(leftspaces,YYYmotif,rightspaces) | |
345 #save that motif, which is the Y and +/- 4 amino acids, including truncation | |
346 motif<-motif[!motif %in% "x"] | |
347 motif<-paste(motif, sep="", collapse="") | |
348 SecondLetters<-motif | |
349 SecondMotifs[i,1]<-SecondLetters | |
350 SecondAccessionNumbers[i,1]<-SecondSubstrateSet[i,3] | |
351 } | |
352 } | |
353 | |
354 | |
355 #ITDmotifs=matrix(,nrow = nrow(ThirdSubstrateSet),ncol=1) | |
356 #ITDAccessionNumbers<-matrix(,nrow = nrow(ThirdSubstrateSet)) | |
357 if(1==0){ | |
358 for (i in 1:nrow(ThirdSubstrateSet)){ | |
359 ITDletters<-ThirdSubstrateSet[i,7:15] | |
360 ITDletters<-ITDletters[ITDletters !="XXXXX"] | |
361 ITDletters<-paste(ITDletters, sep="", collapse="") | |
362 YYYmotif <- unlist(strsplit(ITDletters, split = "")) | |
363 leftspaces<-c() | |
364 rightspaces<-c() | |
365 YYYposition <- match(x = "x", table = YYYmotif) | |
366 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are | |
367 #just 3 letters to the left of x | |
368 | |
369 YYYLettersToTheLeft <- YYYposition - 1 | |
370 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is | |
371 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1 | |
372 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1 | |
373 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the | |
374 #variable the user puts in is | |
375 if (YYYLettersToTheLeft < 4 | YYYLettersToTheRight < 4) { | |
376 leftspaces<-rep(" ",times=(4-YYYLettersToTheLeft)) | |
377 rightspaces<-rep(" ",times=4-(YYYLettersToTheRight)) | |
378 #add blank spaces if the motif has less than 4 letters to the left/right | |
379 motif<-c(leftspaces,YYYmotif,rightspaces) | |
380 #save that motif, which is the Y and +/- 4 amino acids, including truncation | |
381 motif<-motif[!motif %in% "x"] | |
382 motif<-paste(motif, sep="", collapse="") | |
383 ITDletters<-motif | |
384 ITDmotifs[i,1]<-ITDletters | |
385 ITDAccessionNumbers[i,1]<-FirstSubstrateSet[i,3] | |
386 } | |
387 | |
388 if(YYYLettersToTheLeft>3 && YYYLettersToTheRight>3){ | |
389 motif<-YYYmotif | |
390 #add blank spaces if the motif has less than 4 letters to the left/right | |
391 motif<-c(leftspaces,YYYmotif,rightspaces) | |
392 #save that motif, which is the Y and +/- 4 amino acids, including truncation | |
393 motif<-motif[!motif %in% "x"] | |
394 motif<-paste(motif, sep="", collapse="") | |
395 ITDletters<-motif | |
396 ITDmotifs[i,1]<-ITDletters | |
397 ITDAccessionNumbers[i,1]<-FirstSubstrateSet[i,3] | |
398 } | |
399 } | |
400 } | |
401 names(FirstMotifs)<-FirstAccessionNumbers | |
402 names(SecondMotifs)<-SecondAccessionNumbers | |
403 #names(ITDmotifs)<-ITDAccessionNumbers | |
404 } | |
405 | |
406 | |
407 FirstMotifsFINAL<-FirstMotifs[!FirstMotifs %in% SecondMotifs] | |
408 #FirstMotifsFINAL<-FirstMotifsFINAL[!FirstMotifsFINAL %in% ITDmotifs] | |
409 FirstMotifsFINAL<-FirstMotifsFINAL[!duplicated(FirstMotifsFINAL)] | |
410 | |
411 | |
412 # ITDmotifsFINAL<-ITDmotifs[!ITDmotifs %in% SecondMotifs] | |
413 # ITDmotifsFINAL<-ITDmotifsFINAL[!ITDmotifsFINAL %in% FirstMotifs] | |
414 # ITDmotifsFINAL<-ITDmotifsFINAL[!duplicated(ITDmotifsFINAL)] | |
415 | |
416 | |
417 SecondMotifsFINAL<-SecondMotifs[!SecondMotifs %in% FirstMotifs] | |
418 #SecondMotifsFINAL<-SecondMotifsFINAL[!SecondMotifsFINAL %in% ITDmotifs] | |
419 SecondMotifsFINAL<-SecondMotifsFINAL[!duplicated(SecondMotifsFINAL)] | |
420 | |
421 | |
422 columnalheader<-c(rep(NA,36)) | |
423 FirstFinalMatrix<-matrix(data =columnalheader,nrow = 1) | |
424 | |
425 for (k in 1:length(FirstMotifsFINAL)) { | |
426 AN<-00000 | |
427 #I don't remember why, but I felt it necessary to destroy the accession number multiple times to ensure it is | |
428 #destroyed immediately after use | |
429 for (m in 2:ncol(Firstsubbackfreq)) { | |
430 AN <- as.character(Firstsubbackfreq[2, m]) | |
431 #print(AN)} | |
432 if (grepl(pattern = AN, | |
433 x = names(FirstMotifsFINAL[k]), | |
434 fixed = TRUE) == TRUE) { | |
435 outputmatrix <- as.character(Firstsubbackfreq[, m]) | |
436 outputmatrix <- matrix(outputmatrix, nrow = 1) | |
437 #with that accession number, find a match in the subbackfreq file and save it here | |
438 FirstFinalMatrix<-rbind(FirstFinalMatrix,outputmatrix) | |
439 # print(AN,outputmatrix)} | |
440 } | |
441 } | |
442 } | |
443 FirstFinalMatrix<-FirstFinalMatrix[!duplicated(FirstFinalMatrix),] | |
444 | |
445 #columnalheader<-c(rep(NA,36)) | |
446 #ITDFinalMatrix<-matrix(data =columnalheader,nrow = 1) | |
447 | |
448 if(1==0){ | |
449 for (k in 1:length(ITDmotifsFINAL)) { | |
450 AN<-00000 | |
451 #I don't remember why, but I felt it necessary to destroy the accession number multiple times to ensure it is | |
452 #destroyed immediately after use | |
453 for (m in 1:ncol(Thirdsubbackfreq)) { | |
454 AN <- as.character(Thirdsubbackfreq[1, m]) | |
455 if (grepl(pattern = AN, | |
456 x = names(ITDmotifsFINAL[k]), | |
457 fixed = TRUE) == TRUE) { | |
458 outputmatrix <- as.character(Thirdsubbackfreq[, m]) | |
459 outputmatrix <- matrix(outputmatrix, nrow = 1) | |
460 #with that accession number, find a match in the subbackfreq file and save it here | |
461 ITDFinalMatrix<-rbind(ITDFinalMatrix,outputmatrix) | |
462 } | |
463 } | |
464 } | |
465 ITDFinalMatrix<-ITDFinalMatrix[!duplicated(ITDFinalMatrix),] | |
466 } | |
467 | |
468 columnalheader<-c(rep(NA,36)) | |
469 SecondFinalMatrix<-matrix(data =columnalheader,nrow = 1) | |
470 | |
471 for (k in 1:length(SecondMotifsFINAL)) { | |
472 #I don't remember why, but I felt it necessary to destroy the accession number multiple times to ensure it is | |
473 #destroyed immediately after use | |
474 for (m in 1:ncol(Secondsubbackfreq)) { | |
475 AN <- as.character(Secondsubbackfreq[1, m]) | |
476 if (grepl(pattern = AN, | |
477 x = names(SecondMotifsFINAL[k]), | |
478 fixed = TRUE) == TRUE) { | |
479 outputmatrix <- as.character(Secondsubbackfreq[, m]) | |
480 outputmatrix <- matrix(outputmatrix, nrow = 1) | |
481 #with that accession number, find a match in the subbackfreq file and save it here | |
482 SecondFinalMatrix<-rbind(SecondFinalMatrix,outputmatrix) | |
483 } | |
484 } | |
485 } | |
486 SecondFinalMatrix<-SecondFinalMatrix[!duplicated(SecondFinalMatrix),] | |
487 FTLoutputmatrix<-matrix(data=c(FirstMotifsFINAL,names(FirstMotifsFINAL)),ncol = 2) | |
488 | |
489 | |
490 write.table(x=FTLoutputmatrix, | |
491 file=First_unshared_motifs_table, | |
492 quote=FALSE, sep=",", | |
493 row.names=FALSE,col.names = FALSE, na="", append=TRUE) | |
494 | |
495 columnalheader<-c(as.character(Firstsubbackfreq[1:36,1])) | |
496 columnalheader<-matrix(columnalheader,nrow = 1) | |
497 write.table(x=columnalheader, | |
498 file=First_unshared_subbackfreq, | |
499 quote=FALSE, sep=",", | |
500 row.names=FALSE,col.names = FALSE, na="", append=TRUE) | |
501 FirstFinalMatrix<-FirstFinalMatrix[2:nrow(FirstFinalMatrix),] | |
502 write.table(x=FirstFinalMatrix, | |
503 file=First_unshared_subbackfreq, | |
504 quote=FALSE, sep=",", | |
505 row.names=FALSE,col.names = FALSE, na="", append=TRUE) | |
506 | |
507 ############################################################################################################ | |
508 | |
509 D835Youtputmatrix<-matrix(data=c(SecondMotifsFINAL,names(SecondMotifsFINAL)),ncol = 2) | |
510 | |
511 write.table(x=D835Youtputmatrix, | |
512 file=Second_unshared_motifs_table, | |
513 quote=FALSE, sep=",", | |
514 row.names=FALSE,col.names = FALSE, na="", append=TRUE) | |
515 | |
516 columnalheader<-c(as.character(Firstsubbackfreq[1:36,1])) | |
517 columnalheader<-matrix(columnalheader,nrow = 1) | |
518 write.table(x=columnalheader, | |
519 file=Second_unshared_subbackfreq, | |
520 quote=FALSE, sep=",", | |
521 row.names=FALSE,col.names = FALSE, na="", append=TRUE) | |
522 SecondFinalMatrix<-SecondFinalMatrix[2:nrow(SecondFinalMatrix),] | |
523 write.table(x=SecondFinalMatrix, | |
524 file=Second_unshared_subbackfreq, | |
525 quote=FALSE, sep=",", | |
526 row.names=FALSE,col.names = FALSE, na="", append=TRUE) | |
527 | |
528 ############################################################################################################ | |
529 | |
530 # ITDoutputmatrix<-matrix(data = c(ITDmotifsFINAL,names(ITDmotifsFINAL)),ncol = 2) | |
531 # | |
532 # write.table(x=ITDoutputmatrix, | |
533 # file=Third_unshared_motifs_table, | |
534 # quote=FALSE, sep=",", | |
535 # row.names=FALSE,col.names = FALSE, na="", append=TRUE) | |
536 # | |
537 # columnalheader<-c(as.character(Thirdsubbackfreq[1:36,1])) | |
538 # columnalheader<-matrix(columnalheader,nrow = 1) | |
539 # write.table(x=columnalheader, | |
540 # file=Third_unshared_subbackfreq, | |
541 # quote=FALSE, sep=",", | |
542 # row.names=FALSE,col.names = FALSE, na="", append=TRUE) | |
543 # ITDFinalMatrix<-ITDFinalMatrix[2:nrow(ITDFinalMatrix),] | |
544 # write.table(x=ITDFinalMatrix, | |
545 # file=Third_unshared_subbackfreq, | |
546 # quote=FALSE, sep=",", | |
547 # row.names=FALSE,col.names = FALSE, na="", append=TRUE) |