Mercurial > repos > jfb > difference_finder
comparison all stuff/Difference finder for GalaxyP july 2020.R @ 11:77e47268b650 draft default tip
Uploaded
author | jfb |
---|---|
date | Tue, 14 Jul 2020 19:53:34 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
10:78f16bc92ba6 | 11:77e47268b650 |
---|---|
1 FirstSubstrateSet<- read.csv("S1.csv", stringsAsFactors=FALSE,colClasses = "character") | |
2 Firstsubbackfreq<- read.csv("SBF1.csv", header=FALSE, stringsAsFactors=FALSE) | |
3 SecondSubstrateSet<- read.csv("S2.csv", stringsAsFactors=FALSE,colClasses = "character") | |
4 Secondsubbackfreq<- read.csv("SBF2.csv", header=FALSE, stringsAsFactors=FALSE) | |
5 #this brings in the input files | |
6 | |
7 | |
8 First_unshared_motifs_table<-"1RS.csv" | |
9 First_unshared_subbackfreq<-"1RSBF.csv" | |
10 Second_unshared_motifs_table<-"2RS.csv" | |
11 Second_unshared_subbackfreq<-"2RSBF.csv" | |
12 #this names the output files | |
13 | |
14 EmptySubHeader<-colnames(FirstSubstrateSet) | |
15 EmptySubHeader<-matrix(EmptySubHeader, nrow=1) | |
16 EmptySBFHeader<-Firstsubbackfreq[,1] | |
17 #this creates the headers which comes from the input files, so that the output files can be given this header so that they will look identical to the input files | |
18 | |
19 | |
20 #the below code is used to ensure that all phospho-amino acids get marked with an "x" to denote their phosphoness | |
21 FirstCentralLetters<-FirstSubstrateSet[,11] | |
22 SecondCentralLetters<-SecondSubstrateSet[,11] | |
23 #the phospho AAs are always found in position 12, so take them from there | |
24 | |
25 FirstEsses<-sapply(FirstCentralLetters, grepl, pattern="S", ignore.case=TRUE) | |
26 FirstTees<-sapply(FirstCentralLetters, grepl, pattern="T", ignore.case=TRUE) | |
27 FirstWys<-sapply(FirstCentralLetters, grepl, pattern="Y", ignore.case=TRUE) | |
28 #use an 3 apply functions to create vectors, these vetors have true values where they find an S, T or Y. | |
29 #so FirstEsses has a True anywhere it sees an S, and FirstTees has a True anywhere it sees a Y | |
30 | |
31 SecondEsses<-sapply(SecondCentralLetters, grepl, pattern="S", ignore.case=TRUE) | |
32 SecondTees<-sapply(SecondCentralLetters, grepl, pattern="T", ignore.case=TRUE) | |
33 SecondWys<-sapply(SecondCentralLetters, grepl, pattern="Y", ignore.case=TRUE) | |
34 #do the same for the second substrate set's central letters | |
35 | |
36 FirstCentralLetters<-replace(FirstCentralLetters,FirstEsses,"xS") | |
37 FirstCentralLetters<-replace(FirstCentralLetters,FirstTees,"xT") | |
38 FirstCentralLetters<-replace(FirstCentralLetters,FirstWys,"xY") | |
39 #where there is a True value in FirstEsses, replace that value in the original substrate set with an xS. This is because there was originally an S in that | |
40 #position, and I want that S to be marked with an x, denoting phospho | |
41 | |
42 SecondCentralLetters<-replace(SecondCentralLetters,SecondEsses,"xS") | |
43 SecondCentralLetters<-replace(SecondCentralLetters,SecondTees,"xT") | |
44 SecondCentralLetters<-replace(SecondCentralLetters,SecondWys,"xY") | |
45 | |
46 FirstCentralLetters->FirstSubstrateSet[,11] | |
47 SecondCentralLetters->SecondSubstrateSet[,11] | |
48 #then I put these x-marked letters back where I found them, in position 11 of the substrate sets | |
49 | |
50 FTLwtmotifs=matrix(,nrow = nrow(FirstSubstrateSet),ncol=1) | |
51 FTLwtAccessionNumbers=matrix(,nrow = nrow(FirstSubstrateSet),ncol=1) | |
52 | |
53 for (i in 1:nrow(FirstSubstrateSet)){ | |
54 FTLwtletters<-FirstSubstrateSet[i,4:18] | |
55 FTLwtletters<-FTLwtletters[FTLwtletters !="XXXXX"] | |
56 FTLwtletters<-paste(FTLwtletters, sep="", collapse="") | |
57 leftspaces<-c() | |
58 rightspaces<-c() | |
59 | |
60 YYYmotif <- unlist(strsplit(FTLwtletters, split = "")) | |
61 YYYposition <- match(x = "x", table = YYYmotif) | |
62 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are | |
63 #just 3 letters to the left of x | |
64 | |
65 YYYLettersToTheLeft <- YYYposition - 1 | |
66 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is | |
67 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1 | |
68 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1 | |
69 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the | |
70 #variable the user puts in is | |
71 | |
72 | |
73 if (YYYLettersToTheLeft < 7 | YYYLettersToTheRight < 7) { | |
74 leftspaces<-rep(" ",times=(7-YYYLettersToTheLeft)) | |
75 rightspaces<-rep(" ",times=7-(YYYLettersToTheRight)) | |
76 #add blank spaces if the motif has less than 4 letters to the left/right | |
77 motif<-c(leftspaces,YYYmotif,rightspaces) | |
78 #save that motif, which is the Y and +/- 4 amino acids, including truncation | |
79 motif<-motif[!motif %in% "x"] | |
80 motif<-paste(motif, sep="", collapse="") | |
81 FTLwtletters<-motif | |
82 FTLwtmotifs[i,1]<-FTLwtletters | |
83 FTLwtAccessionNumbers[i,1]<-FirstSubstrateSet[i,3] | |
84 } | |
85 | |
86 if(YYYLettersToTheLeft>6 && YYYLettersToTheRight>6){ | |
87 motif<-YYYmotif | |
88 #add blank spaces if the motif has less than 4 letters to the left/right | |
89 motif<-c(leftspaces,YYYmotif,rightspaces) | |
90 #save that motif, which is the Y and +/- 4 amino acids, including truncation | |
91 motif<-motif[!motif %in% "x"] | |
92 motif<-paste(motif, sep="", collapse="") | |
93 FTLwtletters<-motif | |
94 FTLwtmotifs[i,1]<-FTLwtletters | |
95 FTLwtAccessionNumbers[i,1]<-FirstSubstrateSet[i,3] | |
96 | |
97 | |
98 } | |
99 | |
100 } | |
101 | |
102 D835Ymotifs=matrix(,nrow = nrow(SecondSubstrateSet),ncol=1) | |
103 D835YAccessionNumbers<-matrix(,nrow = nrow(SecondSubstrateSet),ncol = 1) | |
104 | |
105 for (i in 1:nrow(SecondSubstrateSet)){ | |
106 D835letters<-SecondSubstrateSet[i,4:18] | |
107 D835letters<-D835letters[D835letters !="XXXXX"] | |
108 D835letters<-paste(D835letters, sep="", collapse="") | |
109 leftspaces<-c() | |
110 rightspaces<-c() | |
111 | |
112 YYYmotif <- unlist(strsplit(D835letters, split = "")) | |
113 YYYposition <- match(x = "x", table = YYYmotif) | |
114 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are | |
115 #just 3 letters to the left of x | |
116 | |
117 YYYLettersToTheLeft <- YYYposition - 1 | |
118 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is | |
119 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1 | |
120 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1 | |
121 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the | |
122 #variable the user puts in is | |
123 if (YYYLettersToTheLeft < 7 | YYYLettersToTheRight < 7) { | |
124 leftspaces<-rep(" ",times=(7-YYYLettersToTheLeft)) | |
125 rightspaces<-rep(" ",times=7-(YYYLettersToTheRight)) | |
126 #add blank spaces if the motif has less than 4 letters to the left/right | |
127 motif<-c(leftspaces,YYYmotif,rightspaces) | |
128 #save that motif, which is the Y and +/- 4 amino acids, including truncation | |
129 motif<-motif[!motif %in% "x"] | |
130 motif<-paste(motif, sep="", collapse="") | |
131 D835letters<-motif | |
132 D835Ymotifs[i,1]<-D835letters | |
133 D835YAccessionNumbers[i,1]<-SecondSubstrateSet[i,3] | |
134 } | |
135 | |
136 if(YYYLettersToTheLeft>6 && YYYLettersToTheRight>6){ | |
137 motif<-YYYmotif | |
138 #add blank spaces if the motif has less than 4 letters to the left/right | |
139 motif<-c(leftspaces,YYYmotif,rightspaces) | |
140 #save that motif, which is the Y and +/- 4 amino acids, including truncation | |
141 motif<-motif[!motif %in% "x"] | |
142 motif<-paste(motif, sep="", collapse="") | |
143 D835letters<-motif | |
144 D835Ymotifs[i,1]<-D835letters | |
145 D835YAccessionNumbers[i,1]<-SecondSubstrateSet[i,3] | |
146 } | |
147 } | |
148 | |
149 names(FTLwtmotifs)<-FTLwtAccessionNumbers | |
150 names(D835Ymotifs)<-D835YAccessionNumbers | |
151 | |
152 | |
153 FTLwtmotifsFINAL<-FTLwtmotifs[!FTLwtmotifs %in% D835Ymotifs] | |
154 FTLwtmotifsFINAL<-FTLwtmotifsFINAL[!duplicated(FTLwtmotifsFINAL)] | |
155 | |
156 D835YmotifsFINAL<-D835Ymotifs[!D835Ymotifs %in% FTLwtmotifs] | |
157 D835YmotifsFINAL<-D835YmotifsFINAL[!duplicated(D835YmotifsFINAL)] | |
158 | |
159 | |
160 columnalheader<-c(rep(NA,36)) | |
161 FTLFinalMatrix<-matrix(data =columnalheader,nrow = 1) | |
162 | |
163 FLTheader<-c("Substrate","Species","Reference","-7","-6","-5","-4","-3","-2","-1","0","1","2","3","4","5","6","7","Phosphite") | |
164 | |
165 if (length(FTLwtmotifsFINAL)>0){ | |
166 for (k in 1:length(FTLwtmotifsFINAL)) { | |
167 AN<-00000 | |
168 #it is necessary to destroy the accession number multiple times to ensure it is | |
169 #destroyed immediately after use | |
170 for (m in 1:ncol(Firstsubbackfreq)) { | |
171 AN <- as.character(Firstsubbackfreq[1, m]) | |
172 if (grepl(pattern = AN, | |
173 x = names(FTLwtmotifsFINAL[k]), | |
174 fixed = TRUE) == TRUE) { | |
175 outputmatrix <- as.character(Firstsubbackfreq[, m]) | |
176 outputmatrix <- matrix(outputmatrix, nrow = 1) | |
177 #with that accession number, find a match in the subbackfreq file and save it here | |
178 FTLFinalMatrix<-rbind(FTLFinalMatrix,outputmatrix) | |
179 } | |
180 } | |
181 } | |
182 FTLFinalMatrix<-FTLFinalMatrix[!duplicated(FTLFinalMatrix),] | |
183 FTLFinalMatrix<-FTLFinalMatrix[2:nrow(FTLFinalMatrix),] | |
184 | |
185 | |
186 FTLoutputmatrix<-matrix(data=c(FTLwtmotifsFINAL,names(FTLwtmotifsFINAL)),ncol = 2) | |
187 # FLTheader<-unlist(FLTheader) | |
188 lefthandFLT<-matrix(data = rep(NA,times=2*nrow(FTLoutputmatrix)),nrow=nrow(FTLoutputmatrix)) | |
189 righthandFLT<-matrix(data = rep(NA,times=1*nrow(FTLoutputmatrix)),nrow=nrow(FTLoutputmatrix)) | |
190 FLTaccessionset<-FTLoutputmatrix[,2] | |
191 FTLmeat<-sapply(FTLoutputmatrix[,1], strsplit, "") | |
192 FTLmeat<-sapply(FTLmeat, unlist) | |
193 colnames(FTLmeat)<-NULL | |
194 FTLmeat<-t(FTLmeat) | |
195 | |
196 FTLoutputmatrix2<-cbind(lefthandFLT,FLTaccessionset,FTLmeat,righthandFLT) | |
197 colnames(FTLoutputmatrix2)<-NULL | |
198 rownames(FTLoutputmatrix2)<-NULL | |
199 colnames(FLTheader)<-NULL | |
200 rownames(FLTheader)<-NULL | |
201 | |
202 | |
203 FirstCentralLettersAGAIN<-FTLoutputmatrix2[,11] | |
204 | |
205 FirstEsses<-sapply(FirstCentralLettersAGAIN, grepl, pattern="S", ignore.case=TRUE) | |
206 FirstTees<-sapply(FirstCentralLettersAGAIN, grepl, pattern="T", ignore.case=TRUE) | |
207 FirstWys<-sapply(FirstCentralLettersAGAIN, grepl, pattern="Y", ignore.case=TRUE) | |
208 | |
209 FirstCentralLettersAGAIN<-replace(FirstCentralLettersAGAIN,FirstEsses,"xS") | |
210 FirstCentralLettersAGAIN<-replace(FirstCentralLettersAGAIN,FirstTees,"xT") | |
211 FirstCentralLettersAGAIN<-replace(FirstCentralLettersAGAIN,FirstWys,"xY") | |
212 | |
213 FirstCentralLettersAGAIN->FTLoutputmatrix2[,11] | |
214 | |
215 FTLoutputmatrix2<-rbind(FLTheader,FTLoutputmatrix2) | |
216 | |
217 write.table(x=FTLoutputmatrix2, | |
218 file=First_unshared_motifs_table, | |
219 quote=FALSE, sep=",", | |
220 row.names=FALSE,col.names = FALSE, na="", append=TRUE) | |
221 | |
222 columnalheader<-c(as.character(Firstsubbackfreq[1:36,1])) | |
223 columnalheader<-matrix(columnalheader,nrow = 1) | |
224 write.table(x=columnalheader, | |
225 file=First_unshared_subbackfreq, | |
226 quote=FALSE, sep=",", | |
227 row.names=FALSE,col.names = FALSE, na="", append=TRUE) | |
228 | |
229 write.table(x=FTLFinalMatrix, | |
230 file=First_unshared_subbackfreq, | |
231 quote=FALSE, sep=",", | |
232 row.names=FALSE,col.names = FALSE, na="", append=TRUE) | |
233 } else{ | |
234 FTLFinalMatrix<-columnalheader | |
235 write.table(x=EmptySubHeader, | |
236 file=First_unshared_motifs_table, | |
237 quote=FALSE, sep=",", | |
238 row.names=FALSE,col.names = FALSE, na="", append=TRUE) | |
239 | |
240 columnalheader<-c(as.character(Firstsubbackfreq[1:36,1])) | |
241 columnalheader<-matrix(columnalheader,nrow = 1) | |
242 write.table(x=columnalheader, | |
243 file=First_unshared_subbackfreq, | |
244 quote=FALSE, sep=",", | |
245 row.names=FALSE,col.names = FALSE, na="", append=TRUE) | |
246 } | |
247 | |
248 | |
249 columnalheader<-c(rep(NA,36)) | |
250 D835YFinalMatrix<-matrix(data =columnalheader,nrow = 1) | |
251 | |
252 if (length(D835YmotifsFINAL)>0){ | |
253 for (k in 1:length(D835YmotifsFINAL)) { | |
254 #it is necessary to destroy the accession number multiple times to ensure it is | |
255 #destroyed immediately after use | |
256 for (m in 1:ncol(Secondsubbackfreq)) { | |
257 AN <- as.character(Secondsubbackfreq[1, m]) | |
258 if (grepl(pattern = AN, | |
259 x = names(D835YmotifsFINAL[k]), | |
260 fixed = TRUE) == TRUE) { | |
261 outputmatrix <- as.character(Secondsubbackfreq[, m]) | |
262 outputmatrix <- matrix(outputmatrix, nrow = 1) | |
263 #with that accession number, find a match in the subbackfreq file and save it here | |
264 D835YFinalMatrix<-rbind(D835YFinalMatrix,outputmatrix) | |
265 } | |
266 } | |
267 } | |
268 D835YFinalMatrix<-D835YFinalMatrix[!duplicated(D835YFinalMatrix),] | |
269 D835YFinalMatrix<-D835YFinalMatrix[2:nrow(D835YFinalMatrix),] | |
270 | |
271 D835Youtputmatrix<-matrix(data=c(D835YmotifsFINAL,names(D835YmotifsFINAL)),ncol = 2) | |
272 | |
273 D835Yheader<-c("Substrate","Species","Reference","-7","-6","-5","-4","-3","-2","-1","0","1","2","3","4","5","6","7","Phosphite") | |
274 # D835Yheader<-unlist(D835Yheader) | |
275 lefthandD835<-matrix(data = rep(NA,times=2*nrow(D835Youtputmatrix)),nrow=nrow(D835Youtputmatrix)) | |
276 righthandD835<-matrix(data = rep(NA,times=1*nrow(D835Youtputmatrix)),nrow=nrow(D835Youtputmatrix)) | |
277 D835Yaset<-D835Youtputmatrix[,2] | |
278 D835meat<-sapply(D835Youtputmatrix[,1], strsplit, "") | |
279 D835meat<-sapply(D835meat, unlist) | |
280 colnames(D835meat)<-NULL | |
281 D835meat<-t(D835meat) | |
282 | |
283 D835Youtputmatrix2<-cbind(lefthandD835,D835Yaset,D835meat,righthandD835) | |
284 colnames(D835Youtputmatrix2)<-NULL | |
285 rownames(D835Youtputmatrix2)<-NULL | |
286 colnames(D835Yheader)<-NULL | |
287 rownames(D835Yheader)<-NULL | |
288 | |
289 | |
290 SecondCentralLettersAGAIN<-D835Youtputmatrix2[,11] | |
291 | |
292 SecondEsses<-sapply(SecondCentralLettersAGAIN, grepl, pattern="S", ignore.case=TRUE) | |
293 SecondTees<-sapply(SecondCentralLettersAGAIN, grepl, pattern="T", ignore.case=TRUE) | |
294 SecondWys<-sapply(SecondCentralLettersAGAIN, grepl, pattern="Y", ignore.case=TRUE) | |
295 | |
296 SecondCentralLettersAGAIN<-replace(SecondCentralLettersAGAIN,SecondEsses,"xS") | |
297 SecondCentralLettersAGAIN<-replace(SecondCentralLettersAGAIN,SecondTees,"xT") | |
298 SecondCentralLettersAGAIN<-replace(SecondCentralLettersAGAIN,SecondWys,"xY") | |
299 | |
300 SecondCentralLettersAGAIN->D835Youtputmatrix2[,11] | |
301 | |
302 D835Youtputmatrix2<-rbind(D835Yheader,D835Youtputmatrix2) | |
303 | |
304 write.table(x=D835Youtputmatrix2, | |
305 file=Second_unshared_motifs_table, | |
306 quote=FALSE, sep=",", | |
307 row.names=FALSE,col.names = FALSE, na="", append=TRUE) | |
308 | |
309 columnalheader<-c(as.character(Firstsubbackfreq[1:36,1])) | |
310 columnalheader<-matrix(columnalheader,nrow = 1) | |
311 write.table(x=columnalheader, | |
312 file=Second_unshared_subbackfreq, | |
313 quote=FALSE, sep=",", | |
314 row.names=FALSE,col.names = FALSE, na="", append=TRUE) | |
315 | |
316 write.table(x=D835YFinalMatrix, | |
317 file=Second_unshared_subbackfreq, | |
318 quote=FALSE, sep=",", | |
319 row.names=FALSE,col.names = FALSE, na="", append=TRUE) | |
320 } else { | |
321 D835YFinalMatrix<- columnalheader | |
322 write.table(x=EmptySubHeader, | |
323 file=Second_unshared_motifs_table, | |
324 quote=FALSE, sep=",", | |
325 row.names=FALSE,col.names = FALSE, na="", append=TRUE) | |
326 | |
327 columnalheader<-c(as.character(Firstsubbackfreq[1:36,1])) | |
328 columnalheader<-matrix(columnalheader,nrow = 1) | |
329 write.table(x=columnalheader, | |
330 file=Second_unshared_subbackfreq, | |
331 quote=FALSE, sep=",", | |
332 row.names=FALSE,col.names = FALSE, na="", append=TRUE) | |
333 } |