comparison all stuff/Difference finder for GalaxyP july 2020.R @ 11:77e47268b650 draft default tip

Uploaded
author jfb
date Tue, 14 Jul 2020 19:53:34 -0400
parents
children
comparison
equal deleted inserted replaced
10:78f16bc92ba6 11:77e47268b650
1 FirstSubstrateSet<- read.csv("S1.csv", stringsAsFactors=FALSE,colClasses = "character")
2 Firstsubbackfreq<- read.csv("SBF1.csv", header=FALSE, stringsAsFactors=FALSE)
3 SecondSubstrateSet<- read.csv("S2.csv", stringsAsFactors=FALSE,colClasses = "character")
4 Secondsubbackfreq<- read.csv("SBF2.csv", header=FALSE, stringsAsFactors=FALSE)
5 #this brings in the input files
6
7
8 First_unshared_motifs_table<-"1RS.csv"
9 First_unshared_subbackfreq<-"1RSBF.csv"
10 Second_unshared_motifs_table<-"2RS.csv"
11 Second_unshared_subbackfreq<-"2RSBF.csv"
12 #this names the output files
13
14 EmptySubHeader<-colnames(FirstSubstrateSet)
15 EmptySubHeader<-matrix(EmptySubHeader, nrow=1)
16 EmptySBFHeader<-Firstsubbackfreq[,1]
17 #this creates the headers which comes from the input files, so that the output files can be given this header so that they will look identical to the input files
18
19
20 #the below code is used to ensure that all phospho-amino acids get marked with an "x" to denote their phosphoness
21 FirstCentralLetters<-FirstSubstrateSet[,11]
22 SecondCentralLetters<-SecondSubstrateSet[,11]
23 #the phospho AAs are always found in position 12, so take them from there
24
25 FirstEsses<-sapply(FirstCentralLetters, grepl, pattern="S", ignore.case=TRUE)
26 FirstTees<-sapply(FirstCentralLetters, grepl, pattern="T", ignore.case=TRUE)
27 FirstWys<-sapply(FirstCentralLetters, grepl, pattern="Y", ignore.case=TRUE)
28 #use an 3 apply functions to create vectors, these vetors have true values where they find an S, T or Y.
29 #so FirstEsses has a True anywhere it sees an S, and FirstTees has a True anywhere it sees a Y
30
31 SecondEsses<-sapply(SecondCentralLetters, grepl, pattern="S", ignore.case=TRUE)
32 SecondTees<-sapply(SecondCentralLetters, grepl, pattern="T", ignore.case=TRUE)
33 SecondWys<-sapply(SecondCentralLetters, grepl, pattern="Y", ignore.case=TRUE)
34 #do the same for the second substrate set's central letters
35
36 FirstCentralLetters<-replace(FirstCentralLetters,FirstEsses,"xS")
37 FirstCentralLetters<-replace(FirstCentralLetters,FirstTees,"xT")
38 FirstCentralLetters<-replace(FirstCentralLetters,FirstWys,"xY")
39 #where there is a True value in FirstEsses, replace that value in the original substrate set with an xS. This is because there was originally an S in that
40 #position, and I want that S to be marked with an x, denoting phospho
41
42 SecondCentralLetters<-replace(SecondCentralLetters,SecondEsses,"xS")
43 SecondCentralLetters<-replace(SecondCentralLetters,SecondTees,"xT")
44 SecondCentralLetters<-replace(SecondCentralLetters,SecondWys,"xY")
45
46 FirstCentralLetters->FirstSubstrateSet[,11]
47 SecondCentralLetters->SecondSubstrateSet[,11]
48 #then I put these x-marked letters back where I found them, in position 11 of the substrate sets
49
50 FTLwtmotifs=matrix(,nrow = nrow(FirstSubstrateSet),ncol=1)
51 FTLwtAccessionNumbers=matrix(,nrow = nrow(FirstSubstrateSet),ncol=1)
52
53 for (i in 1:nrow(FirstSubstrateSet)){
54 FTLwtletters<-FirstSubstrateSet[i,4:18]
55 FTLwtletters<-FTLwtletters[FTLwtletters !="XXXXX"]
56 FTLwtletters<-paste(FTLwtletters, sep="", collapse="")
57 leftspaces<-c()
58 rightspaces<-c()
59
60 YYYmotif <- unlist(strsplit(FTLwtletters, split = ""))
61 YYYposition <- match(x = "x", table = YYYmotif)
62 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are
63 #just 3 letters to the left of x
64
65 YYYLettersToTheLeft <- YYYposition - 1
66 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is
67 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1
68 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1
69 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the
70 #variable the user puts in is
71
72
73 if (YYYLettersToTheLeft < 7 | YYYLettersToTheRight < 7) {
74 leftspaces<-rep(" ",times=(7-YYYLettersToTheLeft))
75 rightspaces<-rep(" ",times=7-(YYYLettersToTheRight))
76 #add blank spaces if the motif has less than 4 letters to the left/right
77 motif<-c(leftspaces,YYYmotif,rightspaces)
78 #save that motif, which is the Y and +/- 4 amino acids, including truncation
79 motif<-motif[!motif %in% "x"]
80 motif<-paste(motif, sep="", collapse="")
81 FTLwtletters<-motif
82 FTLwtmotifs[i,1]<-FTLwtletters
83 FTLwtAccessionNumbers[i,1]<-FirstSubstrateSet[i,3]
84 }
85
86 if(YYYLettersToTheLeft>6 && YYYLettersToTheRight>6){
87 motif<-YYYmotif
88 #add blank spaces if the motif has less than 4 letters to the left/right
89 motif<-c(leftspaces,YYYmotif,rightspaces)
90 #save that motif, which is the Y and +/- 4 amino acids, including truncation
91 motif<-motif[!motif %in% "x"]
92 motif<-paste(motif, sep="", collapse="")
93 FTLwtletters<-motif
94 FTLwtmotifs[i,1]<-FTLwtletters
95 FTLwtAccessionNumbers[i,1]<-FirstSubstrateSet[i,3]
96
97
98 }
99
100 }
101
102 D835Ymotifs=matrix(,nrow = nrow(SecondSubstrateSet),ncol=1)
103 D835YAccessionNumbers<-matrix(,nrow = nrow(SecondSubstrateSet),ncol = 1)
104
105 for (i in 1:nrow(SecondSubstrateSet)){
106 D835letters<-SecondSubstrateSet[i,4:18]
107 D835letters<-D835letters[D835letters !="XXXXX"]
108 D835letters<-paste(D835letters, sep="", collapse="")
109 leftspaces<-c()
110 rightspaces<-c()
111
112 YYYmotif <- unlist(strsplit(D835letters, split = ""))
113 YYYposition <- match(x = "x", table = YYYmotif)
114 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are
115 #just 3 letters to the left of x
116
117 YYYLettersToTheLeft <- YYYposition - 1
118 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is
119 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1
120 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1
121 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the
122 #variable the user puts in is
123 if (YYYLettersToTheLeft < 7 | YYYLettersToTheRight < 7) {
124 leftspaces<-rep(" ",times=(7-YYYLettersToTheLeft))
125 rightspaces<-rep(" ",times=7-(YYYLettersToTheRight))
126 #add blank spaces if the motif has less than 4 letters to the left/right
127 motif<-c(leftspaces,YYYmotif,rightspaces)
128 #save that motif, which is the Y and +/- 4 amino acids, including truncation
129 motif<-motif[!motif %in% "x"]
130 motif<-paste(motif, sep="", collapse="")
131 D835letters<-motif
132 D835Ymotifs[i,1]<-D835letters
133 D835YAccessionNumbers[i,1]<-SecondSubstrateSet[i,3]
134 }
135
136 if(YYYLettersToTheLeft>6 && YYYLettersToTheRight>6){
137 motif<-YYYmotif
138 #add blank spaces if the motif has less than 4 letters to the left/right
139 motif<-c(leftspaces,YYYmotif,rightspaces)
140 #save that motif, which is the Y and +/- 4 amino acids, including truncation
141 motif<-motif[!motif %in% "x"]
142 motif<-paste(motif, sep="", collapse="")
143 D835letters<-motif
144 D835Ymotifs[i,1]<-D835letters
145 D835YAccessionNumbers[i,1]<-SecondSubstrateSet[i,3]
146 }
147 }
148
149 names(FTLwtmotifs)<-FTLwtAccessionNumbers
150 names(D835Ymotifs)<-D835YAccessionNumbers
151
152
153 FTLwtmotifsFINAL<-FTLwtmotifs[!FTLwtmotifs %in% D835Ymotifs]
154 FTLwtmotifsFINAL<-FTLwtmotifsFINAL[!duplicated(FTLwtmotifsFINAL)]
155
156 D835YmotifsFINAL<-D835Ymotifs[!D835Ymotifs %in% FTLwtmotifs]
157 D835YmotifsFINAL<-D835YmotifsFINAL[!duplicated(D835YmotifsFINAL)]
158
159
160 columnalheader<-c(rep(NA,36))
161 FTLFinalMatrix<-matrix(data =columnalheader,nrow = 1)
162
163 FLTheader<-c("Substrate","Species","Reference","-7","-6","-5","-4","-3","-2","-1","0","1","2","3","4","5","6","7","Phosphite")
164
165 if (length(FTLwtmotifsFINAL)>0){
166 for (k in 1:length(FTLwtmotifsFINAL)) {
167 AN<-00000
168 #it is necessary to destroy the accession number multiple times to ensure it is
169 #destroyed immediately after use
170 for (m in 1:ncol(Firstsubbackfreq)) {
171 AN <- as.character(Firstsubbackfreq[1, m])
172 if (grepl(pattern = AN,
173 x = names(FTLwtmotifsFINAL[k]),
174 fixed = TRUE) == TRUE) {
175 outputmatrix <- as.character(Firstsubbackfreq[, m])
176 outputmatrix <- matrix(outputmatrix, nrow = 1)
177 #with that accession number, find a match in the subbackfreq file and save it here
178 FTLFinalMatrix<-rbind(FTLFinalMatrix,outputmatrix)
179 }
180 }
181 }
182 FTLFinalMatrix<-FTLFinalMatrix[!duplicated(FTLFinalMatrix),]
183 FTLFinalMatrix<-FTLFinalMatrix[2:nrow(FTLFinalMatrix),]
184
185
186 FTLoutputmatrix<-matrix(data=c(FTLwtmotifsFINAL,names(FTLwtmotifsFINAL)),ncol = 2)
187 # FLTheader<-unlist(FLTheader)
188 lefthandFLT<-matrix(data = rep(NA,times=2*nrow(FTLoutputmatrix)),nrow=nrow(FTLoutputmatrix))
189 righthandFLT<-matrix(data = rep(NA,times=1*nrow(FTLoutputmatrix)),nrow=nrow(FTLoutputmatrix))
190 FLTaccessionset<-FTLoutputmatrix[,2]
191 FTLmeat<-sapply(FTLoutputmatrix[,1], strsplit, "")
192 FTLmeat<-sapply(FTLmeat, unlist)
193 colnames(FTLmeat)<-NULL
194 FTLmeat<-t(FTLmeat)
195
196 FTLoutputmatrix2<-cbind(lefthandFLT,FLTaccessionset,FTLmeat,righthandFLT)
197 colnames(FTLoutputmatrix2)<-NULL
198 rownames(FTLoutputmatrix2)<-NULL
199 colnames(FLTheader)<-NULL
200 rownames(FLTheader)<-NULL
201
202
203 FirstCentralLettersAGAIN<-FTLoutputmatrix2[,11]
204
205 FirstEsses<-sapply(FirstCentralLettersAGAIN, grepl, pattern="S", ignore.case=TRUE)
206 FirstTees<-sapply(FirstCentralLettersAGAIN, grepl, pattern="T", ignore.case=TRUE)
207 FirstWys<-sapply(FirstCentralLettersAGAIN, grepl, pattern="Y", ignore.case=TRUE)
208
209 FirstCentralLettersAGAIN<-replace(FirstCentralLettersAGAIN,FirstEsses,"xS")
210 FirstCentralLettersAGAIN<-replace(FirstCentralLettersAGAIN,FirstTees,"xT")
211 FirstCentralLettersAGAIN<-replace(FirstCentralLettersAGAIN,FirstWys,"xY")
212
213 FirstCentralLettersAGAIN->FTLoutputmatrix2[,11]
214
215 FTLoutputmatrix2<-rbind(FLTheader,FTLoutputmatrix2)
216
217 write.table(x=FTLoutputmatrix2,
218 file=First_unshared_motifs_table,
219 quote=FALSE, sep=",",
220 row.names=FALSE,col.names = FALSE, na="", append=TRUE)
221
222 columnalheader<-c(as.character(Firstsubbackfreq[1:36,1]))
223 columnalheader<-matrix(columnalheader,nrow = 1)
224 write.table(x=columnalheader,
225 file=First_unshared_subbackfreq,
226 quote=FALSE, sep=",",
227 row.names=FALSE,col.names = FALSE, na="", append=TRUE)
228
229 write.table(x=FTLFinalMatrix,
230 file=First_unshared_subbackfreq,
231 quote=FALSE, sep=",",
232 row.names=FALSE,col.names = FALSE, na="", append=TRUE)
233 } else{
234 FTLFinalMatrix<-columnalheader
235 write.table(x=EmptySubHeader,
236 file=First_unshared_motifs_table,
237 quote=FALSE, sep=",",
238 row.names=FALSE,col.names = FALSE, na="", append=TRUE)
239
240 columnalheader<-c(as.character(Firstsubbackfreq[1:36,1]))
241 columnalheader<-matrix(columnalheader,nrow = 1)
242 write.table(x=columnalheader,
243 file=First_unshared_subbackfreq,
244 quote=FALSE, sep=",",
245 row.names=FALSE,col.names = FALSE, na="", append=TRUE)
246 }
247
248
249 columnalheader<-c(rep(NA,36))
250 D835YFinalMatrix<-matrix(data =columnalheader,nrow = 1)
251
252 if (length(D835YmotifsFINAL)>0){
253 for (k in 1:length(D835YmotifsFINAL)) {
254 #it is necessary to destroy the accession number multiple times to ensure it is
255 #destroyed immediately after use
256 for (m in 1:ncol(Secondsubbackfreq)) {
257 AN <- as.character(Secondsubbackfreq[1, m])
258 if (grepl(pattern = AN,
259 x = names(D835YmotifsFINAL[k]),
260 fixed = TRUE) == TRUE) {
261 outputmatrix <- as.character(Secondsubbackfreq[, m])
262 outputmatrix <- matrix(outputmatrix, nrow = 1)
263 #with that accession number, find a match in the subbackfreq file and save it here
264 D835YFinalMatrix<-rbind(D835YFinalMatrix,outputmatrix)
265 }
266 }
267 }
268 D835YFinalMatrix<-D835YFinalMatrix[!duplicated(D835YFinalMatrix),]
269 D835YFinalMatrix<-D835YFinalMatrix[2:nrow(D835YFinalMatrix),]
270
271 D835Youtputmatrix<-matrix(data=c(D835YmotifsFINAL,names(D835YmotifsFINAL)),ncol = 2)
272
273 D835Yheader<-c("Substrate","Species","Reference","-7","-6","-5","-4","-3","-2","-1","0","1","2","3","4","5","6","7","Phosphite")
274 # D835Yheader<-unlist(D835Yheader)
275 lefthandD835<-matrix(data = rep(NA,times=2*nrow(D835Youtputmatrix)),nrow=nrow(D835Youtputmatrix))
276 righthandD835<-matrix(data = rep(NA,times=1*nrow(D835Youtputmatrix)),nrow=nrow(D835Youtputmatrix))
277 D835Yaset<-D835Youtputmatrix[,2]
278 D835meat<-sapply(D835Youtputmatrix[,1], strsplit, "")
279 D835meat<-sapply(D835meat, unlist)
280 colnames(D835meat)<-NULL
281 D835meat<-t(D835meat)
282
283 D835Youtputmatrix2<-cbind(lefthandD835,D835Yaset,D835meat,righthandD835)
284 colnames(D835Youtputmatrix2)<-NULL
285 rownames(D835Youtputmatrix2)<-NULL
286 colnames(D835Yheader)<-NULL
287 rownames(D835Yheader)<-NULL
288
289
290 SecondCentralLettersAGAIN<-D835Youtputmatrix2[,11]
291
292 SecondEsses<-sapply(SecondCentralLettersAGAIN, grepl, pattern="S", ignore.case=TRUE)
293 SecondTees<-sapply(SecondCentralLettersAGAIN, grepl, pattern="T", ignore.case=TRUE)
294 SecondWys<-sapply(SecondCentralLettersAGAIN, grepl, pattern="Y", ignore.case=TRUE)
295
296 SecondCentralLettersAGAIN<-replace(SecondCentralLettersAGAIN,SecondEsses,"xS")
297 SecondCentralLettersAGAIN<-replace(SecondCentralLettersAGAIN,SecondTees,"xT")
298 SecondCentralLettersAGAIN<-replace(SecondCentralLettersAGAIN,SecondWys,"xY")
299
300 SecondCentralLettersAGAIN->D835Youtputmatrix2[,11]
301
302 D835Youtputmatrix2<-rbind(D835Yheader,D835Youtputmatrix2)
303
304 write.table(x=D835Youtputmatrix2,
305 file=Second_unshared_motifs_table,
306 quote=FALSE, sep=",",
307 row.names=FALSE,col.names = FALSE, na="", append=TRUE)
308
309 columnalheader<-c(as.character(Firstsubbackfreq[1:36,1]))
310 columnalheader<-matrix(columnalheader,nrow = 1)
311 write.table(x=columnalheader,
312 file=Second_unshared_subbackfreq,
313 quote=FALSE, sep=",",
314 row.names=FALSE,col.names = FALSE, na="", append=TRUE)
315
316 write.table(x=D835YFinalMatrix,
317 file=Second_unshared_subbackfreq,
318 quote=FALSE, sep=",",
319 row.names=FALSE,col.names = FALSE, na="", append=TRUE)
320 } else {
321 D835YFinalMatrix<- columnalheader
322 write.table(x=EmptySubHeader,
323 file=Second_unshared_motifs_table,
324 quote=FALSE, sep=",",
325 row.names=FALSE,col.names = FALSE, na="", append=TRUE)
326
327 columnalheader<-c(as.character(Firstsubbackfreq[1:36,1]))
328 columnalheader<-matrix(columnalheader,nrow = 1)
329 write.table(x=columnalheader,
330 file=Second_unshared_subbackfreq,
331 quote=FALSE, sep=",",
332 row.names=FALSE,col.names = FALSE, na="", append=TRUE)
333 }