# HG changeset patch # User jfb # Date 1555685848 14400 # Node ID dff99bed3f5687007b7f91bf89eb697abb731efe # Parent beba4066121ebd9d078cf60e8fa19a573b6ca9ec Uploaded diff -r beba4066121e -r dff99bed3f56 all stuff/Commonality and Difference finderMADE 7 TO 7 1-15-2019.R --- a/all stuff/Commonality and Difference finderMADE 7 TO 7 1-15-2019.R Wed Jan 16 14:33:39 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,1281 +0,0 @@ -#I should make an SOP for this. Problems we encountered: no x in the xY motif, and the kilodemon -#the output files have both Y and xY, they shouldn't why is that happening? make it not happen -#make sure that accession numbers stay locked to each motif, somehow -#output should look just like the KALIP input - -#ff you want ONLY FULL MOTIFS, put "YES" here, please use all caps -FullMotifsOnly_questionmark<-"NO" -#If you want ONLY TRUNCATED MOTIFS, put "YES" here, please use all caps -TruncatedMotifsOnly_questionmark<-"NO" -#if you want to find the overlap, put a "YES" here (all caps), if you want to find the non-overlap, put "NO" (all caps) -Are_You_Looking_For_Commonality<-"NO" - - -#put the names of your input files here -FirstSubstrateSet<- read.csv("Galaxy63-BTK_PLUS-R1_Substrates.csv", stringsAsFactors=FALSE) -Firstsubbackfreq<- read.csv("Galaxy64-BTK_PLUS-R1_SubstrateBackgroundFrequency.csv", header=FALSE, stringsAsFactors=FALSE) - -SecondSubstrateSet<- read.csv("Galaxy65-BTK_PLUS_R2_Substrates.csv", stringsAsFactors=FALSE) -Secondsubbackfreq<- read.csv("Galaxy66-BTK_PLUS_R2_SubstrateBackgroundFrequency.csv", header=FALSE, stringsAsFactors=FALSE) - -ThirdSubstrateSet<- read.csv("Galaxy69-BTK_PLUS_R3_Substrates.csv", stringsAsFactors=FALSE) -Thirdsubbackfreq<- read.csv("Galaxy70-BTK_PLUS_R3_SubstrateBackgroundFrequency.csv", header=FALSE, stringsAsFactors=FALSE) - -#then put the names of your output files here -Shared_motifs_table<-"180719_GALAXY-BTK-plus-rep-OVLP-7to7-substrates.csv" -Shared_subbackfreq_table<-"180719_GALAXY-BTK-plus-rep-OVLP-7to7-SubBackFreq.csv" - -# Shared_motifs_table<-"Shared motifs 7-27-17.csv" -# Shared_subbackfreq_table<-"SubstrateBackgrounFrequency-for-shared-motifs 4 7-27-17.csv" - -First_unshared_motifs_table<-"R1 substrates.csv" -First_unshared_subbackfreq<-"R1 SBF.csv" - -Second_unshared_motifs_table<-"R2 subs.csv" -Second_unshared_subbackfreq<-"R2 SBf.csv" - -Third_unshared_motifs_table<-"R3 subs.csv" -Third_unshared_subbackfreq<-"R3 SBF.csv" - -#final note, this code is going to be unworkable if you want to make a Venn diagram of more than 3 circles. I think I'll poke around -#other languages to see if any of them can do it. -#################################################################################################################################### - - - - - -FirstxY<-rep("xY",times=nrow(FirstSubstrateSet)) -FirstSubstrateSet[,11]<-FirstxY - -SecondxY<-rep("xY",times=nrow(SecondSubstrateSet)) -SecondSubstrateSet[,11]<-SecondxY - -ThirdxY<-rep("xY",times=nrow(ThirdSubstrateSet)) -ThirdSubstrateSet[,11]<-ThirdxY - - - - - - - - - - - -#################################################################################################################################### -#################################################################################################################################### -# better version of this code written in C: what happens when two kinases share a motif, but they found that motif in two -# separate proteins thus two separate accession numbers? -# It should actually output the shared motif and BOTH accession numbers. Right now it does not, it only maps out the second -# accession number. So that needs to be fixed BUT you need to keep the commonality between a motif and its accession number -#################################################################################################################################### -#################################################################################################################################### -#################################################################################################################################### -#################################################################################################################################### - -#Create the motif sets, deciding wether or not you're looking for truncated or full here -#full only -if (Are_You_Looking_For_Commonality=="YES"){ - if (FullMotifsOnly_questionmark=="YES"){ - FTLwtmotifs=matrix(,nrow = nrow(FirstSubstrateSet),ncol=1) - FTLwtAccessionNumbers=matrix(,nrow = nrow(FirstSubstrateSet),ncol=1) - leftspaces<-c() - rightspaces<-c() - for (i in 1:nrow(FirstSubstrateSet)){ - FTLwtletters<-FirstSubstrateSet[i,4:18] - FTLwtletters<-FTLwtletters[FTLwtletters !="XXXXX"] - FTLwtletters<-paste(FTLwtletters, sep="", collapse="") - - - YYYmotif <- unlist(strsplit(FTLwtletters, split = "")) - YYYposition <- match(x = "x", table = YYYmotif) - #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are - #just 3 letters to the left of x - - YYYLettersToTheLeft <- YYYposition - 1 - #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is - #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1 - YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1 - #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the - #variable the user puts in is - - if (YYYLettersToTheLeft > 6 && YYYLettersToTheRight > 6) { - motif<-YYYmotif - #save that motif, which is the Y and +/- 4 amino acids, including truncation - motif<-motif[!motif %in% "x"] - motif<-paste(motif, sep="", collapse="") - FTLwtletters<-motif - FTLwtmotifs[i,1]<-FTLwtletters - FTLwtAccessionNumbers[i,1]<-FirstSubstrateSet[i,3] - - } - - } - # FTLwtmotifs <- FTLwtmotifs[!is.na(FTLwtmotifs)] - # FTLwtmotifs<-matrix(FTLwtmotifs,ncol = 1) - # - - D835Ymotifs=matrix(,nrow = nrow(SecondSubstrateSet),ncol=1) - D835YAccessionNumbers<-matrix(,nrow = nrow(SecondSubstrateSet),ncol = 1) - - for (i in 1:nrow(SecondSubstrateSet)){ - D835letters<-SecondSubstrateSet[i,4:18] - D835letters<-D835letters[D835letters !="XXXXX"] - D835letters<-paste(D835letters, sep="", collapse="") - - - YYYmotif <- unlist(strsplit(D835letters, split = "")) - YYYposition <- match(x = "x", table = YYYmotif) - #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are - #just 3 letters to the left of x - - YYYLettersToTheLeft <- YYYposition - 1 - #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is - #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1 - YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1 - #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the - #variable the user puts in is - - if (YYYLettersToTheLeft > 6 && YYYLettersToTheRight > 6) { - motif<-YYYmotif - #add blank spaces if the motif has less than 4 letters to the left/right - motif<-c(leftspaces,YYYmotif,rightspaces) - #save that motif, which is the Y and +/- 4 amino acids, including truncation - motif<-motif[!motif %in% "x"] - motif<-paste(motif, sep="", collapse="") - D835letters<-motif - D835Ymotifs[i,1]<-D835letters - D835YAccessionNumbers[i,1]<-SecondSubstrateSet[i,3] - - } - } - - ITDmotifs=matrix(,nrow = nrow(ThirdSubstrateSet),ncol=1) - ITDAccessionNumbers<-matrix(,nrow = nrow(ThirdSubstrateSet)) - - for (i in 1:nrow(ThirdSubstrateSet)){ - ITDletters<-ThirdSubstrateSet[i,4:18] - ITDletters<-ITDletters[ITDletters !="XXXXX"] - ITDletters<-paste(ITDletters, sep="", collapse="") - YYYmotif <- unlist(strsplit(ITDletters, split = "")) - YYYposition <- match(x = "x", table = YYYmotif) - #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are - #just 3 letters to the left of x - - YYYLettersToTheLeft <- YYYposition - 1 - #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is - #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1 - YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1 - #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the - #variable the user puts in is - - if (YYYLettersToTheLeft > 6 && YYYLettersToTheRight > 6) { - motif<-YYYmotif - #add blank spaces if the motif has less than 4 letters to the left/right - motif<-c(leftspaces,YYYmotif,rightspaces) - #save that motif, which is the Y and +/- 4 amino acids, including truncation - motif<-motif[!motif %in% "x"] - motif<-paste(motif, sep="", collapse="") - ITDletters<-motif - ITDmotifs[i,1]<-ITDletters - ITDAccessionNumbers[i,1]<-ThirdSubstrateSet[i,3] - - } - } - - } - - ##############################################3 - #Truncated only - if (TruncatedMotifsOnly_questionmark=="YES"){ - FTLwtmotifs=matrix(,nrow = nrow(FirstSubstrateSet),ncol=1) - FTLwtAccessionNumbers=matrix(,nrow = nrow(FirstSubstrateSet),ncol=1) - - for (i in 1:nrow(FirstSubstrateSet)){ - FTLwtletters<-FirstSubstrateSet[i,4:18] - FTLwtletters<-FTLwtletters[FTLwtletters !="XXXXX"] - FTLwtletters<-paste(FTLwtletters, sep="", collapse="") - - - YYYmotif <- unlist(strsplit(FTLwtletters, split = "")) - YYYposition <- match(x = "x", table = YYYmotif) - #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are - #just 3 letters to the left of x - - YYYLettersToTheLeft <- YYYposition - 1 - #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is - #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1 - YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1 - #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the - #variable the user puts in is - - if (YYYLettersToTheLeft < 7 | YYYLettersToTheRight < 7) { - leftspaces<-rep(" ",times=(7-YYYLettersToTheLeft)) - rightspaces<-rep(" ",times=7-(YYYLettersToTheRight)) - #add blank spaces if the motif has less than 4 letters to the left/right - motif<-c(leftspaces,YYYmotif,rightspaces) - #save that motif, which is the Y and +/- 4 amino acids, including truncation - motif<-motif[!motif %in% "x"] - motif<-paste(motif, sep="", collapse="") - FTLwtletters<-motif - FTLwtmotifs[i,1]<-FTLwtletters - FTLwtAccessionNumbers[i,1]<-FirstSubstrateSet[i,3] - } - - } - - D835Ymotifs=matrix(,nrow = nrow(SecondSubstrateSet),ncol=1) - D835YAccessionNumbers<-matrix(,nrow = nrow(SecondSubstrateSet),ncol = 1) - - for (i in 1:nrow(SecondSubstrateSet)){ - D835letters<-SecondSubstrateSet[i,4:18] - D835letters<-D835letters[D835letters !="XXXXX"] - D835letters<-paste(D835letters, sep="", collapse="") - - - YYYmotif <- unlist(strsplit(D835letters, split = "")) - YYYposition <- match(x = "x", table = YYYmotif) - #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are - #just 3 letters to the left of x - - YYYLettersToTheLeft <- YYYposition - 1 - #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is - #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1 - YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1 - #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the - #variable the user puts in is - - if (YYYLettersToTheLeft < 7 | YYYLettersToTheRight < 7) { - leftspaces<-rep(" ",times=(7-YYYLettersToTheLeft)) - rightspaces<-rep(" ",times=7-(YYYLettersToTheRight)) - #add blank spaces if the motif has less than 4 letters to the left/right - motif<-c(leftspaces,YYYmotif,rightspaces) - #save that motif, which is the Y and +/- 4 amino acids, including truncation - motif<-motif[!motif %in% "x"] - motif<-paste(motif, sep="", collapse="") - D835letters<-motif - D835YAccessionNumbers[i,1]<-SecondSubstrateSet[i,3] - D835Ymotifs[i,1]<-D835letters - } - } - - ITDmotifs=matrix(,nrow = nrow(ThirdSubstrateSet),ncol=1) - ITDAccessionNumbers<-matrix(,nrow = nrow(ThirdSubstrateSet)) - - for (i in 1:nrow(ThirdSubstrateSet)){ - ITDletters<-ThirdSubstrateSet[i,4:18] - ITDletters<-ITDletters[ITDletters !="XXXXX"] - ITDletters<-paste(ITDletters, sep="", collapse="") - YYYmotif <- unlist(strsplit(ITDletters, split = "")) - YYYposition <- match(x = "x", table = YYYmotif) - #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are - #just 3 letters to the left of x - - YYYLettersToTheLeft <- YYYposition - 1 - #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is - #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1 - YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1 - #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the - #variable the user puts in is - - if (YYYLettersToTheLeft < 7 | YYYLettersToTheRight < 7) { - leftspaces<-rep(" ",times=(7-YYYLettersToTheLeft)) - rightspaces<-rep(" ",times=7-(YYYLettersToTheRight)) - #add blank spaces if the motif has less than 4 letters to the left/right - motif<-c(leftspaces,YYYmotif,rightspaces) - #save that motif, which is the Y and +/- 4 amino acids, including truncation - motif<-motif[!motif %in% "x"] - motif<-paste(motif, sep="", collapse="") - ITDletters<-motif - ITDAccessionNumbers[i,1]<-ThirdSubstrateSet[i,3] - ITDmotifs[i,1]<-ITDletters - } - } - - } - - ############################################### - #ALL motifs, full and truncated - - if (FullMotifsOnly_questionmark!="YES"&&TruncatedMotifsOnly_questionmark!="YES"){ - FTLwtmotifs=matrix(,nrow = nrow(FirstSubstrateSet),ncol=1) - FTLwtAccessionNumbers=matrix(,nrow = nrow(FirstSubstrateSet),ncol=1) - - for (i in 1:nrow(FirstSubstrateSet)){ - FTLwtletters<-FirstSubstrateSet[i,4:18] - FTLwtletters<-FTLwtletters[FTLwtletters !="XXXXX"] - FTLwtletters<-paste(FTLwtletters, sep="", collapse="") - leftspaces<-c() - rightspaces<-c() - - YYYmotif <- unlist(strsplit(FTLwtletters, split = "")) - YYYposition <- match(x = "x", table = YYYmotif) - #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are - #just 3 letters to the left of x - - YYYLettersToTheLeft <- YYYposition - 1 - #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is - #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1 - YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1 - #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the - #variable the user puts in is - - - if (YYYLettersToTheLeft < 7 | YYYLettersToTheRight < 7) { - leftspaces<-rep(" ",times=(7-YYYLettersToTheLeft)) - rightspaces<-rep(" ",times=7-(YYYLettersToTheRight)) - #add blank spaces if the motif has less than 4 letters to the left/right - motif<-c(leftspaces,YYYmotif,rightspaces) - #save that motif, which is the Y and +/- 4 amino acids, including truncation - motif<-motif[!motif %in% "x"] - motif<-paste(motif, sep="", collapse="") - FTLwtletters<-motif - FTLwtmotifs[i,1]<-FTLwtletters - FTLwtAccessionNumbers[i,1]<-FirstSubstrateSet[i,3] - } - - if(YYYLettersToTheLeft>6 && YYYLettersToTheRight>6){ - motif<-YYYmotif - #add blank spaces if the motif has less than 4 letters to the left/right - motif<-c(leftspaces,YYYmotif,rightspaces) - #save that motif, which is the Y and +/- 4 amino acids, including truncation - motif<-motif[!motif %in% "x"] - motif<-paste(motif, sep="", collapse="") - FTLwtletters<-motif - FTLwtmotifs[i,1]<-FTLwtletters - FTLwtAccessionNumbers[i,1]<-FirstSubstrateSet[i,3] - - - } - - } - - D835Ymotifs=matrix(,nrow = nrow(SecondSubstrateSet),ncol=1) - D835YAccessionNumbers<-matrix(,nrow = nrow(SecondSubstrateSet),ncol = 1) - - for (i in 1:nrow(SecondSubstrateSet)){ - D835letters<-SecondSubstrateSet[i,4:18] - D835letters<-D835letters[D835letters !="XXXXX"] - D835letters<-paste(D835letters, sep="", collapse="") - leftspaces<-c() - rightspaces<-c() - - YYYmotif <- unlist(strsplit(D835letters, split = "")) - YYYposition <- match(x = "x", table = YYYmotif) - #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are - #just 3 letters to the left of x - - YYYLettersToTheLeft <- YYYposition - 1 - #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is - #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1 - YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1 - #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the - #variable the user puts in is - if (YYYLettersToTheLeft < 7 | YYYLettersToTheRight < 7) { - leftspaces<-rep(" ",times=(7-YYYLettersToTheLeft)) - rightspaces<-rep(" ",times=7-(YYYLettersToTheRight)) - #add blank spaces if the motif has less than 4 letters to the left/right - motif<-c(leftspaces,YYYmotif,rightspaces) - #save that motif, which is the Y and +/- 4 amino acids, including truncation - motif<-motif[!motif %in% "x"] - motif<-paste(motif, sep="", collapse="") - D835letters<-motif - D835Ymotifs[i,1]<-D835letters - D835YAccessionNumbers[i,1]<-FirstSubstrateSet[i,3] - } - - if(YYYLettersToTheLeft>6 && YYYLettersToTheRight>6){ - motif<-YYYmotif - #add blank spaces if the motif has less than 4 letters to the left/right - motif<-c(leftspaces,YYYmotif,rightspaces) - #save that motif, which is the Y and +/- 4 amino acids, including truncation - motif<-motif[!motif %in% "x"] - motif<-paste(motif, sep="", collapse="") - D835letters<-motif - D835Ymotifs[i,1]<-D835letters - D835YAccessionNumbers[i,1]<-FirstSubstrateSet[i,3] - } - } - - - ITDmotifs=matrix(,nrow = nrow(ThirdSubstrateSet),ncol=1) - ITDAccessionNumbers<-matrix(,nrow = nrow(ThirdSubstrateSet)) - - for (i in 1:nrow(ThirdSubstrateSet)){ - ITDletters<-ThirdSubstrateSet[i,4:18] - ITDletters<-ITDletters[ITDletters !="XXXXX"] - ITDletters<-paste(ITDletters, sep="", collapse="") - YYYmotif <- unlist(strsplit(ITDletters, split = "")) - leftspaces<-c() - rightspaces<-c() - YYYposition <- match(x = "x", table = YYYmotif) - #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are - #just 3 letters to the left of x - - YYYLettersToTheLeft <- YYYposition - 1 - #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is - #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1 - YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1 - #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the - #variable the user puts in is - if (YYYLettersToTheLeft < 7 | YYYLettersToTheRight < 7) { - leftspaces<-rep(" ",times=(7-YYYLettersToTheLeft)) - rightspaces<-rep(" ",times=7-(YYYLettersToTheRight)) - #add blank spaces if the motif has less than 4 letters to the left/right - motif<-c(leftspaces,YYYmotif,rightspaces) - #save that motif, which is the Y and +/- 4 amino acids, including truncation - motif<-motif[!motif %in% "x"] - motif<-paste(motif, sep="", collapse="") - ITDletters<-motif - ITDmotifs[i,1]<-ITDletters - ITDAccessionNumbers[i,1]<-FirstSubstrateSet[i,3] - } - - if(YYYLettersToTheLeft>6 && YYYLettersToTheRight>6){ - motif<-YYYmotif - #add blank spaces if the motif has less than 4 letters to the left/right - motif<-c(leftspaces,YYYmotif,rightspaces) - #save that motif, which is the Y and +/- 4 amino acids, including truncation - motif<-motif[!motif %in% "x"] - motif<-paste(motif, sep="", collapse="") - ITDletters<-motif - ITDmotifs[i,1]<-ITDletters - ITDAccessionNumbers[i,1]<-FirstSubstrateSet[i,3] - } - } - - } - ############################################################################################################################# - ############################################################################################################################# - ############################################################################################################################# - ############################################################################################################################# - ############################################################################################################################# - - #now look for either commonality or difference. Actually could you look for both... - - if (Are_You_Looking_For_Commonality=="YES"){ - - columnalheader<-c(as.character(Thirdsubbackfreq[1:36,1])) - columnalheader<-matrix(columnalheader,nrow = 1) - write.table(x=columnalheader, - file=Shared_subbackfreq_table, - quote=FALSE, sep=",", - row.names=FALSE,col.names = FALSE, na="", append=TRUE) - - FirstOverlapmotifs<-c() - for (i in 1:nrow(ITDmotifs)){ - for (j in 1:nrow(D835Ymotifs)){ - if (is.na(ITDmotifs[i,1])!=TRUE&&is.na(D835Ymotifs[j,1])!=TRUE){ - if (ITDmotifs[i,1]==D835Ymotifs[j,1]){ - FirstOverlapmotifs<-c(FirstOverlapmotifs,D835Ymotifs[j,1]) - } - } - } - } - - AllAccessionNumbers<-c() - columnalheader<-c(rep(NA,36)) - FinalMatrix<-matrix(data =columnalheader,nrow = 1) - - FinalMotifs<-c(rep(NA,20)) - FinalMotifsMatrix<-matrix(data = FinalMotifs,nrow = 1) - - - for (l in 1:length(FirstOverlapmotifs)) { - AccessionNumber<-00000000000 - for (k in 1:nrow(FTLwtmotifs)) { - AccessionNumber<-0000000000000 - if(is.na(FTLwtmotifs[k])!=TRUE){ - #I don't remember why, but I felt it necessary to destroy the accession number multiple times to ensure it is - #destroyed immediately after use - if (FirstOverlapmotifs[l] == FTLwtmotifs[k]) { - substratematrix<-FirstSubstrateSet[k,1:20] - substratematrix<-as.matrix(substratematrix,nrow=1) - FinalMotifsMatrix<-rbind(FinalMotifsMatrix,substratematrix) - #when you find a match between the venn diagrams, save the substrate info you get into a matrix - - AccessionNumber <- as.character(FirstSubstrateSet[k, 3]) - #then take the accession number - - for (m in 1:ncol(Firstsubbackfreq)) { - AN <- as.character(Firstsubbackfreq[1, m]) - if (grepl(pattern = AN, - x = AccessionNumber, - fixed = TRUE) == TRUE) { - outputmatrix <- as.character(Firstsubbackfreq[, m]) - outputmatrix <- matrix(outputmatrix, nrow = 1) - #with that accession number, find a match in the subbackfreq file and save it here - FinalMatrix<-rbind(FinalMatrix,outputmatrix) - } - } - } - } - } - } - - - TrueMatrix<-FinalMatrix[!duplicated(FinalMatrix),] - TrueFinalMotifsMatrix<-FinalMotifsMatrix[!duplicated(FinalMotifsMatrix),] - - TrueFinalMotifsMatrix<-TrueFinalMotifsMatrix[2:nrow(TrueFinalMotifsMatrix),] - TrueMatrix<-TrueMatrix[2:nrow(TrueMatrix),] - - write.table( - x = TrueFinalMotifsMatrix, - file = Shared_motifs_table, - quote = FALSE, - sep = ",", - row.names = FALSE, - col.names = TRUE, - na = "", - append = TRUE - ) - - - write.table( - x = TrueMatrix, - file = Shared_subbackfreq_table, - quote = FALSE, - sep = ",", - row.names = FALSE, - col.names = FALSE, - na = "", - append = TRUE - ) - } -} - -if (Are_You_Looking_For_Commonality=="NO"){ - if (FullMotifsOnly_questionmark=="YES"){ - FTLwtmotifs=rep(NA,times=nrow(FirstSubstrateSet)) - FTLwtAccessionNumbers=rep(NA,times=nrow(FirstSubstrateSet)) - leftspaces<-c() - rightspaces<-c() - for (i in 1:nrow(FirstSubstrateSet)){ - FTLwtletters<-FirstSubstrateSet[i,4:18] - FTLwtletters<-FTLwtletters[FTLwtletters !="XXXXX"] - FTLwtletters<-paste(FTLwtletters, sep="", collapse="") - - - YYYmotif <- unlist(strsplit(FTLwtletters, split = "")) - YYYposition <- match(x = "x", table = YYYmotif) - #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are - #just 3 letters to the left of x - - YYYLettersToTheLeft <- YYYposition - 1 - #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is - #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1 - YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1 - #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the - #variable the user puts in is - - if (YYYLettersToTheLeft > 6 && YYYLettersToTheRight > 6) { - motif<-YYYmotif - #save that motif, which is the Y and +/- 4 amino acids, including truncation - motif<-motif[!motif %in% "x"] - motif<-paste(motif, sep="", collapse="") - FTLwtletters<-motif - FTLwtmotifs[i]<-FTLwtletters - FTLwtAccessionNumbers[i]<-FirstSubstrateSet[i,3] - } - - } - # FTLwtmotifs <- FTLwtmotifs[!is.na(FTLwtmotifs)] - # FTLwtmotifs<-matrix(FTLwtmotifs,ncol = 1) - # - - D835Ymotifs=rep(NA,times=nrow(FirstSubstrateSet)) - D835YAccessionNumbers=rep(NA,times=nrow(FirstSubstrateSet)) - - for (i in 1:nrow(SecondSubstrateSet)){ - D835letters<-SecondSubstrateSet[i,4:18] - D835letters<-D835letters[D835letters !="XXXXX"] - D835letters<-paste(D835letters, sep="", collapse="") - - - YYYmotif <- unlist(strsplit(D835letters, split = "")) - YYYposition <- match(x = "x", table = YYYmotif) - #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are - #just 3 letters to the left of x - - YYYLettersToTheLeft <- YYYposition - 1 - #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is - #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1 - YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1 - #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the - #variable the user puts in is - - if (YYYLettersToTheLeft > 6 && YYYLettersToTheRight > 6) { - motif<-YYYmotif - #add blank spaces if the motif has less than 4 letters to the left/right - motif<-c(leftspaces,YYYmotif,rightspaces) - #save that motif, which is the Y and +/- 4 amino acids, including truncation - motif<-motif[!motif %in% "x"] - motif<-paste(motif, sep="", collapse="") - D835letters<-motif - D835Ymotifs[i]<-D835letters - D835YAccessionNumbers[i]<-SecondSubstrateSet[i,3] - } - } - - ITDmotifs=rep(NA,times=nrow(FirstSubstrateSet)) - ITDAccessionNumbers=rep(NA,times=nrow(FirstSubstrateSet)) - - for (i in 1:nrow(ThirdSubstrateSet)){ - ITDletters<-ThirdSubstrateSet[i,4:18] - ITDletters<-ITDletters[ITDletters !="XXXXX"] - ITDletters<-paste(ITDletters, sep="", collapse="") - YYYmotif <- unlist(strsplit(ITDletters, split = "")) - YYYposition <- match(x = "x", table = YYYmotif) - #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are - #just 3 letters to the left of x - - YYYLettersToTheLeft <- YYYposition - 1 - #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is - #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1 - YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1 - #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the - #variable the user puts in is - - if (YYYLettersToTheLeft > 6 && YYYLettersToTheRight > 6) { - motif<-YYYmotif - #add blank spaces if the motif has less than 4 letters to the left/right - motif<-c(leftspaces,YYYmotif,rightspaces) - #save that motif, which is the Y and +/- 4 amino acids, including truncation - motif<-motif[!motif %in% "x"] - motif<-paste(motif, sep="", collapse="") - ITDletters<-motif - ITDmotifs[i]<-ITDletters - ITDAccessionNumbers[i]<-ThirdSubstrateSet[i,3] - - } - } - names(ITDmotifs)<-ITDAccessionNumbers - names(D835Ymotifs)<-D835YAccessionNumbers - names(FTLwtmotifs)<-FTLwtAccessionNumbers - } - - - ##############################################3 - #Truncated only - if (TruncatedMotifsOnly_questionmark=="YES"){ - FTLwtmotifs=matrix(,nrow = nrow(FirstSubstrateSet),ncol=1) - FTLwtAccessionNumbers=matrix(,nrow = nrow(FirstSubstrateSet),ncol=1) - - for (i in 1:nrow(FirstSubstrateSet)){ - FTLwtletters<-FirstSubstrateSet[i,4:18] - FTLwtletters<-FTLwtletters[FTLwtletters !="XXXXX"] - FTLwtletters<-paste(FTLwtletters, sep="", collapse="") - - - YYYmotif <- unlist(strsplit(FTLwtletters, split = "")) - YYYposition <- match(x = "x", table = YYYmotif) - #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are - #just 3 letters to the left of x - - YYYLettersToTheLeft <- YYYposition - 1 - #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is - #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1 - YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1 - #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the - #variable the user puts in is - - if (YYYLettersToTheLeft < 7 | YYYLettersToTheRight < 7) { - leftspaces<-rep(" ",times=(7-YYYLettersToTheLeft)) - rightspaces<-rep(" ",times=7-(YYYLettersToTheRight)) - #add blank spaces if the motif has less than 4 letters to the left/right - motif<-c(leftspaces,YYYmotif,rightspaces) - #save that motif, which is the Y and +/- 4 amino acids, including truncation - motif<-motif[!motif %in% "x"] - motif<-paste(motif, sep="", collapse="") - FTLwtletters<-motif - FTLwtmotifs[i,1]<-FTLwtletters - FTLwtAccessionNumbers[i,1]<-FirstSubstrateSet[i,3] - } - - } - - D835Ymotifs=matrix(,nrow = nrow(SecondSubstrateSet),ncol=1) - D835YAccessionNumbers<-matrix(,nrow = nrow(SecondSubstrateSet),ncol = 1) - i=2 - for (i in 1:nrow(SecondSubstrateSet)){ - D835letters<-SecondSubstrateSet[i,4:18] - D835letters<-D835letters[D835letters !="XXXXX"] - D835letters<-paste(D835letters, sep="", collapse="") - - - YYYmotif <- unlist(strsplit(D835letters, split = "")) - YYYposition <- match(x = "x", table = YYYmotif) - #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are - #just 3 letters to the left of x - - YYYLettersToTheLeft <- YYYposition - 1 - #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is - #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1 - YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1 - #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the - #variable the user puts in is - - if (YYYLettersToTheLeft < 7 | YYYLettersToTheRight < 7) { - leftspaces<-rep(" ",times=(7-YYYLettersToTheLeft)) - rightspaces<-rep(" ",times=7-(YYYLettersToTheRight)) - #add blank spaces if the motif has less than 4 letters to the left/right - motif<-c(leftspaces,YYYmotif,rightspaces) - #save that motif, which is the Y and +/- 4 amino acids, including truncation - motif<-motif[!motif %in% "x"] - motif<-paste(motif, sep="", collapse="") - D835letters<-motif - D835YAccessionNumbers[i,1]<-SecondSubstrateSet[i,3] - D835Ymotifs[i,1]<-D835letters - } - } - - ITDmotifs=matrix(,nrow = nrow(ThirdSubstrateSet),ncol=1) - ITDAccessionNumbers<-matrix(,nrow = nrow(ThirdSubstrateSet)) - - for (i in 1:nrow(ThirdSubstrateSet)){ - ITDletters<-ThirdSubstrateSet[i,4:18] - ITDletters<-ITDletters[ITDletters !="XXXXX"] - ITDletters<-paste(ITDletters, sep="", collapse="") - YYYmotif <- unlist(strsplit(ITDletters, split = "")) - YYYposition <- match(x = "x", table = YYYmotif) - #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are - #just 3 letters to the left of x - - YYYLettersToTheLeft <- YYYposition - 1 - #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is - #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1 - YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1 - #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the - #variable the user puts in is - - if (YYYLettersToTheLeft < 7 | YYYLettersToTheRight < 7) { - leftspaces<-rep(" ",times=(7-YYYLettersToTheLeft)) - rightspaces<-rep(" ",times=7-(YYYLettersToTheRight)) - #add blank spaces if the motif has less than 4 letters to the left/right - motif<-c(leftspaces,YYYmotif,rightspaces) - #save that motif, which is the Y and +/- 4 amino acids, including truncation - motif<-motif[!motif %in% "x"] - motif<-paste(motif, sep="", collapse="") - ITDletters<-motif - ITDAccessionNumbers[i,1]<-ThirdSubstrateSet[i,3] - ITDmotifs[i,1]<-ITDletters - } - } - names(FTLwtmotifs)<-FTLwtAccessionNumbers - names(D835Ymotifs)<-D835YAccessionNumbers - names(ITDmotifs)<-ITDAccessionNumbers - } - - ############################################### - #ALL motifs, full and truncated - - if (FullMotifsOnly_questionmark!="YES"&&TruncatedMotifsOnly_questionmark!="YES"){ - FTLwtmotifs=matrix(,nrow = nrow(FirstSubstrateSet),ncol=1) - FTLwtAccessionNumbers=matrix(,nrow = nrow(FirstSubstrateSet),ncol=1) - - for (i in 1:nrow(FirstSubstrateSet)){ - FTLwtletters<-FirstSubstrateSet[i,4:18] - FTLwtletters<-FTLwtletters[FTLwtletters !="XXXXX"] - FTLwtletters<-paste(FTLwtletters, sep="", collapse="") - leftspaces<-c() - rightspaces<-c() - - YYYmotif <- unlist(strsplit(FTLwtletters, split = "")) - YYYposition <- match(x = "x", table = YYYmotif) - #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are - #just 3 letters to the left of x - - YYYLettersToTheLeft <- YYYposition - 1 - #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is - #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1 - YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1 - #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the - #variable the user puts in is - - - if (YYYLettersToTheLeft < 7 | YYYLettersToTheRight < 7) { - leftspaces<-rep(" ",times=(7-YYYLettersToTheLeft)) - rightspaces<-rep(" ",times=7-(YYYLettersToTheRight)) - #add blank spaces if the motif has less than 4 letters to the left/right - motif<-c(leftspaces,YYYmotif,rightspaces) - #save that motif, which is the Y and +/- 4 amino acids, including truncation - motif<-motif[!motif %in% "x"] - motif<-paste(motif, sep="", collapse="") - FTLwtletters<-motif - FTLwtmotifs[i,1]<-FTLwtletters - FTLwtAccessionNumbers[i,1]<-FirstSubstrateSet[i,3] - } - - if(YYYLettersToTheLeft>6 && YYYLettersToTheRight>6){ - motif<-YYYmotif - #add blank spaces if the motif has less than 4 letters to the left/right - motif<-c(leftspaces,YYYmotif,rightspaces) - #save that motif, which is the Y and +/- 4 amino acids, including truncation - motif<-motif[!motif %in% "x"] - motif<-paste(motif, sep="", collapse="") - FTLwtletters<-motif - FTLwtmotifs[i,1]<-FTLwtletters - FTLwtAccessionNumbers[i,1]<-FirstSubstrateSet[i,3] - - - } - - } - - D835Ymotifs=matrix(,nrow = nrow(SecondSubstrateSet),ncol=1) - D835YAccessionNumbers<-matrix(,nrow = nrow(SecondSubstrateSet),ncol = 1) - - for (i in 1:nrow(SecondSubstrateSet)){ - D835letters<-SecondSubstrateSet[i,4:18] - D835letters<-D835letters[D835letters !="XXXXX"] - D835letters<-paste(D835letters, sep="", collapse="") - leftspaces<-c() - rightspaces<-c() - - YYYmotif <- unlist(strsplit(D835letters, split = "")) - YYYposition <- match(x = "x", table = YYYmotif) - #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are - #just 3 letters to the left of x - - YYYLettersToTheLeft <- YYYposition - 1 - #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is - #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1 - YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1 - #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the - #variable the user puts in is - if (YYYLettersToTheLeft < 7 | YYYLettersToTheRight < 7) { - leftspaces<-rep(" ",times=(7-YYYLettersToTheLeft)) - rightspaces<-rep(" ",times=7-(YYYLettersToTheRight)) - #add blank spaces if the motif has less than 4 letters to the left/right - motif<-c(leftspaces,YYYmotif,rightspaces) - #save that motif, which is the Y and +/- 4 amino acids, including truncation - motif<-motif[!motif %in% "x"] - motif<-paste(motif, sep="", collapse="") - D835letters<-motif - D835Ymotifs[i,1]<-D835letters - D835YAccessionNumbers[i,1]<-FirstSubstrateSet[i,3] - } - - if(YYYLettersToTheLeft>6 && YYYLettersToTheRight>6){ - motif<-YYYmotif - #add blank spaces if the motif has less than 4 letters to the left/right - motif<-c(leftspaces,YYYmotif,rightspaces) - #save that motif, which is the Y and +/- 4 amino acids, including truncation - motif<-motif[!motif %in% "x"] - motif<-paste(motif, sep="", collapse="") - D835letters<-motif - D835Ymotifs[i,1]<-D835letters - D835YAccessionNumbers[i,1]<-FirstSubstrateSet[i,3] - } - } - - - ITDmotifs=matrix(,nrow = nrow(ThirdSubstrateSet),ncol=1) - ITDAccessionNumbers<-matrix(,nrow = nrow(ThirdSubstrateSet)) - - for (i in 1:nrow(ThirdSubstrateSet)){ - ITDletters<-ThirdSubstrateSet[i,4:18] - ITDletters<-ITDletters[ITDletters !="XXXXX"] - ITDletters<-paste(ITDletters, sep="", collapse="") - YYYmotif <- unlist(strsplit(ITDletters, split = "")) - leftspaces<-c() - rightspaces<-c() - YYYposition <- match(x = "x", table = YYYmotif) - #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are - #just 3 letters to the left of x - - YYYLettersToTheLeft <- YYYposition - 1 - #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is - #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1 - YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1 - #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the - #variable the user puts in is - if (YYYLettersToTheLeft < 7 | YYYLettersToTheRight < 7) { - leftspaces<-rep(" ",times=(7-YYYLettersToTheLeft)) - rightspaces<-rep(" ",times=7-(YYYLettersToTheRight)) - #add blank spaces if the motif has less than 4 letters to the left/right - motif<-c(leftspaces,YYYmotif,rightspaces) - #save that motif, which is the Y and +/- 4 amino acids, including truncation - motif<-motif[!motif %in% "x"] - motif<-paste(motif, sep="", collapse="") - ITDletters<-motif - ITDmotifs[i,1]<-ITDletters - ITDAccessionNumbers[i,1]<-FirstSubstrateSet[i,3] - } - - if(YYYLettersToTheLeft>6 && YYYLettersToTheRight>6){ - motif<-YYYmotif - #add blank spaces if the motif has less than 4 letters to the left/right - motif<-c(leftspaces,YYYmotif,rightspaces) - #save that motif, which is the Y and +/- 4 amino acids, including truncation - motif<-motif[!motif %in% "x"] - motif<-paste(motif, sep="", collapse="") - ITDletters<-motif - ITDmotifs[i,1]<-ITDletters - ITDAccessionNumbers[i,1]<-FirstSubstrateSet[i,3] - } - } - names(FTLwtmotifs)<-FTLwtAccessionNumbers - names(D835Ymotifs)<-D835YAccessionNumbers - names(ITDmotifs)<-ITDAccessionNumbers - } - - - FTLwtmotifsFINAL<-FTLwtmotifs[!FTLwtmotifs %in% D835Ymotifs] - FTLwtmotifsFINAL<-FTLwtmotifsFINAL[!FTLwtmotifsFINAL %in% ITDmotifs] - FTLwtmotifsFINAL<-FTLwtmotifsFINAL[!duplicated(FTLwtmotifsFINAL)] - - - ITDmotifsFINAL<-ITDmotifs[!ITDmotifs %in% D835Ymotifs] - ITDmotifsFINAL<-ITDmotifsFINAL[!ITDmotifsFINAL %in% FTLwtmotifs] - ITDmotifsFINAL<-ITDmotifsFINAL[!duplicated(ITDmotifsFINAL)] - - - D835YmotifsFINAL<-D835Ymotifs[!D835Ymotifs %in% FTLwtmotifs] - D835YmotifsFINAL<-D835YmotifsFINAL[!D835YmotifsFINAL %in% ITDmotifs] - D835YmotifsFINAL<-D835YmotifsFINAL[!duplicated(D835YmotifsFINAL)] - - - columnalheader<-c(rep(NA,35)) - FTLFinalMatrix<-matrix(data =columnalheader,nrow = 1) - - for (k in 1:length(FTLwtmotifsFINAL)) { - AN<-00000 - #I don't remember why, but I felt it necessary to destroy the accession number multiple times to ensure it is - #destroyed immediately after use - for (m in 1:ncol(Firstsubbackfreq)) { - AN <- as.character(Firstsubbackfreq[1, m]) - if (grepl(pattern = AN, - x = names(FTLwtmotifsFINAL[k]), - fixed = TRUE) == TRUE) { - outputmatrix <- as.character(Firstsubbackfreq[, m]) - outputmatrix <- matrix(outputmatrix, nrow = 1) - #with that accession number, find a match in the subbackfreq file and save it here - FTLFinalMatrix<-rbind(FTLFinalMatrix,outputmatrix) - } - } - } - FTLFinalMatrix<-FTLFinalMatrix[!duplicated(FTLFinalMatrix),] - - columnalheader<-c(rep(NA,35)) - ITDFinalMatrix<-matrix(data =columnalheader,nrow = 1) - - for (k in 1:length(ITDmotifsFINAL)) { - AN<-00000 - #I don't remember why, but I felt it necessary to destroy the accession number multiple times to ensure it is - #destroyed immediately after use - for (m in 1:ncol(Thirdsubbackfreq)) { - AN <- as.character(Thirdsubbackfreq[1, m]) - if (grepl(pattern = AN, - x = names(ITDmotifsFINAL[k]), - fixed = TRUE) == TRUE) { - outputmatrix <- as.character(Thirdsubbackfreq[, m]) - outputmatrix <- matrix(outputmatrix, nrow = 1) - #with that accession number, find a match in the subbackfreq file and save it here - ITDFinalMatrix<-rbind(ITDFinalMatrix,outputmatrix) - } - } - } - ITDFinalMatrix<-ITDFinalMatrix[!duplicated(ITDFinalMatrix),] - - columnalheader<-c(rep(NA,35)) - D835YFinalMatrix<-matrix(data =columnalheader,nrow = 1) - - for (k in 1:length(D835YmotifsFINAL)) { - #I don't remember why, but I felt it necessary to destroy the accession number multiple times to ensure it is - #destroyed immediately after use - for (m in 1:ncol(Secondsubbackfreq)) { - AN <- as.character(Secondsubbackfreq[1, m]) - if (grepl(pattern = AN, - x = names(D835YmotifsFINAL[k]), - fixed = TRUE) == TRUE) { - outputmatrix <- as.character(Secondsubbackfreq[, m]) - outputmatrix <- matrix(outputmatrix, nrow = 1) - #with that accession number, find a match in the subbackfreq file and save it here - D835YFinalMatrix<-rbind(D835YFinalMatrix,outputmatrix) - } - } - } - D835YFinalMatrix<-D835YFinalMatrix[!duplicated(D835YFinalMatrix),] - - FTLoutputmatrix<-matrix(data=c(FTLwtmotifsFINAL,names(FTLwtmotifsFINAL)),ncol = 2) - - - write.table(x=FTLoutputmatrix, - file=First_unshared_motifs_table, - quote=FALSE, sep=",", - row.names=FALSE,col.names = FALSE, na="", append=TRUE) - - columnalheader<-c("Accession Numbers",as.character(Thirdsubbackfreq[1:35,1])) - columnalheader<-matrix(columnalheader,nrow = 1) - write.table(x=columnalheader, - file=First_unshared_subbackfreq, - quote=FALSE, sep=",", - row.names=FALSE,col.names = FALSE, na="", append=TRUE) - - write.table(x=FTLFinalMatrix, - file=First_unshared_subbackfreq, - quote=FALSE, sep=",", - row.names=FALSE,col.names = FALSE, na="", append=TRUE) - - ############################################################################################################ - - D835Youtputmatrix<-matrix(data=c(D835YmotifsFINAL,names(D835YmotifsFINAL)),ncol = 2) - - write.table(x=D835Youtputmatrix, - file=Second_unshared_motifs_table, - quote=FALSE, sep=",", - row.names=FALSE,col.names = FALSE, na="", append=TRUE) - - columnalheader<-c("Accession Numbers",as.character(Thirdsubbackfreq[1:35,1])) - columnalheader<-matrix(columnalheader,nrow = 1) - write.table(x=columnalheader, - file=Second_unshared_subbackfreq, - quote=FALSE, sep=",", - row.names=FALSE,col.names = FALSE, na="", append=TRUE) - - write.table(x=D835YFinalMatrix, - file=Second_unshared_subbackfreq, - quote=FALSE, sep=",", - row.names=FALSE,col.names = FALSE, na="", append=TRUE) - - ############################################################################################################ - - ITDoutputmatrix<-matrix(data = c(ITDmotifsFINAL,names(ITDmotifsFINAL)),ncol = 2) - - write.table(x=ITDoutputmatrix, - file=Third_unshared_motifs_table, - quote=FALSE, sep=",", - row.names=FALSE,col.names = FALSE, na="", append=TRUE) - - columnalheader<-c("Accession Numbers",as.character(Thirdsubbackfreq[1:35,1])) - columnalheader<-matrix(columnalheader,nrow = 1) - write.table(x=columnalheader, - file=Third_unshared_subbackfreq, - quote=FALSE, sep=",", - row.names=FALSE,col.names = FALSE, na="", append=TRUE) - - write.table(x=ITDFinalMatrix, - file=Third_unshared_subbackfreq, - quote=FALSE, sep=",", - row.names=FALSE,col.names = FALSE, na="", append=TRUE) - -} - -# if (Are_You_Looking_For_Commonality=="NO"){ -# -# -# FTLwtmotifsFULLMATRIX<-cbind(FTLwtmotifs,FTLwtAccessionNumbers) -# ITDmotifsFULLMATRIX<-cbind(ITDmotifs,ITDAccessionNumbers) -# D835YmotifsFULLMATRIX<-cbind(D835Ymotifs,D835YAccessionNumbers) -# # ?duplicated -# -# FTLwtmotifsFINAL<-FTLwtmotifs[!FTLwtmotifs %in% D835Ymotifs] -# FTLwtmotifsFINAL<-FTLwtmotifsFINAL[!FTLwtmotifsFINAL %in% ITDmotifs] -# FTLwtmotifsFINAL<-matrix(data=FTLwtmotifsFINAL, ncol = 1) -# -# ITDmotifsFINAL<-ITDmotifs[!ITDmotifs %in% D835Ymotifs] -# ITDmotifsFINAL<-ITDmotifsFINAL[!ITDmotifsFINAL %in% FTLwtmotifs] -# ITDmotifsFINAL<-matrix(data=ITDmotifsFINAL, ncol = 1) -# -# D835YmotifsFINAL<-D835Ymotifs[!D835Ymotifs %in% FTLwtmotifs] -# D835YmotifsFINAL<-D835YmotifsFINAL[!D835YmotifsFINAL %in% ITDmotifs] -# D835YmotifsFINAL<-matrix(data=D835YmotifsFINAL, ncol = 1) -# -# FTLnondupeAccessionNumbers<-c() -# -# for (z in 1:nrow(FTLwtmotifsFINAL)) { -# for (w in 1:nrow(FTLwtmotifsFULLMATRIX)) { -# if (is.na(FTLwtmotifsFULLMATRIX[w,1])!=TRUE) -# if (FTLwtmotifsFINAL[z]==FTLwtmotifsFULLMATRIX[w,1]){ -# FTLnondupeAccessionNumbers<-c(FTLnondupeAccessionNumbers,FTLwtmotifsFULLMATRIX[w,2]) -# } -# } -# } -# -# -# #find accession numbers here, put a matrix of those things, amino acid %, but only after I've unduped them -# FTLnondupeAccessionNumbers<-FTLnondupeAccessionNumbers[!duplicated(FTLnondupeAccessionNumbers)] -# -# columnalheader<-c(rep(NA,35)) -# FTLFinalMatrix<-matrix(data =columnalheader,nrow = 1) -# -# for (k in 1:length(FTLnondupeAccessionNumbers)) { -# #I don't remember why, but I felt it necessary to destroy the accession number multiple times to ensure it is -# #destroyed immediately after use -# for (m in 1:ncol(Firstsubbackfreq)) { -# AN <- as.character(Firstsubbackfreq[1, m]) -# if (grepl(pattern = AN, -# x = FTLnondupeAccessionNumbers[k], -# fixed = TRUE) == TRUE) { -# outputmatrix <- as.character(Firstsubbackfreq[, m]) -# outputmatrix <- matrix(outputmatrix, nrow = 1) -# #with that accession number, find a match in the subbackfreq file and save it here -# FTLFinalMatrix<-rbind(FTLFinalMatrix,outputmatrix) -# } -# } -# } -# ITDnondupeAccessionNumbers<-c() -# -# for (z in 1:nrow(ITDmotifsFINAL)) { -# for (w in 1:nrow(ITDmotifsFULLMATRIX)) { -# if (is.na(ITDmotifsFULLMATRIX[w,1])!=TRUE) -# if (ITDmotifsFINAL[z]==ITDmotifsFULLMATRIX[w,1]){ -# ITDnondupeAccessionNumbers<-c(ITDnondupeAccessionNumbers,ITDmotifsFULLMATRIX[w,2]) -# } -# } -# } -# -# -# #find accession numbers here, put a matrix of those things, amino acid %, but only after I've unduped them -# ITDnondupeAccessionNumbers<-ITDnondupeAccessionNumbers[!duplicated(ITDnondupeAccessionNumbers)] -# -# columnalheader<-c(rep(NA,35)) -# ITDFinalMatrix<-matrix(data =columnalheader,nrow = 1) -# -# for (k in 1:length(ITDnondupeAccessionNumbers)) { -# #I don't remember why, but I felt it necessary to destroy the accession number multiple times to ensure it is -# #destroyed immediately after use -# for (m in 1:ncol(Thirdsubbackfreq)) { -# AN <- as.character(Thirdsubbackfreq[1, m]) -# if (grepl(pattern = AN, -# x = ITDnondupeAccessionNumbers[k], -# fixed = TRUE) == TRUE) { -# outputmatrix <- as.character(Thirdsubbackfreq[, m]) -# outputmatrix <- matrix(outputmatrix, nrow = 1) -# #with that accession number, find a match in the subbackfreq file and save it here -# ITDFinalMatrix<-rbind(ITDFinalMatrix,outputmatrix) -# } -# } -# } -# -# -# D835YnondupeAccessionNumbers<-c() -# -# for (z in 1:nrow(D835YmotifsFINAL)) { -# for (w in 1:nrow(D835YmotifsFULLMATRIX)) { -# if (is.na(D835YmotifsFULLMATRIX[w,1])!=TRUE) -# if (D835YmotifsFINAL[z]==D835YmotifsFULLMATRIX[w,1]){ -# D835YnondupeAccessionNumbers<-c(D835YnondupeAccessionNumbers,D835YmotifsFULLMATRIX[w,2]) -# } -# } -# } -# -# -# #find accession numbers here, put a matrix of those things, amino acid %, but only after I've unduped them -# D835YnondupeAccessionNumbers<-D835YnondupeAccessionNumbers[!duplicated(D835YnondupeAccessionNumbers)] -# -# columnalheader<-c(rep(NA,35)) -# D835YFinalMatrix<-matrix(data =columnalheader,nrow = 1) -# -# for (k in 1:length(D835YnondupeAccessionNumbers)) { -# #I don't remember why, but I felt it necessary to destroy the accession number multiple times to ensure it is -# #destroyed immediately after use -# for (m in 1:ncol(Secondsubbackfreq)) { -# AN <- as.character(Secondsubbackfreq[1, m]) -# if (grepl(pattern = AN, -# x = D835YnondupeAccessionNumbers[k], -# fixed = TRUE) == TRUE) { -# outputmatrix <- as.character(Secondsubbackfreq[, m]) -# outputmatrix <- matrix(outputmatrix, nrow = 1) -# #with that accession number, find a match in the subbackfreq file and save it here -# D835YFinalMatrix<-rbind(D835YFinalMatrix,outputmatrix) -# } -# } -# } -# -# -# -# # FinalFTLmotifs<-FTLwtmotifsFINAL[!duplicated(FTLwtmotifsFINAL)] -# # FinalFTLAccessionNumbers<-FTLnondupeAccessionNumbers[!duplicated(FTLnondupeAccessionNumbers)] -# # necessaryNAs<-rep(NA,times=(length(FinalFTLmotifs)-length(FinalFTLAccessionNumbers))) -# # FinalFTLAccessionNumbers<-c(FinalFTLAccessionNumbers,necessaryNAs) -# # TRUEFTLoutputmatrix<-cbind(FinalFTLmotifs,FinalFTLAccessionNumbers) -# # TRUEFTLoutputmatrix -# -# write.table(x=FTLwtmotifsFINAL, -# file=First_unshared_motifs_table, -# quote=FALSE, sep=",", -# row.names=FALSE,col.names = FALSE, na="", append=TRUE) -# -# columnalheader<-c("Accession Numbers",as.character(Thirdsubbackfreq[1:35,1])) -# columnalheader<-matrix(columnalheader,nrow = 1) -# write.table(x=columnalheader, -# file=First_unshared_subbackfreq, -# quote=FALSE, sep=",", -# row.names=FALSE,col.names = FALSE, na="", append=TRUE) -# -# write.table(x=FTLFinalMatrix, -# file=First_unshared_subbackfreq, -# quote=FALSE, sep=",", -# row.names=FALSE,col.names = FALSE, na="", append=TRUE) -# -# -# -# -# -# -# -# -# -# -# -# -# write.table(x=D835YmotifsFINAL, -# file=Second_unshared_motifs_table, -# quote=FALSE, sep=",", -# row.names=FALSE,col.names = FALSE, na="", append=TRUE) -# -# columnalheader<-c("Accession Numbers",as.character(Thirdsubbackfreq[1:35,1])) -# columnalheader<-matrix(columnalheader,nrow = 1) -# write.table(x=columnalheader, -# file=Second_unshared_subbackfreq, -# quote=FALSE, sep=",", -# row.names=FALSE,col.names = FALSE, na="", append=TRUE) -# -# write.table(x=D835YFinalMatrix, -# file=Second_unshared_subbackfreq, -# quote=FALSE, sep=",", -# row.names=FALSE,col.names = FALSE, na="", append=TRUE) -# -# -# -# -# -# -# -# -# -# -# -# -# -# -# -# -# write.table(x=ITDmotifsFINAL, -# file=Third_unshared_motifs_table, -# quote=FALSE, sep=",", -# row.names=FALSE,col.names = FALSE, na="", append=TRUE) -# -# columnalheader<-c("Accession Numbers",as.character(Thirdsubbackfreq[1:35,1])) -# columnalheader<-matrix(columnalheader,nrow = 1) -# write.table(x=columnalheader, -# file=Third_unshared_subbackfreq, -# quote=FALSE, sep=",", -# row.names=FALSE,col.names = FALSE, na="", append=TRUE) -# -# write.table(x=ITDFinalMatrix, -# file=Third_unshared_subbackfreq, -# quote=FALSE, sep=",", -# row.names=FALSE,col.names = FALSE, na="", append=TRUE) -# -# -# -# -# -# -# } diff -r beba4066121e -r dff99bed3f56 all stuff/Commonality and Difference finderMADE 7 TO 7.R --- a/all stuff/Commonality and Difference finderMADE 7 TO 7.R Wed Jan 16 14:33:39 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,1281 +0,0 @@ -#I should make an SOP for this. Problems we encountered: no x in the xY motif, and the kilodemon -#the output files have both Y and xY, they shouldn't why is that happening? make it not happen -#make sure that accession numbers stay locked to each motif, somehow -#output should look just like the KALIP input - -#ff you want ONLY FULL MOTIFS, put "YES" here, please use all caps -FullMotifsOnly_questionmark<-"NO" -#If you want ONLY TRUNCATED MOTIFS, put "YES" here, please use all caps -TruncatedMotifsOnly_questionmark<-"NO" -#if you want to find the overlap, put a "YES" here (all caps), if you want to find the non-overlap, put "NO" (all caps) -Are_You_Looking_For_Commonality<-"YES" - - -#put the names of your input files here -FirstSubstrateSet<- read.csv("Galaxy63-BTK_PLUS-R1_Substrates.csv", stringsAsFactors=FALSE) -Firstsubbackfreq<- read.csv("Galaxy64-BTK_PLUS-R1_SubstrateBackgroundFrequency.csv", header=FALSE, stringsAsFactors=FALSE) - -SecondSubstrateSet<- read.csv("Galaxy65-BTK_PLUS_R2_Substrates.csv", stringsAsFactors=FALSE) -Secondsubbackfreq<- read.csv("Galaxy66-BTK_PLUS_R2_SubstrateBackgroundFrequency.csv", header=FALSE, stringsAsFactors=FALSE) - -ThirdSubstrateSet<- read.csv("Galaxy69-BTK_PLUS_R3_Substrates.csv", stringsAsFactors=FALSE) -Thirdsubbackfreq<- read.csv("Galaxy70-BTK_PLUS_R3_SubstrateBackgroundFrequency.csv", header=FALSE, stringsAsFactors=FALSE) - -#then put the names of your output files here -Shared_motifs_table<-"180719_GALAXY-BTK-plus-rep-OVLP-7to7-substrates.csv" -Shared_subbackfreq_table<-"180719_GALAXY-BTK-plus-rep-OVLP-7to7-SubBackFreq.csv" - -# Shared_motifs_table<-"Shared motifs 7-27-17.csv" -# Shared_subbackfreq_table<-"SubstrateBackgrounFrequency-for-shared-motifs 4 7-27-17.csv" - -First_unshared_motifs_table<-"R1 substrates.csv" -First_unshared_subbackfreq<-"R1 SBF.csv" - -Second_unshared_motifs_table<-"R2 subs.csv" -Second_unshared_subbackfreq<-"R2 SBf.csv" - -Third_unshared_motifs_table<-"R3 subs.csv" -Third_unshared_subbackfreq<-"R3 SBF.csv" - -#final note, this code is going to be unworkable if you want to make a Venn diagram of more than 3 circles. I think I'll poke around -#other languages to see if any of them can do it. -#################################################################################################################################### - - - - - -FirstxY<-rep("xY",times=nrow(FirstSubstrateSet)) -FirstSubstrateSet[,11]<-FirstxY - -SecondxY<-rep("xY",times=nrow(SecondSubstrateSet)) -SecondSubstrateSet[,11]<-SecondxY - -ThirdxY<-rep("xY",times=nrow(ThirdSubstrateSet)) -ThirdSubstrateSet[,11]<-ThirdxY - - - - - - - - - - - -#################################################################################################################################### -#################################################################################################################################### -# better version of this code written in C: what happens when two kinases share a motif, but they found that motif in two -# separate proteins thus two separate accession numbers? -# It should actually output the shared motif and BOTH accession numbers. Right now it does not, it only maps out the second -# accession number. So that needs to be fixed BUT you need to keep the commonality between a motif and its accession number -#################################################################################################################################### -#################################################################################################################################### -#################################################################################################################################### -#################################################################################################################################### - -#Create the motif sets, deciding wether or not you're looking for truncated or full here -#full only -if (Are_You_Looking_For_Commonality=="YES"){ - if (FullMotifsOnly_questionmark=="YES"){ - FTLwtmotifs=matrix(,nrow = nrow(FirstSubstrateSet),ncol=1) - FTLwtAccessionNumbers=matrix(,nrow = nrow(FirstSubstrateSet),ncol=1) - leftspaces<-c() - rightspaces<-c() - for (i in 1:nrow(FirstSubstrateSet)){ - FTLwtletters<-FirstSubstrateSet[i,4:18] - FTLwtletters<-FTLwtletters[FTLwtletters !="XXXXX"] - FTLwtletters<-paste(FTLwtletters, sep="", collapse="") - - - YYYmotif <- unlist(strsplit(FTLwtletters, split = "")) - YYYposition <- match(x = "x", table = YYYmotif) - #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are - #just 3 letters to the left of x - - YYYLettersToTheLeft <- YYYposition - 1 - #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is - #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1 - YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1 - #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the - #variable the user puts in is - - if (YYYLettersToTheLeft > 6 && YYYLettersToTheRight > 6) { - motif<-YYYmotif - #save that motif, which is the Y and +/- 4 amino acids, including truncation - motif<-motif[!motif %in% "x"] - motif<-paste(motif, sep="", collapse="") - FTLwtletters<-motif - FTLwtmotifs[i,1]<-FTLwtletters - FTLwtAccessionNumbers[i,1]<-FirstSubstrateSet[i,3] - - } - - } - # FTLwtmotifs <- FTLwtmotifs[!is.na(FTLwtmotifs)] - # FTLwtmotifs<-matrix(FTLwtmotifs,ncol = 1) - # - - D835Ymotifs=matrix(,nrow = nrow(SecondSubstrateSet),ncol=1) - D835YAccessionNumbers<-matrix(,nrow = nrow(SecondSubstrateSet),ncol = 1) - - for (i in 1:nrow(SecondSubstrateSet)){ - D835letters<-SecondSubstrateSet[i,4:18] - D835letters<-D835letters[D835letters !="XXXXX"] - D835letters<-paste(D835letters, sep="", collapse="") - - - YYYmotif <- unlist(strsplit(D835letters, split = "")) - YYYposition <- match(x = "x", table = YYYmotif) - #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are - #just 3 letters to the left of x - - YYYLettersToTheLeft <- YYYposition - 1 - #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is - #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1 - YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1 - #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the - #variable the user puts in is - - if (YYYLettersToTheLeft > 6 && YYYLettersToTheRight > 6) { - motif<-YYYmotif - #add blank spaces if the motif has less than 4 letters to the left/right - motif<-c(leftspaces,YYYmotif,rightspaces) - #save that motif, which is the Y and +/- 4 amino acids, including truncation - motif<-motif[!motif %in% "x"] - motif<-paste(motif, sep="", collapse="") - D835letters<-motif - D835Ymotifs[i,1]<-D835letters - D835YAccessionNumbers[i,1]<-SecondSubstrateSet[i,3] - - } - } - - ITDmotifs=matrix(,nrow = nrow(ThirdSubstrateSet),ncol=1) - ITDAccessionNumbers<-matrix(,nrow = nrow(ThirdSubstrateSet)) - - for (i in 1:nrow(ThirdSubstrateSet)){ - ITDletters<-ThirdSubstrateSet[i,4:18] - ITDletters<-ITDletters[ITDletters !="XXXXX"] - ITDletters<-paste(ITDletters, sep="", collapse="") - YYYmotif <- unlist(strsplit(ITDletters, split = "")) - YYYposition <- match(x = "x", table = YYYmotif) - #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are - #just 3 letters to the left of x - - YYYLettersToTheLeft <- YYYposition - 1 - #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is - #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1 - YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1 - #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the - #variable the user puts in is - - if (YYYLettersToTheLeft > 6 && YYYLettersToTheRight > 6) { - motif<-YYYmotif - #add blank spaces if the motif has less than 4 letters to the left/right - motif<-c(leftspaces,YYYmotif,rightspaces) - #save that motif, which is the Y and +/- 4 amino acids, including truncation - motif<-motif[!motif %in% "x"] - motif<-paste(motif, sep="", collapse="") - ITDletters<-motif - ITDmotifs[i,1]<-ITDletters - ITDAccessionNumbers[i,1]<-ThirdSubstrateSet[i,3] - - } - } - - } - - ##############################################3 - #Truncated only - if (TruncatedMotifsOnly_questionmark=="YES"){ - FTLwtmotifs=matrix(,nrow = nrow(FirstSubstrateSet),ncol=1) - FTLwtAccessionNumbers=matrix(,nrow = nrow(FirstSubstrateSet),ncol=1) - - for (i in 1:nrow(FirstSubstrateSet)){ - FTLwtletters<-FirstSubstrateSet[i,4:18] - FTLwtletters<-FTLwtletters[FTLwtletters !="XXXXX"] - FTLwtletters<-paste(FTLwtletters, sep="", collapse="") - - - YYYmotif <- unlist(strsplit(FTLwtletters, split = "")) - YYYposition <- match(x = "x", table = YYYmotif) - #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are - #just 3 letters to the left of x - - YYYLettersToTheLeft <- YYYposition - 1 - #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is - #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1 - YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1 - #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the - #variable the user puts in is - - if (YYYLettersToTheLeft < 7 | YYYLettersToTheRight < 7) { - leftspaces<-rep(" ",times=(7-YYYLettersToTheLeft)) - rightspaces<-rep(" ",times=7-(YYYLettersToTheRight)) - #add blank spaces if the motif has less than 4 letters to the left/right - motif<-c(leftspaces,YYYmotif,rightspaces) - #save that motif, which is the Y and +/- 4 amino acids, including truncation - motif<-motif[!motif %in% "x"] - motif<-paste(motif, sep="", collapse="") - FTLwtletters<-motif - FTLwtmotifs[i,1]<-FTLwtletters - FTLwtAccessionNumbers[i,1]<-FirstSubstrateSet[i,3] - } - - } - - D835Ymotifs=matrix(,nrow = nrow(SecondSubstrateSet),ncol=1) - D835YAccessionNumbers<-matrix(,nrow = nrow(SecondSubstrateSet),ncol = 1) - - for (i in 1:nrow(SecondSubstrateSet)){ - D835letters<-SecondSubstrateSet[i,4:18] - D835letters<-D835letters[D835letters !="XXXXX"] - D835letters<-paste(D835letters, sep="", collapse="") - - - YYYmotif <- unlist(strsplit(D835letters, split = "")) - YYYposition <- match(x = "x", table = YYYmotif) - #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are - #just 3 letters to the left of x - - YYYLettersToTheLeft <- YYYposition - 1 - #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is - #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1 - YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1 - #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the - #variable the user puts in is - - if (YYYLettersToTheLeft < 7 | YYYLettersToTheRight < 7) { - leftspaces<-rep(" ",times=(7-YYYLettersToTheLeft)) - rightspaces<-rep(" ",times=7-(YYYLettersToTheRight)) - #add blank spaces if the motif has less than 4 letters to the left/right - motif<-c(leftspaces,YYYmotif,rightspaces) - #save that motif, which is the Y and +/- 4 amino acids, including truncation - motif<-motif[!motif %in% "x"] - motif<-paste(motif, sep="", collapse="") - D835letters<-motif - D835YAccessionNumbers[i,1]<-SecondSubstrateSet[i,3] - D835Ymotifs[i,1]<-D835letters - } - } - - ITDmotifs=matrix(,nrow = nrow(ThirdSubstrateSet),ncol=1) - ITDAccessionNumbers<-matrix(,nrow = nrow(ThirdSubstrateSet)) - - for (i in 1:nrow(ThirdSubstrateSet)){ - ITDletters<-ThirdSubstrateSet[i,4:18] - ITDletters<-ITDletters[ITDletters !="XXXXX"] - ITDletters<-paste(ITDletters, sep="", collapse="") - YYYmotif <- unlist(strsplit(ITDletters, split = "")) - YYYposition <- match(x = "x", table = YYYmotif) - #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are - #just 3 letters to the left of x - - YYYLettersToTheLeft <- YYYposition - 1 - #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is - #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1 - YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1 - #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the - #variable the user puts in is - - if (YYYLettersToTheLeft < 7 | YYYLettersToTheRight < 7) { - leftspaces<-rep(" ",times=(7-YYYLettersToTheLeft)) - rightspaces<-rep(" ",times=7-(YYYLettersToTheRight)) - #add blank spaces if the motif has less than 4 letters to the left/right - motif<-c(leftspaces,YYYmotif,rightspaces) - #save that motif, which is the Y and +/- 4 amino acids, including truncation - motif<-motif[!motif %in% "x"] - motif<-paste(motif, sep="", collapse="") - ITDletters<-motif - ITDAccessionNumbers[i,1]<-ThirdSubstrateSet[i,3] - ITDmotifs[i,1]<-ITDletters - } - } - - } - - ############################################### - #ALL motifs, full and truncated - - if (FullMotifsOnly_questionmark!="YES"&&TruncatedMotifsOnly_questionmark!="YES"){ - FTLwtmotifs=matrix(,nrow = nrow(FirstSubstrateSet),ncol=1) - FTLwtAccessionNumbers=matrix(,nrow = nrow(FirstSubstrateSet),ncol=1) - - for (i in 1:nrow(FirstSubstrateSet)){ - FTLwtletters<-FirstSubstrateSet[i,4:18] - FTLwtletters<-FTLwtletters[FTLwtletters !="XXXXX"] - FTLwtletters<-paste(FTLwtletters, sep="", collapse="") - leftspaces<-c() - rightspaces<-c() - - YYYmotif <- unlist(strsplit(FTLwtletters, split = "")) - YYYposition <- match(x = "x", table = YYYmotif) - #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are - #just 3 letters to the left of x - - YYYLettersToTheLeft <- YYYposition - 1 - #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is - #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1 - YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1 - #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the - #variable the user puts in is - - - if (YYYLettersToTheLeft < 7 | YYYLettersToTheRight < 7) { - leftspaces<-rep(" ",times=(7-YYYLettersToTheLeft)) - rightspaces<-rep(" ",times=7-(YYYLettersToTheRight)) - #add blank spaces if the motif has less than 4 letters to the left/right - motif<-c(leftspaces,YYYmotif,rightspaces) - #save that motif, which is the Y and +/- 4 amino acids, including truncation - motif<-motif[!motif %in% "x"] - motif<-paste(motif, sep="", collapse="") - FTLwtletters<-motif - FTLwtmotifs[i,1]<-FTLwtletters - FTLwtAccessionNumbers[i,1]<-FirstSubstrateSet[i,3] - } - - if(YYYLettersToTheLeft>6 && YYYLettersToTheRight>6){ - motif<-YYYmotif - #add blank spaces if the motif has less than 4 letters to the left/right - motif<-c(leftspaces,YYYmotif,rightspaces) - #save that motif, which is the Y and +/- 4 amino acids, including truncation - motif<-motif[!motif %in% "x"] - motif<-paste(motif, sep="", collapse="") - FTLwtletters<-motif - FTLwtmotifs[i,1]<-FTLwtletters - FTLwtAccessionNumbers[i,1]<-FirstSubstrateSet[i,3] - - - } - - } - - D835Ymotifs=matrix(,nrow = nrow(SecondSubstrateSet),ncol=1) - D835YAccessionNumbers<-matrix(,nrow = nrow(SecondSubstrateSet),ncol = 1) - - for (i in 1:nrow(SecondSubstrateSet)){ - D835letters<-SecondSubstrateSet[i,4:18] - D835letters<-D835letters[D835letters !="XXXXX"] - D835letters<-paste(D835letters, sep="", collapse="") - leftspaces<-c() - rightspaces<-c() - - YYYmotif <- unlist(strsplit(D835letters, split = "")) - YYYposition <- match(x = "x", table = YYYmotif) - #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are - #just 3 letters to the left of x - - YYYLettersToTheLeft <- YYYposition - 1 - #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is - #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1 - YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1 - #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the - #variable the user puts in is - if (YYYLettersToTheLeft < 7 | YYYLettersToTheRight < 7) { - leftspaces<-rep(" ",times=(7-YYYLettersToTheLeft)) - rightspaces<-rep(" ",times=7-(YYYLettersToTheRight)) - #add blank spaces if the motif has less than 4 letters to the left/right - motif<-c(leftspaces,YYYmotif,rightspaces) - #save that motif, which is the Y and +/- 4 amino acids, including truncation - motif<-motif[!motif %in% "x"] - motif<-paste(motif, sep="", collapse="") - D835letters<-motif - D835Ymotifs[i,1]<-D835letters - D835YAccessionNumbers[i,1]<-FirstSubstrateSet[i,3] - } - - if(YYYLettersToTheLeft>6 && YYYLettersToTheRight>6){ - motif<-YYYmotif - #add blank spaces if the motif has less than 4 letters to the left/right - motif<-c(leftspaces,YYYmotif,rightspaces) - #save that motif, which is the Y and +/- 4 amino acids, including truncation - motif<-motif[!motif %in% "x"] - motif<-paste(motif, sep="", collapse="") - D835letters<-motif - D835Ymotifs[i,1]<-D835letters - D835YAccessionNumbers[i,1]<-FirstSubstrateSet[i,3] - } - } - - - ITDmotifs=matrix(,nrow = nrow(ThirdSubstrateSet),ncol=1) - ITDAccessionNumbers<-matrix(,nrow = nrow(ThirdSubstrateSet)) - - for (i in 1:nrow(ThirdSubstrateSet)){ - ITDletters<-ThirdSubstrateSet[i,4:18] - ITDletters<-ITDletters[ITDletters !="XXXXX"] - ITDletters<-paste(ITDletters, sep="", collapse="") - YYYmotif <- unlist(strsplit(ITDletters, split = "")) - leftspaces<-c() - rightspaces<-c() - YYYposition <- match(x = "x", table = YYYmotif) - #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are - #just 3 letters to the left of x - - YYYLettersToTheLeft <- YYYposition - 1 - #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is - #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1 - YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1 - #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the - #variable the user puts in is - if (YYYLettersToTheLeft < 7 | YYYLettersToTheRight < 7) { - leftspaces<-rep(" ",times=(7-YYYLettersToTheLeft)) - rightspaces<-rep(" ",times=7-(YYYLettersToTheRight)) - #add blank spaces if the motif has less than 4 letters to the left/right - motif<-c(leftspaces,YYYmotif,rightspaces) - #save that motif, which is the Y and +/- 4 amino acids, including truncation - motif<-motif[!motif %in% "x"] - motif<-paste(motif, sep="", collapse="") - ITDletters<-motif - ITDmotifs[i,1]<-ITDletters - ITDAccessionNumbers[i,1]<-FirstSubstrateSet[i,3] - } - - if(YYYLettersToTheLeft>6 && YYYLettersToTheRight>6){ - motif<-YYYmotif - #add blank spaces if the motif has less than 4 letters to the left/right - motif<-c(leftspaces,YYYmotif,rightspaces) - #save that motif, which is the Y and +/- 4 amino acids, including truncation - motif<-motif[!motif %in% "x"] - motif<-paste(motif, sep="", collapse="") - ITDletters<-motif - ITDmotifs[i,1]<-ITDletters - ITDAccessionNumbers[i,1]<-FirstSubstrateSet[i,3] - } - } - - } - ############################################################################################################################# - ############################################################################################################################# - ############################################################################################################################# - ############################################################################################################################# - ############################################################################################################################# - - #now look for either commonality or difference. Actually could you look for both... - - if (Are_You_Looking_For_Commonality=="YES"){ - - columnalheader<-c(as.character(Thirdsubbackfreq[1:36,1])) - columnalheader<-matrix(columnalheader,nrow = 1) - write.table(x=columnalheader, - file=Shared_subbackfreq_table, - quote=FALSE, sep=",", - row.names=FALSE,col.names = FALSE, na="", append=TRUE) - - FirstOverlapmotifs<-c() - for (i in 1:nrow(ITDmotifs)){ - for (j in 1:nrow(D835Ymotifs)){ - if (is.na(ITDmotifs[i,1])!=TRUE&&is.na(D835Ymotifs[j,1])!=TRUE){ - if (ITDmotifs[i,1]==D835Ymotifs[j,1]){ - FirstOverlapmotifs<-c(FirstOverlapmotifs,D835Ymotifs[j,1]) - } - } - } - } - - AllAccessionNumbers<-c() - columnalheader<-c(rep(NA,36)) - FinalMatrix<-matrix(data =columnalheader,nrow = 1) - - FinalMotifs<-c(rep(NA,20)) - FinalMotifsMatrix<-matrix(data = FinalMotifs,nrow = 1) - - - for (l in 1:length(FirstOverlapmotifs)) { - AccessionNumber<-00000000000 - for (k in 1:nrow(FTLwtmotifs)) { - AccessionNumber<-0000000000000 - if(is.na(FTLwtmotifs[k])!=TRUE){ - #I don't remember why, but I felt it necessary to destroy the accession number multiple times to ensure it is - #destroyed immediately after use - if (FirstOverlapmotifs[l] == FTLwtmotifs[k]) { - substratematrix<-FirstSubstrateSet[k,1:20] - substratematrix<-as.matrix(substratematrix,nrow=1) - FinalMotifsMatrix<-rbind(FinalMotifsMatrix,substratematrix) - #when you find a match between the venn diagrams, save the substrate info you get into a matrix - - AccessionNumber <- as.character(FirstSubstrateSet[k, 3]) - #then take the accession number - - for (m in 1:ncol(Firstsubbackfreq)) { - AN <- as.character(Firstsubbackfreq[1, m]) - if (grepl(pattern = AN, - x = AccessionNumber, - fixed = TRUE) == TRUE) { - outputmatrix <- as.character(Firstsubbackfreq[, m]) - outputmatrix <- matrix(outputmatrix, nrow = 1) - #with that accession number, find a match in the subbackfreq file and save it here - FinalMatrix<-rbind(FinalMatrix,outputmatrix) - } - } - } - } - } - } - - - TrueMatrix<-FinalMatrix[!duplicated(FinalMatrix),] - TrueFinalMotifsMatrix<-FinalMotifsMatrix[!duplicated(FinalMotifsMatrix),] - - TrueFinalMotifsMatrix<-TrueFinalMotifsMatrix[2:nrow(TrueFinalMotifsMatrix),] - TrueMatrix<-TrueMatrix[2:nrow(TrueMatrix),] - - write.table( - x = TrueFinalMotifsMatrix, - file = Shared_motifs_table, - quote = FALSE, - sep = ",", - row.names = FALSE, - col.names = TRUE, - na = "", - append = TRUE - ) - - - write.table( - x = TrueMatrix, - file = Shared_subbackfreq_table, - quote = FALSE, - sep = ",", - row.names = FALSE, - col.names = FALSE, - na = "", - append = TRUE - ) - } -} - -if (Are_You_Looking_For_Commonality=="NO"){ - if (FullMotifsOnly_questionmark=="YES"){ - FTLwtmotifs=rep(NA,times=nrow(FirstSubstrateSet)) - FTLwtAccessionNumbers=rep(NA,times=nrow(FirstSubstrateSet)) - leftspaces<-c() - rightspaces<-c() - for (i in 1:nrow(FirstSubstrateSet)){ - FTLwtletters<-FirstSubstrateSet[i,4:18] - FTLwtletters<-FTLwtletters[FTLwtletters !="XXXXX"] - FTLwtletters<-paste(FTLwtletters, sep="", collapse="") - - - YYYmotif <- unlist(strsplit(FTLwtletters, split = "")) - YYYposition <- match(x = "x", table = YYYmotif) - #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are - #just 3 letters to the left of x - - YYYLettersToTheLeft <- YYYposition - 1 - #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is - #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1 - YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1 - #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the - #variable the user puts in is - - if (YYYLettersToTheLeft > 6 && YYYLettersToTheRight > 6) { - motif<-YYYmotif - #save that motif, which is the Y and +/- 4 amino acids, including truncation - motif<-motif[!motif %in% "x"] - motif<-paste(motif, sep="", collapse="") - FTLwtletters<-motif - FTLwtmotifs[i]<-FTLwtletters - FTLwtAccessionNumbers[i]<-FirstSubstrateSet[i,3] - } - - } - # FTLwtmotifs <- FTLwtmotifs[!is.na(FTLwtmotifs)] - # FTLwtmotifs<-matrix(FTLwtmotifs,ncol = 1) - # - - D835Ymotifs=rep(NA,times=nrow(FirstSubstrateSet)) - D835YAccessionNumbers=rep(NA,times=nrow(FirstSubstrateSet)) - - for (i in 1:nrow(SecondSubstrateSet)){ - D835letters<-SecondSubstrateSet[i,4:18] - D835letters<-D835letters[D835letters !="XXXXX"] - D835letters<-paste(D835letters, sep="", collapse="") - - - YYYmotif <- unlist(strsplit(D835letters, split = "")) - YYYposition <- match(x = "x", table = YYYmotif) - #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are - #just 3 letters to the left of x - - YYYLettersToTheLeft <- YYYposition - 1 - #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is - #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1 - YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1 - #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the - #variable the user puts in is - - if (YYYLettersToTheLeft > 6 && YYYLettersToTheRight > 6) { - motif<-YYYmotif - #add blank spaces if the motif has less than 4 letters to the left/right - motif<-c(leftspaces,YYYmotif,rightspaces) - #save that motif, which is the Y and +/- 4 amino acids, including truncation - motif<-motif[!motif %in% "x"] - motif<-paste(motif, sep="", collapse="") - D835letters<-motif - D835Ymotifs[i]<-D835letters - D835YAccessionNumbers[i]<-SecondSubstrateSet[i,3] - } - } - - ITDmotifs=rep(NA,times=nrow(FirstSubstrateSet)) - ITDAccessionNumbers=rep(NA,times=nrow(FirstSubstrateSet)) - - for (i in 1:nrow(ThirdSubstrateSet)){ - ITDletters<-ThirdSubstrateSet[i,4:18] - ITDletters<-ITDletters[ITDletters !="XXXXX"] - ITDletters<-paste(ITDletters, sep="", collapse="") - YYYmotif <- unlist(strsplit(ITDletters, split = "")) - YYYposition <- match(x = "x", table = YYYmotif) - #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are - #just 3 letters to the left of x - - YYYLettersToTheLeft <- YYYposition - 1 - #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is - #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1 - YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1 - #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the - #variable the user puts in is - - if (YYYLettersToTheLeft > 6 && YYYLettersToTheRight > 6) { - motif<-YYYmotif - #add blank spaces if the motif has less than 4 letters to the left/right - motif<-c(leftspaces,YYYmotif,rightspaces) - #save that motif, which is the Y and +/- 4 amino acids, including truncation - motif<-motif[!motif %in% "x"] - motif<-paste(motif, sep="", collapse="") - ITDletters<-motif - ITDmotifs[i]<-ITDletters - ITDAccessionNumbers[i]<-ThirdSubstrateSet[i,3] - - } - } - names(ITDmotifs)<-ITDAccessionNumbers - names(D835Ymotifs)<-D835YAccessionNumbers - names(FTLwtmotifs)<-FTLwtAccessionNumbers - } - - - ##############################################3 - #Truncated only - if (TruncatedMotifsOnly_questionmark=="YES"){ - FTLwtmotifs=matrix(,nrow = nrow(FirstSubstrateSet),ncol=1) - FTLwtAccessionNumbers=matrix(,nrow = nrow(FirstSubstrateSet),ncol=1) - - for (i in 1:nrow(FirstSubstrateSet)){ - FTLwtletters<-FirstSubstrateSet[i,4:18] - FTLwtletters<-FTLwtletters[FTLwtletters !="XXXXX"] - FTLwtletters<-paste(FTLwtletters, sep="", collapse="") - - - YYYmotif <- unlist(strsplit(FTLwtletters, split = "")) - YYYposition <- match(x = "x", table = YYYmotif) - #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are - #just 3 letters to the left of x - - YYYLettersToTheLeft <- YYYposition - 1 - #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is - #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1 - YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1 - #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the - #variable the user puts in is - - if (YYYLettersToTheLeft < 7 | YYYLettersToTheRight < 7) { - leftspaces<-rep(" ",times=(7-YYYLettersToTheLeft)) - rightspaces<-rep(" ",times=7-(YYYLettersToTheRight)) - #add blank spaces if the motif has less than 4 letters to the left/right - motif<-c(leftspaces,YYYmotif,rightspaces) - #save that motif, which is the Y and +/- 4 amino acids, including truncation - motif<-motif[!motif %in% "x"] - motif<-paste(motif, sep="", collapse="") - FTLwtletters<-motif - FTLwtmotifs[i,1]<-FTLwtletters - FTLwtAccessionNumbers[i,1]<-FirstSubstrateSet[i,3] - } - - } - - D835Ymotifs=matrix(,nrow = nrow(SecondSubstrateSet),ncol=1) - D835YAccessionNumbers<-matrix(,nrow = nrow(SecondSubstrateSet),ncol = 1) - i=2 - for (i in 1:nrow(SecondSubstrateSet)){ - D835letters<-SecondSubstrateSet[i,4:18] - D835letters<-D835letters[D835letters !="XXXXX"] - D835letters<-paste(D835letters, sep="", collapse="") - - - YYYmotif <- unlist(strsplit(D835letters, split = "")) - YYYposition <- match(x = "x", table = YYYmotif) - #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are - #just 3 letters to the left of x - - YYYLettersToTheLeft <- YYYposition - 1 - #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is - #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1 - YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1 - #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the - #variable the user puts in is - - if (YYYLettersToTheLeft < 7 | YYYLettersToTheRight < 7) { - leftspaces<-rep(" ",times=(7-YYYLettersToTheLeft)) - rightspaces<-rep(" ",times=7-(YYYLettersToTheRight)) - #add blank spaces if the motif has less than 4 letters to the left/right - motif<-c(leftspaces,YYYmotif,rightspaces) - #save that motif, which is the Y and +/- 4 amino acids, including truncation - motif<-motif[!motif %in% "x"] - motif<-paste(motif, sep="", collapse="") - D835letters<-motif - D835YAccessionNumbers[i,1]<-SecondSubstrateSet[i,3] - D835Ymotifs[i,1]<-D835letters - } - } - - ITDmotifs=matrix(,nrow = nrow(ThirdSubstrateSet),ncol=1) - ITDAccessionNumbers<-matrix(,nrow = nrow(ThirdSubstrateSet)) - - for (i in 1:nrow(ThirdSubstrateSet)){ - ITDletters<-ThirdSubstrateSet[i,4:18] - ITDletters<-ITDletters[ITDletters !="XXXXX"] - ITDletters<-paste(ITDletters, sep="", collapse="") - YYYmotif <- unlist(strsplit(ITDletters, split = "")) - YYYposition <- match(x = "x", table = YYYmotif) - #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are - #just 3 letters to the left of x - - YYYLettersToTheLeft <- YYYposition - 1 - #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is - #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1 - YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1 - #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the - #variable the user puts in is - - if (YYYLettersToTheLeft < 7 | YYYLettersToTheRight < 7) { - leftspaces<-rep(" ",times=(7-YYYLettersToTheLeft)) - rightspaces<-rep(" ",times=7-(YYYLettersToTheRight)) - #add blank spaces if the motif has less than 4 letters to the left/right - motif<-c(leftspaces,YYYmotif,rightspaces) - #save that motif, which is the Y and +/- 4 amino acids, including truncation - motif<-motif[!motif %in% "x"] - motif<-paste(motif, sep="", collapse="") - ITDletters<-motif - ITDAccessionNumbers[i,1]<-ThirdSubstrateSet[i,3] - ITDmotifs[i,1]<-ITDletters - } - } - names(FTLwtmotifs)<-FTLwtAccessionNumbers - names(D835Ymotifs)<-D835YAccessionNumbers - names(ITDmotifs)<-ITDAccessionNumbers - } - - ############################################### - #ALL motifs, full and truncated - - if (FullMotifsOnly_questionmark!="YES"&&TruncatedMotifsOnly_questionmark!="YES"){ - FTLwtmotifs=matrix(,nrow = nrow(FirstSubstrateSet),ncol=1) - FTLwtAccessionNumbers=matrix(,nrow = nrow(FirstSubstrateSet),ncol=1) - - for (i in 1:nrow(FirstSubstrateSet)){ - FTLwtletters<-FirstSubstrateSet[i,4:18] - FTLwtletters<-FTLwtletters[FTLwtletters !="XXXXX"] - FTLwtletters<-paste(FTLwtletters, sep="", collapse="") - leftspaces<-c() - rightspaces<-c() - - YYYmotif <- unlist(strsplit(FTLwtletters, split = "")) - YYYposition <- match(x = "x", table = YYYmotif) - #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are - #just 3 letters to the left of x - - YYYLettersToTheLeft <- YYYposition - 1 - #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is - #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1 - YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1 - #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the - #variable the user puts in is - - - if (YYYLettersToTheLeft < 7 | YYYLettersToTheRight < 7) { - leftspaces<-rep(" ",times=(7-YYYLettersToTheLeft)) - rightspaces<-rep(" ",times=7-(YYYLettersToTheRight)) - #add blank spaces if the motif has less than 4 letters to the left/right - motif<-c(leftspaces,YYYmotif,rightspaces) - #save that motif, which is the Y and +/- 4 amino acids, including truncation - motif<-motif[!motif %in% "x"] - motif<-paste(motif, sep="", collapse="") - FTLwtletters<-motif - FTLwtmotifs[i,1]<-FTLwtletters - FTLwtAccessionNumbers[i,1]<-FirstSubstrateSet[i,3] - } - - if(YYYLettersToTheLeft>6 && YYYLettersToTheRight>6){ - motif<-YYYmotif - #add blank spaces if the motif has less than 4 letters to the left/right - motif<-c(leftspaces,YYYmotif,rightspaces) - #save that motif, which is the Y and +/- 4 amino acids, including truncation - motif<-motif[!motif %in% "x"] - motif<-paste(motif, sep="", collapse="") - FTLwtletters<-motif - FTLwtmotifs[i,1]<-FTLwtletters - FTLwtAccessionNumbers[i,1]<-FirstSubstrateSet[i,3] - - - } - - } - - D835Ymotifs=matrix(,nrow = nrow(SecondSubstrateSet),ncol=1) - D835YAccessionNumbers<-matrix(,nrow = nrow(SecondSubstrateSet),ncol = 1) - - for (i in 1:nrow(SecondSubstrateSet)){ - D835letters<-SecondSubstrateSet[i,4:18] - D835letters<-D835letters[D835letters !="XXXXX"] - D835letters<-paste(D835letters, sep="", collapse="") - leftspaces<-c() - rightspaces<-c() - - YYYmotif <- unlist(strsplit(D835letters, split = "")) - YYYposition <- match(x = "x", table = YYYmotif) - #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are - #just 3 letters to the left of x - - YYYLettersToTheLeft <- YYYposition - 1 - #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is - #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1 - YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1 - #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the - #variable the user puts in is - if (YYYLettersToTheLeft < 7 | YYYLettersToTheRight < 7) { - leftspaces<-rep(" ",times=(7-YYYLettersToTheLeft)) - rightspaces<-rep(" ",times=7-(YYYLettersToTheRight)) - #add blank spaces if the motif has less than 4 letters to the left/right - motif<-c(leftspaces,YYYmotif,rightspaces) - #save that motif, which is the Y and +/- 4 amino acids, including truncation - motif<-motif[!motif %in% "x"] - motif<-paste(motif, sep="", collapse="") - D835letters<-motif - D835Ymotifs[i,1]<-D835letters - D835YAccessionNumbers[i,1]<-FirstSubstrateSet[i,3] - } - - if(YYYLettersToTheLeft>6 && YYYLettersToTheRight>6){ - motif<-YYYmotif - #add blank spaces if the motif has less than 4 letters to the left/right - motif<-c(leftspaces,YYYmotif,rightspaces) - #save that motif, which is the Y and +/- 4 amino acids, including truncation - motif<-motif[!motif %in% "x"] - motif<-paste(motif, sep="", collapse="") - D835letters<-motif - D835Ymotifs[i,1]<-D835letters - D835YAccessionNumbers[i,1]<-FirstSubstrateSet[i,3] - } - } - - - ITDmotifs=matrix(,nrow = nrow(ThirdSubstrateSet),ncol=1) - ITDAccessionNumbers<-matrix(,nrow = nrow(ThirdSubstrateSet)) - - for (i in 1:nrow(ThirdSubstrateSet)){ - ITDletters<-ThirdSubstrateSet[i,4:18] - ITDletters<-ITDletters[ITDletters !="XXXXX"] - ITDletters<-paste(ITDletters, sep="", collapse="") - YYYmotif <- unlist(strsplit(ITDletters, split = "")) - leftspaces<-c() - rightspaces<-c() - YYYposition <- match(x = "x", table = YYYmotif) - #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are - #just 3 letters to the left of x - - YYYLettersToTheLeft <- YYYposition - 1 - #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is - #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1 - YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1 - #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the - #variable the user puts in is - if (YYYLettersToTheLeft < 7 | YYYLettersToTheRight < 7) { - leftspaces<-rep(" ",times=(7-YYYLettersToTheLeft)) - rightspaces<-rep(" ",times=7-(YYYLettersToTheRight)) - #add blank spaces if the motif has less than 4 letters to the left/right - motif<-c(leftspaces,YYYmotif,rightspaces) - #save that motif, which is the Y and +/- 4 amino acids, including truncation - motif<-motif[!motif %in% "x"] - motif<-paste(motif, sep="", collapse="") - ITDletters<-motif - ITDmotifs[i,1]<-ITDletters - ITDAccessionNumbers[i,1]<-FirstSubstrateSet[i,3] - } - - if(YYYLettersToTheLeft>6 && YYYLettersToTheRight>6){ - motif<-YYYmotif - #add blank spaces if the motif has less than 4 letters to the left/right - motif<-c(leftspaces,YYYmotif,rightspaces) - #save that motif, which is the Y and +/- 4 amino acids, including truncation - motif<-motif[!motif %in% "x"] - motif<-paste(motif, sep="", collapse="") - ITDletters<-motif - ITDmotifs[i,1]<-ITDletters - ITDAccessionNumbers[i,1]<-FirstSubstrateSet[i,3] - } - } - names(FTLwtmotifs)<-FTLwtAccessionNumbers - names(D835Ymotifs)<-D835YAccessionNumbers - names(ITDmotifs)<-ITDAccessionNumbers - } - - - FTLwtmotifsFINAL<-FTLwtmotifs[!FTLwtmotifs %in% D835Ymotifs] - FTLwtmotifsFINAL<-FTLwtmotifsFINAL[!FTLwtmotifsFINAL %in% ITDmotifs] - FTLwtmotifsFINAL<-FTLwtmotifsFINAL[!duplicated(FTLwtmotifsFINAL)] - - - ITDmotifsFINAL<-ITDmotifs[!ITDmotifs %in% D835Ymotifs] - ITDmotifsFINAL<-ITDmotifsFINAL[!ITDmotifsFINAL %in% FTLwtmotifs] - ITDmotifsFINAL<-ITDmotifsFINAL[!duplicated(ITDmotifsFINAL)] - - - D835YmotifsFINAL<-D835Ymotifs[!D835Ymotifs %in% FTLwtmotifs] - D835YmotifsFINAL<-D835YmotifsFINAL[!D835YmotifsFINAL %in% ITDmotifs] - D835YmotifsFINAL<-D835YmotifsFINAL[!duplicated(D835YmotifsFINAL)] - - - columnalheader<-c(rep(NA,35)) - FTLFinalMatrix<-matrix(data =columnalheader,nrow = 1) - - for (k in 1:length(FTLwtmotifsFINAL)) { - AN<-00000 - #I don't remember why, but I felt it necessary to destroy the accession number multiple times to ensure it is - #destroyed immediately after use - for (m in 1:ncol(Firstsubbackfreq)) { - AN <- as.character(Firstsubbackfreq[1, m]) - if (grepl(pattern = AN, - x = names(FTLwtmotifsFINAL[k]), - fixed = TRUE) == TRUE) { - outputmatrix <- as.character(Firstsubbackfreq[, m]) - outputmatrix <- matrix(outputmatrix, nrow = 1) - #with that accession number, find a match in the subbackfreq file and save it here - FTLFinalMatrix<-rbind(FTLFinalMatrix,outputmatrix) - } - } - } - FTLFinalMatrix<-FTLFinalMatrix[!duplicated(FTLFinalMatrix),] - - columnalheader<-c(rep(NA,35)) - ITDFinalMatrix<-matrix(data =columnalheader,nrow = 1) - - for (k in 1:length(ITDmotifsFINAL)) { - AN<-00000 - #I don't remember why, but I felt it necessary to destroy the accession number multiple times to ensure it is - #destroyed immediately after use - for (m in 1:ncol(Thirdsubbackfreq)) { - AN <- as.character(Thirdsubbackfreq[1, m]) - if (grepl(pattern = AN, - x = names(ITDmotifsFINAL[k]), - fixed = TRUE) == TRUE) { - outputmatrix <- as.character(Thirdsubbackfreq[, m]) - outputmatrix <- matrix(outputmatrix, nrow = 1) - #with that accession number, find a match in the subbackfreq file and save it here - ITDFinalMatrix<-rbind(ITDFinalMatrix,outputmatrix) - } - } - } - ITDFinalMatrix<-ITDFinalMatrix[!duplicated(ITDFinalMatrix),] - - columnalheader<-c(rep(NA,35)) - D835YFinalMatrix<-matrix(data =columnalheader,nrow = 1) - - for (k in 1:length(D835YmotifsFINAL)) { - #I don't remember why, but I felt it necessary to destroy the accession number multiple times to ensure it is - #destroyed immediately after use - for (m in 1:ncol(Secondsubbackfreq)) { - AN <- as.character(Secondsubbackfreq[1, m]) - if (grepl(pattern = AN, - x = names(D835YmotifsFINAL[k]), - fixed = TRUE) == TRUE) { - outputmatrix <- as.character(Secondsubbackfreq[, m]) - outputmatrix <- matrix(outputmatrix, nrow = 1) - #with that accession number, find a match in the subbackfreq file and save it here - D835YFinalMatrix<-rbind(D835YFinalMatrix,outputmatrix) - } - } - } - D835YFinalMatrix<-D835YFinalMatrix[!duplicated(D835YFinalMatrix),] - - FTLoutputmatrix<-matrix(data=c(FTLwtmotifsFINAL,names(FTLwtmotifsFINAL)),ncol = 2) - - - write.table(x=FTLoutputmatrix, - file=First_unshared_motifs_table, - quote=FALSE, sep=",", - row.names=FALSE,col.names = FALSE, na="", append=TRUE) - - columnalheader<-c("Accession Numbers",as.character(Thirdsubbackfreq[1:35,1])) - columnalheader<-matrix(columnalheader,nrow = 1) - write.table(x=columnalheader, - file=First_unshared_subbackfreq, - quote=FALSE, sep=",", - row.names=FALSE,col.names = FALSE, na="", append=TRUE) - - write.table(x=FTLFinalMatrix, - file=First_unshared_subbackfreq, - quote=FALSE, sep=",", - row.names=FALSE,col.names = FALSE, na="", append=TRUE) - - ############################################################################################################ - - D835Youtputmatrix<-matrix(data=c(D835YmotifsFINAL,names(D835YmotifsFINAL)),ncol = 2) - - write.table(x=D835Youtputmatrix, - file=Second_unshared_motifs_table, - quote=FALSE, sep=",", - row.names=FALSE,col.names = FALSE, na="", append=TRUE) - - columnalheader<-c("Accession Numbers",as.character(Thirdsubbackfreq[1:35,1])) - columnalheader<-matrix(columnalheader,nrow = 1) - write.table(x=columnalheader, - file=Second_unshared_subbackfreq, - quote=FALSE, sep=",", - row.names=FALSE,col.names = FALSE, na="", append=TRUE) - - write.table(x=D835YFinalMatrix, - file=Second_unshared_subbackfreq, - quote=FALSE, sep=",", - row.names=FALSE,col.names = FALSE, na="", append=TRUE) - - ############################################################################################################ - - ITDoutputmatrix<-matrix(data = c(ITDmotifsFINAL,names(ITDmotifsFINAL)),ncol = 2) - - write.table(x=ITDoutputmatrix, - file=Third_unshared_motifs_table, - quote=FALSE, sep=",", - row.names=FALSE,col.names = FALSE, na="", append=TRUE) - - columnalheader<-c("Accession Numbers",as.character(Thirdsubbackfreq[1:35,1])) - columnalheader<-matrix(columnalheader,nrow = 1) - write.table(x=columnalheader, - file=Third_unshared_subbackfreq, - quote=FALSE, sep=",", - row.names=FALSE,col.names = FALSE, na="", append=TRUE) - - write.table(x=ITDFinalMatrix, - file=Third_unshared_subbackfreq, - quote=FALSE, sep=",", - row.names=FALSE,col.names = FALSE, na="", append=TRUE) - -} - -# if (Are_You_Looking_For_Commonality=="NO"){ -# -# -# FTLwtmotifsFULLMATRIX<-cbind(FTLwtmotifs,FTLwtAccessionNumbers) -# ITDmotifsFULLMATRIX<-cbind(ITDmotifs,ITDAccessionNumbers) -# D835YmotifsFULLMATRIX<-cbind(D835Ymotifs,D835YAccessionNumbers) -# # ?duplicated -# -# FTLwtmotifsFINAL<-FTLwtmotifs[!FTLwtmotifs %in% D835Ymotifs] -# FTLwtmotifsFINAL<-FTLwtmotifsFINAL[!FTLwtmotifsFINAL %in% ITDmotifs] -# FTLwtmotifsFINAL<-matrix(data=FTLwtmotifsFINAL, ncol = 1) -# -# ITDmotifsFINAL<-ITDmotifs[!ITDmotifs %in% D835Ymotifs] -# ITDmotifsFINAL<-ITDmotifsFINAL[!ITDmotifsFINAL %in% FTLwtmotifs] -# ITDmotifsFINAL<-matrix(data=ITDmotifsFINAL, ncol = 1) -# -# D835YmotifsFINAL<-D835Ymotifs[!D835Ymotifs %in% FTLwtmotifs] -# D835YmotifsFINAL<-D835YmotifsFINAL[!D835YmotifsFINAL %in% ITDmotifs] -# D835YmotifsFINAL<-matrix(data=D835YmotifsFINAL, ncol = 1) -# -# FTLnondupeAccessionNumbers<-c() -# -# for (z in 1:nrow(FTLwtmotifsFINAL)) { -# for (w in 1:nrow(FTLwtmotifsFULLMATRIX)) { -# if (is.na(FTLwtmotifsFULLMATRIX[w,1])!=TRUE) -# if (FTLwtmotifsFINAL[z]==FTLwtmotifsFULLMATRIX[w,1]){ -# FTLnondupeAccessionNumbers<-c(FTLnondupeAccessionNumbers,FTLwtmotifsFULLMATRIX[w,2]) -# } -# } -# } -# -# -# #find accession numbers here, put a matrix of those things, amino acid %, but only after I've unduped them -# FTLnondupeAccessionNumbers<-FTLnondupeAccessionNumbers[!duplicated(FTLnondupeAccessionNumbers)] -# -# columnalheader<-c(rep(NA,35)) -# FTLFinalMatrix<-matrix(data =columnalheader,nrow = 1) -# -# for (k in 1:length(FTLnondupeAccessionNumbers)) { -# #I don't remember why, but I felt it necessary to destroy the accession number multiple times to ensure it is -# #destroyed immediately after use -# for (m in 1:ncol(Firstsubbackfreq)) { -# AN <- as.character(Firstsubbackfreq[1, m]) -# if (grepl(pattern = AN, -# x = FTLnondupeAccessionNumbers[k], -# fixed = TRUE) == TRUE) { -# outputmatrix <- as.character(Firstsubbackfreq[, m]) -# outputmatrix <- matrix(outputmatrix, nrow = 1) -# #with that accession number, find a match in the subbackfreq file and save it here -# FTLFinalMatrix<-rbind(FTLFinalMatrix,outputmatrix) -# } -# } -# } -# ITDnondupeAccessionNumbers<-c() -# -# for (z in 1:nrow(ITDmotifsFINAL)) { -# for (w in 1:nrow(ITDmotifsFULLMATRIX)) { -# if (is.na(ITDmotifsFULLMATRIX[w,1])!=TRUE) -# if (ITDmotifsFINAL[z]==ITDmotifsFULLMATRIX[w,1]){ -# ITDnondupeAccessionNumbers<-c(ITDnondupeAccessionNumbers,ITDmotifsFULLMATRIX[w,2]) -# } -# } -# } -# -# -# #find accession numbers here, put a matrix of those things, amino acid %, but only after I've unduped them -# ITDnondupeAccessionNumbers<-ITDnondupeAccessionNumbers[!duplicated(ITDnondupeAccessionNumbers)] -# -# columnalheader<-c(rep(NA,35)) -# ITDFinalMatrix<-matrix(data =columnalheader,nrow = 1) -# -# for (k in 1:length(ITDnondupeAccessionNumbers)) { -# #I don't remember why, but I felt it necessary to destroy the accession number multiple times to ensure it is -# #destroyed immediately after use -# for (m in 1:ncol(Thirdsubbackfreq)) { -# AN <- as.character(Thirdsubbackfreq[1, m]) -# if (grepl(pattern = AN, -# x = ITDnondupeAccessionNumbers[k], -# fixed = TRUE) == TRUE) { -# outputmatrix <- as.character(Thirdsubbackfreq[, m]) -# outputmatrix <- matrix(outputmatrix, nrow = 1) -# #with that accession number, find a match in the subbackfreq file and save it here -# ITDFinalMatrix<-rbind(ITDFinalMatrix,outputmatrix) -# } -# } -# } -# -# -# D835YnondupeAccessionNumbers<-c() -# -# for (z in 1:nrow(D835YmotifsFINAL)) { -# for (w in 1:nrow(D835YmotifsFULLMATRIX)) { -# if (is.na(D835YmotifsFULLMATRIX[w,1])!=TRUE) -# if (D835YmotifsFINAL[z]==D835YmotifsFULLMATRIX[w,1]){ -# D835YnondupeAccessionNumbers<-c(D835YnondupeAccessionNumbers,D835YmotifsFULLMATRIX[w,2]) -# } -# } -# } -# -# -# #find accession numbers here, put a matrix of those things, amino acid %, but only after I've unduped them -# D835YnondupeAccessionNumbers<-D835YnondupeAccessionNumbers[!duplicated(D835YnondupeAccessionNumbers)] -# -# columnalheader<-c(rep(NA,35)) -# D835YFinalMatrix<-matrix(data =columnalheader,nrow = 1) -# -# for (k in 1:length(D835YnondupeAccessionNumbers)) { -# #I don't remember why, but I felt it necessary to destroy the accession number multiple times to ensure it is -# #destroyed immediately after use -# for (m in 1:ncol(Secondsubbackfreq)) { -# AN <- as.character(Secondsubbackfreq[1, m]) -# if (grepl(pattern = AN, -# x = D835YnondupeAccessionNumbers[k], -# fixed = TRUE) == TRUE) { -# outputmatrix <- as.character(Secondsubbackfreq[, m]) -# outputmatrix <- matrix(outputmatrix, nrow = 1) -# #with that accession number, find a match in the subbackfreq file and save it here -# D835YFinalMatrix<-rbind(D835YFinalMatrix,outputmatrix) -# } -# } -# } -# -# -# -# # FinalFTLmotifs<-FTLwtmotifsFINAL[!duplicated(FTLwtmotifsFINAL)] -# # FinalFTLAccessionNumbers<-FTLnondupeAccessionNumbers[!duplicated(FTLnondupeAccessionNumbers)] -# # necessaryNAs<-rep(NA,times=(length(FinalFTLmotifs)-length(FinalFTLAccessionNumbers))) -# # FinalFTLAccessionNumbers<-c(FinalFTLAccessionNumbers,necessaryNAs) -# # TRUEFTLoutputmatrix<-cbind(FinalFTLmotifs,FinalFTLAccessionNumbers) -# # TRUEFTLoutputmatrix -# -# write.table(x=FTLwtmotifsFINAL, -# file=First_unshared_motifs_table, -# quote=FALSE, sep=",", -# row.names=FALSE,col.names = FALSE, na="", append=TRUE) -# -# columnalheader<-c("Accession Numbers",as.character(Thirdsubbackfreq[1:35,1])) -# columnalheader<-matrix(columnalheader,nrow = 1) -# write.table(x=columnalheader, -# file=First_unshared_subbackfreq, -# quote=FALSE, sep=",", -# row.names=FALSE,col.names = FALSE, na="", append=TRUE) -# -# write.table(x=FTLFinalMatrix, -# file=First_unshared_subbackfreq, -# quote=FALSE, sep=",", -# row.names=FALSE,col.names = FALSE, na="", append=TRUE) -# -# -# -# -# -# -# -# -# -# -# -# -# write.table(x=D835YmotifsFINAL, -# file=Second_unshared_motifs_table, -# quote=FALSE, sep=",", -# row.names=FALSE,col.names = FALSE, na="", append=TRUE) -# -# columnalheader<-c("Accession Numbers",as.character(Thirdsubbackfreq[1:35,1])) -# columnalheader<-matrix(columnalheader,nrow = 1) -# write.table(x=columnalheader, -# file=Second_unshared_subbackfreq, -# quote=FALSE, sep=",", -# row.names=FALSE,col.names = FALSE, na="", append=TRUE) -# -# write.table(x=D835YFinalMatrix, -# file=Second_unshared_subbackfreq, -# quote=FALSE, sep=",", -# row.names=FALSE,col.names = FALSE, na="", append=TRUE) -# -# -# -# -# -# -# -# -# -# -# -# -# -# -# -# -# write.table(x=ITDmotifsFINAL, -# file=Third_unshared_motifs_table, -# quote=FALSE, sep=",", -# row.names=FALSE,col.names = FALSE, na="", append=TRUE) -# -# columnalheader<-c("Accession Numbers",as.character(Thirdsubbackfreq[1:35,1])) -# columnalheader<-matrix(columnalheader,nrow = 1) -# write.table(x=columnalheader, -# file=Third_unshared_subbackfreq, -# quote=FALSE, sep=",", -# row.names=FALSE,col.names = FALSE, na="", append=TRUE) -# -# write.table(x=ITDFinalMatrix, -# file=Third_unshared_subbackfreq, -# quote=FALSE, sep=",", -# row.names=FALSE,col.names = FALSE, na="", append=TRUE) -# -# -# -# -# -# -# } diff -r beba4066121e -r dff99bed3f56 all stuff/Difference finder MADE 7 TO 7 4-18-2019.R --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/all stuff/Difference finder MADE 7 TO 7 4-18-2019.R Fri Apr 19 10:57:28 2019 -0400 @@ -0,0 +1,351 @@ +#I should make an SOP for this. Problems we encountered: no x in the xY motif, and the kilodemon +#the output files have both Y and xY, they shouldn't why is that happening? make it not happen +#make sure that accession numbers stay locked to each motif, somehow +#output should look just like the KALIP input + +#ff you want ONLY FULL MOTIFS, put "YES" here, please use all caps +FullMotifsOnly_questionmark<-"NO" +#If you want ONLY TRUNCATED MOTIFS, put "YES" here, please use all caps +TruncatedMotifsOnly_questionmark<-"NO" +#if you want to find the overlap, put a "YES" here (all caps), if you want to find the non-overlap, put "NO" (all caps) +Are_You_Looking_For_Commonality<-"NO" + + +#put the names of your input files here +FirstSubstrateSet<- read.csv("Galaxy1071-(17A_TiO2_Substrates.csv).csv", stringsAsFactors=FALSE) +Firstsubbackfreq<- read.csv("Galaxy1072-(17A_TiO2_SubstrateBackgroundFrequency.csv).csv", header=FALSE, stringsAsFactors=FALSE) + +SecondSubstrateSet<- read.csv("Galaxy1073-(17A_FeNTA_Substrates.csv) (1).csv", stringsAsFactors=FALSE) +Secondsubbackfreq<- read.csv("Galaxy1074-(17A_FeNTA_SubstrateBackgroundFrequency.csv).csv", header=FALSE, stringsAsFactors=FALSE) + +# ThirdSubstrateSet<- read.csv("Galaxy69-BTK_PLUS_R3_Substrates.csv", stringsAsFactors=FALSE) +# Thirdsubbackfreq<- read.csv("Galaxy70-BTK_PLUS_R3_SubstrateBackgroundFrequency.csv", header=FALSE, stringsAsFactors=FALSE) + +#then put the names of your output files here +# Shared_motifs_table<-"180719_GALAXY-BTK-plus-rep-OVLP-7to7-substrates.csv" +# Shared_subbackfreq_table<-"180719_GALAXY-BTK-plus-rep-OVLP-7to7-SubBackFreq.csv" + +# Shared_motifs_table<-"Shared motifs 7-27-17.csv" +# Shared_subbackfreq_table<-"SubstrateBackgrounFrequency-for-shared-motifs 4 7-27-17.csv" + +First_unshared_motifs_table<-"17A TIo2 without FeNTA.csv" +First_unshared_subbackfreq<-"17A TIo2 without FeNTA sbf.csv" + +Second_unshared_motifs_table<-"17A FeNTA without TIo2.csv" +Second_unshared_subbackfreq<-"17A FeNTA without TIo2 sbf.csv" + +# Third_unshared_motifs_table<-"R3 subs.csv" +# Third_unshared_subbackfreq<-"R3 SBF.csv" + +#final note, this code is going to be unworkable if you want to make a Venn diagram of more than 3 circles. I think I'll poke around +#other languages to see if any of them can do it. +#################################################################################################################################### + + + + + +# grepl(pattern = "S", x=asdf, ignore.case = TRUE) + +FirstCentralLetters<-FirstSubstrateSet[,11] +SecondCentralLetters<-SecondSubstrateSet[,11] + +FirstEsses<-sapply(FirstCentralLetters, grepl, pattern="S", ignore.case=TRUE) +FirstTees<-sapply(FirstCentralLetters, grepl, pattern="T", ignore.case=TRUE) +FirstWys<-sapply(FirstCentralLetters, grepl, pattern="Y", ignore.case=TRUE) + +SecondEsses<-sapply(SecondCentralLetters, grepl, pattern="S", ignore.case=TRUE) +SecondTees<-sapply(SecondCentralLetters, grepl, pattern="T", ignore.case=TRUE) +SecondWys<-sapply(SecondCentralLetters, grepl, pattern="Y", ignore.case=TRUE) + +FirstCentralLetters<-replace(FirstCentralLetters,FirstEsses,"xS") +FirstCentralLetters<-replace(FirstCentralLetters,FirstTees,"xT") +FirstCentralLetters<-replace(FirstCentralLetters,FirstWys,"xY") + +SecondCentralLetters<-replace(SecondCentralLetters,SecondEsses,"xS") +SecondCentralLetters<-replace(SecondCentralLetters,SecondTees,"xT") +SecondCentralLetters<-replace(SecondCentralLetters,SecondWys,"xY") + +FirstCentralLetters->FirstSubstrateSet[,11] +SecondCentralLetters->SecondSubstrateSet[,11] + +#################################################################################################################################### +#################################################################################################################################### +# better version of this code written in C: what happens when two kinases share a motif, but they found that motif in two +# separate proteins thus two separate accession numbers? +# It should actually output the shared motif and BOTH accession numbers. Right now it does not, it only maps out the second +# accession number. So that needs to be fixed BUT you need to keep the commonality between a motif and its accession number +#################################################################################################################################### +#################################################################################################################################### +#################################################################################################################################### +#################################################################################################################################### + +#Create the motif sets, deciding wether or not you're looking for truncated or full here +#full only + + +if (Are_You_Looking_For_Commonality=="NO"){ + + ############################################### + #ALL motifs, full and truncated + + if (FullMotifsOnly_questionmark!="YES"&&TruncatedMotifsOnly_questionmark!="YES"){ + FTLwtmotifs=matrix(,nrow = nrow(FirstSubstrateSet),ncol=1) + FTLwtAccessionNumbers=matrix(,nrow = nrow(FirstSubstrateSet),ncol=1) + + for (i in 1:nrow(FirstSubstrateSet)){ + FTLwtletters<-FirstSubstrateSet[i,4:18] + FTLwtletters<-FTLwtletters[FTLwtletters !="XXXXX"] + FTLwtletters<-paste(FTLwtletters, sep="", collapse="") + leftspaces<-c() + rightspaces<-c() + + YYYmotif <- unlist(strsplit(FTLwtletters, split = "")) + YYYposition <- match(x = "x", table = YYYmotif) + #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are + #just 3 letters to the left of x + + YYYLettersToTheLeft <- YYYposition - 1 + #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is + #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1 + YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1 + #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the + #variable the user puts in is + + + if (YYYLettersToTheLeft < 7 | YYYLettersToTheRight < 7) { + leftspaces<-rep(" ",times=(7-YYYLettersToTheLeft)) + rightspaces<-rep(" ",times=7-(YYYLettersToTheRight)) + #add blank spaces if the motif has less than 4 letters to the left/right + motif<-c(leftspaces,YYYmotif,rightspaces) + #save that motif, which is the Y and +/- 4 amino acids, including truncation + motif<-motif[!motif %in% "x"] + motif<-paste(motif, sep="", collapse="") + FTLwtletters<-motif + FTLwtmotifs[i,1]<-FTLwtletters + FTLwtAccessionNumbers[i,1]<-FirstSubstrateSet[i,3] + } + + if(YYYLettersToTheLeft>6 && YYYLettersToTheRight>6){ + motif<-YYYmotif + #add blank spaces if the motif has less than 4 letters to the left/right + motif<-c(leftspaces,YYYmotif,rightspaces) + #save that motif, which is the Y and +/- 4 amino acids, including truncation + motif<-motif[!motif %in% "x"] + motif<-paste(motif, sep="", collapse="") + FTLwtletters<-motif + FTLwtmotifs[i,1]<-FTLwtletters + FTLwtAccessionNumbers[i,1]<-FirstSubstrateSet[i,3] + + + } + + } + + D835Ymotifs=matrix(,nrow = nrow(SecondSubstrateSet),ncol=1) + D835YAccessionNumbers<-matrix(,nrow = nrow(SecondSubstrateSet),ncol = 1) + + for (i in 1:nrow(SecondSubstrateSet)){ + D835letters<-SecondSubstrateSet[i,4:18] + D835letters<-D835letters[D835letters !="XXXXX"] + D835letters<-paste(D835letters, sep="", collapse="") + leftspaces<-c() + rightspaces<-c() + + YYYmotif <- unlist(strsplit(D835letters, split = "")) + YYYposition <- match(x = "x", table = YYYmotif) + #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are + #just 3 letters to the left of x + + YYYLettersToTheLeft <- YYYposition - 1 + #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is + #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1 + YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1 + #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the + #variable the user puts in is + if (YYYLettersToTheLeft < 7 | YYYLettersToTheRight < 7) { + leftspaces<-rep(" ",times=(7-YYYLettersToTheLeft)) + rightspaces<-rep(" ",times=7-(YYYLettersToTheRight)) + #add blank spaces if the motif has less than 4 letters to the left/right + motif<-c(leftspaces,YYYmotif,rightspaces) + #save that motif, which is the Y and +/- 4 amino acids, including truncation + motif<-motif[!motif %in% "x"] + motif<-paste(motif, sep="", collapse="") + D835letters<-motif + D835Ymotifs[i,1]<-D835letters + D835YAccessionNumbers[i,1]<-SecondSubstrateSet[i,3] + } + + if(YYYLettersToTheLeft>6 && YYYLettersToTheRight>6){ + motif<-YYYmotif + #add blank spaces if the motif has less than 4 letters to the left/right + motif<-c(leftspaces,YYYmotif,rightspaces) + #save that motif, which is the Y and +/- 4 amino acids, including truncation + motif<-motif[!motif %in% "x"] + motif<-paste(motif, sep="", collapse="") + D835letters<-motif + D835Ymotifs[i,1]<-D835letters + D835YAccessionNumbers[i,1]<-SecondSubstrateSet[i,3] + } + } + + names(FTLwtmotifs)<-FTLwtAccessionNumbers + names(D835Ymotifs)<-D835YAccessionNumbers + + } + + + FTLwtmotifsFINAL<-FTLwtmotifs[!FTLwtmotifs %in% D835Ymotifs] + FTLwtmotifsFINAL<-FTLwtmotifsFINAL[!duplicated(FTLwtmotifsFINAL)] + + D835YmotifsFINAL<-D835Ymotifs[!D835Ymotifs %in% FTLwtmotifs] + D835YmotifsFINAL<-D835YmotifsFINAL[!duplicated(D835YmotifsFINAL)] + + + columnalheader<-c(rep(NA,36)) + FTLFinalMatrix<-matrix(data =columnalheader,nrow = 1) + + for (k in 1:length(FTLwtmotifsFINAL)) { + AN<-00000 + #I don't remember why, but I felt it necessary to destroy the accession number multiple times to ensure it is + #destroyed immediately after use + for (m in 1:ncol(Firstsubbackfreq)) { + AN <- as.character(Firstsubbackfreq[1, m]) + if (grepl(pattern = AN, + x = names(FTLwtmotifsFINAL[k]), + fixed = TRUE) == TRUE) { + outputmatrix <- as.character(Firstsubbackfreq[, m]) + outputmatrix <- matrix(outputmatrix, nrow = 1) + #with that accession number, find a match in the subbackfreq file and save it here + FTLFinalMatrix<-rbind(FTLFinalMatrix,outputmatrix) + } + } + } + FTLFinalMatrix<-FTLFinalMatrix[!duplicated(FTLFinalMatrix),] + FTLFinalMatrix<-FTLFinalMatrix[2:nrow(FTLFinalMatrix),] + + columnalheader<-c(rep(NA,36)) + D835YFinalMatrix<-matrix(data =columnalheader,nrow = 1) + + for (k in 1:length(D835YmotifsFINAL)) { + #I don't remember why, but I felt it necessary to destroy the accession number multiple times to ensure it is + #destroyed immediately after use + for (m in 1:ncol(Secondsubbackfreq)) { + AN <- as.character(Secondsubbackfreq[1, m]) + if (grepl(pattern = AN, + x = names(D835YmotifsFINAL[k]), + fixed = TRUE) == TRUE) { + outputmatrix <- as.character(Secondsubbackfreq[, m]) + outputmatrix <- matrix(outputmatrix, nrow = 1) + #with that accession number, find a match in the subbackfreq file and save it here + D835YFinalMatrix<-rbind(D835YFinalMatrix,outputmatrix) + } + } + } + D835YFinalMatrix<-D835YFinalMatrix[!duplicated(D835YFinalMatrix),] + D835YFinalMatrix<-D835YFinalMatrix[2:nrow(D835YFinalMatrix),] + + FTLoutputmatrix<-matrix(data=c(FTLwtmotifsFINAL,names(FTLwtmotifsFINAL)),ncol = 2) + + + FLTheader<-c("Substrate","Species","Reference","-7","-6","-5","-4","-3","-2","-1","0","1","2","3","4","5","6","7","Phosphite") + # FLTheader<-unlist(FLTheader) + lefthandFLT<-matrix(data = rep(NA,times=2*nrow(FTLoutputmatrix)),nrow=nrow(FTLoutputmatrix)) + righthandFLT<-matrix(data = rep(NA,times=1*nrow(FTLoutputmatrix)),nrow=nrow(FTLoutputmatrix)) + FLTaccessionset<-FTLoutputmatrix[,2] + FTLmeat<-sapply(FTLoutputmatrix[,1], strsplit, "") + FTLmeat<-sapply(FTLmeat, unlist) + colnames(FTLmeat)<-NULL + FTLmeat<-t(FTLmeat) + + FTLoutputmatrix2<-cbind(lefthandFLT,FLTaccessionset,FTLmeat,righthandFLT) + colnames(FTLoutputmatrix2)<-NULL + rownames(FTLoutputmatrix2)<-NULL + colnames(FLTheader)<-NULL + rownames(FLTheader)<-NULL + + + FirstCentralLettersAGAIN<-FTLoutputmatrix2[,11] + + FirstEsses<-sapply(FirstCentralLettersAGAIN, grepl, pattern="S", ignore.case=TRUE) + FirstTees<-sapply(FirstCentralLettersAGAIN, grepl, pattern="T", ignore.case=TRUE) + FirstWys<-sapply(FirstCentralLettersAGAIN, grepl, pattern="Y", ignore.case=TRUE) + + FirstCentralLettersAGAIN<-replace(FirstCentralLettersAGAIN,FirstEsses,"xS") + FirstCentralLettersAGAIN<-replace(FirstCentralLettersAGAIN,FirstTees,"xT") + FirstCentralLettersAGAIN<-replace(FirstCentralLettersAGAIN,FirstWys,"xY") + + FirstCentralLettersAGAIN->FTLoutputmatrix2[,11] + + FTLoutputmatrix2<-rbind(FLTheader,FTLoutputmatrix2) + + write.table(x=FTLoutputmatrix2, + file=First_unshared_motifs_table, + quote=FALSE, sep=",", + row.names=FALSE,col.names = FALSE, na="", append=TRUE) + + columnalheader<-c("Accession Numbers",as.character(Firstsubbackfreq[1:35,1])) + columnalheader<-matrix(columnalheader,nrow = 1) + write.table(x=columnalheader, + file=First_unshared_subbackfreq, + quote=FALSE, sep=",", + row.names=FALSE,col.names = FALSE, na="", append=TRUE) + + write.table(x=FTLFinalMatrix, + file=First_unshared_subbackfreq, + quote=FALSE, sep=",", + row.names=FALSE,col.names = FALSE, na="", append=TRUE) + + ############################################################################################################ + + D835Youtputmatrix<-matrix(data=c(D835YmotifsFINAL,names(D835YmotifsFINAL)),ncol = 2) + + D835Yheader<-c("Substrate","Species","Reference","-7","-6","-5","-4","-3","-2","-1","0","1","2","3","4","5","6","7","Phosphite") + # D835Yheader<-unlist(D835Yheader) + lefthandD835<-matrix(data = rep(NA,times=2*nrow(D835Youtputmatrix)),nrow=nrow(D835Youtputmatrix)) + righthandD835<-matrix(data = rep(NA,times=1*nrow(D835Youtputmatrix)),nrow=nrow(D835Youtputmatrix)) + D835Yaset<-D835Youtputmatrix[,2] + D835meat<-sapply(D835Youtputmatrix[,1], strsplit, "") + D835meat<-sapply(D835meat, unlist) + colnames(D835meat)<-NULL + D835meat<-t(D835meat) + + D835Youtputmatrix2<-cbind(lefthandD835,D835Yaset,D835meat,righthandD835) + colnames(D835Youtputmatrix2)<-NULL + rownames(D835Youtputmatrix2)<-NULL + colnames(D835Yheader)<-NULL + rownames(D835Yheader)<-NULL + + + SecondCentralLettersAGAIN<-D835Youtputmatrix2[,11] + + SecondEsses<-sapply(SecondCentralLettersAGAIN, grepl, pattern="S", ignore.case=TRUE) + SecondTees<-sapply(SecondCentralLettersAGAIN, grepl, pattern="T", ignore.case=TRUE) + SecondWys<-sapply(SecondCentralLettersAGAIN, grepl, pattern="Y", ignore.case=TRUE) + + SecondCentralLettersAGAIN<-replace(SecondCentralLettersAGAIN,SecondEsses,"xS") + SecondCentralLettersAGAIN<-replace(SecondCentralLettersAGAIN,SecondTees,"xT") + SecondCentralLettersAGAIN<-replace(SecondCentralLettersAGAIN,SecondWys,"xY") + + SecondCentralLettersAGAIN->D835Youtputmatrix2[,11] + + D835Youtputmatrix2<-rbind(D835Yheader,D835Youtputmatrix2) + + write.table(x=D835Youtputmatrix2, + file=Second_unshared_motifs_table, + quote=FALSE, sep=",", + row.names=FALSE,col.names = FALSE, na="", append=TRUE) + + columnalheader<-c("Accession Numbers",as.character(Firstsubbackfreq[1:35,1])) + columnalheader<-matrix(columnalheader,nrow = 1) + write.table(x=columnalheader, + file=Second_unshared_subbackfreq, + quote=FALSE, sep=",", + row.names=FALSE,col.names = FALSE, na="", append=TRUE) + + write.table(x=D835YFinalMatrix, + file=Second_unshared_subbackfreq, + quote=FALSE, sep=",", + row.names=FALSE,col.names = FALSE, na="", append=TRUE) +} + diff -r beba4066121e -r dff99bed3f56 all stuff/Difference finderMADE 7 TO 7 1-15-2019.R --- a/all stuff/Difference finderMADE 7 TO 7 1-15-2019.R Wed Jan 16 14:33:39 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,351 +0,0 @@ -#I should make an SOP for this. Problems we encountered: no x in the xY motif, and the kilodemon -#the output files have both Y and xY, they shouldn't why is that happening? make it not happen -#make sure that accession numbers stay locked to each motif, somehow -#output should look just like the KALIP input - -#ff you want ONLY FULL MOTIFS, put "YES" here, please use all caps -FullMotifsOnly_questionmark<-"NO" -#If you want ONLY TRUNCATED MOTIFS, put "YES" here, please use all caps -TruncatedMotifsOnly_questionmark<-"NO" -#if you want to find the overlap, put a "YES" here (all caps), if you want to find the non-overlap, put "NO" (all caps) -Are_You_Looking_For_Commonality<-"NO" - - -#put the names of your input files here -FirstSubstrateSet<- read.csv("S1.csv", stringsAsFactors=FALSE) -Firstsubbackfreq<- read.csv("SBF1.csv", header=FALSE, stringsAsFactors=FALSE) - -SecondSubstrateSet<- read.csv("S2.csv", stringsAsFactors=FALSE) -Secondsubbackfreq<- read.csv("SBF2.csv", header=FALSE, stringsAsFactors=FALSE) - -# ThirdSubstrateSet<- read.csv("Galaxy69-BTK_PLUS_R3_Substrates.csv", stringsAsFactors=FALSE) -# Thirdsubbackfreq<- read.csv("Galaxy70-BTK_PLUS_R3_SubstrateBackgroundFrequency.csv", header=FALSE, stringsAsFactors=FALSE) - -#then put the names of your output files here -# Shared_motifs_table<-"180719_GALAXY-BTK-plus-rep-OVLP-7to7-substrates.csv" -# Shared_subbackfreq_table<-"180719_GALAXY-BTK-plus-rep-OVLP-7to7-SubBackFreq.csv" - -# Shared_motifs_table<-"Shared motifs 7-27-17.csv" -# Shared_subbackfreq_table<-"SubstrateBackgrounFrequency-for-shared-motifs 4 7-27-17.csv" - -First_unshared_motifs_table<-"1RS.csv" -First_unshared_subbackfreq<-"1RSBF.csv" - -Second_unshared_motifs_table<-"2RS.csv" -Second_unshared_subbackfreq<-"2RSBF.csv" - -# Third_unshared_motifs_table<-"R3 subs.csv" -# Third_unshared_subbackfreq<-"R3 SBF.csv" - -#final note, this code is going to be unworkable if you want to make a Venn diagram of more than 3 circles. I think I'll poke around -#other languages to see if any of them can do it. -#################################################################################################################################### - - - - - -# grepl(pattern = "S", x=asdf, ignore.case = TRUE) - -FirstCentralLetters<-FirstSubstrateSet[,11] -SecondCentralLetters<-SecondSubstrateSet[,11] - -FirstEsses<-sapply(FirstCentralLetters, grepl, pattern="S", ignore.case=TRUE) -FirstTees<-sapply(FirstCentralLetters, grepl, pattern="T", ignore.case=TRUE) -FirstWys<-sapply(FirstCentralLetters, grepl, pattern="Y", ignore.case=TRUE) - -SecondEsses<-sapply(SecondCentralLetters, grepl, pattern="S", ignore.case=TRUE) -SecondTees<-sapply(SecondCentralLetters, grepl, pattern="T", ignore.case=TRUE) -SecondWys<-sapply(SecondCentralLetters, grepl, pattern="Y", ignore.case=TRUE) - -FirstCentralLetters<-replace(FirstCentralLetters,FirstEsses,"xS") -FirstCentralLetters<-replace(FirstCentralLetters,FirstTees,"xT") -FirstCentralLetters<-replace(FirstCentralLetters,FirstWys,"xY") - -SecondCentralLetters<-replace(SecondCentralLetters,SecondEsses,"xS") -SecondCentralLetters<-replace(SecondCentralLetters,SecondTees,"xT") -SecondCentralLetters<-replace(SecondCentralLetters,SecondWys,"xY") - -FirstCentralLetters->FirstSubstrateSet[,11] -SecondCentralLetters->SecondSubstrateSet[,11] - -#################################################################################################################################### -#################################################################################################################################### -# better version of this code written in C: what happens when two kinases share a motif, but they found that motif in two -# separate proteins thus two separate accession numbers? -# It should actually output the shared motif and BOTH accession numbers. Right now it does not, it only maps out the second -# accession number. So that needs to be fixed BUT you need to keep the commonality between a motif and its accession number -#################################################################################################################################### -#################################################################################################################################### -#################################################################################################################################### -#################################################################################################################################### - -#Create the motif sets, deciding wether or not you're looking for truncated or full here -#full only - - -if (Are_You_Looking_For_Commonality=="NO"){ - - ############################################### - #ALL motifs, full and truncated - - if (FullMotifsOnly_questionmark!="YES"&&TruncatedMotifsOnly_questionmark!="YES"){ - FTLwtmotifs=matrix(,nrow = nrow(FirstSubstrateSet),ncol=1) - FTLwtAccessionNumbers=matrix(,nrow = nrow(FirstSubstrateSet),ncol=1) - - for (i in 1:nrow(FirstSubstrateSet)){ - FTLwtletters<-FirstSubstrateSet[i,4:18] - FTLwtletters<-FTLwtletters[FTLwtletters !="XXXXX"] - FTLwtletters<-paste(FTLwtletters, sep="", collapse="") - leftspaces<-c() - rightspaces<-c() - - YYYmotif <- unlist(strsplit(FTLwtletters, split = "")) - YYYposition <- match(x = "x", table = YYYmotif) - #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are - #just 3 letters to the left of x - - YYYLettersToTheLeft <- YYYposition - 1 - #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is - #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1 - YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1 - #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the - #variable the user puts in is - - - if (YYYLettersToTheLeft < 7 | YYYLettersToTheRight < 7) { - leftspaces<-rep(" ",times=(7-YYYLettersToTheLeft)) - rightspaces<-rep(" ",times=7-(YYYLettersToTheRight)) - #add blank spaces if the motif has less than 4 letters to the left/right - motif<-c(leftspaces,YYYmotif,rightspaces) - #save that motif, which is the Y and +/- 4 amino acids, including truncation - motif<-motif[!motif %in% "x"] - motif<-paste(motif, sep="", collapse="") - FTLwtletters<-motif - FTLwtmotifs[i,1]<-FTLwtletters - FTLwtAccessionNumbers[i,1]<-FirstSubstrateSet[i,3] - } - - if(YYYLettersToTheLeft>6 && YYYLettersToTheRight>6){ - motif<-YYYmotif - #add blank spaces if the motif has less than 4 letters to the left/right - motif<-c(leftspaces,YYYmotif,rightspaces) - #save that motif, which is the Y and +/- 4 amino acids, including truncation - motif<-motif[!motif %in% "x"] - motif<-paste(motif, sep="", collapse="") - FTLwtletters<-motif - FTLwtmotifs[i,1]<-FTLwtletters - FTLwtAccessionNumbers[i,1]<-FirstSubstrateSet[i,3] - - - } - - } - - D835Ymotifs=matrix(,nrow = nrow(SecondSubstrateSet),ncol=1) - D835YAccessionNumbers<-matrix(,nrow = nrow(SecondSubstrateSet),ncol = 1) - - for (i in 1:nrow(SecondSubstrateSet)){ - D835letters<-SecondSubstrateSet[i,4:18] - D835letters<-D835letters[D835letters !="XXXXX"] - D835letters<-paste(D835letters, sep="", collapse="") - leftspaces<-c() - rightspaces<-c() - - YYYmotif <- unlist(strsplit(D835letters, split = "")) - YYYposition <- match(x = "x", table = YYYmotif) - #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are - #just 3 letters to the left of x - - YYYLettersToTheLeft <- YYYposition - 1 - #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is - #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1 - YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1 - #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the - #variable the user puts in is - if (YYYLettersToTheLeft < 7 | YYYLettersToTheRight < 7) { - leftspaces<-rep(" ",times=(7-YYYLettersToTheLeft)) - rightspaces<-rep(" ",times=7-(YYYLettersToTheRight)) - #add blank spaces if the motif has less than 4 letters to the left/right - motif<-c(leftspaces,YYYmotif,rightspaces) - #save that motif, which is the Y and +/- 4 amino acids, including truncation - motif<-motif[!motif %in% "x"] - motif<-paste(motif, sep="", collapse="") - D835letters<-motif - D835Ymotifs[i,1]<-D835letters - D835YAccessionNumbers[i,1]<-FirstSubstrateSet[i,3] - } - - if(YYYLettersToTheLeft>6 && YYYLettersToTheRight>6){ - motif<-YYYmotif - #add blank spaces if the motif has less than 4 letters to the left/right - motif<-c(leftspaces,YYYmotif,rightspaces) - #save that motif, which is the Y and +/- 4 amino acids, including truncation - motif<-motif[!motif %in% "x"] - motif<-paste(motif, sep="", collapse="") - D835letters<-motif - D835Ymotifs[i,1]<-D835letters - D835YAccessionNumbers[i,1]<-FirstSubstrateSet[i,3] - } - } - - names(FTLwtmotifs)<-FTLwtAccessionNumbers - names(D835Ymotifs)<-D835YAccessionNumbers - - } - - - FTLwtmotifsFINAL<-FTLwtmotifs[!FTLwtmotifs %in% D835Ymotifs] - FTLwtmotifsFINAL<-FTLwtmotifsFINAL[!duplicated(FTLwtmotifsFINAL)] - - D835YmotifsFINAL<-D835Ymotifs[!D835Ymotifs %in% FTLwtmotifs] - D835YmotifsFINAL<-D835YmotifsFINAL[!duplicated(D835YmotifsFINAL)] - - - columnalheader<-c(rep(NA,36)) - FTLFinalMatrix<-matrix(data =columnalheader,nrow = 1) - - for (k in 1:length(FTLwtmotifsFINAL)) { - AN<-00000 - #I don't remember why, but I felt it necessary to destroy the accession number multiple times to ensure it is - #destroyed immediately after use - for (m in 1:ncol(Firstsubbackfreq)) { - AN <- as.character(Firstsubbackfreq[1, m]) - if (grepl(pattern = AN, - x = names(FTLwtmotifsFINAL[k]), - fixed = TRUE) == TRUE) { - outputmatrix <- as.character(Firstsubbackfreq[, m]) - outputmatrix <- matrix(outputmatrix, nrow = 1) - #with that accession number, find a match in the subbackfreq file and save it here - FTLFinalMatrix<-rbind(FTLFinalMatrix,outputmatrix) - } - } - } - FTLFinalMatrix<-FTLFinalMatrix[!duplicated(FTLFinalMatrix),] - FTLFinalMatrix<-FTLFinalMatrix[2:nrow(FTLFinalMatrix),] - - columnalheader<-c(rep(NA,36)) - D835YFinalMatrix<-matrix(data =columnalheader,nrow = 1) - - for (k in 1:length(D835YmotifsFINAL)) { - #I don't remember why, but I felt it necessary to destroy the accession number multiple times to ensure it is - #destroyed immediately after use - for (m in 1:ncol(Secondsubbackfreq)) { - AN <- as.character(Secondsubbackfreq[1, m]) - if (grepl(pattern = AN, - x = names(D835YmotifsFINAL[k]), - fixed = TRUE) == TRUE) { - outputmatrix <- as.character(Secondsubbackfreq[, m]) - outputmatrix <- matrix(outputmatrix, nrow = 1) - #with that accession number, find a match in the subbackfreq file and save it here - D835YFinalMatrix<-rbind(D835YFinalMatrix,outputmatrix) - } - } - } - D835YFinalMatrix<-D835YFinalMatrix[!duplicated(D835YFinalMatrix),] - D835YFinalMatrix<-D835YFinalMatrix[2:nrow(D835YFinalMatrix),] - - FTLoutputmatrix<-matrix(data=c(FTLwtmotifsFINAL,names(FTLwtmotifsFINAL)),ncol = 2) - - - FLTheader<-c("Substrate","Species","Reference","-7","-6","-5","-4","-3","-2","-1","0","1","2","3","4","5","6","7","Phosphite") - # FLTheader<-unlist(FLTheader) - lefthandFLT<-matrix(data = rep(NA,times=2*nrow(FTLoutputmatrix)),nrow=nrow(FTLoutputmatrix)) - righthandFLT<-matrix(data = rep(NA,times=1*nrow(FTLoutputmatrix)),nrow=nrow(FTLoutputmatrix)) - FLTaccessionset<-FTLoutputmatrix[,2] - FTLmeat<-sapply(FTLoutputmatrix[,1], strsplit, "") - FTLmeat<-sapply(FTLmeat, unlist) - colnames(FTLmeat)<-NULL - FTLmeat<-t(FTLmeat) - - FTLoutputmatrix2<-cbind(lefthandFLT,FLTaccessionset,FTLmeat,righthandFLT) - colnames(FTLoutputmatrix2)<-NULL - rownames(FTLoutputmatrix2)<-NULL - colnames(FLTheader)<-NULL - rownames(FLTheader)<-NULL - - - FirstCentralLettersAGAIN<-FTLoutputmatrix2[,11] - - FirstEsses<-sapply(FirstCentralLettersAGAIN, grepl, pattern="S", ignore.case=TRUE) - FirstTees<-sapply(FirstCentralLettersAGAIN, grepl, pattern="T", ignore.case=TRUE) - FirstWys<-sapply(FirstCentralLettersAGAIN, grepl, pattern="Y", ignore.case=TRUE) - - FirstCentralLettersAGAIN<-replace(FirstCentralLettersAGAIN,FirstEsses,"xS") - FirstCentralLettersAGAIN<-replace(FirstCentralLettersAGAIN,FirstTees,"xT") - FirstCentralLettersAGAIN<-replace(FirstCentralLettersAGAIN,FirstWys,"xY") - - FirstCentralLettersAGAIN->FTLoutputmatrix2[,11] - - FTLoutputmatrix2<-rbind(FLTheader,FTLoutputmatrix2) - - write.table(x=FTLoutputmatrix2, - file=First_unshared_motifs_table, - quote=FALSE, sep=",", - row.names=FALSE,col.names = FALSE, na="", append=TRUE) - - columnalheader<-c("Accession Numbers",as.character(Firstsubbackfreq[1:35,1])) - columnalheader<-matrix(columnalheader,nrow = 1) - write.table(x=columnalheader, - file=First_unshared_subbackfreq, - quote=FALSE, sep=",", - row.names=FALSE,col.names = FALSE, na="", append=TRUE) - - write.table(x=FTLFinalMatrix, - file=First_unshared_subbackfreq, - quote=FALSE, sep=",", - row.names=FALSE,col.names = FALSE, na="", append=TRUE) - - ############################################################################################################ - - D835Youtputmatrix<-matrix(data=c(D835YmotifsFINAL,names(D835YmotifsFINAL)),ncol = 2) - - D835Yheader<-c("Substrate","Species","Reference","-7","-6","-5","-4","-3","-2","-1","0","1","2","3","4","5","6","7","Phosphite") - # D835Yheader<-unlist(D835Yheader) - lefthandD835<-matrix(data = rep(NA,times=2*nrow(D835Youtputmatrix)),nrow=nrow(D835Youtputmatrix)) - righthandD835<-matrix(data = rep(NA,times=1*nrow(D835Youtputmatrix)),nrow=nrow(D835Youtputmatrix)) - D835Yaset<-D835Youtputmatrix[,2] - D835meat<-sapply(D835Youtputmatrix[,1], strsplit, "") - D835meat<-sapply(D835meat, unlist) - colnames(D835meat)<-NULL - D835meat<-t(D835meat) - - D835Youtputmatrix2<-cbind(lefthandD835,D835Yaset,D835meat,righthandD835) - colnames(D835Youtputmatrix2)<-NULL - rownames(D835Youtputmatrix2)<-NULL - colnames(D835Yheader)<-NULL - rownames(D835Yheader)<-NULL - - - SecondCentralLettersAGAIN<-D835Youtputmatrix2[,11] - - SecondEsses<-sapply(SecondCentralLettersAGAIN, grepl, pattern="S", ignore.case=TRUE) - SecondTees<-sapply(SecondCentralLettersAGAIN, grepl, pattern="T", ignore.case=TRUE) - SecondWys<-sapply(SecondCentralLettersAGAIN, grepl, pattern="Y", ignore.case=TRUE) - - SecondCentralLettersAGAIN<-replace(SecondCentralLettersAGAIN,SecondEsses,"xS") - SecondCentralLettersAGAIN<-replace(SecondCentralLettersAGAIN,SecondTees,"xT") - SecondCentralLettersAGAIN<-replace(SecondCentralLettersAGAIN,SecondWys,"xY") - - SecondCentralLettersAGAIN->D835Youtputmatrix2[,11] - - D835Youtputmatrix2<-rbind(D835Yheader,D835Youtputmatrix2) - - write.table(x=D835Youtputmatrix2, - file=Second_unshared_motifs_table, - quote=FALSE, sep=",", - row.names=FALSE,col.names = FALSE, na="", append=TRUE) - - columnalheader<-c("Accession Numbers",as.character(Firstsubbackfreq[1:35,1])) - columnalheader<-matrix(columnalheader,nrow = 1) - write.table(x=columnalheader, - file=Second_unshared_subbackfreq, - quote=FALSE, sep=",", - row.names=FALSE,col.names = FALSE, na="", append=TRUE) - - write.table(x=D835YFinalMatrix, - file=Second_unshared_subbackfreq, - quote=FALSE, sep=",", - row.names=FALSE,col.names = FALSE, na="", append=TRUE) -} - diff -r beba4066121e -r dff99bed3f56 all stuff/difference finder for 2 overlaps proper names 7-7_1-15-2019.R --- a/all stuff/difference finder for 2 overlaps proper names 7-7_1-15-2019.R Wed Jan 16 14:33:39 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,547 +0,0 @@ -#Difference finder for only 2 - -#ff you want ONLY FULL MOTIFS, put "YES" here, please use all caps -FullMotifsOnly_questionmark<-"NO" -#If you want ONLY TRUNCATED MOTIFS, put "YES" here, please use all caps -TruncatedMotifsOnly_questionmark<-"NO" - -FirstSubstrateSet<- read.csv("170922-BTK-MINUS-COMBO FILES_Substrates.csv", stringsAsFactors=FALSE) -Firstsubbackfreq<- read.csv("170922-BTK-MINUS-COMBO FILES_Substrates.csv", header=FALSE, stringsAsFactors=FALSE) - -SecondSubstrateSet<- read.csv("170922-btk-rep OVLP-plus_Substrates.csv", stringsAsFactors=FALSE) -Secondsubbackfreq<- read.csv("170922-btk-rep OVLP-plus_SubBackFreq.csv", header=FALSE, stringsAsFactors=FALSE) - -First_unshared_motifs_table<-"170922-BTK-MINUS-COMBO FILES_Substrates-unique.csv" -First_unshared_subbackfreq<-"170922-BTK-MINUS-COMBO FILES_SubBackFreq-unique.csv" - -Second_unshared_motifs_table<-"170922-btk-rep OVLP-plus_Substrates-unique.csv" -Second_unshared_subbackfreq<-"170922-btk-rep OVLP-plus_SubBackFreq-unique.csv" - - -LeftOfYLetters<-7 -RightOfYLetters<-7 - -if (FullMotifsOnly_questionmark=="YES"){ - FirstMotifs=rep(NA,times=nrow(FirstSubstrateSet)) - FirstAccessionNumbers=rep(NA,times=nrow(FirstSubstrateSet)) - leftspaces<-c() - rightspaces<-c() - for (i in 1:nrow(FirstSubstrateSet)){ - FirstLetters<-FirstSubstrateSet[i,7:15] - FirstLetters<-FirstLetters[FirstLetters !="XXXXX"] - FirstLetters<-paste(FirstLetters, sep="", collapse="") - - - YYYmotif <- unlist(strsplit(FirstLetters, split = "")) - YYYposition <- match(x = "x", table = YYYmotif) - #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are - #just 3 letters to the left of x - - YYYLettersToTheLeft <- YYYposition - 1 - #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is - #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1 - YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1 - #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the - #variable the user puts in is - - if (YYYLettersToTheLeft > 3 && YYYLettersToTheRight > 3) { - motif<-YYYmotif - #save that motif, which is the Y and +/- 4 amino acids, including truncation - motif<-motif[!motif %in% "x"] - motif<-paste(motif, sep="", collapse="") - FirstLetters<-motif - FirstMotifs[i]<-FirstLetters - FirstAccessionNumbers[i]<-FirstSubstrateSet[i,3] - } - - } - # FirstMotifs <- FirstMotifs[!is.na(FirstMotifs)] - # FirstMotifs<-matrix(FirstMotifs,ncol = 1) - # - - SecondMotifs=rep(NA,times=nrow(FirstSubstrateSet)) - SecondAccessionNumbers=rep(NA,times=nrow(FirstSubstrateSet)) - - for (i in 1:nrow(SecondSubstrateSet)){ - SecondLetters<-SecondSubstrateSet[i,7:15] - SecondLetters<-SecondLetters[SecondLetters !="XXXXX"] - SecondLetters<-paste(SecondLetters, sep="", collapse="") - - - YYYmotif <- unlist(strsplit(SecondLetters, split = "")) - YYYposition <- match(x = "x", table = YYYmotif) - #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are - #just 3 letters to the left of x - - YYYLettersToTheLeft <- YYYposition - 1 - #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is - #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1 - YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1 - #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the - #variable the user puts in is - - if (YYYLettersToTheLeft > 3 && YYYLettersToTheRight > 3) { - motif<-YYYmotif - #add blank spaces if the motif has less than 4 letters to the left/right - motif<-c(leftspaces,YYYmotif,rightspaces) - #save that motif, which is the Y and +/- 4 amino acids, including truncation - motif<-motif[!motif %in% "x"] - motif<-paste(motif, sep="", collapse="") - SecondLetters<-motif - SecondMotifs[i]<-SecondLetters - SecondAccessionNumbers[i]<-SecondSubstrateSet[i,3] - } - } - names(FirstMotifs)<-FirstAccessionNumbers - names(SecondMotifs)<-SecondAccessionNumbers - - -# ITDmotifs=rep(NA,times=nrow(FirstSubstrateSet)) -# ITDAccessionNumbers=rep(NA,times=nrow(FirstSubstrateSet)) - if(1==0){ - for (i in 1:nrow(ThirdSubstrateSet)){ - ITDletters<-ThirdSubstrateSet[i,7:15] - ITDletters<-ITDletters[ITDletters !="XXXXX"] - ITDletters<-paste(ITDletters, sep="", collapse="") - YYYmotif <- unlist(strsplit(ITDletters, split = "")) - YYYposition <- match(x = "x", table = YYYmotif) - #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are - #just 3 letters to the left of x - - YYYLettersToTheLeft <- YYYposition - 1 - #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is - #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1 - YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1 - #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the - #variable the user puts in is - - if (YYYLettersToTheLeft > 3 && YYYLettersToTheRight > 3) { - motif<-YYYmotif - #add blank spaces if the motif has less than 4 letters to the left/right - motif<-c(leftspaces,YYYmotif,rightspaces) - #save that motif, which is the Y and +/- 4 amino acids, including truncation - motif<-motif[!motif %in% "x"] - motif<-paste(motif, sep="", collapse="") - ITDletters<-motif - ITDmotifs[i]<-ITDletters - ITDAccessionNumbers[i]<-ThirdSubstrateSet[i,3] - - } - } - } - #names(ITDmotifs)<-ITDAccessionNumbers - names(SecondMotifs)<-SecondAccessionNumbers - names(FirstMotifs)<-FirstAccessionNumbers -} - - -##############################################3 -#Truncated only -if (TruncatedMotifsOnly_questionmark=="YES"){ - FirstMotifs=matrix(,nrow = nrow(FirstSubstrateSet),ncol=1) - FirstAccessionNumbers=matrix(,nrow = nrow(FirstSubstrateSet),ncol=1) - - for (i in 1:nrow(FirstSubstrateSet)){ - FirstLetters<-FirstSubstrateSet[i,7:15] - FirstLetters<-FirstLetters[FirstLetters !="XXXXX"] - FirstLetters<-paste(FirstLetters, sep="", collapse="") - - - YYYmotif <- unlist(strsplit(FirstLetters, split = "")) - YYYposition <- match(x = "x", table = YYYmotif) - #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are - #just 3 letters to the left of x - - YYYLettersToTheLeft <- YYYposition - 1 - #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is - #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1 - YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1 - #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the - #variable the user puts in is - - if (YYYLettersToTheLeft < 4 | YYYLettersToTheRight < 4) { - leftspaces<-rep(" ",times=(4-YYYLettersToTheLeft)) - rightspaces<-rep(" ",times=4-(YYYLettersToTheRight)) - #add blank spaces if the motif has less than 4 letters to the left/right - motif<-c(leftspaces,YYYmotif,rightspaces) - #save that motif, which is the Y and +/- 4 amino acids, including truncation - motif<-motif[!motif %in% "x"] - motif<-paste(motif, sep="", collapse="") - FirstLetters<-motif - FirstMotifs[i,1]<-FirstLetters - FirstAccessionNumbers[i,1]<-FirstSubstrateSet[i,3] - } - - } - - SecondMotifs=matrix(,nrow = nrow(SecondSubstrateSet),ncol=1) - SecondAccessionNumbers<-matrix(,nrow = nrow(SecondSubstrateSet),ncol = 1) - i=2 - for (i in 1:nrow(SecondSubstrateSet)){ - SecondLetters<-SecondSubstrateSet[i,7:15] - SecondLetters<-SecondLetters[SecondLetters !="XXXXX"] - SecondLetters<-paste(SecondLetters, sep="", collapse="") - - - YYYmotif <- unlist(strsplit(SecondLetters, split = "")) - YYYposition <- match(x = "x", table = YYYmotif) - #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are - #just 3 letters to the left of x - - YYYLettersToTheLeft <- YYYposition - 1 - #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is - #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1 - YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1 - #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the - #variable the user puts in is - - if (YYYLettersToTheLeft < 4 | YYYLettersToTheRight < 4) { - leftspaces<-rep(" ",times=(4-YYYLettersToTheLeft)) - rightspaces<-rep(" ",times=4-(YYYLettersToTheRight)) - #add blank spaces if the motif has less than 4 letters to the left/right - motif<-c(leftspaces,YYYmotif,rightspaces) - #save that motif, which is the Y and +/- 4 amino acids, including truncation - motif<-motif[!motif %in% "x"] - motif<-paste(motif, sep="", collapse="") - SecondLetters<-motif - SecondAccessionNumbers[i,1]<-SecondSubstrateSet[i,3] - SecondMotifs[i,1]<-SecondLetters - } - } - -# ITDmotifs=matrix(,nrow = nrow(ThirdSubstrateSet),ncol=1) -# ITDAccessionNumbers<-matrix(,nrow = nrow(ThirdSubstrateSet)) - if(1==0){ - for (i in 1:nrow(ThirdSubstrateSet)){ - ITDletters<-ThirdSubstrateSet[i,7:15] - ITDletters<-ITDletters[ITDletters !="XXXXX"] - ITDletters<-paste(ITDletters, sep="", collapse="") - YYYmotif <- unlist(strsplit(ITDletters, split = "")) - YYYposition <- match(x = "x", table = YYYmotif) - #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are - #just 3 letters to the left of x - - YYYLettersToTheLeft <- YYYposition - 1 - #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is - #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1 - YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1 - #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the - #variable the user puts in is - - if (YYYLettersToTheLeft < 4 | YYYLettersToTheRight < 4) { - leftspaces<-rep(" ",times=(4-YYYLettersToTheLeft)) - rightspaces<-rep(" ",times=4-(YYYLettersToTheRight)) - #add blank spaces if the motif has less than 4 letters to the left/right - motif<-c(leftspaces,YYYmotif,rightspaces) - #save that motif, which is the Y and +/- 4 amino acids, including truncation - motif<-motif[!motif %in% "x"] - motif<-paste(motif, sep="", collapse="") - ITDletters<-motif - ITDAccessionNumbers[i,1]<-ThirdSubstrateSet[i,3] - ITDmotifs[i,1]<-ITDletters - } - } - } - names(FirstMotifs)<-FirstAccessionNumbers - names(SecondMotifs)<-SecondAccessionNumbers - #names(ITDmotifs)<-ITDAccessionNumbers -} - -############################################### -#ALL motifs, full and truncated - -if (FullMotifsOnly_questionmark!="YES"&&TruncatedMotifsOnly_questionmark!="YES"){ - #print("!")} - FirstMotifs=matrix(,nrow = nrow(FirstSubstrateSet),ncol=1) - FirstAccessionNumbers=matrix(,nrow = nrow(FirstSubstrateSet),ncol=1) - - for (i in 1:nrow(FirstSubstrateSet)){ - FirstLetters<-FirstSubstrateSet[i,7:15] - FirstLetters<-FirstLetters[FirstLetters !="XXXXX"] - FirstLetters<-paste(FirstLetters, sep="", collapse="") - leftspaces<-c() - rightspaces<-c() - - YYYmotif <- unlist(strsplit(FirstLetters, split = "")) - YYYposition <- match(x = "x", table = YYYmotif) - #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are - #just 3 letters to the left of x - - YYYLettersToTheLeft <- YYYposition - 1 - #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is - #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1 - YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1 - #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the - #variable the user puts in is - - - if (YYYLettersToTheLeft < 4 | YYYLettersToTheRight < 4) { - leftspaces<-rep(" ",times=(4-YYYLettersToTheLeft)) - rightspaces<-rep(" ",times=4-(YYYLettersToTheRight)) - #add blank spaces if the motif has less than 4 letters to the left/right - motif<-c(leftspaces,YYYmotif,rightspaces) - #save that motif, which is the Y and +/- 4 amino acids, including truncation - motif<-motif[!motif %in% "x"] - motif<-paste(motif, sep="", collapse="") - FirstLetters<-motif - FirstMotifs[i,1]<-FirstLetters - FirstAccessionNumbers[i,1]<-FirstSubstrateSet[i,3] - } - - if(YYYLettersToTheLeft>3 && YYYLettersToTheRight>3){ - motif<-YYYmotif - #add blank spaces if the motif has less than 4 letters to the left/right - motif<-c(leftspaces,YYYmotif,rightspaces) - #save that motif, which is the Y and +/- 4 amino acids, including truncation - motif<-motif[!motif %in% "x"] - motif<-paste(motif, sep="", collapse="") - FirstLetters<-motif - FirstMotifs[i,1]<-FirstLetters - FirstAccessionNumbers[i,1]<-FirstSubstrateSet[i,3] - - - } - - } - - SecondMotifs=matrix(,nrow = nrow(SecondSubstrateSet),ncol=1) - SecondAccessionNumbers<-matrix(,nrow = nrow(SecondSubstrateSet),ncol = 1) - - for (i in 1:nrow(SecondSubstrateSet)){ - SecondLetters<-SecondSubstrateSet[i,7:15] - SecondLetters<-SecondLetters[SecondLetters !="XXXXX"] - SecondLetters<-paste(SecondLetters, sep="", collapse="") - leftspaces<-c() - rightspaces<-c() - - YYYmotif <- unlist(strsplit(SecondLetters, split = "")) - YYYposition <- match(x = "x", table = YYYmotif) - #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are - #just 3 letters to the left of x - - YYYLettersToTheLeft <- YYYposition - 1 - #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is - #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1 - YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1 - #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the - #variable the user puts in is - if (YYYLettersToTheLeft < 4 | YYYLettersToTheRight < 4) { - leftspaces<-rep(" ",times=(4-YYYLettersToTheLeft)) - rightspaces<-rep(" ",times=4-(YYYLettersToTheRight)) - #add blank spaces if the motif has less than 4 letters to the left/right - motif<-c(leftspaces,YYYmotif,rightspaces) - #save that motif, which is the Y and +/- 4 amino acids, including truncation - motif<-motif[!motif %in% "x"] - motif<-paste(motif, sep="", collapse="") - SecondLetters<-motif - SecondMotifs[i,1]<-SecondLetters - SecondAccessionNumbers[i,1]<-SecondSubstrateSet[i,3] - } - - if(YYYLettersToTheLeft>3 && YYYLettersToTheRight>3){ - motif<-YYYmotif - #add blank spaces if the motif has less than 4 letters to the left/right - motif<-c(leftspaces,YYYmotif,rightspaces) - #save that motif, which is the Y and +/- 4 amino acids, including truncation - motif<-motif[!motif %in% "x"] - motif<-paste(motif, sep="", collapse="") - SecondLetters<-motif - SecondMotifs[i,1]<-SecondLetters - SecondAccessionNumbers[i,1]<-SecondSubstrateSet[i,3] - } - } - - - #ITDmotifs=matrix(,nrow = nrow(ThirdSubstrateSet),ncol=1) - #ITDAccessionNumbers<-matrix(,nrow = nrow(ThirdSubstrateSet)) - if(1==0){ - for (i in 1:nrow(ThirdSubstrateSet)){ - ITDletters<-ThirdSubstrateSet[i,7:15] - ITDletters<-ITDletters[ITDletters !="XXXXX"] - ITDletters<-paste(ITDletters, sep="", collapse="") - YYYmotif <- unlist(strsplit(ITDletters, split = "")) - leftspaces<-c() - rightspaces<-c() - YYYposition <- match(x = "x", table = YYYmotif) - #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are - #just 3 letters to the left of x - - YYYLettersToTheLeft <- YYYposition - 1 - #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is - #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1 - YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1 - #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the - #variable the user puts in is - if (YYYLettersToTheLeft < 4 | YYYLettersToTheRight < 4) { - leftspaces<-rep(" ",times=(4-YYYLettersToTheLeft)) - rightspaces<-rep(" ",times=4-(YYYLettersToTheRight)) - #add blank spaces if the motif has less than 4 letters to the left/right - motif<-c(leftspaces,YYYmotif,rightspaces) - #save that motif, which is the Y and +/- 4 amino acids, including truncation - motif<-motif[!motif %in% "x"] - motif<-paste(motif, sep="", collapse="") - ITDletters<-motif - ITDmotifs[i,1]<-ITDletters - ITDAccessionNumbers[i,1]<-FirstSubstrateSet[i,3] - } - - if(YYYLettersToTheLeft>3 && YYYLettersToTheRight>3){ - motif<-YYYmotif - #add blank spaces if the motif has less than 4 letters to the left/right - motif<-c(leftspaces,YYYmotif,rightspaces) - #save that motif, which is the Y and +/- 4 amino acids, including truncation - motif<-motif[!motif %in% "x"] - motif<-paste(motif, sep="", collapse="") - ITDletters<-motif - ITDmotifs[i,1]<-ITDletters - ITDAccessionNumbers[i,1]<-FirstSubstrateSet[i,3] - } - } - } - names(FirstMotifs)<-FirstAccessionNumbers - names(SecondMotifs)<-SecondAccessionNumbers - #names(ITDmotifs)<-ITDAccessionNumbers -} - - -FirstMotifsFINAL<-FirstMotifs[!FirstMotifs %in% SecondMotifs] -#FirstMotifsFINAL<-FirstMotifsFINAL[!FirstMotifsFINAL %in% ITDmotifs] -FirstMotifsFINAL<-FirstMotifsFINAL[!duplicated(FirstMotifsFINAL)] - - -# ITDmotifsFINAL<-ITDmotifs[!ITDmotifs %in% SecondMotifs] -# ITDmotifsFINAL<-ITDmotifsFINAL[!ITDmotifsFINAL %in% FirstMotifs] -# ITDmotifsFINAL<-ITDmotifsFINAL[!duplicated(ITDmotifsFINAL)] - - -SecondMotifsFINAL<-SecondMotifs[!SecondMotifs %in% FirstMotifs] -#SecondMotifsFINAL<-SecondMotifsFINAL[!SecondMotifsFINAL %in% ITDmotifs] -SecondMotifsFINAL<-SecondMotifsFINAL[!duplicated(SecondMotifsFINAL)] - - -columnalheader<-c(rep(NA,36)) -FirstFinalMatrix<-matrix(data =columnalheader,nrow = 1) - -for (k in 1:length(FirstMotifsFINAL)) { - AN<-00000 - #I don't remember why, but I felt it necessary to destroy the accession number multiple times to ensure it is - #destroyed immediately after use - for (m in 2:ncol(Firstsubbackfreq)) { - AN <- as.character(Firstsubbackfreq[2, m]) - #print(AN)} - if (grepl(pattern = AN, - x = names(FirstMotifsFINAL[k]), - fixed = TRUE) == TRUE) { - outputmatrix <- as.character(Firstsubbackfreq[, m]) - outputmatrix <- matrix(outputmatrix, nrow = 1) - #with that accession number, find a match in the subbackfreq file and save it here - FirstFinalMatrix<-rbind(FirstFinalMatrix,outputmatrix) - # print(AN,outputmatrix)} - } - } -} -FirstFinalMatrix<-FirstFinalMatrix[!duplicated(FirstFinalMatrix),] - -#columnalheader<-c(rep(NA,36)) -#ITDFinalMatrix<-matrix(data =columnalheader,nrow = 1) - -if(1==0){ - for (k in 1:length(ITDmotifsFINAL)) { - AN<-00000 - #I don't remember why, but I felt it necessary to destroy the accession number multiple times to ensure it is - #destroyed immediately after use - for (m in 1:ncol(Thirdsubbackfreq)) { - AN <- as.character(Thirdsubbackfreq[1, m]) - if (grepl(pattern = AN, - x = names(ITDmotifsFINAL[k]), - fixed = TRUE) == TRUE) { - outputmatrix <- as.character(Thirdsubbackfreq[, m]) - outputmatrix <- matrix(outputmatrix, nrow = 1) - #with that accession number, find a match in the subbackfreq file and save it here - ITDFinalMatrix<-rbind(ITDFinalMatrix,outputmatrix) - } - } - } - ITDFinalMatrix<-ITDFinalMatrix[!duplicated(ITDFinalMatrix),] -} - -columnalheader<-c(rep(NA,36)) -SecondFinalMatrix<-matrix(data =columnalheader,nrow = 1) - -for (k in 1:length(SecondMotifsFINAL)) { - #I don't remember why, but I felt it necessary to destroy the accession number multiple times to ensure it is - #destroyed immediately after use - for (m in 1:ncol(Secondsubbackfreq)) { - AN <- as.character(Secondsubbackfreq[1, m]) - if (grepl(pattern = AN, - x = names(SecondMotifsFINAL[k]), - fixed = TRUE) == TRUE) { - outputmatrix <- as.character(Secondsubbackfreq[, m]) - outputmatrix <- matrix(outputmatrix, nrow = 1) - #with that accession number, find a match in the subbackfreq file and save it here - SecondFinalMatrix<-rbind(SecondFinalMatrix,outputmatrix) - } - } -} -SecondFinalMatrix<-SecondFinalMatrix[!duplicated(SecondFinalMatrix),] -FTLoutputmatrix<-matrix(data=c(FirstMotifsFINAL,names(FirstMotifsFINAL)),ncol = 2) - - -write.table(x=FTLoutputmatrix, - file=First_unshared_motifs_table, - quote=FALSE, sep=",", - row.names=FALSE,col.names = FALSE, na="", append=TRUE) - -columnalheader<-c(as.character(Firstsubbackfreq[1:36,1])) -columnalheader<-matrix(columnalheader,nrow = 1) -write.table(x=columnalheader, - file=First_unshared_subbackfreq, - quote=FALSE, sep=",", - row.names=FALSE,col.names = FALSE, na="", append=TRUE) -FirstFinalMatrix<-FirstFinalMatrix[2:nrow(FirstFinalMatrix),] -write.table(x=FirstFinalMatrix, - file=First_unshared_subbackfreq, - quote=FALSE, sep=",", - row.names=FALSE,col.names = FALSE, na="", append=TRUE) - -############################################################################################################ - -D835Youtputmatrix<-matrix(data=c(SecondMotifsFINAL,names(SecondMotifsFINAL)),ncol = 2) - -write.table(x=D835Youtputmatrix, - file=Second_unshared_motifs_table, - quote=FALSE, sep=",", - row.names=FALSE,col.names = FALSE, na="", append=TRUE) - -columnalheader<-c(as.character(Firstsubbackfreq[1:36,1])) -columnalheader<-matrix(columnalheader,nrow = 1) -write.table(x=columnalheader, - file=Second_unshared_subbackfreq, - quote=FALSE, sep=",", - row.names=FALSE,col.names = FALSE, na="", append=TRUE) -SecondFinalMatrix<-SecondFinalMatrix[2:nrow(SecondFinalMatrix),] -write.table(x=SecondFinalMatrix, - file=Second_unshared_subbackfreq, - quote=FALSE, sep=",", - row.names=FALSE,col.names = FALSE, na="", append=TRUE) - -############################################################################################################ - -# ITDoutputmatrix<-matrix(data = c(ITDmotifsFINAL,names(ITDmotifsFINAL)),ncol = 2) -# -# write.table(x=ITDoutputmatrix, -# file=Third_unshared_motifs_table, -# quote=FALSE, sep=",", -# row.names=FALSE,col.names = FALSE, na="", append=TRUE) -# -# columnalheader<-c(as.character(Thirdsubbackfreq[1:36,1])) -# columnalheader<-matrix(columnalheader,nrow = 1) -# write.table(x=columnalheader, -# file=Third_unshared_subbackfreq, -# quote=FALSE, sep=",", -# row.names=FALSE,col.names = FALSE, na="", append=TRUE) -# ITDFinalMatrix<-ITDFinalMatrix[2:nrow(ITDFinalMatrix),] -# write.table(x=ITDFinalMatrix, -# file=Third_unshared_subbackfreq, -# quote=FALSE, sep=",", -# row.names=FALSE,col.names = FALSE, na="", append=TRUE) \ No newline at end of file diff -r beba4066121e -r dff99bed3f56 all stuff/differenceFinder.xml --- a/all stuff/differenceFinder.xml Wed Jan 16 14:33:39 2019 -0500 +++ b/all stuff/differenceFinder.xml Fri Apr 19 10:57:28 2019 -0400 @@ -9,7 +9,7 @@ ln -s '$input3' S2.csv && ln -s '$input4' SBF2.csv && - Rscript '$__tool_directory__/Difference finderMADE 7 TO 7 1-15-2019.R' + Rscript '$__tool_directory__/Difference finder MADE 7 TO 7 4-18-2019.R' ]]>