Mercurial > repos > jfb > difference_finder

diff all stuff/.Rhistory @ 9:033dd86d3e0c draft
Uploaded
author: jfb
date: Fri, 21 Feb 2020 13:07:45 -0500
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/all stuff/.Rhistory	Fri Feb 21 13:07:45 2020 -0500
@@ -0,0 +1,288 @@
+?rnorm
+?rnorm
+?rnorm
+pwr
+?pwr
+??pwr
+power.t.test(n=6, power = .7, type = "two.sample", alternative = "two.sided")
+power.t.test(n=6, power = .9, type = "two.sample", alternative = "two.sided")
+?power.t.test
+power.t.test(n=5, sd=1000, power = .7, type = "paired", alternative = "one.sided")
+10^4
+power.t.test(n=5, sd=10000, power = .7, type = "paired", alternative = "one.sided")
+power.t.test(n=10, delta = 7, sd=10, type = "paired", alternative = "one.sided")
+setwd("C:/Users/John Blankenhor/Downloads/difference_finder-4dd15c41d9e7/difference_finder-4dd15c41d9e7/all stuff")
+#put the names of your input files here
+FirstSubstrateSet<- read.csv("S1.csv", stringsAsFactors=FALSE)
+Firstsubbackfreq<- read.csv("SBF1.csv", header=FALSE, stringsAsFactors=FALSE)
+FirstSubstrateSet
+EmptySubHeader<-First_unshared_motifs_table[1,]
+First_unshared_motifs_table<-"1RS.csv"
+First_unshared_subbackfreq<-"1RSBF.csv"
+#put the names of your input files here
+FirstSubstrateSet<- read.csv("S1.csv", stringsAsFactors=FALSE)
+Firstsubbackfreq<- read.csv("SBF1.csv", header=FALSE, stringsAsFactors=FALSE)
+EmptySubHeader<-FirstSubstrateSet[1,]
+head(Firstsubbackfreq)
+EmptySBFHeader<-Firstsubbackfreq[,1]
+#ff you want ONLY FULL MOTIFS, put "YES" here, please use all caps
+FullMotifsOnly_questionmark<-"NO"
+#If you want ONLY TRUNCATED MOTIFS, put "YES" here, please use all caps
+TruncatedMotifsOnly_questionmark<-"NO"
+#if you want to find the overlap, put a "YES" here (all caps), if you want to find the non-overlap, put "NO" (all caps)
+Are_You_Looking_For_Commonality<-"NO"
+#put the names of your input files here
+FirstSubstrateSet<- read.csv("S1.csv", stringsAsFactors=FALSE)
+Firstsubbackfreq<- read.csv("SBF1.csv", header=FALSE, stringsAsFactors=FALSE)
+SecondSubstrateSet<- read.csv("S2.csv", stringsAsFactors=FALSE)
+Secondsubbackfreq<- read.csv("SBF2.csv", header=FALSE, stringsAsFactors=FALSE)
+First_unshared_motifs_table<-"1RS.csv"
+First_unshared_subbackfreq<-"1RSBF.csv"
+Second_unshared_motifs_table<-"2RS.csv"
+Second_unshared_subbackfreq<-"2RSBF.csv"
+EmptySubHeader<-FirstSubstrateSet[1,]
+EmptySBFHeader<-Firstsubbackfreq[,1]
+FirstCentralLetters<-FirstSubstrateSet[,11]
+SecondCentralLetters<-SecondSubstrateSet[,11]
+FirstEsses<-sapply(FirstCentralLetters, grepl, pattern="S", ignore.case=TRUE)
+FirstTees<-sapply(FirstCentralLetters, grepl, pattern="T", ignore.case=TRUE)
+FirstWys<-sapply(FirstCentralLetters, grepl, pattern="Y", ignore.case=TRUE)
+SecondEsses<-sapply(SecondCentralLetters, grepl, pattern="S", ignore.case=TRUE)
+SecondTees<-sapply(SecondCentralLetters, grepl, pattern="T", ignore.case=TRUE)
+SecondWys<-sapply(SecondCentralLetters, grepl, pattern="Y", ignore.case=TRUE)
+FirstCentralLetters<-replace(FirstCentralLetters,FirstEsses,"xS")
+FirstCentralLetters<-replace(FirstCentralLetters,FirstTees,"xT")
+FirstCentralLetters<-replace(FirstCentralLetters,FirstWys,"xY")
+SecondCentralLetters<-replace(SecondCentralLetters,SecondEsses,"xS")
+SecondCentralLetters<-replace(SecondCentralLetters,SecondTees,"xT")
+SecondCentralLetters<-replace(SecondCentralLetters,SecondWys,"xY")
+FirstCentralLetters->FirstSubstrateSet[,11]
+SecondCentralLetters->SecondSubstrateSet[,11]
+if (FullMotifsOnly_questionmark!="YES"&&TruncatedMotifsOnly_questionmark!="YES"){
+FTLwtmotifs=matrix(,nrow = nrow(FirstSubstrateSet),ncol=1)
+FTLwtAccessionNumbers=matrix(,nrow = nrow(FirstSubstrateSet),ncol=1)
+for (i in 1:nrow(FirstSubstrateSet)){
+FTLwtletters<-FirstSubstrateSet[i,4:18]
+FTLwtletters<-FTLwtletters[FTLwtletters !="XXXXX"]
+FTLwtletters<-paste(FTLwtletters, sep="", collapse="")
+leftspaces<-c()
+rightspaces<-c()
+YYYmotif <- unlist(strsplit(FTLwtletters, split = ""))
+YYYposition <- match(x = "x", table = YYYmotif)
+#position itself tells me how much is to the left of that X by what it's number is.  x at position 4 tells me that there are
+#just 3 letters to the left of x
+YYYLettersToTheLeft <- YYYposition - 1
+#how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is
+#just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1
+YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1
+#then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the
+#variable the user puts in is
+if (YYYLettersToTheLeft < 7 | YYYLettersToTheRight < 7) {
+leftspaces<-rep(" ",times=(7-YYYLettersToTheLeft))
+rightspaces<-rep(" ",times=7-(YYYLettersToTheRight))
+#add blank spaces if the motif has less than 4 letters to the left/right
+motif<-c(leftspaces,YYYmotif,rightspaces)
+#save that motif, which is the Y and +/- 4 amino acids, including truncation
+motif<-motif[!motif %in% "x"]
+motif<-paste(motif, sep="", collapse="")
+FTLwtletters<-motif
+FTLwtmotifs[i,1]<-FTLwtletters
+FTLwtAccessionNumbers[i,1]<-FirstSubstrateSet[i,3]
+}
+if(YYYLettersToTheLeft>6 && YYYLettersToTheRight>6){
+motif<-YYYmotif
+#add blank spaces if the motif has less than 4 letters to the left/right
+motif<-c(leftspaces,YYYmotif,rightspaces)
+#save that motif, which is the Y and +/- 4 amino acids, including truncation
+motif<-motif[!motif %in% "x"]
+motif<-paste(motif, sep="", collapse="")
+FTLwtletters<-motif
+FTLwtmotifs[i,1]<-FTLwtletters
+FTLwtAccessionNumbers[i,1]<-FirstSubstrateSet[i,3]
+}
+}
+D835Ymotifs=matrix(,nrow = nrow(SecondSubstrateSet),ncol=1)
+D835YAccessionNumbers<-matrix(,nrow = nrow(SecondSubstrateSet),ncol = 1)
+for (i in 1:nrow(SecondSubstrateSet)){
+D835letters<-SecondSubstrateSet[i,4:18]
+D835letters<-D835letters[D835letters !="XXXXX"]
+D835letters<-paste(D835letters, sep="", collapse="")
+leftspaces<-c()
+rightspaces<-c()
+YYYmotif <- unlist(strsplit(D835letters, split = ""))
+YYYposition <- match(x = "x", table = YYYmotif)
+#position itself tells me how much is to the left of that X by what it's number is.  x at position 4 tells me that there are
+#just 3 letters to the left of x
+YYYLettersToTheLeft <- YYYposition - 1
+#how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is
+#just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1
+YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1
+#then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the
+#variable the user puts in is
+if (YYYLettersToTheLeft < 7 | YYYLettersToTheRight < 7) {
+leftspaces<-rep(" ",times=(7-YYYLettersToTheLeft))
+rightspaces<-rep(" ",times=7-(YYYLettersToTheRight))
+#add blank spaces if the motif has less than 4 letters to the left/right
+motif<-c(leftspaces,YYYmotif,rightspaces)
+#save that motif, which is the Y and +/- 4 amino acids, including truncation
+motif<-motif[!motif %in% "x"]
+motif<-paste(motif, sep="", collapse="")
+D835letters<-motif
+D835Ymotifs[i,1]<-D835letters
+D835YAccessionNumbers[i,1]<-SecondSubstrateSet[i,3]
+}
+if(YYYLettersToTheLeft>6 && YYYLettersToTheRight>6){
+motif<-YYYmotif
+#add blank spaces if the motif has less than 4 letters to the left/right
+motif<-c(leftspaces,YYYmotif,rightspaces)
+#save that motif, which is the Y and +/- 4 amino acids, including truncation
+motif<-motif[!motif %in% "x"]
+motif<-paste(motif, sep="", collapse="")
+D835letters<-motif
+D835Ymotifs[i,1]<-D835letters
+D835YAccessionNumbers[i,1]<-SecondSubstrateSet[i,3]
+}
+}
+names(FTLwtmotifs)<-FTLwtAccessionNumbers
+names(D835Ymotifs)<-D835YAccessionNumbers
+}
+FTLwtmotifsFINAL<-FTLwtmotifs[!FTLwtmotifs %in% D835Ymotifs]
+FTLwtmotifsFINAL<-FTLwtmotifsFINAL[!duplicated(FTLwtmotifsFINAL)]
+D835YmotifsFINAL<-D835Ymotifs[!D835Ymotifs %in% FTLwtmotifs]
+D835YmotifsFINAL<-D835YmotifsFINAL[!duplicated(D835YmotifsFINAL)]
+if (FullMotifsOnly_questionmark!="YES"&&TruncatedMotifsOnly_questionmark!="YES"){
+FTLwtmotifs=matrix(,nrow = nrow(FirstSubstrateSet),ncol=1)
+FTLwtAccessionNumbers=matrix(,nrow = nrow(FirstSubstrateSet),ncol=1)
+for (i in 1:nrow(FirstSubstrateSet)){
+FTLwtletters<-FirstSubstrateSet[i,4:18]
+FTLwtletters<-FTLwtletters[FTLwtletters !="XXXXX"]
+FTLwtletters<-paste(FTLwtletters, sep="", collapse="")
+leftspaces<-c()
+rightspaces<-c()
+YYYmotif <- unlist(strsplit(FTLwtletters, split = ""))
+YYYposition <- match(x = "x", table = YYYmotif)
+#position itself tells me how much is to the left of that X by what it's number is.  x at position 4 tells me that there are
+#just 3 letters to the left of x
+YYYLettersToTheLeft <- YYYposition - 1
+#how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is
+#just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1
+YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1
+#then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the
+#variable the user puts in is
+if (YYYLettersToTheLeft < 7 | YYYLettersToTheRight < 7) {
+leftspaces<-rep(" ",times=(7-YYYLettersToTheLeft))
+rightspaces<-rep(" ",times=7-(YYYLettersToTheRight))
+#add blank spaces if the motif has less than 4 letters to the left/right
+motif<-c(leftspaces,YYYmotif,rightspaces)
+#save that motif, which is the Y and +/- 4 amino acids, including truncation
+motif<-motif[!motif %in% "x"]
+motif<-paste(motif, sep="", collapse="")
+FTLwtletters<-motif
+FTLwtmotifs[i,1]<-FTLwtletters
+FTLwtAccessionNumbers[i,1]<-FirstSubstrateSet[i,3]
+}
+if(YYYLettersToTheLeft>6 && YYYLettersToTheRight>6){
+motif<-YYYmotif
+#add blank spaces if the motif has less than 4 letters to the left/right
+motif<-c(leftspaces,YYYmotif,rightspaces)
+#save that motif, which is the Y and +/- 4 amino acids, including truncation
+motif<-motif[!motif %in% "x"]
+motif<-paste(motif, sep="", collapse="")
+FTLwtletters<-motif
+FTLwtmotifs[i,1]<-FTLwtletters
+FTLwtAccessionNumbers[i,1]<-FirstSubstrateSet[i,3]
+}
+}
+D835Ymotifs=matrix(,nrow = nrow(SecondSubstrateSet),ncol=1)
+D835YAccessionNumbers<-matrix(,nrow = nrow(SecondSubstrateSet),ncol = 1)
+for (i in 1:nrow(SecondSubstrateSet)){
+D835letters<-SecondSubstrateSet[i,4:18]
+D835letters<-D835letters[D835letters !="XXXXX"]
+D835letters<-paste(D835letters, sep="", collapse="")
+leftspaces<-c()
+rightspaces<-c()
+YYYmotif <- unlist(strsplit(D835letters, split = ""))
+YYYposition <- match(x = "x", table = YYYmotif)
+#position itself tells me how much is to the left of that X by what it's number is.  x at position 4 tells me that there are
+#just 3 letters to the left of x
+YYYLettersToTheLeft <- YYYposition - 1
+#how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is
+#just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1
+YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1
+#then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the
+#variable the user puts in is
+if (YYYLettersToTheLeft < 7 | YYYLettersToTheRight < 7) {
+leftspaces<-rep(" ",times=(7-YYYLettersToTheLeft))
+rightspaces<-rep(" ",times=7-(YYYLettersToTheRight))
+#add blank spaces if the motif has less than 4 letters to the left/right
+motif<-c(leftspaces,YYYmotif,rightspaces)
+#save that motif, which is the Y and +/- 4 amino acids, including truncation
+motif<-motif[!motif %in% "x"]
+motif<-paste(motif, sep="", collapse="")
+D835letters<-motif
+D835Ymotifs[i,1]<-D835letters
+D835YAccessionNumbers[i,1]<-SecondSubstrateSet[i,3]
+}
+if(YYYLettersToTheLeft>6 && YYYLettersToTheRight>6){
+motif<-YYYmotif
+#add blank spaces if the motif has less than 4 letters to the left/right
+motif<-c(leftspaces,YYYmotif,rightspaces)
+#save that motif, which is the Y and +/- 4 amino acids, including truncation
+motif<-motif[!motif %in% "x"]
+motif<-paste(motif, sep="", collapse="")
+D835letters<-motif
+D835Ymotifs[i,1]<-D835letters
+D835YAccessionNumbers[i,1]<-SecondSubstrateSet[i,3]
+}
+}
+names(FTLwtmotifs)<-FTLwtAccessionNumbers
+names(D835Ymotifs)<-D835YAccessionNumbers
+}
+#ff you want ONLY FULL MOTIFS, put "YES" here, please use all caps
+FullMotifsOnly_questionmark<-"NO"
+#If you want ONLY TRUNCATED MOTIFS, put "YES" here, please use all caps
+TruncatedMotifsOnly_questionmark<-"NO"
+#if you want to find the overlap, put a "YES" here (all caps), if you want to find the non-overlap, put "NO" (all caps)
+Are_You_Looking_For_Commonality<-"NO"
+#put the names of your input files here
+FirstSubstrateSet<- read.csv("S1.csv", stringsAsFactors=FALSE)
+Firstsubbackfreq<- read.csv("SBF1.csv", header=FALSE, stringsAsFactors=FALSE)
+SecondSubstrateSet<- read.csv("S2.csv", stringsAsFactors=FALSE)
+Secondsubbackfreq<- read.csv("SBF2.csv", header=FALSE, stringsAsFactors=FALSE)
+First_unshared_motifs_table<-"1RS.csv"
+First_unshared_subbackfreq<-"1RSBF.csv"
+Second_unshared_motifs_table<-"2RS.csv"
+Second_unshared_subbackfreq<-"2RSBF.csv"
+EmptySubHeader<-FirstSubstrateSet[1,]
+EmptySBFHeader<-Firstsubbackfreq[,1]
+FirstCentralLetters<-FirstSubstrateSet[,11]
+SecondCentralLetters<-SecondSubstrateSet[,11]
+FirstEsses<-sapply(FirstCentralLetters, grepl, pattern="S", ignore.case=TRUE)
+FirstTees<-sapply(FirstCentralLetters, grepl, pattern="T", ignore.case=TRUE)
+FirstWys<-sapply(FirstCentralLetters, grepl, pattern="Y", ignore.case=TRUE)
+SecondEsses<-sapply(SecondCentralLetters, grepl, pattern="S", ignore.case=TRUE)
+SecondTees<-sapply(SecondCentralLetters, grepl, pattern="T", ignore.case=TRUE)
+SecondWys<-sapply(SecondCentralLetters, grepl, pattern="Y", ignore.case=TRUE)
+FirstCentralLetters<-replace(FirstCentralLetters,FirstEsses,"xS")
+FirstCentralLetters<-replace(FirstCentralLetters,FirstTees,"xT")
+FirstCentralLetters<-replace(FirstCentralLetters,FirstWys,"xY")
+SecondCentralLetters<-replace(SecondCentralLetters,SecondEsses,"xS")
+SecondCentralLetters<-replace(SecondCentralLetters,SecondTees,"xT")
+SecondCentralLetters<-replace(SecondCentralLetters,SecondWys,"xY")
+FirstCentralLetters->FirstSubstrateSet[,11]
+SecondCentralLetters->SecondSubstrateSet[,11]
+FirstSubstrateSet
+#put the names of your input files here
+FirstSubstrateSet<- read.csv("S1.csv", stringsAsFactors=FALSE)
+FirstSubstrateSet
+head(SecondSubstrateSet)
+#put the names of your input files here
+FirstSubstrateSet<- read.csv("S1.csv", stringsAsFactors=FALSE)
+head(SecondSubstrateSet)
+FirstSubstrateSet
+#put the names of your input files here
+FirstSubstrateSet<- read.csv("S1.csv", stringsAsFactors=FALSE)
+FirstSubstrateSet
+#put the names of your input files here
+FirstSubstrateSet<- read.csv("S1.csv", stringsAsFactors=FALSE)
+SecondSubstrateSet<- read.csv("S2.csv", stringsAsFactors=FALSE)