Mercurial > repos > jfb > kinatest_r_7_7testing
diff kinatestid_r/Kinatest-R.R @ 17:26ef4add9f7b draft
Uploaded
author | jfb |
---|---|
date | Wed, 28 Feb 2018 14:09:19 -0500 |
parents | 15b5d4ae4480 |
children | e16ca3e9fc49 |
line wrap: on
line diff
--- a/kinatestid_r/Kinatest-R.R Thu Feb 08 15:49:05 2018 -0500 +++ b/kinatestid_r/Kinatest-R.R Wed Feb 28 14:09:19 2018 -0500 @@ -1,7 +1,6 @@ - ImportedSubstrateList<- read.csv("input1", stringsAsFactors=FALSE) NegativeSubstrateList<- read.csv("input2", stringsAsFactors=FALSE) -SubstrateBackgroundFrequency<- read.csv("input3", stringsAsFactors=FALSE) +SubstrateBackgroundFrequency<- read.csv("input3", stringsAsFactors=FALSE, header = FALSE) ScreenerFilename<-"screener" @@ -13,21 +12,14 @@ - - - - - - - +SubstrateBackgroundFrequency<-t(SubstrateBackgroundFrequency) +# number<-nrow(SubstrateBackgroundFrequency)-1 +SubstrateBackgroundFrequency<-SubstrateBackgroundFrequency[2:nrow(SubstrateBackgroundFrequency),] +Sub<-na.omit(SubstrateBackgroundFrequency) +SubstrateBackgroundFrequency<-Sub - - - - - - - +args = commandArgs(trailingOnly=TRUE) +TodaysKinase<-args[1] @@ -594,10 +586,10 @@ PercentTable<-rbind(HeaderSD,PercentTable) row.names(PercentTable)<-NULL PercentTable<-data.frame(SetOfAAs,PercentTable) -numberofY<-as.numeric(SubstrateBackgroundFrequency$Number.of.Y) +numberofY<-as.numeric(SubstrateBackgroundFrequency[,34]) numberofY<-numberofY[!is.na(numberofY)] -numberofPY<-as.numeric(SubstrateBackgroundFrequency$Number.of.pY) +numberofPY<-as.numeric(SubstrateBackgroundFrequency[,35]) numberofPY<-numberofPY[!is.na(numberofPY)] NormalizationScore<-sum(numberofPY)/sum(numberofY) @@ -1161,38 +1153,110 @@ bareSDs<-SDtable[2:21,2:16] goodones<-bareSDs>2 +# Positionm7<-which(goodones[,1] %in% TRUE) +# if (length(Positionm7)<1){Positionm7<-which(bareSDs[,1]==max(bareSDs[,1]))} +# Positionm6<-which(goodones[,2] %in% TRUE) +# if (length(Positionm6)<1){Positionm6<-which(bareSDs[,2]==max(bareSDs[,2]))} +# Positionm5<-which(goodones[,3] %in% TRUE) +# if (length(Positionm5)<1){Positionm5<-which(bareSDs[,3]==max(bareSDs[,3]))} +# Positionm4<-which(goodones[,4] %in% TRUE) +# if (length(Positionm4)<2){Positionm4<-bareSDs[,4][order(bareSDs[,4])[1:2]]} +# Positionm3<-which(goodones[,5] %in% TRUE) +# if (length(Positionm3)<2){Positionm3<-bareSDs[,5][order(bareSDs[,5])[1:2]]} +# Positionm2<-which(goodones[,6] %in% TRUE) +# if (length(Positionm2)<2){Positionm2<-bareSDs[,6][order(bareSDs[,6])[1:2]]} +# Positionm1<-which(goodones[,7] %in% TRUE) +# if (length(Positionm1)<2){Positionm1<-bareSDs[,7][order(bareSDs[,7])[1:2]]} +# +# Positiond0<-which(goodones[,8] %in% TRUE) +# if (length(Positiond0)<1){Positiond0<-which(bareSDs[,8]==max(bareSDs[,8]))} +# +# Positionp1<-which(goodones[,9] %in% TRUE) +# if (length(Positionp1)<2){Positionp1<-bareSDs[,9][order(bareSDs[,9])[1:2]]} +# Positionp2<-which(goodones[,10] %in% TRUE) +# if (length(Positionp2)<2){Positionp2<-bareSDs[,10][order(bareSDs[,10])[1:2]]} +# Positionp3<-which(goodones[,11] %in% TRUE) +# if (length(Positionp3)<2){Positionp3<-bareSDs[,11][order(bareSDs[,11])[1:2]]} +# Positionp4<-which(goodones[,12] %in% TRUE) +# if (length(Positionp4)<2){Positionp4<-bareSDs[,12][order(bareSDs[,12])[1:2]]} +# Positionp5<-which(goodones[,13] %in% TRUE) +# if (length(Positionp5)<1){Positionp5<-which(bareSDs[,13]==max(bareSDs[,13]))} +# Positionp6<-which(goodones[,14] %in% TRUE) +# if (length(Positionp6)<1){Positionp6<-which(bareSDs[,14]==max(bareSDs[,14]))} +# Positionp7<-which(goodones[,15] %in% TRUE) +# if (length(Positionp7)<1){Positionp7<-which(bareSDs[,15]==max(bareSDs[,15]))} + + + + +# Positionm7<-which(goodones[,1] %in% TRUE) +# if (length(Positionm7)<1){Positionm7<-which(bareSDs[,1]==max(bareSDs[,1]))} +# Positionm6<-which(goodones[,2] %in% TRUE) +# if (length(Positionm6)<1){Positionm6<-which(bareSDs[,2]==max(bareSDs[,2]))} +# Positionm5<-which(goodones[,3] %in% TRUE) +# if (length(Positionm5)<1){Positionm5<-which(bareSDs[,3]==max(bareSDs[,3]))} +# Positionm4<-which(goodones[,4] %in% TRUE) +# if (length(Positionm4)<1){Positionm4<-which(bareSDs[,4]==max(bareSDs[,4]))} +# Positionm3<-which(goodones[,5] %in% TRUE) +# if (length(Positionm3)<1){Positionm3<-which(bareSDs[,5]==max(bareSDs[,5]))} +# Positionm2<-which(goodones[,6] %in% TRUE) +# if (length(Positionm2)<1){Positionm2<-which(bareSDs[,6]==max(bareSDs[,6]))} +# Positionm1<-which(goodones[,7] %in% TRUE) +# if (length(Positionm1)<1){Positionm1<-which(bareSDs[,7]==max(bareSDs[,7]))} +# +# Positiond0<-which(goodones[,8] %in% TRUE) +# if (length(Positiond0)<1){Positiond0<-which(bareSDs[,8]==max(bareSDs[,8]))} +# +# Positionp1<-which(goodones[,9] %in% TRUE) +# if (length(Positionp1)<1){Positionp1<-which(bareSDs[,9]==max(bareSDs[,9]))} +# Positionp2<-which(goodones[,10] %in% TRUE) +# if (length(Positionp2)<1){Positionp2<-which(bareSDs[,10]==max(bareSDs[,10]))} +# Positionp3<-which(goodones[,11] %in% TRUE) +# if (length(Positionp3)<1){Positionp3<-which(bareSDs[,11]==max(bareSDs[,11]))} +# Positionp4<-which(goodones[,12] %in% TRUE) +# if (length(Positionp4)<1){Positionp4<-which(bareSDs[,12]==max(bareSDs[,12]))} +# Positionp5<-which(goodones[,13] %in% TRUE) +# if (length(Positionp5)<1){Positionp5<-which(bareSDs[,13]==max(bareSDs[,13]))} +# Positionp6<-which(goodones[,14] %in% TRUE) +# if (length(Positionp6)<1){Positionp6<-which(bareSDs[,14]==max(bareSDs[,14]))} +# Positionp7<-which(goodones[,15] %in% TRUE) +# if (length(Positionp7)<1){Positionp7<-which(bareSDs[,15]==max(bareSDs[,15]))} + +match(c(bareSDs[,2][order(bareSDs[,2])[1:2]]),bareSDs[,2]) + Positionm7<-which(goodones[,1] %in% TRUE) -if (length(Positionm7)<1){Positionm7<-which(bareSDs[,1]==max(bareSDs[,1]))} +if (length(Positionm7)<3){Positionm7<-match(c(bareSDs[,1][order(bareSDs[,1])[19:20]]),bareSDs[,1])} Positionm6<-which(goodones[,2] %in% TRUE) -if (length(Positionm6)<1){Positionm6<-which(bareSDs[,2]==max(bareSDs[,2]))} +if (length(Positionm6)<3){Positionm6<-match(c(bareSDs[,2][order(bareSDs[,2])[19:20]]),bareSDs[,2])} Positionm5<-which(goodones[,3] %in% TRUE) -if (length(Positionm5)<1){Positionm5<-which(bareSDs[,3]==max(bareSDs[,3]))} +if (length(Positionm5)<3){Positionm5<-match(c(bareSDs[,3][order(bareSDs[,3])[19:20]]),bareSDs[,3])} Positionm4<-which(goodones[,4] %in% TRUE) -if (length(Positionm4)<1){Positionm4<-which(bareSDs[,4]==max(bareSDs[,4]))} +if (length(Positionm4)<3){Positionm4<-match(c(bareSDs[,4][order(bareSDs[,4])[19:20]]),bareSDs[,4])} Positionm3<-which(goodones[,5] %in% TRUE) -if (length(Positionm3)<1){Positionm3<-which(bareSDs[,5]==max(bareSDs[,5]))} +if (length(Positionm3)<3){Positionm3<-match(c(bareSDs[,5][order(bareSDs[,5])[19:20]]),bareSDs[,5])} Positionm2<-which(goodones[,6] %in% TRUE) -if (length(Positionm2)<1){Positionm2<-which(bareSDs[,6]==max(bareSDs[,6]))} +if (length(Positionm2)<3){Positionm2<-match(c(bareSDs[,6][order(bareSDs[,6])[19:20]]),bareSDs[,6])} Positionm1<-which(goodones[,7] %in% TRUE) -if (length(Positionm1)<1){Positionm1<-which(bareSDs[,7]==max(bareSDs[,7]))} +if (length(Positionm1)<3){Positionm1<-match(c(bareSDs[,7][order(bareSDs[,7])[19:20]]),bareSDs[,7])} Positiond0<-which(goodones[,8] %in% TRUE) -if (length(Positiond0)<1){Positiond0<-which(bareSDs[,8]==max(bareSDs[,8]))} +#if (length(Positiond0)<3){Positiond0<-bareSDs[,8][order(bareSDs[,8])[1:2]]} Positionp1<-which(goodones[,9] %in% TRUE) -if (length(Positionp1)<1){Positionp1<-which(bareSDs[,9]==max(bareSDs[,9]))} +if (length(Positionp1)<3){Positionp1<-match(c(bareSDs[,9][order(bareSDs[,9])[19:20]]),bareSDs[,9])} Positionp2<-which(goodones[,10] %in% TRUE) -if (length(Positionp2)<1){Positionp2<-which(bareSDs[,10]==max(bareSDs[,10]))} +if (length(Positionp2)<3){Positionp2<-match(c(bareSDs[,10][order(bareSDs[,10])[19:20]]),bareSDs[,10])} Positionp3<-which(goodones[,11] %in% TRUE) -if (length(Positionp3)<1){Positionp3<-which(bareSDs[,11]==max(bareSDs[,11]))} +if (length(Positionp3)<3){Positionp3<-match(c(bareSDs[,11][order(bareSDs[,11])[19:20]]),bareSDs[,11])} Positionp4<-which(goodones[,12] %in% TRUE) -if (length(Positionp4)<1){Positionp4<-which(bareSDs[,12]==max(bareSDs[,12]))} +if (length(Positionp4)<3){Positionp4<-match(c(bareSDs[,12][order(bareSDs[,12])[19:20]]),bareSDs[,12])} Positionp5<-which(goodones[,13] %in% TRUE) -if (length(Positionp5)<1){Positionp5<-which(bareSDs[,13]==max(bareSDs[,13]))} +if (length(Positionp5)<3){Positionp5<-match(c(bareSDs[,13][order(bareSDs[,13])[19:20]]),bareSDs[,13])} Positionp6<-which(goodones[,14] %in% TRUE) -if (length(Positionp6)<1){Positionp6<-which(bareSDs[,14]==max(bareSDs[,14]))} +if (length(Positionp6)<3){Positionp6<-match(c(bareSDs[,14][order(bareSDs[,14])[19:20]]),bareSDs[,14])} Positionp7<-which(goodones[,15] %in% TRUE) -if (length(Positionp7)<1){Positionp7<-which(bareSDs[,15]==max(bareSDs[,15]))} +if (length(Positionp7)<3){Positionp7<-match(c(bareSDs[,15][order(bareSDs[,15])[19:20]]),bareSDs[,15])} + aa_props2 <- c("1"="A", "2"="C", "3"="D", "4"="E", "5"="F", "6"="G", "7"="H", "8"="I", "9"="K", "10"="L", "11"="M", "12"="N", "13"="P", "14"="Q", "15"="R", "16"="S", "17"="T", "18"="V", "19"="W", "20"="Y") @@ -1714,66 +1778,79 @@ AblThresh<-as.numeric(Abl[24,1]) AblTrueThresh<-((AblThresh*AblNorm)/(100-AblThresh)) AblActive<-unlist(AblGeneratedScores)>AblTrueThresh +if (TodaysKinase=="ABL"){AblActive<-rep(0,times=nrow(GeneratedPeptides))} ArgNorm<-1/as.numeric(Arg[22,1]) ArgThresh<-as.numeric(Arg[24,1]) ArgTrueThresh<-((ArgThresh*ArgNorm)/(100-ArgThresh)) ArgActive<-unlist(ArgGeneratedScores)>ArgTrueThresh +if (TodaysKinase=="ARG"){ArgActive<-rep(0,times=nrow(GeneratedPeptides))} BtkNorm<-1/as.numeric(Btk[22,1]) BtkThresh<-as.numeric(Btk[24,1]) BtkTrueThresh<-((BtkThresh*BtkNorm)/(100-BtkThresh)) BtkActive<-unlist(BtkGeneratedScores)>BtkTrueThresh +if (TodaysKinase=="BTK"){BtkActive<-rep(0,times=nrow(GeneratedPeptides))} CskNorm<-1/as.numeric(Csk[22,1]) CskThresh<-as.numeric(Csk[24,1]) CskTrueThresh<-((CskThresh*CskNorm)/(100-CskThresh)) CskActive<-(CskGeneratedScores)>CskTrueThresh +if (TodaysKinase=="CSK"){CskActive<-rep(0,times=nrow(GeneratedPeptides))} FynNorm<-1/as.numeric(Fyn[22,1]) FynThresh<-as.numeric(Fyn[24,1]) FynTrueThresh<-((FynThresh*FynNorm)/(100-FynThresh)) FynActive<-unlist(FynGeneratedScores)>FynTrueThresh +if (TodaysKinase=="FYN"){FynActive<-rep(0,times=nrow(GeneratedPeptides))} HckNorm<-1/as.numeric(Hck[22,1]) HckThresh<-as.numeric(Hck[24,1]) HckTrueThresh<-((HckThresh*HckNorm)/(100-HckThresh)) HckActive<-unlist(HckGeneratedScores)>HckTrueThresh +if (TodaysKinase=="HCK"){HckActive<-rep(0,times=nrow(GeneratedPeptides))} JAK2Norm<-1/as.numeric(JAK2[22,1]) JAK2Thresh<-as.numeric(JAK2[24,1]) JAK2TrueThresh<-((JAK2Thresh*JAK2Norm)/(100-JAK2Thresh)) JAk2Active<-unlist(JAK2GeneratedScores)>JAK2TrueThresh +if (TodaysKinase=="JAK2"){JAk2Active<-rep(0,times=nrow(GeneratedPeptides))} LckNorm<-1/as.numeric(Lck[22,1]) LckThresh<-as.numeric(Lck[24,1]) LckTrueThresh<-((LckThresh*LckNorm)/(100-LckThresh)) LckActive<-unlist(LckGeneratedScores)>LckTrueThresh +if (TodaysKinase=="LCK"){LckActive<-rep(0,times=nrow(GeneratedPeptides))} LynNorm<-1/as.numeric(Lyn[22,1]) LynThresh<-as.numeric(Lyn[24,1]) LynTrueThresh<-((LynThresh*LynNorm)/(100-LynThresh)) LynActive<-unlist(LynGeneratedScores)>LynTrueThresh +if (TodaysKinase=="LYN"){LynActive<-rep(0,times=nrow(GeneratedPeptides))} Pyk2Norm<-1/as.numeric(Pyk2[22,1]) Pyk2Thresh<-as.numeric(Pyk2[24,1]) Pyk2TrueThresh<-((Pyk2Thresh*Pyk2Norm)/(100-Pyk2Thresh)) Pyk2Active<-unlist(Pyk2GeneratedScores)>Pyk2TrueThresh +if (TodaysKinase=="PYK2"){Pyk2Active<-rep(0,times=nrow(GeneratedPeptides))} SrcNorm<-1/as.numeric(Src[22,1]) SrcThresh<-as.numeric(Src[24,1]) SrcTrueThresh<-((SrcThresh*SrcNorm)/(100-SrcThresh)) SrcActive<-unlist(SrcGeneratedScores)>SrcTrueThresh +if (TodaysKinase=="SRC"){SrcActive<-rep(0,times=nrow(GeneratedPeptides))} SykNorm<-1/as.numeric(Syk[22,1]) SykThresh<-as.numeric(Syk[24,1]) SykTrueThresh<-((SykThresh*SykNorm)/(100-SykThresh)) SykActive<-unlist(SykGeneratedScores)>SykTrueThresh +if (TodaysKinase=="SYK"){SykActive<-rep(0,times=nrow(GeneratedPeptides))} YesNorm<-1/as.numeric(Yes[22,1]) YesThresh<-as.numeric(Yes[24,1]) YesTrueThresh<-((YesThresh*YesNorm)/(100-YesThresh)) YesActive<-unlist(YesGeneratedScores)>YesTrueThresh +if (TodaysKinase=="YES"){YesActive<-rep(0,times=nrow(GeneratedPeptides))} AllActive<-AblActive+ArgActive+BtkActive+CskActive+FynActive+HckActive+JAk2Active+LckActive+LynActive+Pyk2Active+SrcActive+SykActive+YesActive #Btkactive+ @@ -1884,23 +1961,24 @@ #create the MCC table -threshold<-c(1:100) -threshold<-order(threshold,decreasing = TRUE) +threshold<-c(1:100,(1:9)/10,(1:9)/100,0,-.1) +threshold<-threshold[order(threshold,decreasing = TRUE)] +threshold -Truepositives<-c(1:100) -Falsenegatives<-c(1:100) -Sensitivity<-c(1:100) -TrueNegatives<-c(1:100) -FalsePositives<-c(1:100) -Specificity<-c(1:100) -Accuracy<-c(1:100) -MCC<-c(1:100) -EER<-c(1:100) +Truepositives<-c(1:120) +Falsenegatives<-c(1:120) +Sensitivity<-c(1:120) +TrueNegatives<-c(1:120) +FalsePositives<-c(1:120) +Specificity<-c(1:120) +Accuracy<-c(1:120) +MCC<-c(1:120) +EER<-c(1:120) #MAKE DAMN SURE THAT THE ACCESSION NUMBERS FOLLOW THE MOTIFS -for (z in 1:100) { - thres<-101-z +for (z in 1:120) { + thres<-threshold[z] Truepositives[z]<-length(PositiveWeirdScores[PositiveWeirdScores>=(thres)]) Falsenegatives[z]<-nrow(positivesubstrates)-Truepositives[z] Sensitivity[z]<-Truepositives[z]/(Falsenegatives[z]+Truepositives[z]) @@ -1909,10 +1987,10 @@ FalsePositives[z]<-nrow(NegativeSubstrateList)-TrueNegatives[z] Specificity[z]<-1-(TrueNegatives[z]/(FalsePositives[z]+TrueNegatives[z])) Accuracy[z]<-100*(Truepositives[z]+TrueNegatives[z])/(Falsenegatives[z]+FalsePositives[z]+TrueNegatives[z]+Truepositives[z]) - MCC[z]<-((Truepositives[z]+TrueNegatives[z])-(Falsenegatives[z]+FalsePositives[z]))/sqrt(round(round(Truepositives[z]+Falsenegatives[z])*round(TrueNegatives[z]+FalsePositives[z])*round(Truepositives[z]+FalsePositives[z])*round(TrueNegatives[z]+Falsenegatives[z]))) + MCC[z]<-((Truepositives[z]*TrueNegatives[z])-(Falsenegatives[z]*FalsePositives[z]))/sqrt(round(round(Truepositives[z]+Falsenegatives[z])*round(TrueNegatives[z]+FalsePositives[z])*round(Truepositives[z]+FalsePositives[z])*round(TrueNegatives[z]+Falsenegatives[z]))) EER[z]<-.01*(((1-(Sensitivity[z]))*(Truepositives[z]+Falsenegatives[z]))+(Specificity[z]*(1-(Truepositives[z]+Falsenegatives[z])))) } -Characterization<-cbind.data.frame(threshold,Truepositives,Falsenegatives,Sensitivity,TrueNegatives,FalsePositives,Specificity,Accuracy,MCC,EER) +Characterization<-cbind.data.frame(threshold,Truepositives,Falsenegatives,Sensitivity,TrueNegatives,FalsePositives,Specificity,MCC,EER) positiveheader<-c(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,"RPMS","PMS") positivewithscores<-rbind.data.frame(positiveheader,positivewithscores) @@ -1931,4 +2009,5 @@ # header<-colnames(RanksPeptides) # RanksPeptides<-rbind.data.frame(header,RanksPeptides) +write.table(x="Off Target Kinase activity (your kinase of interest should have zeros here because it is ON-target)",file = FILENAME3,append = FALSE,row.names = FALSE,col.names = TRUE,sep = ",") write.table(RanksPeptides,file = FILENAME3,append = FALSE,row.names = FALSE,col.names = TRUE,sep = ",")