# HG changeset patch # User ning # Date 1402949856 14400 # Node ID 97d36e663171141a7f96a993a20a3de51068c62b # Parent fdbabf28d095e9942e799389413cd5b83c1e8a08 Deleted selected files diff -r fdbabf28d095 -r 97d36e663171 ChoosePatterns.R --- a/ChoosePatterns.R Mon Jun 16 16:16:43 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,27 +0,0 @@ -sink(file="/tmp/none") -sink("/dev/null") -options(warn=-1) -options(echo=F) - -#invisible("EBSeq") -suppressMessages(library("EBSeq")) - -args <- commandArgs(trailingOnly = T) -inputfile <- args[1] -Idx <- args[2] -outputfile <- args[3] - -print(args) - -ReadIn=read.table(inputfile,stringsAsFactors=F,header=T, sep="\t") - -IndexIn=strsplit(Idx,split=",")[[1]] -Index=as.numeric(IndexIn) - -Mat=data.matrix(ReadIn) - -Out=Mat[Index,] - - -write.table(Out,file=outputfile,quote=F,col.names=T,row.names=T,sep = "\t") - diff -r fdbabf28d095 -r 97d36e663171 ChoosePatterns.xml --- a/ChoosePatterns.xml Mon Jun 16 16:16:43 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,19 +0,0 @@ - - Choose patterns of interests in a multiple condition design - R --quiet --slave --file=$GALAXY_ROOT_DIR/tools/EBSeq/ChoosePatterns.R --args $All_Possible_Patterns $Rows_of_interests $output - - - - - - - - - -Input could be obtained from Get All Possible Patterns Function - - - - - diff -r fdbabf28d095 -r 97d36e663171 EBGeneMultiCondTest.R --- a/EBGeneMultiCondTest.R Mon Jun 16 16:16:43 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,65 +0,0 @@ -sink(file="/tmp/none") -sink("/dev/null") -options(warn=-1) -options(echo=F) - -invisible("EBSeq") -suppressMessages(library("EBSeq")) - -args <- commandArgs(trailingOnly = T) -inputfile <- args[1] -WhetherSampleName <- args[2] -CondIn <- args[3] -PatternFile <- args[4] -outputfile <- args[5] -MAP.out<-args[6] -Sizesout <-args[7] - -#write.table(PatternFile,outputfile,quote=F,col.names=T,row.names=T,sep = "\t") - -#write.table(args,outputfile,quote=F,col.names=T,row.names=T,sep = "\t") - -Conditions=strsplit(CondIn,split=",")[[1]] - -if(WhetherSampleName=="y"){ - ReadIn=read.table(inputfile,stringsAsFactors=F,header=T, sep="\t") - Names=names(ReadIn)[-1] - } -if(WhetherSampleName=="n"){ - ReadIn=read.table(inputfile,stringsAsFactors=F,header=F, sep="\t") - Names=paste0("S",1:length(Conditions)) -} - -PatternIn=read.table(PatternFile,stringsAsFactors=F,header=T,sep="\t") - -if(class(ReadIn[[1]])=="character"){ - GeneMat=do.call(cbind,ReadIn[-1]) - rownames(GeneMat)=ReadIn[[1]] - colnames(GeneMat)=Names -} -if(class(ReadIn[[1]])=="numeric"){ - GeneMat=data.matrix(ReadIn) - colnames(GeneMat)=Names - } - -Patterns=data.matrix(PatternIn) - - -Sizes=MedianNorm(GeneMat) -EBOut=EBMultiTest(Data=GeneMat,Conditions=as.factor(Conditions), - AllParti=Patterns,sizeFactors=Sizes, maxround=5) -PPout=GetMultiPP(EBOut) -MultiPP=PPout$PP -MultiMAP=PPout$MAP -Data.norm=round(GetNormalizedMat(GeneMat, Sizes),2) - -Mat=cbind(MultiMAP,Data.norm[names(MultiMAP),]) - -colnames(Mat)= -c("MAP",Names) -options(warn=-1) - -write.table(round(MultiPP,2),file=outputfile,quote=F,col.names=T,row.names=T,sep = "\t") -write.table(Mat,file=MAP.out ,quote=F,col.names=T,row.names=T,sep = "\t") -write.table(Sizes,file=Sizesout,quote=F,col.names=F,row.names=F,sep = "\t") - diff -r fdbabf28d095 -r 97d36e663171 EBGeneMultiCondTest.xml --- a/EBGeneMultiCondTest.xml Mon Jun 16 16:16:43 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,34 +0,0 @@ - - Runs EBSeq to find DE genes across multiple (more than two) conditions - R --quiet --slave --file=$GALAXY_ROOT_DIR/tools/EBSeq/EBGeneMultiCondTest.R --args $Gene_Expression $First_Row_Sample_Names $Conditions $Patterns $PP_of_each_pattern $Pattern_with_highest_PP $Sizes - - - - - - - - - - - - - - - - - -The input Conditions should have exactly two levels. The length of the Condition vector should be exactly the same as the number of columns in the data file (except the gene names column). - -The patterns of interests could be obtained by function Get All Possible Patterns (and optionally, if there are too many patterns generated, the function Choose Patterns could be used to choose only subset of the patterns.) - -Three output files will be generated. The first file contains the Posterior probability of being each pattern. The second file contains the pattern with highest PP for each gene and the normalized expressions. Genes are with the same order as in input file. -The last file provides the library size factor for each sample. - - - - - - diff -r fdbabf28d095 -r 97d36e663171 EBGeneTwoCondTest.R --- a/EBGeneTwoCondTest.R Mon Jun 16 16:16:43 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,71 +0,0 @@ -sink(file="/tmp/none") -sink("/dev/null") -options(warn=-1) -options(echo=F) - -invisible("EBSeq") -suppressMessages(library("EBSeq")) - -args <- commandArgs(trailingOnly = T) -inputfile <- args[1] -WhetherSampleName <- args[2] -CondIn <- args[3] -FDR <- args[4] -outputfile <- args[5] -Sort.out<-args[6] -Sort.out.FDR <-args[7] -Sizesout <-args[8] - -Conditions=strsplit(CondIn,split=",")[[1]] - -if(WhetherSampleName=="y"){ - ReadIn=read.table(inputfile,stringsAsFactors=F,header=T,sep="\t") - Names=names(ReadIn)[-1] - } -if(WhetherSampleName=="n"){ - ReadIn=read.table(inputfile,stringsAsFactors=F,header=F,sep="\t") - Names=paste0("S",1:length(Conditions)) -} - -if(class(ReadIn[[1]])=="character"){ - GeneMat=do.call(cbind,ReadIn[-1]) - rownames(GeneMat)=ReadIn[[1]] - colnames(GeneMat)=Names -} -if(class(ReadIn[[1]])=="numeric"){ - GeneMat=data.matrix(ReadIn) - colnames(GeneMat)=Names - } - - -Sizes=MedianNorm(GeneMat) -EBOut=EBTest(Data=GeneMat,Conditions=as.factor(Conditions),sizeFactors=Sizes, maxround=5) -PP=GetPP(EBOut) -PP.sort=sort(PP,decreasing=T) -PP.sort.FDR=PP.sort[which(PP.sort>=1-as.numeric(FDR))] - -Data.norm=GetNormalizedMat(GeneMat, Sizes) -FC=PostFC(EBOut) -realFC=FC[[2]] -postFC=FC[[1]] - -Mat=cbind(PP, realFC[names(PP)], postFC[names(PP)],Data.norm[names(PP),]) -Mat.sort=cbind(PP.sort, realFC[names(PP.sort)], postFC[names(PP.sort)],Data.norm[names(PP.sort),]) - - -if(length(PP.sort.FDR)>1)Mat.sort.FDR=cbind(PP.sort.FDR, realFC[names(PP.sort.FDR)], postFC[names(PP.sort.FDR)],Data.norm[names(PP.sort.FDR),]) - -if(length(PP.sort.FDR)==1)Mat.sort.FDR=matrix( - c(PP.sort.FDR, realFC[names(PP.sort.FDR)], postFC[names(PP.sort.FDR)],Data.norm[names(PP.sort.FDR),]) - ,nrow=1) - -colnames(Mat)=colnames(Mat.sort)= - c("PPDE","RealFC","PosteriorFC",colnames(Data.norm)) -if(length(PP.sort.FDR)>0)colnames(Mat.sort.FDR)= - c("PPDE","RealFC","PosteriorFC",colnames(Data.norm)) - -write.table(round(Mat,2),file=outputfile,quote=F,col.names=T,row.names=T,sep = "\t") -write.table(round(Mat.sort,2),file=Sort.out ,quote=F,col.names=T,row.names=T,sep = "\t") -if(length(PP.sort.FDR)>0)write.table(round(Mat.sort.FDR,2),file=Sort.out.FDR,quote=F,col.names=T,row.names=T,sep = "\t") -write.table(Sizes,file=Sizesout,quote=F,col.names=F,row.names=F,sep = "\t") - diff -r fdbabf28d095 -r 97d36e663171 EBGeneTwoCondTest.xml --- a/EBGeneTwoCondTest.xml Mon Jun 16 16:16:43 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,39 +0,0 @@ - - Runs EBSeq to find DE genes across two conditions - R --quiet --slave --file=$GALAXY_ROOT_DIR/tools/EBSeq/EBGeneTwoCondTest.R --args $Gene_Expression $First_Row_Sample_Names $Conditions $Target_FDR $Output $Sorted_Output $Sorted_Output_with_target_FDR $Sizes - - - - - - - - - - - - - - - - - - -The input Conditions should have exactly two levels. The length of the Condition vector should be exactly the same as the number of -columns in the data file (except the gene names column). - -Four output files will be generated. Each of the first 3 files contains Posterior probability of being -DE (PPDE), Fold Change (RealFC), Posterior Fold Change (PostFC) and normalized gene expressions. -The Four files are: -Genes with the same order as in input file; -Genes sorted by PPDE; DE Genes under target FDR (PPDE>=TargetFDR) -and sorted by PPDE; -Library size factor for each sample. - - - - - diff -r fdbabf28d095 -r 97d36e663171 EBIsoformMultiCondTest.R --- a/EBIsoformMultiCondTest.R Mon Jun 16 16:16:43 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,66 +0,0 @@ -sink(file="/tmp/none") -sink("/dev/null") -options(warn=-1) -options(echo=F) - -invisible("EBSeq") -suppressMessages(library("EBSeq")) - -args <- commandArgs(trailingOnly = T) -inputfile <- args[1] -WhetherSampleName <- args[2] -CondIn <- args[3] -PatternFile <- args[5] -Ig.file <- args[4] -outputfile <- args[6] -MAP.out<-args[7] -Sizesout <-args[8] - -#write.table(args,file=outputfile,quote=F,col.names=T,row.names=T,sep = "\t") - - -Conditions=strsplit(CondIn,split=",")[[1]] -if(WhetherSampleName=="y"){ - ReadIn=read.table(inputfile,stringsAsFactors=F,header=T, sep="\t") - Names=names(ReadIn)[-1] - } -if(WhetherSampleName=="n"){ - ReadIn=read.table(inputfile,stringsAsFactors=F,header=F, sep="\t") - Names=paste0("S",1:length(Conditions)) -} - -PatternIn=read.table(PatternFile,stringsAsFactors=F,header=T,sep="\t") -IgVIn=read.table(Ig.file,stringsAsFactors=F,header=F,sep="\t") -IgV=IgVIn[[1]] - -if(class(ReadIn[[1]])=="character"){ - GeneMat=do.call(cbind,ReadIn[-1]) - rownames(GeneMat)=ReadIn[[1]] - colnames(GeneMat)=Names -} -if(class(ReadIn[[1]])=="numeric"){ - GeneMat=data.matrix(ReadIn) - colnames(GeneMat)=Names - } - -Patterns=data.matrix(PatternIn) - -Sizes=MedianNorm(GeneMat) -#write.table(Conditions,file=outputfile,quote=F,col.names=T,row.names=T,sep = "\t") -EBOut=EBMultiTest(Data=GeneMat,NgVector=IgV,Conditions=as.factor(Conditions), - AllParti=Patterns,sizeFactors=Sizes, maxround=5) -PPout=GetMultiPP(EBOut) -MultiPP=PPout$PP -MultiMAP=PPout$MAP -Data.norm=round(GetNormalizedMat(GeneMat, Sizes),2) - -Mat=cbind(MultiMAP,Data.norm[names(MultiMAP),]) - -colnames(Mat)= -c("MAP",Names) -options(warn=-1) - -write.table(round(MultiPP,2),file=outputfile,quote=F,col.names=T,row.names=T,sep = "\t") -write.table(Mat,file=MAP.out ,quote=F,col.names=T,row.names=T,sep = "\t") -write.table(Sizes,file=Sizesout,quote=F,col.names=F,row.names=F,sep = "\t") - diff -r fdbabf28d095 -r 97d36e663171 EBIsoformMultiCondTest.xml --- a/EBIsoformMultiCondTest.xml Mon Jun 16 16:16:43 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,37 +0,0 @@ - - Runs EBSeq to find DE isoforms across multiple (more than two) conditions - R --quiet --slave --file=$GALAXY_ROOT_DIR/tools/EBSeq/EBIsoformMultiCondTest.R --args $Isoform_Expression $First_Row_Sample_Names $Conditions $Ig_Vector $Patterns_of_Interests $PP_of_each_pattern $Pattern_with_highest_PP $Sizes - - - - - - - - - - - - - - - - - - -The input Conditions should have exactly two levels. The length of the Condition vector should be exactly the same as the number of columns in the data file (except the isoform names column). - -The patterns of interests could be obtained by function Get All Possible Patterns (and optionally, if there are too many patterns generated, the function Choose Patterns could be used to choose only subset of the patterns.) - -The Ig Vector could be generated by the GetIg function or obtained from RSEM output. - -Three output files will be generated. The first file contains the Posterior probability of being each pattern. -The second file contains the pattern with highest PP for each isoform and the normalized expressions.Isoforms are with the same order as in input file. -The last file provides the library size factor for each sample. - - - - - diff -r fdbabf28d095 -r 97d36e663171 EBIsoformTwoCondTest.R --- a/EBIsoformTwoCondTest.R Mon Jun 16 16:16:43 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,75 +0,0 @@ -sink(file="/tmp/none") -sink("/dev/null") -options(warn=-1) -options(echo=F) - -invisible("EBSeq") -suppressMessages(library("EBSeq")) - -args <- commandArgs(trailingOnly = T) -inputfile <- args[1] -WhetherSampleName <- args[2] -CondIn <- args[3] -Ig.file <-args[4] -FDR <- args[5] -outputfile <- args[6] -Sort.out<-args[7] -Sort.out.FDR <-args[8] -Sizesout <-args[9] - - -Conditions=strsplit(CondIn,split=",")[[1]] -if(WhetherSampleName=="y"){ - ReadIn=read.table(inputfile,stringsAsFactors=F,header=T,sep="\t") - Names=names(ReadIn)[-1] - } -if(WhetherSampleName=="n"){ - ReadIn=read.table(inputfile,stringsAsFactors=F,header=F, sep="\t") - Names=paste0("S",1:length(Conditions)) -} - -if(class(ReadIn[[1]])=="character"){ - GeneMat=do.call(cbind,ReadIn[-1]) - rownames(GeneMat)=ReadIn[[1]] - colnames(GeneMat)=Names -} -if(class(ReadIn[[1]])=="numeric"){ - GeneMat=data.matrix(ReadIn) - colnames(GeneMat)=Names - } - -IgVIn=read.csv(Ig.file,stringsAsFactors=F,header=F) -IgV=IgVIn[[1]] -Conditions=strsplit(CondIn,split=",")[[1]] - -Sizes=MedianNorm(GeneMat) -EBOut=EBTest(Data=GeneMat,NgVector=IgV,Conditions=as.factor(Conditions),sizeFactors=Sizes, maxround=5) -PP=GetPP(EBOut) -PP.sort=sort(PP,decreasing=T) -PP.sort.FDR=PP.sort[which(PP.sort>=1-as.numeric(FDR))] - -Data.norm=GetNormalizedMat(GeneMat, Sizes) -FC=PostFC(EBOut) -realFC=FC[[2]] -postFC=FC[[1]] - -Mat=cbind(PP, realFC[names(PP)], postFC[names(PP)],Data.norm[names(PP),]) -Mat.sort=cbind(PP.sort, realFC[names(PP.sort)], postFC[names(PP.sort)],Data.norm[names(PP.sort),]) - - -if(length(PP.sort.FDR)>1)Mat.sort.FDR=cbind(PP.sort.FDR, realFC[names(PP.sort.FDR)], postFC[names(PP.sort.FDR)],Data.norm[names(PP.sort.FDR),]) - -if(length(PP.sort.FDR)==1)Mat.sort.FDR=matrix( - c(PP.sort.FDR, realFC[names(PP.sort.FDR)], postFC[names(PP.sort.FDR)],Data.norm[names(PP.sort.FDR),]) - ,nrow=1) - -colnames(Mat)=colnames(Mat.sort)= - c("PPDE","RealFC","PosteriorFC",colnames(Data.norm)) -if(length(PP.sort.FDR)>0)colnames(Mat.sort.FDR)= - c("PPDE","RealFC","PosteriorFC",colnames(Data.norm)) - -write.table(round(Mat,2),file=outputfile,quote=F,col.names=T,row.names=T,sep = "\t") -write.table(round(Mat.sort,2),file=Sort.out ,quote=F,col.names=T,row.names=T,sep = "\t") -if(length(PP.sort.FDR)>0)write.table(round(Mat.sort.FDR,2),file=Sort.out.FDR,quote=F,col.names=T,row.names=T,sep = "\t") -write.table(Sizes,file=Sizesout,quote=F,col.names=F,row.names=F,sep = "\t") - diff -r fdbabf28d095 -r 97d36e663171 EBIsoformTwoCondTest.xml --- a/EBIsoformTwoCondTest.xml Mon Jun 16 16:16:43 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,41 +0,0 @@ - - Runs EBSeq to find DE isoforms across two conditions - R --quiet --slave --file=$GALAXY_ROOT_DIR/tools/EBSeq/EBIsoformTwoCondTest.R --args $Isoform_Expression $First_Row_Sample_Names $Conditions $Ig_Vector $Target_FDR $Output $Sorted_Output $Sorted_Outpu_with_target_FDR $Sizes - - - - - - - - - - - - - - - - - - - -The input Conditions should have exactly two levels. The length of the Condition vector should be exactly the same as the number of columns in the data file (except the isoform names column). - -The Ig Vector could be generated by the GetIg function or obtained from RSEM output. - -Four output files will be generated. Each of the first 3 files contains Posterior probability of being DE (PPDE), Fold Change (RealFC), Posterior Fold Change (PostFC) and normalized isoform expressions. -The four files are: - -Isoforms with the same order as in input file; -Isoforms sorted by PPDE; DE Isoforms under target FDR (PPDE>=TargetFDR) -and sorted by PPDE; -Library size factor for each sample. - - - - - diff -r fdbabf28d095 -r 97d36e663171 GetAllPatterns.R --- a/GetAllPatterns.R Mon Jun 16 16:16:43 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,23 +0,0 @@ -sink(file="/tmp/none") -sink("/dev/null") -options(warn=-1) -options(echo=F) - -invisible("EBSeq") -suppressMessages(library("EBSeq")) - -args <- commandArgs() -inputfile <- args[6] -outputfile <- args[7] -#PairwisePlots <-args[6] - -print(args) - -Conds=strsplit(inputfile,split=",")[[1]] - - -Out=GetPatterns(Conds) - - -write.table(Out,file=outputfile,quote=F,col.names=T,row.names=T,sep = "\t") - diff -r fdbabf28d095 -r 97d36e663171 GetAllPatterns.xml --- a/GetAllPatterns.xml Mon Jun 16 16:16:43 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,19 +0,0 @@ - - Get all possible patterns in a multiple condition design - R --quiet --slave --file=$GALAXY_ROOT_DIR/tools/EBSeq/GetAllPatterns.R --args $input $output - - - - - - - - - - - - - - diff -r fdbabf28d095 -r 97d36e663171 GetIg.R --- a/GetIg.R Mon Jun 16 16:16:43 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,23 +0,0 @@ -sink(file="/tmp/none") -sink("/dev/null") -options(warn=-1) -options(echo=F) - -invisible("EBSeq") -suppressMessages(library("EBSeq")) - -args <- commandArgs(trailingOnly = T) -inputfile <- args[1] -outputfile <- args[2] - -print(args) - -a1=read.csv(inputfile,stringsAsFactors=F,header=F, sep="\t") -Ng=GetNg(a1[[1]],a1[[2]]) -Ig=Ng$IsoformNgTrun - - - - -write.table(Ig,file=outputfile,quote=F,col.names=F,row.names=F,sep = "\t") - diff -r fdbabf28d095 -r 97d36e663171 GetIg.xml --- a/GetIg.xml Mon Jun 16 16:16:43 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,18 +0,0 @@ - - Get Ig vector from gene-isoform mapping for isoform leve - l DE analysis - R --quiet --slave --file=$GALAXY_ROOT_DIR/tools/EBSeq/GetIg.R --args $input $output - - - - - - - - - - - - - - diff -r fdbabf28d095 -r 97d36e663171 GetNormalizedExpression.R --- a/GetNormalizedExpression.R Mon Jun 16 16:16:43 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,41 +0,0 @@ -sink(file="/tmp/none") -sink("/dev/null") -options(warn=-1) -options(echo=F) - -invisible("EBSeq") -suppressMessages(library("EBSeq")) - -args <- commandArgs(trailingOnly = T) -inputfile <- args[1] -WhetherSampleName <- args[2] -outputfile <- args[3] -Boxplots<-args[4] -Sizesout <-args[5] - -print(args) - -if(WhetherSampleName=="y"){ - ReadIn=read.table(inputfile,stringsAsFactors=F,header=T, sep="\t") - Names=names(ReadIn)[-1] - } -if(WhetherSampleName=="n"){ - ReadIn=read.table(inputfile,stringsAsFactors=F,header=F, sep="\t") -} - -GeneMat=do.call(cbind,ReadIn[-1]) -rownames(GeneMat)=ReadIn[[1]] -if(WhetherSampleName=="y")colnames(GeneMat)=Names - - -Sizes=MedianNorm(GeneMat) - -Data.norm=GetNormalizedMat(GeneMat, Sizes) - -write.table(round(Data.norm,2),file=outputfile,quote=F,col.names=T,row.names=T,sep = "\t") -pdf(Boxplots) -boxplot(Data.norm,log="y",ylim=c(10^-1,10^6)) -dev.off() - -write.table(Sizes,file=Sizesout,quote=F,col.names=F,row.names=F,sep = "\t") - diff -r fdbabf28d095 -r 97d36e663171 GetNormalizedExpression.xml --- a/GetNormalizedExpression.xml Mon Jun 16 16:16:43 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,25 +0,0 @@ - - Calculate normalization factors and get the normalized expression matrix - R --quiet --slave --file=$GALAXY_ROOT_DIR/tools/EBSeq/GetNormalizedExpression.R --args $Gene_Expression $First_Row_Sample_Names $output $Boxplots $Sizes - - - - - - - - - - - - - - - -The function would provide the normalized expression values (normalized by the library size factors) and estimated library size factors. -A boxplot for checking will also be generated. - - - - -