# HG changeset patch
# User fubar
# Date 1315544868 14400
# Node ID 1959becd059294ed7070f28940553f51bddfb68b
Initial upload of files for DGE tool
diff -r 000000000000 -r 1959becd0592 rgDGE.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/rgDGE.py Fri Sep 09 01:07:48 2011 -0400
@@ -0,0 +1,338 @@
+# Copyright ross lazarus
+# september 2011
+# all rights reserved
+# for the Rgenetics project
+# all rights reserved
+# licensed to you under the terms of the LGPL as documented at http://www.gnu.org/copyleft/lesser.html
+
+import sys
+import shutil
+import subprocess
+import os
+import time
+import tempfile
+import optparse
+
+progname = os.path.split(sys.argv[0])[1]
+myversion = 'V000.2 September 2011'
+verbose = False
+debug = False
+
+
+galhtmlprefix = """
+
+
+
+
+
+
+
+
+
+"""
+galhtmlattr = """Galaxy Rgenetics tool output %s run at %s """
+galhtmlpostfix = """
\n"""
+
+DGESCRIPT="""
+#### edgeR.Rscript
+#
+# Performs DGE on a count table containing n replicates of two conditions
+#
+# Parameters
+#
+# 1 - Output Dir
+
+# Writen by: S.Lunke and A.Kaspi
+####
+
+
+usage <- function(){
+ print("#### edgeR.R", quote=F)
+ print("", quote=F)
+ print("Performs DGE on a count table containing n replicates of two conditions", quote=F)
+ print("", quote=F)
+ print("USAGE: Rscript edgeR.R ", quote=F)
+ print("", quote=F)
+ print(" Parameters", quote=F)
+ print("", quote=F)
+ print(" 1 - Output Dir", quote=F)
+ print(" 2 - Input File", quote=F)
+ print(" 3 - Treatment name", quote=F)
+ print(" 4 - Treatment Columns i.e. 3,4,6", quote=F)
+ print(" 5 - Control name", quote=F)
+ print(" 6 - Control Columns i.e. 2,7,8", quote=F)
+ print(" 7 - Output tabular file name", quote=F)
+
+
+ print("", quote=F)
+ print(" Interface writen by: S.Lunke and A.Kaspi", quote=F)
+ print(" Makes extensive use of the edgeR software - Mark D. Robinson, Davis J. McCarthy, Gordon K. Smyth, PMCID: PMC2796818", quote=F)
+ print("####", quote=F)
+ q()
+ }
+
+
+## edgeIt runs edgeR
+edgeIt <- function (Input,group,outputfilename) {
+
+
+ ## Error handling
+ if (length(unique(group))!=2){
+ print("Number of conditions identified in experiment does not equal 2")
+ q()
+ }
+
+ #The workhorse
+ require(edgeR)
+
+ ## Setup DGEList object
+ DGEList <- DGEList(Count_Matrix, group = group)
+
+ #Extract T v C names
+ TName=unique(group)[1]
+ CName=unique(group)[2]
+
+ # Outfile name - we need something more predictable than
+ # outfile <- paste(Out_Dir,"/",TName,"_Vs_",CName, sep="")
+ # since it needs to be renamed and squirted into the history so added as a paramter
+
+ # Filter all tags that have less than one read per million in half + 1 or more libraries. n = ceiling(length(colnames(DGEList))/2)+1
+ n = ceiling(length(colnames(DGEList))/2)+1
+ DGEList <- DGEList[rowSums(1e+06 * DGEList$counts/expandAsMatrix(DGEList$samples$lib.size, dim(DGEList)) > 1) >= n, ]
+
+
+ ## Run tagwise dispersion test and DGE analysis
+ prior.n <- ceiling(50/(length(group)-length(2)))
+ DGEList <- calcNormFactors(DGEList)
+ DGEList <- estimateCommonDisp(DGEList)
+ DGEList <- estimateTagwiseDisp(DGEList, prior.n=prior.n, trend=T, grid.length=1000)
+ DE <- exactTest(DGEList, common.disp=F)
+
+ ## Normalized data RPM
+ normData <- (1e+06 * DGEList$counts/expandAsMatrix(DGEList$samples$lib.size, dim(DGEList)))
+ colnames(normData) <- paste( "norm",colnames(normData),sep="_")
+
+
+ #Prepare our output file
+ output <- cbind(
+ Name=as.character(rownames(DGEList$counts)),
+ DE$table,
+ adj.p.value=p.adjust(DE$table$p.value, method="fdr"),
+ Dispersion=DGEList$tagwise.dispersion,normData,
+ DGEList$counts
+ )
+ soutput = output[order(output$p.val),] # sorted into p value order - for quick toptable
+ #Write output
+ write.table(soutput,outputfilename, quote=FALSE, sep="\t",row.names=F)
+
+
+ ## Plot MAplot
+ pdf("Smearplot.pdf")
+ #TODO colour siggenes
+ nsig = sum(output$adj.p.value < 0.05)
+ deTags = rownames(topTags(DE,n=nsig)$table)
+ plotSmear(DGEList,de.tags=deTags,main=paste("Smear Plot for ",TName,' Vs ',CName,' (FDR@0.05, N = ',nsig,')',sep=''))
+ grid(col="blue")
+ dev.off()
+
+ ## Plot MDS
+ pdf("MDSplot.pdf")
+ plotMDS.dge(DGEList,xlim=c(-2,1),main=paste("MDS Plot for",TName,'Vs',CName),cex=0.5)
+ grid(col="blue")
+ dev.off()
+
+ #Return our main table
+ output
+
+} #Done
+
+#### MAIN ####
+
+parameters <- commandArgs(trailingOnly=T)
+
+## Error handling
+if (length(parameters) < 6){
+ print("Not enough Input files supplied. Specify at least two input files.", quote=F)
+ print("", quote=F)
+ usage()
+}
+
+Out_Dir <- as.character(parameters[1])
+Input <- as.character(parameters[2])
+TreatmentName <- as.character(parameters[3])
+TreatmentCols <- as.character(parameters[4])
+ControlName <- as.character(parameters[5])
+ControlCols <- as.character(parameters[6])
+outputfilename <- as.character(parameters[7])
+
+
+#Set our columns
+TCols = as.numeric(strsplit(TreatmentCols,",")[[1]])-1 #+1
+CCols = as.numeric(strsplit(ControlCols,",")[[1]])-1 #+1
+cat('## got TCols=')
+cat(TCols)
+cat('; CCols=')
+cat(CCols)
+cat('\n')
+
+
+group<-strsplit(TreatmentCols,",")[[1]]
+
+## Create output dir if non existent
+ if (file.exists(Out_Dir) == F) dir.create(Out_Dir)
+
+Count_Matrix<-read.table(Input,header=T,row.names=1,sep='\t') #Load tab file assume header
+Count_Matrix<-Count_Matrix[,c(TCols,CCols)] #extract columns we care about
+group<-c(rep(TreatmentName,length(TCols)), rep(ControlName,length(CCols)) ) #Build a group descriptor
+colnames(Count_Matrix) <- paste(group,colnames(Count_Matrix),sep="_") #Relable columns
+
+
+results <- edgeIt(Input,group,outputfilename) #Run the main function
+# for the log
+sessionInfo()
+
+"""
+
+def timenow():
+ """return current time as a string
+ """
+ return time.strftime('%d/%m/%Y %H:%M:%S', time.localtime(time.time()))
+
+class DGE:
+ """class is a wrapper for DGE - note hard coded script above so it's all in one place for Galaxy"""
+
+ def __init__(self,myName=None,opts=None):
+ """
+ Rscript edgeR.R results HGHN_mRNA_counts.txt HiGly 2,3 Control 1,4
+Out_Dir <- as.character(parameters[1])
+Input <- as.character(parameters[2])
+TreatmentName <- as.character(parameters[3])
+TreatmentCols <- as.character(parameters[4])
+ControlName <- as.character(parameters[5])
+ControlCols <- as.character(parameters[6])
+outputfilename <- as.character(parameters[7])
+ """
+ self.thumbformat = 'jpg'
+ self.tlog = os.path.join(opts.output_dir,"DGE_runner.log")
+ self.opts = opts
+ self.myName=myName
+ self.cl = []
+ a = self.cl.append
+ rfname = os.path.join(opts.output_dir,'DGE.R')
+ rscript = open(rfname,'w')
+ rscript.write(DGESCRIPT)
+ rscript.close()
+ a('Rscript')
+ a(rfname)
+ a("%s" % opts.output_dir)
+ a("%s" % opts.input_matrix)
+ a("%s" % opts.treatname)
+ a("%s" % opts.treatcols)
+ a("%s" % opts.ctrlname)
+ a("%s" % opts.ctrlcols)
+ a("%s" % opts.output_tab)
+
+ def compressPDF(self,inpdf=None,thumbformat='png'):
+ """need absolute path to pdf
+ """
+ assert os.path.isfile(inpdf), "## Input %s supplied to %s compressPDF not found" % (inpdf,self.myName)
+ sto = open(self.tlog,'a')
+ outpdf = '%s_compressed' % inpdf
+ cl = ["gs", "-sDEVICE=pdfwrite", "-dNOPAUSE", "-dBATCH", "-sOutputFile=%s" % outpdf,inpdf]
+ x = subprocess.Popen(cl,stdout=sto,stderr=sto,cwd=self.opts.output_dir)
+ retval1 = x.wait()
+ if retval1 == 0:
+ os.unlink(inpdf)
+ shutil.move(outpdf,inpdf)
+ outpng = '%s.%s' % (os.path.splitext(inpdf)[0],thumbformat)
+ cl2 = ['convert', inpdf, outpng]
+ x = subprocess.Popen(cl2,stdout=sto,stderr=sto,cwd=self.opts.output_dir)
+ retval2 = x.wait()
+ sto.close()
+ retval = retval1 or retval2
+ return retval
+
+ def runDGE(self):
+ """
+ """
+ sto = open(self.tlog,'w')
+ x = subprocess.Popen(' '.join(self.cl),shell=True,stdout=sto,stderr=sto,cwd=self.opts.output_dir)
+ retval = x.wait()
+ sto.close()
+ flist = os.listdir(self.opts.output_dir)
+ flist.sort()
+ html = [galhtmlprefix % progname,]
+ html.append('
Galaxy DGE outputs run at %s
Click on the images below to download high quality PDF versions\n' % timenow())
+ if len(flist) > 0:
+ html.append('
\n')
+ for fname in flist:
+ dname,e = os.path.splitext(fname)
+ if e.lower() == '.pdf' : # compress and make a thumbnail
+ thumb = '%s.%s' % (dname,self.thumbformat)
+ pdff = os.path.join(self.opts.output_dir,fname)
+ retval = self.compressPDF(inpdf=pdff,thumbformat=self.thumbformat)
+ if retval == 0:
+ s= '