Mercurial > repos > yufei-luo > s_mart
view SMART/DiffExpAnal/DESeqTools/anadiffGenes2conds.R @ 31:0ab839023fe4
Uploaded
author | m-zytnicki |
---|---|
date | Tue, 30 Apr 2013 14:33:21 -0400 |
parents | 94ab73e8a190 |
children |
line wrap: on
line source
# Analyse differentielle de donnees d expression par gene # avec DESeq # 2 conditions args <- commandArgs() #print(args[1]) #print(args[2]) #print(args[3]) #print(args[4]) #print(args[5]) #print(args[6]) #output file names #print(args[7]) # HTML file name #print(args[8]) # HTML file all images directory #print(args[9]) # complete xls file name #print(args[10])# UP xls file name #print(args[11]) #Down xls file name #print(args[12]) #the executable scipt (for getting the path) library(R2HTML) library(R.utils) #run example: projectName <- "DESeqAnalysis" analysisVersion <- "V1" # fitType=local, sharingMode=fit-only, method=blind rawDir <- "raw" targetFile <- args[4] header <- as.integer(args[5]) #si on a header ou pas, si on a, header=1, sinon header=0 withOutReplicates <- as.integer(args[6]) #get the directory to write the results tab <- splitByPattern(args[7], pattern="/") res_dir <- "" for (e in tab[1:length(tab)-1]) { res_dir <- paste(res_dir, e, sep="")} #get the html output file name OUT_HTMLname <- args[7] #get the images directory to write to OUT_imgDir <- args[8] #if the directory dosen't existe, we should create it first alpha <- 0.05 adjMethod <- "BH" outfile <- T runningScriptTab <- splitByPattern(args[12], pattern="/") RfuncDir <- "" for (r in runningScriptTab[1:length(runningScriptTab)-1]) { RfuncDir <- paste(RfuncDir, r, sep="")} #find the path of executable script RfuncDir <- paste(RfuncDir, "DESeqTools/", sep="") #define the function files path # Dossier contenant les fonctions print(RfuncDir) source( paste(RfuncDir, "RNAseqFunctions.R", sep="/") ) # Chargement des packages et des fonctions library(DESeq) RNAseqFunctions(RfuncDir) # Chargement du target file target <- loadTargetFile( targetFile, header ) # Chargement des donnees, construction d'une table de comptages par gene #have changed rawCounts <- loadCountData( target, header ) conds <- unique(target$group) cond1 <- as.character(conds[1]) cond2 <- as.character(conds[!conds == conds[1]]) rawCounts <- HTseqClean( rawCounts ) # Transformation en matrice de comptages counts <- raw2counts( rawCounts )[[1]] # Nombre de reads par echantillon OUT_barplotTCName <- paste(OUT_imgDir, "barplotTC.png", sep="/") barplotTC( counts, target$group, OUT_barplotTCName, out=outfile ) # Proportion comptages nuls OUT_barplotNulName <- paste(OUT_imgDir, "barplotNul.png", sep="/") barplotNul( counts, target$group, OUT_barplotNulName, out=outfile ) # Suppression comptages nuls counts <- removeNul( counts )[[1]] # Density plot OUT_densityPlotName <- paste(OUT_imgDir, "densityPlot.png", sep="/") densityPlot( counts, target$group, OUT_densityPlotName, out=outfile ) # Boxplot OUT_boxplotCountsName <- paste(OUT_imgDir, "boxplotCounts.png", sep="/") boxplotCounts( counts, target$group, type = c("raw", "norm"), OUT_boxplotCountsName, out=outfile ) # Sequence majoritaire OUT_majSequenceName <- paste(OUT_imgDir, "majSequence.png", sep="/") majSequence( counts, target$group, OUT_majSequenceName, out=outfile ) # ScatterPlot between two samples OUT_scatterPlot <- paste(OUT_imgDir, "scatterPlot.png", sep="/") pairwiseScatterPlots(counts, target, OUT_scatterPlot, out=outfile, pdffile=FALSE) # SERE coefficient calculation (Poisson hypothesis for replicates techiques), to know if the variability between the réplicates or the conditons is hight or not. coef <- pairwiseSERE(counts) print(coef) coef # Creation structure de donnees cds, !! we use newCountDataset because that we have first column not numeric, and DESeq dosen't take non numeric values. cds <- newCountDataSet( counts, target$group ) # Diagnostic for clustering of non-normalized samples OUT_clusterPlot_before <- paste(OUT_imgDir, "clusteringOfSamplesBefore.png", sep="/") clusterPlot(cds, OUT_clusterPlot_before, out=outfile) # Normalisation (calcul des lib size factors ) cds <- estimateSizeFactors( cds ) # Estimation de la dispersion # parametres: # method: how samples are pooled to estimate dispersion. If no replicates use "blind" # sharingMode: how variance estimate is computed with respect to the fitted line. # "Maximum" is the most conservative (max between fit and estimation), "fit-only" keeps the estimated value # fitType: refers to the model. "Local" is the published model, "parametric" is glm-based (may not converge), now we use "parametric" as default value. #in this case, without replicates if(withOutReplicates!=0){ cds <- estimateDispersions( cds, sharingMode="fit-only", method="blind") } else if(withOutReplicates==0){ #cds <- estimateDispersions( cds, sharingMode="fit-only", fitType="local")} cds <- estimateDispersions( cds)} # Analyse differentielle, ajustement BH par defaut res <- nbinomTest( cds, cond1, cond2) # Diagnostic for clustering of normalized samples OUT_clusterPlot <- paste(OUT_imgDir, "clusteringOfSamples.png", sep="/") clusterPlot(cds, OUT_clusterPlot, out=outfile) # Control plot of dispersion estimates OUT_plotDispEstimatesName <- paste(OUT_imgDir, "disperssionEstimates.png", sep="/") plotDispEstimates( cds, OUT_plotDispEstimatesName, out=outfile ) # Distribution of raw p-values OUT_histoRawpName <- paste(OUT_imgDir, "histoRawPvalue.png", sep="/") histoRawp( res, OUT_histoRawpName, out=outfile ) # MAplot showing DE genes OUT_MAplotDEName <- paste(OUT_imgDir, "MAplotDE.png", sep="/") MAplotDE( res, alpha, OUT_MAplotDEName, out=outfile ) # export complete data OUT_completeName <- args[9] complete <- exportComplete( counts, res, target, adjMethod, cond1, cond2, OUT_completeName, out=outfile ) # export significant genes OUT_upName <- args[10] OUT_downName <- args[11] diff <- exportDiff( complete, alpha, adjMethod, OUT_upName, OUT_downName, out=outfile ) # write all images results into an HTML file prefixHTMLname <- tab[length(tab)] #HTMLCSS(file.path(res_dir), filename=prefixHTMLname, CSSfile="R2HTML") HTMLInitFile(file.path(res_dir), filename=prefixHTMLname, BackGroundColor="white") HTML.title("<center>Differential Expression DESeq analysis.", HR=1) HTML.title("<center>BarplotTC: number of RNA-seq reads per sample.", HR=2) HTMLInsertGraph("barplotTC.png") HTML.title("<center>BarplotNul: number of RNA-seq reads that the count is 0 (nul).", HR=2) HTMLInsertGraph("barplotNul.png") HTML.title("<center>DensityPlot: density of each sample.", HR=2) HTMLInsertGraph("densityPlot.png") HTML.title("<center>Boxplot: number of RNA-seq reads distribution per sample.", HR=2) HTMLInsertGraph("boxplotCounts.png") HTML.title("<center>MajorSequence: the proportion of reads associated with the most expressed sequence.", HR=2) HTMLInsertGraph("majSequence.png") HTML.title("<center>ScatterPlot: Scatter plot of samples.", HR=2) HTMLInsertGraph("scatterPlot.png") HTML.title("<center>Clustering Of No-Normalized Samples: Representing the no-normalized samples in Diagnostic.", HR=2) HTMLInsertGraph("clusteringOfSamplesBefore.png") HTML.title("<center>Clustering Of Normalized Samples: Representing the normalized samples in Diagnostic.", HR=2) HTMLInsertGraph("clusteringOfSamples.png") HTML.title("<center>DispersionEstimates: representing dispersion estimates vs mean expression.", HR=2) HTMLInsertGraph("disperssionEstimates.png") HTML.title("<center>HistoRawPValue: histogram of raw p-value.", HR=2) HTMLInsertGraph("histoRawPvalue.png") HTML.title("<center>MAplotDE: the differentially expressed genes (red point).", HR=2) HTMLInsertGraph("MAplotDE.png") HTMLEndFile() absoluPrefixHTMLname <- paste(res_dir, prefixHTMLname, sep="") outName <- paste(absoluPrefixHTMLname, ".html", sep="") # change name is to be adapted into Galaxy file.rename(outName, OUT_HTMLname)