Mercurial > repos > yufei-luo > s_mart
diff SMART/DiffExpAnal/DESeqTools/anadiffGenes2conds.R @ 31:0ab839023fe4
Uploaded
author | m-zytnicki |
---|---|
date | Tue, 30 Apr 2013 14:33:21 -0400 |
parents | 94ab73e8a190 |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/SMART/DiffExpAnal/DESeqTools/anadiffGenes2conds.R Tue Apr 30 14:33:21 2013 -0400 @@ -0,0 +1,191 @@ +# Analyse differentielle de donnees d expression par gene +# avec DESeq +# 2 conditions + +args <- commandArgs() +#print(args[1]) +#print(args[2]) +#print(args[3]) +#print(args[4]) +#print(args[5]) +#print(args[6]) +#output file names +#print(args[7]) # HTML file name +#print(args[8]) # HTML file all images directory +#print(args[9]) # complete xls file name +#print(args[10])# UP xls file name +#print(args[11]) #Down xls file name +#print(args[12]) #the executable scipt (for getting the path) + +library(R2HTML) +library(R.utils) + +#run example: +projectName <- "DESeqAnalysis" +analysisVersion <- "V1" # fitType=local, sharingMode=fit-only, method=blind +rawDir <- "raw" +targetFile <- args[4] +header <- as.integer(args[5]) #si on a header ou pas, si on a, header=1, sinon header=0 +withOutReplicates <- as.integer(args[6]) + +#get the directory to write the results +tab <- splitByPattern(args[7], pattern="/") +res_dir <- "" +for (e in tab[1:length(tab)-1]) { res_dir <- paste(res_dir, e, sep="")} +#get the html output file name +OUT_HTMLname <- args[7] +#get the images directory to write to +OUT_imgDir <- args[8] +#if the directory dosen't existe, we should create it first + +alpha <- 0.05 +adjMethod <- "BH" +outfile <- T +runningScriptTab <- splitByPattern(args[12], pattern="/") +RfuncDir <- "" +for (r in runningScriptTab[1:length(runningScriptTab)-1]) { RfuncDir <- paste(RfuncDir, r, sep="")} #find the path of executable script +RfuncDir <- paste(RfuncDir, "DESeqTools/", sep="") #define the function files path +# Dossier contenant les fonctions +print(RfuncDir) +source( paste(RfuncDir, "RNAseqFunctions.R", sep="/") ) + +# Chargement des packages et des fonctions +library(DESeq) +RNAseqFunctions(RfuncDir) +# Chargement du target file +target <- loadTargetFile( targetFile, header ) +# Chargement des donnees, construction d'une table de comptages par gene +#have changed +rawCounts <- loadCountData( target, header ) +conds <- unique(target$group) +cond1 <- as.character(conds[1]) +cond2 <- as.character(conds[!conds == conds[1]]) +rawCounts <- HTseqClean( rawCounts ) + +# Transformation en matrice de comptages +counts <- raw2counts( rawCounts )[[1]] + +# Nombre de reads par echantillon +OUT_barplotTCName <- paste(OUT_imgDir, "barplotTC.png", sep="/") +barplotTC( counts, target$group, OUT_barplotTCName, out=outfile ) + +# Proportion comptages nuls +OUT_barplotNulName <- paste(OUT_imgDir, "barplotNul.png", sep="/") +barplotNul( counts, target$group, OUT_barplotNulName, out=outfile ) + +# Suppression comptages nuls +counts <- removeNul( counts )[[1]] + +# Density plot +OUT_densityPlotName <- paste(OUT_imgDir, "densityPlot.png", sep="/") +densityPlot( counts, target$group, OUT_densityPlotName, out=outfile ) + +# Boxplot +OUT_boxplotCountsName <- paste(OUT_imgDir, "boxplotCounts.png", sep="/") +boxplotCounts( counts, target$group, type = c("raw", "norm"), OUT_boxplotCountsName, out=outfile ) +# Sequence majoritaire +OUT_majSequenceName <- paste(OUT_imgDir, "majSequence.png", sep="/") +majSequence( counts, target$group, OUT_majSequenceName, out=outfile ) + +# ScatterPlot between two samples +OUT_scatterPlot <- paste(OUT_imgDir, "scatterPlot.png", sep="/") +pairwiseScatterPlots(counts, target, OUT_scatterPlot, out=outfile, pdffile=FALSE) + +# SERE coefficient calculation (Poisson hypothesis for replicates techiques), to know if the variability between the réplicates or the conditons is hight or not. +coef <- pairwiseSERE(counts) +print(coef) +coef +# Creation structure de donnees cds, !! we use newCountDataset because that we have first column not numeric, and DESeq dosen't take non numeric values. +cds <- newCountDataSet( counts, target$group ) + +# Diagnostic for clustering of non-normalized samples +OUT_clusterPlot_before <- paste(OUT_imgDir, "clusteringOfSamplesBefore.png", sep="/") +clusterPlot(cds, OUT_clusterPlot_before, out=outfile) + + +# Normalisation (calcul des lib size factors ) +cds <- estimateSizeFactors( cds ) + +# Estimation de la dispersion +# parametres: + # method: how samples are pooled to estimate dispersion. If no replicates use "blind" + # sharingMode: how variance estimate is computed with respect to the fitted line. + # "Maximum" is the most conservative (max between fit and estimation), "fit-only" keeps the estimated value + # fitType: refers to the model. "Local" is the published model, "parametric" is glm-based (may not converge), now we use "parametric" as default value. +#in this case, without replicates +if(withOutReplicates!=0){ + cds <- estimateDispersions( cds, sharingMode="fit-only", method="blind") +} else if(withOutReplicates==0){ + #cds <- estimateDispersions( cds, sharingMode="fit-only", fitType="local")} + cds <- estimateDispersions( cds)} +# Analyse differentielle, ajustement BH par defaut +res <- nbinomTest( cds, cond1, cond2) + +# Diagnostic for clustering of normalized samples +OUT_clusterPlot <- paste(OUT_imgDir, "clusteringOfSamples.png", sep="/") +clusterPlot(cds, OUT_clusterPlot, out=outfile) + +# Control plot of dispersion estimates +OUT_plotDispEstimatesName <- paste(OUT_imgDir, "disperssionEstimates.png", sep="/") +plotDispEstimates( cds, OUT_plotDispEstimatesName, out=outfile ) + +# Distribution of raw p-values +OUT_histoRawpName <- paste(OUT_imgDir, "histoRawPvalue.png", sep="/") +histoRawp( res, OUT_histoRawpName, out=outfile ) + +# MAplot showing DE genes +OUT_MAplotDEName <- paste(OUT_imgDir, "MAplotDE.png", sep="/") +MAplotDE( res, alpha, OUT_MAplotDEName, out=outfile ) + +# export complete data +OUT_completeName <- args[9] +complete <- exportComplete( counts, res, target, adjMethod, cond1, cond2, OUT_completeName, out=outfile ) + +# export significant genes +OUT_upName <- args[10] +OUT_downName <- args[11] +diff <- exportDiff( complete, alpha, adjMethod, OUT_upName, OUT_downName, out=outfile ) + +# write all images results into an HTML file +prefixHTMLname <- tab[length(tab)] +#HTMLCSS(file.path(res_dir), filename=prefixHTMLname, CSSfile="R2HTML") +HTMLInitFile(file.path(res_dir), filename=prefixHTMLname, BackGroundColor="white") +HTML.title("<center>Differential Expression DESeq analysis.", HR=1) +HTML.title("<center>BarplotTC: number of RNA-seq reads per sample.", HR=2) + HTMLInsertGraph("barplotTC.png") + +HTML.title("<center>BarplotNul: number of RNA-seq reads that the count is 0 (nul).", HR=2) + HTMLInsertGraph("barplotNul.png") + +HTML.title("<center>DensityPlot: density of each sample.", HR=2) + HTMLInsertGraph("densityPlot.png") + +HTML.title("<center>Boxplot: number of RNA-seq reads distribution per sample.", HR=2) + HTMLInsertGraph("boxplotCounts.png") + +HTML.title("<center>MajorSequence: the proportion of reads associated with the most expressed sequence.", HR=2) + HTMLInsertGraph("majSequence.png") + +HTML.title("<center>ScatterPlot: Scatter plot of samples.", HR=2) + HTMLInsertGraph("scatterPlot.png") + +HTML.title("<center>Clustering Of No-Normalized Samples: Representing the no-normalized samples in Diagnostic.", HR=2) + HTMLInsertGraph("clusteringOfSamplesBefore.png") + +HTML.title("<center>Clustering Of Normalized Samples: Representing the normalized samples in Diagnostic.", HR=2) + HTMLInsertGraph("clusteringOfSamples.png") + +HTML.title("<center>DispersionEstimates: representing dispersion estimates vs mean expression.", HR=2) + HTMLInsertGraph("disperssionEstimates.png") + +HTML.title("<center>HistoRawPValue: histogram of raw p-value.", HR=2) + HTMLInsertGraph("histoRawPvalue.png") + +HTML.title("<center>MAplotDE: the differentially expressed genes (red point).", HR=2) + HTMLInsertGraph("MAplotDE.png") +HTMLEndFile() +absoluPrefixHTMLname <- paste(res_dir, prefixHTMLname, sep="") +outName <- paste(absoluPrefixHTMLname, ".html", sep="") +# change name is to be adapted into Galaxy +file.rename(outName, OUT_HTMLname) +