comparison deseq/differential_expression_analysis_pipeline_for_rnaseq_data-a03838a6eb54/DiffExpAnal/DESeqTools/anadiffGenes2conds.R @ 10:6e573fd3c41b draft

Uploaded
author yufei-luo
date Mon, 13 May 2013 10:06:30 -0400
parents
children
comparison
equal deleted inserted replaced
9:a03838a6eb54 10:6e573fd3c41b
1 # Marie-Anges Dillies, Yufei Luo
2 # Analyse differentielle de donnees d expression par gene
3 # avec DESeq
4 # 2 conditions
5
6 args <- commandArgs()
7 #print(args[1])
8 #print(args[2])
9 #print(args[3])
10 #print(args[4])
11 #print(args[5])
12 #print(args[6])
13 #output file names
14 #print(args[7]) # HTML file name
15 #print(args[8]) # HTML file all images directory
16 #print(args[9]) # complete xls file name
17 #print(args[10])# UP xls file name
18 #print(args[11]) #Down xls file name
19 #print(args[12]) #the executable scipt (for getting the path)
20
21 library(R2HTML)
22 library(R.utils)
23
24 #run example:
25 projectName <- "DESeqAnalysis"
26 analysisVersion <- "V1" # fitType=local, sharingMode=fit-only, method=blind
27 rawDir <- "raw"
28 targetFile <- args[4]
29 header <- as.integer(args[5]) #si on a header ou pas, si on a, header=1, sinon header=0
30 withOutReplicates <- as.integer(args[6])
31
32 #get the directory to write the results
33 tab <- splitByPattern(args[7], pattern="/")
34 res_dir <- ""
35 for (e in tab[1:length(tab)-1]) { res_dir <- paste(res_dir, e, sep="")}
36 #get the html output file name
37 OUT_HTMLname <- args[7]
38 #get the images directory to write to
39 OUT_imgDir <- args[8]
40 #if the directory dosen't existe, we should create it first
41
42 alpha <- 0.05
43 adjMethod <- "BH"
44 outfile <- T
45 runningScriptTab <- splitByPattern(args[12], pattern="/")
46 RfuncDir <- ""
47 for (r in runningScriptTab[1:length(runningScriptTab)-1]) { RfuncDir <- paste(RfuncDir, r, sep="")} #find the path of executable script
48 RfuncDir <- paste(RfuncDir, "DESeqTools/", sep="") #define the function files path
49 # Dossier contenant les fonctions
50 print(RfuncDir)
51 source( paste(RfuncDir, "RNAseqFunctions.R", sep="/") )
52
53 # Chargement des packages et des fonctions
54 library(DESeq)
55 RNAseqFunctions(RfuncDir)
56 # Chargement du target file
57 target <- loadTargetFile( targetFile, header )
58 # Chargement des donnees, construction d'une table de comptages par gene
59 #have changed
60 rawCounts <- loadCountData( target, header )
61 conds <- unique(target$group)
62 cond1 <- as.character(conds[1])
63 cond2 <- as.character(conds[!conds == conds[1]])
64 rawCounts <- HTseqClean( rawCounts )
65
66 # Transformation en matrice de comptages
67 counts <- raw2counts( rawCounts )[[1]]
68
69 # Nombre de reads par echantillon
70 OUT_barplotTCName <- paste(OUT_imgDir, "barplotTC.png", sep="/")
71 barplotTC( counts, target$group, OUT_barplotTCName, out=outfile )
72
73 # Proportion comptages nuls
74 OUT_barplotNulName <- paste(OUT_imgDir, "barplotNul.png", sep="/")
75 barplotNul( counts, target$group, OUT_barplotNulName, out=outfile )
76
77 # Suppression comptages nuls
78 counts <- removeNul( counts )[[1]]
79
80 # Density plot
81 OUT_densityPlotName <- paste(OUT_imgDir, "densityPlot.png", sep="/")
82 densityPlot( counts, target$group, OUT_densityPlotName, out=outfile )
83
84 # Boxplot
85 OUT_boxplotCountsName <- paste(OUT_imgDir, "boxplotCounts.png", sep="/")
86 boxplotCounts( counts, target$group, type = c("raw", "norm"), OUT_boxplotCountsName, out=outfile )
87 # Sequence majoritaire
88 OUT_majSequenceName <- paste(OUT_imgDir, "majSequence.png", sep="/")
89 majSequence( counts, target$group, OUT_majSequenceName, out=outfile )
90
91 # ScatterPlot between two samples
92 OUT_scatterPlot <- paste(OUT_imgDir, "scatterPlot.png", sep="/")
93 pairwiseScatterPlots(counts, target, OUT_scatterPlot, out=outfile, pdffile=FALSE)
94
95 # SERE coefficient calculation (Poisson hypothesis for replicates techiques), to know if the variability between the réplicates or the conditons is hight or not.
96 coef <- pairwiseSERE(counts)
97 print(coef)
98 coef
99 # Creation structure de donnees cds, !! we use newCountDataset because that we have first column not numeric, and DESeq dosen't take non numeric values.
100 cds <- newCountDataSet( counts, target$group )
101
102 # Diagnostic for clustering of non-normalized samples
103 OUT_clusterPlot_before <- paste(OUT_imgDir, "clusteringOfSamplesBefore.png", sep="/")
104 clusterPlot(cds, OUT_clusterPlot_before, out=outfile)
105
106
107 # Normalisation (calcul des lib size factors )
108 cds <- estimateSizeFactors( cds )
109
110 # Estimation de la dispersion
111 # parametres:
112 # method: how samples are pooled to estimate dispersion (we use "pooled"
113 # as default parameter. If no replicates, we use "blind".
114 # sharingMode: how variance estimate is computed with respect to the fitted line.
115 #"Maximum" is the most conservative (max between fit andestimation),
116 #"fit-only" keeps the estimated value, we use "Maximum" as default parameter.
117 # fitType: refers to the model. "Local" is the published model,
118 #"parametric" is glm-based (may not converge), now we use "parametric" as default value.
119
120 if(withOutReplicates!=0){
121 cds <- estimateDispersions( cds, method="blind")#in this case, without replicates
122
123 } else if(withOutReplicates==0){
124 cds <- estimateDispersions( cds, method="pooled", sharingMode="maximum", fitType="parametric") }
125
126 # Analyse differentielle, ajustement BH par defaut
127 res <- nbinomTest( cds, cond1, cond2)
128
129 # Diagnostic for clustering of normalized samples
130 OUT_clusterPlot <- paste(OUT_imgDir, "clusteringOfSamples.png", sep="/")
131 clusterPlot(cds, OUT_clusterPlot, out=outfile)
132
133 # Control plot of dispersion estimates
134 OUT_plotDispEstimatesName <- paste(OUT_imgDir, "disperssionEstimates.png", sep="/")
135 plotDispEstimates( cds, OUT_plotDispEstimatesName, out=outfile )
136
137 # Distribution of raw p-values
138 OUT_histoRawpName <- paste(OUT_imgDir, "histoRawPvalue.png", sep="/")
139 histoRawp( res, OUT_histoRawpName, out=outfile )
140
141 # MAplot showing DE genes
142 OUT_MAplotDEName <- paste(OUT_imgDir, "MAplotDE.png", sep="/")
143 MAplotDE( res, alpha, OUT_MAplotDEName, out=outfile )
144
145 # export complete data
146 OUT_completeName <- args[9]
147 complete <- exportComplete( counts, res, target, adjMethod, cond1, cond2, OUT_completeName, out=outfile )
148
149 # export significant genes
150 OUT_upName <- args[10]
151 OUT_downName <- args[11]
152 diff <- exportDiff( complete, alpha, adjMethod, OUT_upName, OUT_downName, out=outfile )
153
154 # write all images results into an HTML file
155 prefixHTMLname <- tab[length(tab)]
156 #HTMLCSS(file.path(res_dir), filename=prefixHTMLname, CSSfile="R2HTML")
157 HTMLInitFile(file.path(res_dir), filename=prefixHTMLname, BackGroundColor="white")
158 HTML.title("<center>Differential Expression DESeq analysis.", HR=1)
159 HTML.title("<center>BarplotTC: number of RNA-seq reads per sample.", HR=2)
160 HTMLInsertGraph("barplotTC.png")
161
162 HTML.title("<center>BarplotNul: number of RNA-seq reads that the count is 0 (nul).", HR=2)
163 HTMLInsertGraph("barplotNul.png")
164
165 HTML.title("<center>DensityPlot: density of each sample.", HR=2)
166 HTMLInsertGraph("densityPlot.png")
167
168 HTML.title("<center>Boxplot: number of RNA-seq reads distribution per sample.", HR=2)
169 HTMLInsertGraph("boxplotCounts.png")
170
171 HTML.title("<center>MajorSequence: the proportion of reads associated with the most expressed sequence.", HR=2)
172 HTMLInsertGraph("majSequence.png")
173
174 HTML.title("<center>ScatterPlot: Scatter plot of samples.", HR=2)
175 HTMLInsertGraph("scatterPlot.png")
176
177 HTML.title("<center>Clustering Of No-Normalized Samples: Representing the no-normalized samples in Diagnostic.", HR=2)
178 HTMLInsertGraph("clusteringOfSamplesBefore.png")
179
180 HTML.title("<center>Clustering Of Normalized Samples: Representing the normalized samples in Diagnostic.", HR=2)
181 HTMLInsertGraph("clusteringOfSamples.png")
182
183 HTML.title("<center>DispersionEstimates: representing dispersion estimates vs mean expression.", HR=2)
184 HTMLInsertGraph("disperssionEstimates.png")
185
186 HTML.title("<center>HistoRawPValue: histogram of raw p-value.", HR=2)
187 HTMLInsertGraph("histoRawPvalue.png")
188
189 HTML.title("<center>MAplotDE: the differentially expressed genes (red point).", HR=2)
190 HTMLInsertGraph("MAplotDE.png")
191 HTMLEndFile()
192 absoluPrefixHTMLname <- paste(res_dir, prefixHTMLname, sep="")
193 outName <- paste(absoluPrefixHTMLname, ".html", sep="")
194 # change name is to be adapted into Galaxy
195 file.rename(outName, OUT_HTMLname)
196