18
|
1 # Analyse differentielle de donnees d expression par gene
|
|
2 # avec DESeq
|
|
3 # 2 conditions
|
|
4
|
|
5 args <- commandArgs()
|
|
6 #print(args[1])
|
|
7 #print(args[2])
|
|
8 #print(args[3])
|
|
9 #print(args[4])
|
|
10 #print(args[5])
|
|
11 #print(args[6])
|
|
12 #output file names
|
|
13 #print(args[7]) # HTML file name
|
|
14 #print(args[8]) # HTML file all images directory
|
|
15 #print(args[9]) # complete xls file name
|
|
16 #print(args[10])# UP xls file name
|
|
17 #print(args[11]) #Down xls file name
|
|
18 #print(args[12]) #the executable scipt (for getting the path)
|
|
19
|
|
20 library(R2HTML)
|
|
21 library(R.utils)
|
|
22
|
|
23 #run example:
|
|
24 projectName <- "DESeqAnalysis"
|
|
25 analysisVersion <- "V1" # fitType=local, sharingMode=fit-only, method=blind
|
|
26 rawDir <- "raw"
|
|
27 targetFile <- args[4]
|
|
28 header <- as.integer(args[5]) #si on a header ou pas, si on a, header=1, sinon header=0
|
|
29 withOutReplicates <- as.integer(args[6])
|
|
30
|
|
31 #get the directory to write the results
|
|
32 tab <- splitByPattern(args[7], pattern="/")
|
|
33 res_dir <- ""
|
|
34 for (e in tab[1:length(tab)-1]) { res_dir <- paste(res_dir, e, sep="")}
|
|
35 #get the html output file name
|
|
36 OUT_HTMLname <- args[7]
|
|
37 #get the images directory to write to
|
|
38 OUT_imgDir <- args[8]
|
|
39 #if the directory dosen't existe, we should create it first
|
|
40
|
|
41 alpha <- 0.05
|
|
42 adjMethod <- "BH"
|
|
43 outfile <- T
|
|
44 runningScriptTab <- splitByPattern(args[12], pattern="/")
|
|
45 RfuncDir <- ""
|
|
46 for (r in runningScriptTab[1:length(runningScriptTab)-1]) { RfuncDir <- paste(RfuncDir, r, sep="")} #find the path of executable script
|
|
47 RfuncDir <- paste(RfuncDir, "DESeqTools/", sep="") #define the function files path
|
|
48 # Dossier contenant les fonctions
|
|
49 print(RfuncDir)
|
|
50 source( paste(RfuncDir, "RNAseqFunctions.R", sep="/") )
|
|
51
|
|
52 # Chargement des packages et des fonctions
|
|
53 library(DESeq)
|
|
54 RNAseqFunctions(RfuncDir)
|
|
55 # Chargement du target file
|
|
56 target <- loadTargetFile( targetFile, header )
|
|
57 # Chargement des donnees, construction d'une table de comptages par gene
|
|
58 #have changed
|
|
59 rawCounts <- loadCountData( target, header )
|
|
60 conds <- unique(target$group)
|
|
61 cond1 <- as.character(conds[1])
|
|
62 cond2 <- as.character(conds[!conds == conds[1]])
|
|
63 rawCounts <- HTseqClean( rawCounts )
|
|
64
|
|
65 # Transformation en matrice de comptages
|
|
66 counts <- raw2counts( rawCounts )[[1]]
|
|
67
|
|
68 # Nombre de reads par echantillon
|
|
69 OUT_barplotTCName <- paste(OUT_imgDir, "barplotTC.png", sep="/")
|
|
70 barplotTC( counts, target$group, OUT_barplotTCName, out=outfile )
|
|
71
|
|
72 # Proportion comptages nuls
|
|
73 OUT_barplotNulName <- paste(OUT_imgDir, "barplotNul.png", sep="/")
|
|
74 barplotNul( counts, target$group, OUT_barplotNulName, out=outfile )
|
|
75
|
|
76 # Suppression comptages nuls
|
|
77 counts <- removeNul( counts )[[1]]
|
|
78
|
|
79 # Density plot
|
|
80 OUT_densityPlotName <- paste(OUT_imgDir, "densityPlot.png", sep="/")
|
|
81 densityPlot( counts, target$group, OUT_densityPlotName, out=outfile )
|
|
82
|
|
83 # Boxplot
|
|
84 OUT_boxplotCountsName <- paste(OUT_imgDir, "boxplotCounts.png", sep="/")
|
|
85 boxplotCounts( counts, target$group, type = c("raw", "norm"), OUT_boxplotCountsName, out=outfile )
|
|
86 # Sequence majoritaire
|
|
87 OUT_majSequenceName <- paste(OUT_imgDir, "majSequence.png", sep="/")
|
|
88 majSequence( counts, target$group, OUT_majSequenceName, out=outfile )
|
|
89
|
|
90 # ScatterPlot between two samples
|
|
91 OUT_scatterPlot <- paste(OUT_imgDir, "scatterPlot.png", sep="/")
|
|
92 pairwiseScatterPlots(counts, target, OUT_scatterPlot, out=outfile, pdffile=FALSE)
|
|
93
|
|
94 # SERE coefficient calculation (Poisson hypothesis for replicates techiques), to know if the variability between the réplicates or the conditons is hight or not.
|
|
95 coef <- pairwiseSERE(counts)
|
|
96 print(coef)
|
|
97 coef
|
|
98 # Creation structure de donnees cds, !! we use newCountDataset because that we have first column not numeric, and DESeq dosen't take non numeric values.
|
|
99 cds <- newCountDataSet( counts, target$group )
|
|
100
|
|
101 # Diagnostic for clustering of non-normalized samples
|
|
102 OUT_clusterPlot_before <- paste(OUT_imgDir, "clusteringOfSamplesBefore.png", sep="/")
|
|
103 clusterPlot(cds, OUT_clusterPlot_before, out=outfile)
|
|
104
|
|
105
|
|
106 # Normalisation (calcul des lib size factors )
|
|
107 cds <- estimateSizeFactors( cds )
|
|
108
|
|
109 # Estimation de la dispersion
|
|
110 # parametres:
|
|
111 # method: how samples are pooled to estimate dispersion. If no replicates use "blind"
|
|
112 # sharingMode: how variance estimate is computed with respect to the fitted line.
|
|
113 # "Maximum" is the most conservative (max between fit and estimation), "fit-only" keeps the estimated value
|
|
114 # fitType: refers to the model. "Local" is the published model, "parametric" is glm-based (may not converge), now we use "parametric" as default value.
|
|
115 #in this case, without replicates
|
|
116 if(withOutReplicates!=0){
|
|
117 cds <- estimateDispersions( cds, sharingMode="fit-only", method="blind")
|
|
118 } else if(withOutReplicates==0){
|
|
119 #cds <- estimateDispersions( cds, sharingMode="fit-only", fitType="local")}
|
|
120 cds <- estimateDispersions( cds)}
|
|
121 # Analyse differentielle, ajustement BH par defaut
|
|
122 res <- nbinomTest( cds, cond1, cond2)
|
|
123
|
|
124 # Diagnostic for clustering of normalized samples
|
|
125 OUT_clusterPlot <- paste(OUT_imgDir, "clusteringOfSamples.png", sep="/")
|
|
126 clusterPlot(cds, OUT_clusterPlot, out=outfile)
|
|
127
|
|
128 # Control plot of dispersion estimates
|
|
129 OUT_plotDispEstimatesName <- paste(OUT_imgDir, "disperssionEstimates.png", sep="/")
|
|
130 plotDispEstimates( cds, OUT_plotDispEstimatesName, out=outfile )
|
|
131
|
|
132 # Distribution of raw p-values
|
|
133 OUT_histoRawpName <- paste(OUT_imgDir, "histoRawPvalue.png", sep="/")
|
|
134 histoRawp( res, OUT_histoRawpName, out=outfile )
|
|
135
|
|
136 # MAplot showing DE genes
|
|
137 OUT_MAplotDEName <- paste(OUT_imgDir, "MAplotDE.png", sep="/")
|
|
138 MAplotDE( res, alpha, OUT_MAplotDEName, out=outfile )
|
|
139
|
|
140 # export complete data
|
|
141 OUT_completeName <- args[9]
|
|
142 complete <- exportComplete( counts, res, target, adjMethod, cond1, cond2, OUT_completeName, out=outfile )
|
|
143
|
|
144 # export significant genes
|
|
145 OUT_upName <- args[10]
|
|
146 OUT_downName <- args[11]
|
|
147 diff <- exportDiff( complete, alpha, adjMethod, OUT_upName, OUT_downName, out=outfile )
|
|
148
|
|
149 # write all images results into an HTML file
|
|
150 prefixHTMLname <- tab[length(tab)]
|
|
151 #HTMLCSS(file.path(res_dir), filename=prefixHTMLname, CSSfile="R2HTML")
|
|
152 HTMLInitFile(file.path(res_dir), filename=prefixHTMLname, BackGroundColor="white")
|
|
153 HTML.title("<center>Differential Expression DESeq analysis.", HR=1)
|
|
154 HTML.title("<center>BarplotTC: number of RNA-seq reads per sample.", HR=2)
|
|
155 HTMLInsertGraph("barplotTC.png")
|
|
156
|
|
157 HTML.title("<center>BarplotNul: number of RNA-seq reads that the count is 0 (nul).", HR=2)
|
|
158 HTMLInsertGraph("barplotNul.png")
|
|
159
|
|
160 HTML.title("<center>DensityPlot: density of each sample.", HR=2)
|
|
161 HTMLInsertGraph("densityPlot.png")
|
|
162
|
|
163 HTML.title("<center>Boxplot: number of RNA-seq reads distribution per sample.", HR=2)
|
|
164 HTMLInsertGraph("boxplotCounts.png")
|
|
165
|
|
166 HTML.title("<center>MajorSequence: the proportion of reads associated with the most expressed sequence.", HR=2)
|
|
167 HTMLInsertGraph("majSequence.png")
|
|
168
|
|
169 HTML.title("<center>ScatterPlot: Scatter plot of samples.", HR=2)
|
|
170 HTMLInsertGraph("scatterPlot.png")
|
|
171
|
|
172 HTML.title("<center>Clustering Of No-Normalized Samples: Representing the no-normalized samples in Diagnostic.", HR=2)
|
|
173 HTMLInsertGraph("clusteringOfSamplesBefore.png")
|
|
174
|
|
175 HTML.title("<center>Clustering Of Normalized Samples: Representing the normalized samples in Diagnostic.", HR=2)
|
|
176 HTMLInsertGraph("clusteringOfSamples.png")
|
|
177
|
|
178 HTML.title("<center>DispersionEstimates: representing dispersion estimates vs mean expression.", HR=2)
|
|
179 HTMLInsertGraph("disperssionEstimates.png")
|
|
180
|
|
181 HTML.title("<center>HistoRawPValue: histogram of raw p-value.", HR=2)
|
|
182 HTMLInsertGraph("histoRawPvalue.png")
|
|
183
|
|
184 HTML.title("<center>MAplotDE: the differentially expressed genes (red point).", HR=2)
|
|
185 HTMLInsertGraph("MAplotDE.png")
|
|
186 HTMLEndFile()
|
|
187 absoluPrefixHTMLname <- paste(res_dir, prefixHTMLname, sep="")
|
|
188 outName <- paste(absoluPrefixHTMLname, ".html", sep="")
|
|
189 # change name is to be adapted into Galaxy
|
|
190 file.rename(outName, OUT_HTMLname)
|
|
191
|