Mercurial > repos > vandelj > giant_hierarchical_clustering
comparison src/ExprPlotsScript.R @ 0:14045c80a222 draft
"planemo upload for repository https://github.com/juliechevalier/GIANT/tree/master commit cb276a594444c8f32e9819fefde3a21f121d35df"
author | vandelj |
---|---|
date | Fri, 26 Jun 2020 09:38:23 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:14045c80a222 |
---|---|
1 # A command-line interface to basic plots for use with Galaxy | |
2 # written by Jimmy Vandel | |
3 # one of these arguments is required: | |
4 # | |
5 # | |
6 initial.options <- commandArgs(trailingOnly = FALSE) | |
7 file.arg.name <- "--file=" | |
8 script.name <- sub(file.arg.name, "", initial.options[grep(file.arg.name, initial.options)]) | |
9 script.basename <- dirname(script.name) | |
10 source(file.path(script.basename, "utils.R")) | |
11 source(file.path(script.basename, "getopt.R")) | |
12 | |
13 #addComment("Welcome R!") | |
14 | |
15 # setup R error handling to go to stderr | |
16 options( show.error.messages=F, error = function () { cat(geterrmessage(), file=stderr() ); q( "no", 1, F ) } ) | |
17 | |
18 # we need that to not crash galaxy with an UTF8 error on German LC settings. | |
19 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") | |
20 loc <- Sys.setlocale("LC_NUMERIC", "C") | |
21 | |
22 #get starting time | |
23 start.time <- Sys.time() | |
24 | |
25 #get options | |
26 options(stringAsfactors = FALSE, useFancyQuotes = FALSE) | |
27 args <- commandArgs() | |
28 | |
29 | |
30 # get options, using the spec as defined by the enclosed list. | |
31 # we read the options from the default: commandArgs(TRUE). | |
32 spec <- matrix(c( | |
33 "dataFile", "i", 1, "character", | |
34 "factorInfo","t", 1, "character", | |
35 "dataFileFormat","j",1,"character", | |
36 "conditionNames","c",1,"character", | |
37 "format", "f", 1, "character", | |
38 "quiet", "q", 0, "logical", | |
39 "log", "l", 1, "character", | |
40 "histo" , "h", 1, "character", | |
41 "maPlot" , "a", 1, "character", | |
42 "boxplot" , "b", 1, "character", | |
43 "microarray" , "m", 1, "character", | |
44 "acp" , "p" , 1, "character", | |
45 "screePlot" , "s" , 1, "character"), | |
46 byrow=TRUE, ncol=4) | |
47 opt <- getopt(spec) | |
48 | |
49 # enforce the following required arguments | |
50 if (is.null(opt$log)) { | |
51 addComment("[ERROR]'log file' is required") | |
52 q( "no", 1, F ) | |
53 } | |
54 addComment("[INFO]Start of R script",T,opt$log,display=FALSE) | |
55 if (is.null(opt$dataFile) || is.null(opt$dataFileFormat)) { | |
56 addComment("[ERROR]'dataFile' and it format are required",T,opt$log) | |
57 q( "no", 1, F ) | |
58 } | |
59 if (is.null(opt$format)) { | |
60 addComment("[ERROR]'output format' is required",T,opt$log) | |
61 q( "no", 1, F ) | |
62 } | |
63 if (is.null(opt$histo) & is.null(opt$maPlot) & is.null(opt$boxplot) & is.null(opt$microarray) & is.null(opt$acp)){ | |
64 addComment("[ERROR]Select at least one plot to draw",T,opt$log) | |
65 q( "no", 1, F ) | |
66 } | |
67 | |
68 verbose <- if (is.null(opt$quiet)) { | |
69 TRUE | |
70 }else{ | |
71 FALSE} | |
72 | |
73 addComment("[INFO]Parameters checked!",T,opt$log,display=FALSE) | |
74 | |
75 addComment(c("[INFO]Working directory: ",getwd()),TRUE,opt$log,display=FALSE) | |
76 addComment(c("[INFO]Command line: ",args),TRUE,opt$log,display=FALSE) | |
77 | |
78 #directory for plots | |
79 dir.create(file.path(getwd(), "plotDir")) | |
80 dir.create(file.path(getwd(), "plotLyDir")) | |
81 | |
82 #silent package loading | |
83 suppressPackageStartupMessages({ | |
84 library("oligo") | |
85 library("ff") | |
86 library("ggplot2") | |
87 library("plotly") | |
88 }) | |
89 | |
90 | |
91 #chargement des fichiers en entrée | |
92 #fichier de type CEL | |
93 dataAreFromCel=FALSE | |
94 if(toupper(opt$dataFileFormat)=="CEL"){ | |
95 dataAreFromCel=TRUE | |
96 celData=read.celfiles(unlist(strsplit(opt$dataFile,","))) | |
97 #load all expressions | |
98 dataMatrix=exprs(celData) | |
99 #select "pm" probes | |
100 probeInfo=getProbeInfo(celData,probeType = c("pm"),target="probeset") | |
101 #reduce dataMatrix to log expression matrix for a randomly probe selection | |
102 dataMatrix=log2(dataMatrix[sample(unique(probeInfo[,1]),min(100000,length(unique(probeInfo[,1])))),]) | |
103 addComment("[INFO]Raw data are log2 transformed",TRUE,opt$log,display=FALSE) | |
104 remove(probeInfo) | |
105 }else{ | |
106 #fichier deja tabule | |
107 dataMatrix=read.csv(file=opt$dataFile,header=F,sep="\t",colClasses="character") | |
108 #remove first row to convert it as colnames (to avoid X before colnames with header=T) | |
109 colNamesData=dataMatrix[1,-1] | |
110 dataMatrix=dataMatrix[-1,] | |
111 #remove first colum to convert it as rownames | |
112 rowNamesData=dataMatrix[,1] | |
113 dataMatrix=dataMatrix[,-1] | |
114 if(is.data.frame(dataMatrix)){ | |
115 dataMatrix=data.matrix(dataMatrix) | |
116 }else{ | |
117 dataMatrix=data.matrix(as.numeric(dataMatrix)) | |
118 } | |
119 dimnames(dataMatrix)=list(rowNamesData,colNamesData) | |
120 if(any(duplicated(rowNamesData)))addComment("[WARNING] several rows share the same probe/gene name, you should merge or rename them to avoid further analysis mistakes",TRUE,opt$log,display=FALSE) | |
121 } | |
122 | |
123 addComment("[INFO]Input data loaded",TRUE,opt$log,display=FALSE) | |
124 addComment(c("[INFO]Dim of data matrix:",dim(dataMatrix)),T,opt$log,display=FALSE) | |
125 | |
126 #get number of conditions | |
127 nbConditions=ncol(dataMatrix) | |
128 | |
129 #get condition names if they are specified | |
130 if(!is.null(opt$conditionNames) && length(opt$conditionNames)==nbConditions){ | |
131 nameConditions=opt$conditionNames | |
132 colnames(dataMatrix)=nameConditions | |
133 #rownames(phenoData(celData)@data)=nameConditions | |
134 #rownames(protocolData(celData)@data)=nameConditions | |
135 }else{ | |
136 nameConditions=colnames(dataMatrix) | |
137 } | |
138 | |
139 #create a correspondance table between plot file names and name displayed in figure legend and html items | |
140 correspondanceNameTable=matrix("",ncol=2,nrow=nbConditions) | |
141 correspondanceNameTable[,1]=paste("Condition",1:nbConditions,sep="") | |
142 correspondanceNameTable[,2]=nameConditions | |
143 rownames(correspondanceNameTable)=correspondanceNameTable[,2] | |
144 | |
145 addComment("[INFO]Retreive condition names",TRUE,opt$log,display=FALSE) | |
146 | |
147 if(!is.null(opt$factorInfo)){ | |
148 #chargement du fichier des facteurs | |
149 factorInfoMatrix=read.csv(file=file.path(getwd(), opt$factorInfo),header=F,sep="\t",colClasses="character") | |
150 #remove first row to convert it as colnames | |
151 colnames(factorInfoMatrix)=factorInfoMatrix[1,] | |
152 factorInfoMatrix=factorInfoMatrix[-1,] | |
153 #use first colum to convert it as rownames but not removing it to avoid conversion as vector in unique factor case | |
154 rownames(factorInfoMatrix)=factorInfoMatrix[,1] | |
155 | |
156 | |
157 if(length(setdiff(colnames(dataMatrix),rownames(factorInfoMatrix)))!=0){ | |
158 addComment("[ERROR]Missing samples in factor file",T,opt$log) | |
159 q( "no", 1, F ) | |
160 } | |
161 | |
162 #order sample as in expression matrix and remove spurious sample | |
163 factorInfoMatrix=factorInfoMatrix[colnames(dataMatrix),] | |
164 | |
165 addComment("[INFO]Factors OK",T,opt$log,display=FALSE) | |
166 addComment(c("[INFO]Dim of factorInfo matrix:",dim(factorInfoMatrix)),T,opt$log,display=FALSE) | |
167 | |
168 } | |
169 | |
170 addComment("[INFO]Ready to plot",T,opt$log,display=FALSE) | |
171 | |
172 | |
173 ##---------------------- | |
174 | |
175 ###plot histograms### | |
176 histogramPerFigure=50 | |
177 if (!is.null(opt$histo)) { | |
178 for(iToPlot in 1:(((nbConditions-1)%/%histogramPerFigure)+1)){ | |
179 firstPlot=1+histogramPerFigure*(iToPlot-1) | |
180 lastPlot=min(nbConditions,histogramPerFigure*iToPlot) | |
181 dataToPlot=data.frame(x=c(dataMatrix[,firstPlot:lastPlot]),Experiment=rep(colnames(dataMatrix)[firstPlot:lastPlot],each=nrow(dataMatrix))) | |
182 p <- ggplot(data=dataToPlot, aes(x = x, color=Experiment)) + stat_density(geom="line", size=1, position="identity") + | |
183 ggtitle("Intensity densities") + theme_bw() + ylab(label="Density") + | |
184 theme(panel.border=element_blank(),plot.title = element_text(hjust = 0.5)) | |
185 if(dataAreFromCel){ | |
186 #original ploting function | |
187 #hist(celData[,firstPlot:lastPlot],lty=rep(1,nbConditions)[firstPlot:lastPlot],lwd=2,which='pm',target="probeset",transfo=log2,col=rainbow(nbConditions)[firstPlot:lastPlot]) | |
188 p <- p + xlab(label="Log2 intensities") | |
189 }else{ | |
190 p <- p + xlab(label="Intensities") | |
191 } | |
192 if(opt$format=="pdf"){ | |
193 pdf(paste(c("./plotDir/",opt$histo,iToPlot,".pdf"),collapse=""))}else{ | |
194 png(paste(c("./plotDir/",opt$histo,iToPlot,".png"),collapse="")) | |
195 } | |
196 print(p) | |
197 dev.off() | |
198 #save plotly files | |
199 pp <- ggplotly(p) | |
200 htmlwidgets::saveWidget(as_widget(pp), paste(c(file.path(getwd(), "plotLyDir"),"/",opt$histo,iToPlot,".html"),collapse=""),selfcontained = F) | |
201 } | |
202 remove(p,dataToPlot) | |
203 addComment("[INFO]Histograms drawn",T,opt$log,display=FALSE) | |
204 } | |
205 | |
206 ##---------------------- | |
207 | |
208 ###plot MAplots### | |
209 MAplotPerPage=4 | |
210 if (!is.null(opt$maPlot)) { | |
211 iToPlot=1 | |
212 plotVector=list() | |
213 toTake=sample(nrow(dataMatrix),min(200000,nrow(dataMatrix))) | |
214 refMedianColumn=rowMedians(as.matrix(dataMatrix[toTake,])) | |
215 if(length(toTake)>100000)addComment(c("[INFO]high number of input data rows ",length(toTake),"; the generation of MA plot can take a while, please be patient"),TRUE,opt$log,display=FALSE) | |
216 for (iCondition in 1:nbConditions){ | |
217 #MAplot(celData,which=i,what=pm,transfo=log2) | |
218 #smoothScatter(x=xToPlot,y=yToPlot,main=nameConditions[iCondition]) | |
219 dataA=dataMatrix[toTake,iCondition] | |
220 dataB=refMedianColumn####ATTENTION PAR DEFAUT | |
221 xToPlot=0.5*(dataA+dataB) | |
222 yToPlot=dataA-dataB | |
223 tempX=seq(min(xToPlot),max(xToPlot),0.1) | |
224 tempY=unlist(lapply(tempX,function(x){median(yToPlot[intersect(which(xToPlot>=(x-0.1/2)),which(xToPlot<(x+0.1/2)))])})) | |
225 | |
226 dataToPlot=data.frame(x=xToPlot,y=yToPlot) | |
227 dataMedianToPlot=data.frame(x=tempX,y=tempY) | |
228 p <- ggplot(data=dataToPlot, aes(x,y)) + stat_density2d(aes(fill = ..density..^0.25), geom = "tile", contour = FALSE, n = 100) + | |
229 scale_fill_continuous(low = "white", high = "dodgerblue4") + geom_smooth(data=dataMedianToPlot,colour="red", size=0.5, se=FALSE) + | |
230 ggtitle(correspondanceNameTable[iCondition,2]) + theme_bw() + xlab(label="") + ylab(label="") + | |
231 theme(panel.border=element_blank(),plot.title = element_text(hjust = 0.5),legend.position = "none") | |
232 plotVector[[length(plotVector)+1]]=p | |
233 | |
234 #save plotly files | |
235 pp <- ggplotly(p) | |
236 htmlwidgets::saveWidget(as_widget(pp), paste(c(file.path(getwd(), "plotLyDir"),"/",opt$maPlot,"_",correspondanceNameTable[iCondition,1],".html"),collapse=""),selfcontained = F) | |
237 | |
238 if(iCondition==nbConditions || length(plotVector)==MAplotPerPage){ | |
239 #define a new plotting file | |
240 if(opt$format=="pdf"){ | |
241 pdf(paste(c("./plotDir/",opt$maPlot,iToPlot,".pdf"),collapse=""))}else{ | |
242 png(paste(c("./plotDir/",opt$maPlot,iToPlot,".png"),collapse="")) | |
243 } | |
244 multiplot(plotlist=plotVector,cols=2) | |
245 dev.off() | |
246 if(iCondition<nbConditions){ | |
247 #prepare for a new plotting file if necessary | |
248 plotVector=list() | |
249 iToPlot=iToPlot+1 | |
250 } | |
251 } | |
252 } | |
253 remove(p,dataToPlot,dataA,dataB,toTake,xToPlot,yToPlot) | |
254 addComment("[INFO]MAplots drawn",T,opt$log,display=FALSE) | |
255 } | |
256 | |
257 ##---------------------- | |
258 | |
259 ###plot boxplots### | |
260 boxplotPerFigure=50 | |
261 if (!is.null(opt$boxplot)) { | |
262 for(iToPlot in 1:(((nbConditions-1)%/%boxplotPerFigure)+1)){ | |
263 firstPlot=1+boxplotPerFigure*(iToPlot-1) | |
264 lastPlot=min(nbConditions,boxplotPerFigure*iToPlot) | |
265 dataToPlot=data.frame(intensities=c(dataMatrix[,firstPlot:lastPlot]),Experiment=rep(colnames(dataMatrix)[firstPlot:lastPlot],each=nrow(dataMatrix))) | |
266 #to make HTML file lighter, sampling will be done amongst outliers | |
267 #get outliers for each boxplot | |
268 boxplotsOutliers=apply(dataMatrix[,firstPlot:lastPlot],2,function(x)boxplot.stats(x)$out) | |
269 #sample amongst them to keep at maximum of 1000 points and include both min and max outliers values | |
270 boxplotsOutliers=lapply(boxplotsOutliers,function(x)if(length(x)>0)c(sample(c(x),min(length(x),1000)),max(c(x)),min(c(x)))) | |
271 dataOutliers=data.frame(yVal=unlist(boxplotsOutliers),xVal=unlist(lapply(seq_along(boxplotsOutliers),function(x)rep(names(boxplotsOutliers)[x],length(boxplotsOutliers[[x]]))))) | |
272 #plot boxplots without outliers | |
273 p <- ggplot(data=dataToPlot, aes(y = intensities, x=Experiment ,color=Experiment)) + geom_boxplot(outlier.colour=NA,outlier.shape =NA) + | |
274 ggtitle("Intensities") + theme_bw() + xlab(label="") + | |
275 theme(panel.border=element_blank(),plot.title = element_text(hjust = 0.5),axis.text.x = element_text(angle = 45, hjust = 1),plot.margin=unit(c(10,10,max(unlist(lapply(dataToPlot$Experiment,function(x)nchar(as.character(x))))),15+max(unlist(lapply(dataToPlot$Experiment,function(x)nchar(as.character(x)))))),"mm")) | |
276 #add to plot sampled outliers | |
277 p <- p + geom_point(data=dataOutliers,aes(x=xVal,y=yVal,color=xVal),inherit.aes = F) | |
278 if(dataAreFromCel){ | |
279 #original plotting function | |
280 #boxplot(celData[,firstPlot:lastPlot],which='pm',col=rainbow(nbConditions)[firstPlot:lastPlot],target="probeset",transfo=log2,names=nameConditions[firstPlot:lastPlot],main="Intensities") | |
281 p <- p + ylab(label="Log2 intensities") | |
282 }else{ | |
283 p <- p + ylab(label="Intensities") | |
284 } | |
285 if(opt$format=="pdf"){ | |
286 pdf(paste(c("./plotDir/",opt$boxplot,iToPlot,".pdf"),collapse=""))}else{ | |
287 png(paste(c("./plotDir/",opt$boxplot,iToPlot,".png"),collapse="")) | |
288 } | |
289 print(p) | |
290 dev.off() | |
291 #save plotly files | |
292 pp <- ggplotly(p) | |
293 | |
294 #modify plotly object to get HTML file not too heavy for loading | |
295 for(iData in 1:length(pp$x$data)){ | |
296 ##get kept outliers y values | |
297 #yPointsToKeep=dataOutliers$yVal[which(dataOutliers$xVal==pp$x$data[[iData]]$name)] | |
298 if(pp$x$data[[iData]]$type=="scatter"){ | |
299 ##scatter plot represent outliers points added to boxplot through geom_point | |
300 ##nothing to do as outliers have been sampled allready, just have to modify hover text | |
301 #if(length(yPointsToKeep)>0){ | |
302 #pointsToKeep=which(pp$x$data[[iData]]$y %in% yPointsToKeep) | |
303 #pp$x$data[[iData]]$x=pp$x$data[[iData]]$x[pointsToKeep] | |
304 #pp$x$data[[iData]]$y=pp$x$data[[iData]]$y[pointsToKeep] | |
305 #pp$x$data[[iData]]$text=pp$x$data[[iData]]$text[pointsToKeep] | |
306 #}else{ | |
307 #pp$x$data[[iData]]$x=NULL | |
308 #pp$x$data[[iData]]$y=NULL | |
309 #pp$x$data[[iData]]$marker$opacity=0 | |
310 #pp$x$data[[iData]]$hoverinfo=NULL | |
311 #pp$x$data[[iData]]$text=NULL | |
312 #} | |
313 #modify text to display | |
314 if(dataAreFromCel){ | |
315 pp$x$data[[iData]]$text=unlist(lapply(seq_along(pp$x$data[[iData]]$y),function(x)return(paste(c("log2(intensity) ",prettyNum(pp$x$data[[iData]]$y[x],digits=4)),collapse = "")))) | |
316 }else{ | |
317 pp$x$data[[iData]]$text=unlist(lapply(seq_along(pp$x$data[[iData]]$y),function(x)return(paste(c("intensity ",prettyNum(pp$x$data[[iData]]$y[x],digits=4)),collapse = "")))) | |
318 } | |
319 }else{ | |
320 ##disable marker plotting to keep only box and whiskers plot (outliers are displayed through scatter plot) | |
321 pp$x$data[[iData]]$marker$opacity=0 | |
322 | |
323 #sample 50000 points amongst all data to get a lighter html file, sampling size should not be too low to avoid modifying limit of boxplots | |
324 pp$x$data[[iData]]$y=c(sample(dataMatrix[,pp$x$data[[iData]]$name],min(length(dataMatrix[,pp$x$data[[iData]]$name]),50000)),min(dataMatrix[,pp$x$data[[iData]]$name]),max(dataMatrix[,pp$x$data[[iData]]$name])) | |
325 pp$x$data[[iData]]$x=rep(pp$x$data[[iData]]$x[1],length(pp$x$data[[iData]]$y)) | |
326 | |
327 ##first remove outliers info | |
328 #downUpValues=boxplot.stats(dataMatrix[,pp$x$data[[iData]]$name])$stats | |
329 #if(verbose)addComment(c("filter values for boxplot",pp$x$data[[iData]]$name,"between",min(downUpValues),"and",max(downUpValues)),T,opt$log) | |
330 #pointsToRemove=which(pp$x$data[[iData]]$y<min(downUpValues)) | |
331 #if(length(pointsToRemove)>0)pp$x$data[[iData]]$y=pp$x$data[[iData]]$y[-pointsToRemove] | |
332 #pointsToRemove=which(pp$x$data[[iData]]$y>max(downUpValues)) | |
333 #if(length(pointsToRemove)>0)pp$x$data[[iData]]$y=pp$x$data[[iData]]$y[-pointsToRemove] | |
334 #then add sampled outliers info | |
335 #pp$x$data[[iData]]$y=c(yPointsToKeep,pp$x$data[[iData]]$y) | |
336 #pp$x$data[[iData]]$x=rep(pp$x$data[[iData]]$x[1],length(pp$x$data[[iData]]$y)) | |
337 } | |
338 } | |
339 | |
340 htmlwidgets::saveWidget(as_widget(pp), paste(c(file.path(getwd(), "plotLyDir"),"/",opt$boxplot,iToPlot,".html"),collapse=""),selfcontained = F) | |
341 } | |
342 remove(p,dataToPlot) | |
343 addComment("[INFO]Boxplots drawn",T,opt$log,display=FALSE) | |
344 | |
345 } | |
346 | |
347 ##---------------------- | |
348 | |
349 ###plot microarrays (only for .CEL files)### | |
350 if (!is.null(opt$microarray) && dataAreFromCel) { | |
351 for (iCondition in 1:nbConditions){ | |
352 if(opt$format=="pdf"){ | |
353 pdf(paste(c("./plotDir/",opt$microarray,"_",correspondanceNameTable[iCondition,1],".pdf"),collapse=""),onefile = F,width = 5,height = 5)}else{ | |
354 png(paste(c("./plotDir/",opt$microarray,"_",correspondanceNameTable[iCondition,1],".png"),collapse="")) | |
355 } | |
356 image(celData[,iCondition],main=correspondanceNameTable[iCondition,2]) | |
357 dev.off() | |
358 } | |
359 addComment("[INFO]Microarray drawn",T,opt$log,display=FALSE) | |
360 } | |
361 | |
362 ##---------------------- | |
363 | |
364 ###plot PCA plot### | |
365 if (!is.null(opt$acp)){ | |
366 ##to avoid error when nrow is too large, results quite stable with 200k random selected rows | |
367 randomSelection=sample(nrow(dataMatrix),min(200000,nrow(dataMatrix))) | |
368 #remove constant variables | |
369 | |
370 dataFiltered=dataMatrix[randomSelection,] | |
371 toRemove=which(unlist(apply(dataFiltered,1,var))==0) | |
372 if(length(toRemove)>0){ | |
373 dataFiltered=dataFiltered[-toRemove,] | |
374 } | |
375 ##geom_text(aes(label=Experiments,hjust=1, vjust=1.3), y = PC2+0.01) | |
376 PACres = prcomp(t(dataFiltered),scale.=TRUE) | |
377 | |
378 if(!is.null(opt$screePlot)){ | |
379 #scree plot | |
380 #p <- fviz_eig(PACres) | |
381 dataToPlot=data.frame(compo=seq(1,length(PACres$sdev)),var=(PACres$sdev^2/sum(PACres$sdev^2))*100) | |
382 p<-ggplot(data=dataToPlot, aes(x=compo, y=var)) + geom_bar(stat="identity", fill="steelblue") + geom_line() + geom_point() + | |
383 ggtitle("Scree plot") + theme_bw() + theme(panel.border=element_blank(),plot.title = element_text(hjust = 0.5)) + | |
384 xlab(label="Dimensions") + ylab(label="% explained variances") + scale_x_discrete(limits=dataToPlot$compo) | |
385 pp <- ggplotly(p) | |
386 | |
387 if(opt$format=="pdf"){ | |
388 pdf(paste(c("./plotDir/",opt$screePlot,".pdf"),collapse=""))}else{ | |
389 png(paste(c("./plotDir/",opt$screePlot,".png"),collapse="")) | |
390 } | |
391 plot(p) | |
392 dev.off() | |
393 htmlwidgets::saveWidget(as_widget(pp), paste(c(file.path(getwd(), "plotLyDir"),"/",opt$screePlot,".html"),collapse=""),selfcontained = F) | |
394 } | |
395 | |
396 #now plot pca plots | |
397 | |
398 if(!is.null(opt$factorInfo)){ | |
399 fileIdent="" | |
400 symbolset = c("circle","cross","square","diamond","circle-open","square-open","diamond-open","x") | |
401 | |
402 #save equivalence between real factor names and generic ones in correspondanceNameTable | |
403 correspondanceNameTable=rbind(correspondanceNameTable,matrix(c(paste("Factor",1:(ncol(factorInfoMatrix)-1),sep=""),colnames(factorInfoMatrix)[-1]),ncol=2,nrow=ncol(factorInfoMatrix)-1)) | |
404 rownames(correspondanceNameTable)=correspondanceNameTable[,2] | |
405 | |
406 #first order factors from decreasing groups number | |
407 orderedFactors=colnames(factorInfoMatrix)[-1][order(unlist(lapply(colnames(factorInfoMatrix)[-1],function(x)length(table(factorInfoMatrix[,x])))),decreasing = T)] | |
408 allFactorsBigger=length(table(factorInfoMatrix[,orderedFactors[length(orderedFactors)]]))>length(symbolset) | |
409 if(allFactorsBigger)addComment("[WARNING]All factors are composed of too many groups to display two factors at same time, each PCA plot will display only one factor groups",T,opt$log,display=FALSE) | |
410 for(iFactor in 1:length(orderedFactors)){ | |
411 #if it is the last factor of the list or if all factor | |
412 if(iFactor==length(orderedFactors) || allFactorsBigger){ | |
413 if(length(orderedFactors)==1 || allFactorsBigger){ | |
414 dataToPlot=data.frame(PC1=PACres$x[,1],PC2=PACres$x[,2],PC3=PACres$x[,3],Experiments=rownames(PACres$x), Attribute1=factorInfoMatrix[rownames(PACres$x),orderedFactors[iFactor]], hoverLabel=unlist(lapply(rownames(PACres$x),function(x)paste(factorInfoMatrix[x,-1],collapse=",")))) | |
415 p <- plot_ly(dataToPlot,x = ~PC1, y = ~PC2, z = ~PC3, type = 'scatter3d', mode="markers", color=~Attribute1,colors=rainbow(length(levels(dataToPlot$Attribute1))+2),hoverinfo = 'text', text = ~paste(Experiments,"\n",hoverLabel),marker=list(size=5))%>% | |
416 layout(title = "Principal Component Analysis", scene = list(xaxis = list(title = "Component 1"),yaxis = list(title = "Component 2"),zaxis = list(title = "Component 3")), | |
417 legend=list(font = list(family = "sans-serif",size = 15,color = "#000"))) | |
418 fileIdent=correspondanceNameTable[orderedFactors[iFactor],1] | |
419 #add text label to plot | |
420 ##p <- add_text(p,x = dataToPlot$PC1, y = dataToPlot$PC2 + (max(PACres$x[,2])-min(PACres$x[,2]))*0.02, z = dataToPlot$PC3, mode = 'text', inherit = F, text=rownames(PACres$x), hoverinfo='skip', showlegend = FALSE, color=I('black')) | |
421 #save the plotly plot | |
422 htmlwidgets::saveWidget(as_widget(p), paste(c(file.path(getwd(), "plotLyDir"),"/",opt$acp,"_",fileIdent,".html"),collapse=""),selfcontained = F) | |
423 } | |
424 }else{ | |
425 for(iFactorBis in (iFactor+1):length(orderedFactors)){ | |
426 if(length(table(factorInfoMatrix[,orderedFactors[iFactorBis]]))<=length(symbolset)){ | |
427 dataToPlot=data.frame(PC1=PACres$x[,1],PC2=PACres$x[,2],PC3=PACres$x[,3],Experiments=rownames(PACres$x), Attribute1=factorInfoMatrix[rownames(PACres$x),orderedFactors[iFactor]], Attribute2=factorInfoMatrix[rownames(PACres$x),orderedFactors[iFactorBis]], hoverLabel=unlist(lapply(rownames(PACres$x),function(x)paste(factorInfoMatrix[x,-1],collapse=",")))) | |
428 p <- plot_ly(dataToPlot,x = ~PC1, y = ~PC2, z = ~PC3, type = 'scatter3d', mode="markers", color=~Attribute1,colors=rainbow(length(levels(dataToPlot$Attribute1))+2),symbol=~Attribute2,symbols = symbolset,hoverinfo = 'text', text = ~paste(Experiments,"\n",hoverLabel),marker=list(size=5))%>% | |
429 layout(title = "Principal Component Analysis", scene = list(xaxis = list(title = "Component 1"),yaxis = list(title = "Component 2"),zaxis = list(title = "Component 3")), | |
430 legend=list(font = list(family = "sans-serif",size = 15,color = "#000"))) | |
431 fileIdent=paste(correspondanceNameTable[orderedFactors[c(iFactor,iFactorBis)],1],collapse="_AND_") | |
432 #add text label to plot | |
433 ##p <- add_text(p,x = dataToPlot$PC1, y = dataToPlot$PC2 + (max(PACres$x[,2])-min(PACres$x[,2]))*0.02, z = dataToPlot$PC3, mode = 'text', inherit = F, text=rownames(PACres$x), hoverinfo='skip', showlegend = FALSE, color=I('black')) | |
434 #save the plotly plot | |
435 htmlwidgets::saveWidget(as_widget(p), paste(c(file.path(getwd(), "plotLyDir"),"/",opt$acp,"_",fileIdent,".html"),collapse=""),selfcontained = F) | |
436 }else{ | |
437 addComment(c("[WARNING]PCA with",orderedFactors[iFactor],"and",orderedFactors[iFactorBis],"groups cannot be displayed, too many groups (max",length(symbolset),")"),T,opt$log,display=FALSE) | |
438 } | |
439 } | |
440 } | |
441 } | |
442 }else{ | |
443 dataToPlot=data.frame(PC1=PACres$x[,1],PC2=PACres$x[,2],PC3=PACres$x[,3],Experiments=rownames(PACres$x)) | |
444 p <- plot_ly(dataToPlot,x = ~PC1, y = ~PC2, z = ~PC3, type = 'scatter3d', mode="markers",marker=list(size=5,color="salmon"),hoverinfo = 'text',text = ~paste(Experiments))%>% | |
445 layout(title = "Principal Component Analysis", scene = list(xaxis = list(title = "Component 1"),yaxis = list(title = "Component 2"),zaxis = list(title = "Component 3")), | |
446 legend=list(font = list(family = "sans-serif",size = 15,color = "#000"))) | |
447 ##p <- add_text(p,x = dataToPlot$PC1, y = dataToPlot$PC2 + (max(PACres$x[,2])-min(PACres$x[,2]))*0.02, z = dataToPlot$PC3, mode = 'text', inherit = F, text=rownames(PACres$x), hoverinfo='skip', showlegend = FALSE, color=I('black')) | |
448 | |
449 #save plotly files | |
450 htmlwidgets::saveWidget(as_widget(p), paste(c(file.path(getwd(), "plotLyDir"),"/",opt$acp,"_plot.html"),collapse=""),selfcontained = F) | |
451 } | |
452 remove(p,dataToPlot,dataFiltered) | |
453 addComment("[INFO]ACP plot drawn",T,opt$log,display=FALSE) | |
454 } | |
455 | |
456 #write correspondances between plot file names and displayed names in figure legends, usefull to define html items in xml file | |
457 write.table(correspondanceNameTable,file=file.path(getwd(), "correspondanceFileNames.csv"),quote=FALSE,sep="\t",col.names = F,row.names = F) | |
458 | |
459 end.time <- Sys.time() | |
460 addComment(c("[INFO]Total execution time for R script:",as.numeric(end.time - start.time,units="mins"),"mins"),T,opt$log,display=FALSE) | |
461 | |
462 addComment("[INFO]End of R script",T,opt$log,display=FALSE) | |
463 | |
464 printSessionInfo(opt$log) | |
465 #sessionInfo() |