comparison src/ExprPlotsScript.R @ 0:14045c80a222 draft

"planemo upload for repository https://github.com/juliechevalier/GIANT/tree/master commit cb276a594444c8f32e9819fefde3a21f121d35df"
author vandelj
date Fri, 26 Jun 2020 09:38:23 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:14045c80a222
1 # A command-line interface to basic plots for use with Galaxy
2 # written by Jimmy Vandel
3 # one of these arguments is required:
4 #
5 #
6 initial.options <- commandArgs(trailingOnly = FALSE)
7 file.arg.name <- "--file="
8 script.name <- sub(file.arg.name, "", initial.options[grep(file.arg.name, initial.options)])
9 script.basename <- dirname(script.name)
10 source(file.path(script.basename, "utils.R"))
11 source(file.path(script.basename, "getopt.R"))
12
13 #addComment("Welcome R!")
14
15 # setup R error handling to go to stderr
16 options( show.error.messages=F, error = function () { cat(geterrmessage(), file=stderr() ); q( "no", 1, F ) } )
17
18 # we need that to not crash galaxy with an UTF8 error on German LC settings.
19 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
20 loc <- Sys.setlocale("LC_NUMERIC", "C")
21
22 #get starting time
23 start.time <- Sys.time()
24
25 #get options
26 options(stringAsfactors = FALSE, useFancyQuotes = FALSE)
27 args <- commandArgs()
28
29
30 # get options, using the spec as defined by the enclosed list.
31 # we read the options from the default: commandArgs(TRUE).
32 spec <- matrix(c(
33 "dataFile", "i", 1, "character",
34 "factorInfo","t", 1, "character",
35 "dataFileFormat","j",1,"character",
36 "conditionNames","c",1,"character",
37 "format", "f", 1, "character",
38 "quiet", "q", 0, "logical",
39 "log", "l", 1, "character",
40 "histo" , "h", 1, "character",
41 "maPlot" , "a", 1, "character",
42 "boxplot" , "b", 1, "character",
43 "microarray" , "m", 1, "character",
44 "acp" , "p" , 1, "character",
45 "screePlot" , "s" , 1, "character"),
46 byrow=TRUE, ncol=4)
47 opt <- getopt(spec)
48
49 # enforce the following required arguments
50 if (is.null(opt$log)) {
51 addComment("[ERROR]'log file' is required")
52 q( "no", 1, F )
53 }
54 addComment("[INFO]Start of R script",T,opt$log,display=FALSE)
55 if (is.null(opt$dataFile) || is.null(opt$dataFileFormat)) {
56 addComment("[ERROR]'dataFile' and it format are required",T,opt$log)
57 q( "no", 1, F )
58 }
59 if (is.null(opt$format)) {
60 addComment("[ERROR]'output format' is required",T,opt$log)
61 q( "no", 1, F )
62 }
63 if (is.null(opt$histo) & is.null(opt$maPlot) & is.null(opt$boxplot) & is.null(opt$microarray) & is.null(opt$acp)){
64 addComment("[ERROR]Select at least one plot to draw",T,opt$log)
65 q( "no", 1, F )
66 }
67
68 verbose <- if (is.null(opt$quiet)) {
69 TRUE
70 }else{
71 FALSE}
72
73 addComment("[INFO]Parameters checked!",T,opt$log,display=FALSE)
74
75 addComment(c("[INFO]Working directory: ",getwd()),TRUE,opt$log,display=FALSE)
76 addComment(c("[INFO]Command line: ",args),TRUE,opt$log,display=FALSE)
77
78 #directory for plots
79 dir.create(file.path(getwd(), "plotDir"))
80 dir.create(file.path(getwd(), "plotLyDir"))
81
82 #silent package loading
83 suppressPackageStartupMessages({
84 library("oligo")
85 library("ff")
86 library("ggplot2")
87 library("plotly")
88 })
89
90
91 #chargement des fichiers en entrée
92 #fichier de type CEL
93 dataAreFromCel=FALSE
94 if(toupper(opt$dataFileFormat)=="CEL"){
95 dataAreFromCel=TRUE
96 celData=read.celfiles(unlist(strsplit(opt$dataFile,",")))
97 #load all expressions
98 dataMatrix=exprs(celData)
99 #select "pm" probes
100 probeInfo=getProbeInfo(celData,probeType = c("pm"),target="probeset")
101 #reduce dataMatrix to log expression matrix for a randomly probe selection
102 dataMatrix=log2(dataMatrix[sample(unique(probeInfo[,1]),min(100000,length(unique(probeInfo[,1])))),])
103 addComment("[INFO]Raw data are log2 transformed",TRUE,opt$log,display=FALSE)
104 remove(probeInfo)
105 }else{
106 #fichier deja tabule
107 dataMatrix=read.csv(file=opt$dataFile,header=F,sep="\t",colClasses="character")
108 #remove first row to convert it as colnames (to avoid X before colnames with header=T)
109 colNamesData=dataMatrix[1,-1]
110 dataMatrix=dataMatrix[-1,]
111 #remove first colum to convert it as rownames
112 rowNamesData=dataMatrix[,1]
113 dataMatrix=dataMatrix[,-1]
114 if(is.data.frame(dataMatrix)){
115 dataMatrix=data.matrix(dataMatrix)
116 }else{
117 dataMatrix=data.matrix(as.numeric(dataMatrix))
118 }
119 dimnames(dataMatrix)=list(rowNamesData,colNamesData)
120 if(any(duplicated(rowNamesData)))addComment("[WARNING] several rows share the same probe/gene name, you should merge or rename them to avoid further analysis mistakes",TRUE,opt$log,display=FALSE)
121 }
122
123 addComment("[INFO]Input data loaded",TRUE,opt$log,display=FALSE)
124 addComment(c("[INFO]Dim of data matrix:",dim(dataMatrix)),T,opt$log,display=FALSE)
125
126 #get number of conditions
127 nbConditions=ncol(dataMatrix)
128
129 #get condition names if they are specified
130 if(!is.null(opt$conditionNames) && length(opt$conditionNames)==nbConditions){
131 nameConditions=opt$conditionNames
132 colnames(dataMatrix)=nameConditions
133 #rownames(phenoData(celData)@data)=nameConditions
134 #rownames(protocolData(celData)@data)=nameConditions
135 }else{
136 nameConditions=colnames(dataMatrix)
137 }
138
139 #create a correspondance table between plot file names and name displayed in figure legend and html items
140 correspondanceNameTable=matrix("",ncol=2,nrow=nbConditions)
141 correspondanceNameTable[,1]=paste("Condition",1:nbConditions,sep="")
142 correspondanceNameTable[,2]=nameConditions
143 rownames(correspondanceNameTable)=correspondanceNameTable[,2]
144
145 addComment("[INFO]Retreive condition names",TRUE,opt$log,display=FALSE)
146
147 if(!is.null(opt$factorInfo)){
148 #chargement du fichier des facteurs
149 factorInfoMatrix=read.csv(file=file.path(getwd(), opt$factorInfo),header=F,sep="\t",colClasses="character")
150 #remove first row to convert it as colnames
151 colnames(factorInfoMatrix)=factorInfoMatrix[1,]
152 factorInfoMatrix=factorInfoMatrix[-1,]
153 #use first colum to convert it as rownames but not removing it to avoid conversion as vector in unique factor case
154 rownames(factorInfoMatrix)=factorInfoMatrix[,1]
155
156
157 if(length(setdiff(colnames(dataMatrix),rownames(factorInfoMatrix)))!=0){
158 addComment("[ERROR]Missing samples in factor file",T,opt$log)
159 q( "no", 1, F )
160 }
161
162 #order sample as in expression matrix and remove spurious sample
163 factorInfoMatrix=factorInfoMatrix[colnames(dataMatrix),]
164
165 addComment("[INFO]Factors OK",T,opt$log,display=FALSE)
166 addComment(c("[INFO]Dim of factorInfo matrix:",dim(factorInfoMatrix)),T,opt$log,display=FALSE)
167
168 }
169
170 addComment("[INFO]Ready to plot",T,opt$log,display=FALSE)
171
172
173 ##----------------------
174
175 ###plot histograms###
176 histogramPerFigure=50
177 if (!is.null(opt$histo)) {
178 for(iToPlot in 1:(((nbConditions-1)%/%histogramPerFigure)+1)){
179 firstPlot=1+histogramPerFigure*(iToPlot-1)
180 lastPlot=min(nbConditions,histogramPerFigure*iToPlot)
181 dataToPlot=data.frame(x=c(dataMatrix[,firstPlot:lastPlot]),Experiment=rep(colnames(dataMatrix)[firstPlot:lastPlot],each=nrow(dataMatrix)))
182 p <- ggplot(data=dataToPlot, aes(x = x, color=Experiment)) + stat_density(geom="line", size=1, position="identity") +
183 ggtitle("Intensity densities") + theme_bw() + ylab(label="Density") +
184 theme(panel.border=element_blank(),plot.title = element_text(hjust = 0.5))
185 if(dataAreFromCel){
186 #original ploting function
187 #hist(celData[,firstPlot:lastPlot],lty=rep(1,nbConditions)[firstPlot:lastPlot],lwd=2,which='pm',target="probeset",transfo=log2,col=rainbow(nbConditions)[firstPlot:lastPlot])
188 p <- p + xlab(label="Log2 intensities")
189 }else{
190 p <- p + xlab(label="Intensities")
191 }
192 if(opt$format=="pdf"){
193 pdf(paste(c("./plotDir/",opt$histo,iToPlot,".pdf"),collapse=""))}else{
194 png(paste(c("./plotDir/",opt$histo,iToPlot,".png"),collapse=""))
195 }
196 print(p)
197 dev.off()
198 #save plotly files
199 pp <- ggplotly(p)
200 htmlwidgets::saveWidget(as_widget(pp), paste(c(file.path(getwd(), "plotLyDir"),"/",opt$histo,iToPlot,".html"),collapse=""),selfcontained = F)
201 }
202 remove(p,dataToPlot)
203 addComment("[INFO]Histograms drawn",T,opt$log,display=FALSE)
204 }
205
206 ##----------------------
207
208 ###plot MAplots###
209 MAplotPerPage=4
210 if (!is.null(opt$maPlot)) {
211 iToPlot=1
212 plotVector=list()
213 toTake=sample(nrow(dataMatrix),min(200000,nrow(dataMatrix)))
214 refMedianColumn=rowMedians(as.matrix(dataMatrix[toTake,]))
215 if(length(toTake)>100000)addComment(c("[INFO]high number of input data rows ",length(toTake),"; the generation of MA plot can take a while, please be patient"),TRUE,opt$log,display=FALSE)
216 for (iCondition in 1:nbConditions){
217 #MAplot(celData,which=i,what=pm,transfo=log2)
218 #smoothScatter(x=xToPlot,y=yToPlot,main=nameConditions[iCondition])
219 dataA=dataMatrix[toTake,iCondition]
220 dataB=refMedianColumn####ATTENTION PAR DEFAUT
221 xToPlot=0.5*(dataA+dataB)
222 yToPlot=dataA-dataB
223 tempX=seq(min(xToPlot),max(xToPlot),0.1)
224 tempY=unlist(lapply(tempX,function(x){median(yToPlot[intersect(which(xToPlot>=(x-0.1/2)),which(xToPlot<(x+0.1/2)))])}))
225
226 dataToPlot=data.frame(x=xToPlot,y=yToPlot)
227 dataMedianToPlot=data.frame(x=tempX,y=tempY)
228 p <- ggplot(data=dataToPlot, aes(x,y)) + stat_density2d(aes(fill = ..density..^0.25), geom = "tile", contour = FALSE, n = 100) +
229 scale_fill_continuous(low = "white", high = "dodgerblue4") + geom_smooth(data=dataMedianToPlot,colour="red", size=0.5, se=FALSE) +
230 ggtitle(correspondanceNameTable[iCondition,2]) + theme_bw() + xlab(label="") + ylab(label="") +
231 theme(panel.border=element_blank(),plot.title = element_text(hjust = 0.5),legend.position = "none")
232 plotVector[[length(plotVector)+1]]=p
233
234 #save plotly files
235 pp <- ggplotly(p)
236 htmlwidgets::saveWidget(as_widget(pp), paste(c(file.path(getwd(), "plotLyDir"),"/",opt$maPlot,"_",correspondanceNameTable[iCondition,1],".html"),collapse=""),selfcontained = F)
237
238 if(iCondition==nbConditions || length(plotVector)==MAplotPerPage){
239 #define a new plotting file
240 if(opt$format=="pdf"){
241 pdf(paste(c("./plotDir/",opt$maPlot,iToPlot,".pdf"),collapse=""))}else{
242 png(paste(c("./plotDir/",opt$maPlot,iToPlot,".png"),collapse=""))
243 }
244 multiplot(plotlist=plotVector,cols=2)
245 dev.off()
246 if(iCondition<nbConditions){
247 #prepare for a new plotting file if necessary
248 plotVector=list()
249 iToPlot=iToPlot+1
250 }
251 }
252 }
253 remove(p,dataToPlot,dataA,dataB,toTake,xToPlot,yToPlot)
254 addComment("[INFO]MAplots drawn",T,opt$log,display=FALSE)
255 }
256
257 ##----------------------
258
259 ###plot boxplots###
260 boxplotPerFigure=50
261 if (!is.null(opt$boxplot)) {
262 for(iToPlot in 1:(((nbConditions-1)%/%boxplotPerFigure)+1)){
263 firstPlot=1+boxplotPerFigure*(iToPlot-1)
264 lastPlot=min(nbConditions,boxplotPerFigure*iToPlot)
265 dataToPlot=data.frame(intensities=c(dataMatrix[,firstPlot:lastPlot]),Experiment=rep(colnames(dataMatrix)[firstPlot:lastPlot],each=nrow(dataMatrix)))
266 #to make HTML file lighter, sampling will be done amongst outliers
267 #get outliers for each boxplot
268 boxplotsOutliers=apply(dataMatrix[,firstPlot:lastPlot],2,function(x)boxplot.stats(x)$out)
269 #sample amongst them to keep at maximum of 1000 points and include both min and max outliers values
270 boxplotsOutliers=lapply(boxplotsOutliers,function(x)if(length(x)>0)c(sample(c(x),min(length(x),1000)),max(c(x)),min(c(x))))
271 dataOutliers=data.frame(yVal=unlist(boxplotsOutliers),xVal=unlist(lapply(seq_along(boxplotsOutliers),function(x)rep(names(boxplotsOutliers)[x],length(boxplotsOutliers[[x]])))))
272 #plot boxplots without outliers
273 p <- ggplot(data=dataToPlot, aes(y = intensities, x=Experiment ,color=Experiment)) + geom_boxplot(outlier.colour=NA,outlier.shape =NA) +
274 ggtitle("Intensities") + theme_bw() + xlab(label="") +
275 theme(panel.border=element_blank(),plot.title = element_text(hjust = 0.5),axis.text.x = element_text(angle = 45, hjust = 1),plot.margin=unit(c(10,10,max(unlist(lapply(dataToPlot$Experiment,function(x)nchar(as.character(x))))),15+max(unlist(lapply(dataToPlot$Experiment,function(x)nchar(as.character(x)))))),"mm"))
276 #add to plot sampled outliers
277 p <- p + geom_point(data=dataOutliers,aes(x=xVal,y=yVal,color=xVal),inherit.aes = F)
278 if(dataAreFromCel){
279 #original plotting function
280 #boxplot(celData[,firstPlot:lastPlot],which='pm',col=rainbow(nbConditions)[firstPlot:lastPlot],target="probeset",transfo=log2,names=nameConditions[firstPlot:lastPlot],main="Intensities")
281 p <- p + ylab(label="Log2 intensities")
282 }else{
283 p <- p + ylab(label="Intensities")
284 }
285 if(opt$format=="pdf"){
286 pdf(paste(c("./plotDir/",opt$boxplot,iToPlot,".pdf"),collapse=""))}else{
287 png(paste(c("./plotDir/",opt$boxplot,iToPlot,".png"),collapse=""))
288 }
289 print(p)
290 dev.off()
291 #save plotly files
292 pp <- ggplotly(p)
293
294 #modify plotly object to get HTML file not too heavy for loading
295 for(iData in 1:length(pp$x$data)){
296 ##get kept outliers y values
297 #yPointsToKeep=dataOutliers$yVal[which(dataOutliers$xVal==pp$x$data[[iData]]$name)]
298 if(pp$x$data[[iData]]$type=="scatter"){
299 ##scatter plot represent outliers points added to boxplot through geom_point
300 ##nothing to do as outliers have been sampled allready, just have to modify hover text
301 #if(length(yPointsToKeep)>0){
302 #pointsToKeep=which(pp$x$data[[iData]]$y %in% yPointsToKeep)
303 #pp$x$data[[iData]]$x=pp$x$data[[iData]]$x[pointsToKeep]
304 #pp$x$data[[iData]]$y=pp$x$data[[iData]]$y[pointsToKeep]
305 #pp$x$data[[iData]]$text=pp$x$data[[iData]]$text[pointsToKeep]
306 #}else{
307 #pp$x$data[[iData]]$x=NULL
308 #pp$x$data[[iData]]$y=NULL
309 #pp$x$data[[iData]]$marker$opacity=0
310 #pp$x$data[[iData]]$hoverinfo=NULL
311 #pp$x$data[[iData]]$text=NULL
312 #}
313 #modify text to display
314 if(dataAreFromCel){
315 pp$x$data[[iData]]$text=unlist(lapply(seq_along(pp$x$data[[iData]]$y),function(x)return(paste(c("log2(intensity) ",prettyNum(pp$x$data[[iData]]$y[x],digits=4)),collapse = ""))))
316 }else{
317 pp$x$data[[iData]]$text=unlist(lapply(seq_along(pp$x$data[[iData]]$y),function(x)return(paste(c("intensity ",prettyNum(pp$x$data[[iData]]$y[x],digits=4)),collapse = ""))))
318 }
319 }else{
320 ##disable marker plotting to keep only box and whiskers plot (outliers are displayed through scatter plot)
321 pp$x$data[[iData]]$marker$opacity=0
322
323 #sample 50000 points amongst all data to get a lighter html file, sampling size should not be too low to avoid modifying limit of boxplots
324 pp$x$data[[iData]]$y=c(sample(dataMatrix[,pp$x$data[[iData]]$name],min(length(dataMatrix[,pp$x$data[[iData]]$name]),50000)),min(dataMatrix[,pp$x$data[[iData]]$name]),max(dataMatrix[,pp$x$data[[iData]]$name]))
325 pp$x$data[[iData]]$x=rep(pp$x$data[[iData]]$x[1],length(pp$x$data[[iData]]$y))
326
327 ##first remove outliers info
328 #downUpValues=boxplot.stats(dataMatrix[,pp$x$data[[iData]]$name])$stats
329 #if(verbose)addComment(c("filter values for boxplot",pp$x$data[[iData]]$name,"between",min(downUpValues),"and",max(downUpValues)),T,opt$log)
330 #pointsToRemove=which(pp$x$data[[iData]]$y<min(downUpValues))
331 #if(length(pointsToRemove)>0)pp$x$data[[iData]]$y=pp$x$data[[iData]]$y[-pointsToRemove]
332 #pointsToRemove=which(pp$x$data[[iData]]$y>max(downUpValues))
333 #if(length(pointsToRemove)>0)pp$x$data[[iData]]$y=pp$x$data[[iData]]$y[-pointsToRemove]
334 #then add sampled outliers info
335 #pp$x$data[[iData]]$y=c(yPointsToKeep,pp$x$data[[iData]]$y)
336 #pp$x$data[[iData]]$x=rep(pp$x$data[[iData]]$x[1],length(pp$x$data[[iData]]$y))
337 }
338 }
339
340 htmlwidgets::saveWidget(as_widget(pp), paste(c(file.path(getwd(), "plotLyDir"),"/",opt$boxplot,iToPlot,".html"),collapse=""),selfcontained = F)
341 }
342 remove(p,dataToPlot)
343 addComment("[INFO]Boxplots drawn",T,opt$log,display=FALSE)
344
345 }
346
347 ##----------------------
348
349 ###plot microarrays (only for .CEL files)###
350 if (!is.null(opt$microarray) && dataAreFromCel) {
351 for (iCondition in 1:nbConditions){
352 if(opt$format=="pdf"){
353 pdf(paste(c("./plotDir/",opt$microarray,"_",correspondanceNameTable[iCondition,1],".pdf"),collapse=""),onefile = F,width = 5,height = 5)}else{
354 png(paste(c("./plotDir/",opt$microarray,"_",correspondanceNameTable[iCondition,1],".png"),collapse=""))
355 }
356 image(celData[,iCondition],main=correspondanceNameTable[iCondition,2])
357 dev.off()
358 }
359 addComment("[INFO]Microarray drawn",T,opt$log,display=FALSE)
360 }
361
362 ##----------------------
363
364 ###plot PCA plot###
365 if (!is.null(opt$acp)){
366 ##to avoid error when nrow is too large, results quite stable with 200k random selected rows
367 randomSelection=sample(nrow(dataMatrix),min(200000,nrow(dataMatrix)))
368 #remove constant variables
369
370 dataFiltered=dataMatrix[randomSelection,]
371 toRemove=which(unlist(apply(dataFiltered,1,var))==0)
372 if(length(toRemove)>0){
373 dataFiltered=dataFiltered[-toRemove,]
374 }
375 ##geom_text(aes(label=Experiments,hjust=1, vjust=1.3), y = PC2+0.01)
376 PACres = prcomp(t(dataFiltered),scale.=TRUE)
377
378 if(!is.null(opt$screePlot)){
379 #scree plot
380 #p <- fviz_eig(PACres)
381 dataToPlot=data.frame(compo=seq(1,length(PACres$sdev)),var=(PACres$sdev^2/sum(PACres$sdev^2))*100)
382 p<-ggplot(data=dataToPlot, aes(x=compo, y=var)) + geom_bar(stat="identity", fill="steelblue") + geom_line() + geom_point() +
383 ggtitle("Scree plot") + theme_bw() + theme(panel.border=element_blank(),plot.title = element_text(hjust = 0.5)) +
384 xlab(label="Dimensions") + ylab(label="% explained variances") + scale_x_discrete(limits=dataToPlot$compo)
385 pp <- ggplotly(p)
386
387 if(opt$format=="pdf"){
388 pdf(paste(c("./plotDir/",opt$screePlot,".pdf"),collapse=""))}else{
389 png(paste(c("./plotDir/",opt$screePlot,".png"),collapse=""))
390 }
391 plot(p)
392 dev.off()
393 htmlwidgets::saveWidget(as_widget(pp), paste(c(file.path(getwd(), "plotLyDir"),"/",opt$screePlot,".html"),collapse=""),selfcontained = F)
394 }
395
396 #now plot pca plots
397
398 if(!is.null(opt$factorInfo)){
399 fileIdent=""
400 symbolset = c("circle","cross","square","diamond","circle-open","square-open","diamond-open","x")
401
402 #save equivalence between real factor names and generic ones in correspondanceNameTable
403 correspondanceNameTable=rbind(correspondanceNameTable,matrix(c(paste("Factor",1:(ncol(factorInfoMatrix)-1),sep=""),colnames(factorInfoMatrix)[-1]),ncol=2,nrow=ncol(factorInfoMatrix)-1))
404 rownames(correspondanceNameTable)=correspondanceNameTable[,2]
405
406 #first order factors from decreasing groups number
407 orderedFactors=colnames(factorInfoMatrix)[-1][order(unlist(lapply(colnames(factorInfoMatrix)[-1],function(x)length(table(factorInfoMatrix[,x])))),decreasing = T)]
408 allFactorsBigger=length(table(factorInfoMatrix[,orderedFactors[length(orderedFactors)]]))>length(symbolset)
409 if(allFactorsBigger)addComment("[WARNING]All factors are composed of too many groups to display two factors at same time, each PCA plot will display only one factor groups",T,opt$log,display=FALSE)
410 for(iFactor in 1:length(orderedFactors)){
411 #if it is the last factor of the list or if all factor
412 if(iFactor==length(orderedFactors) || allFactorsBigger){
413 if(length(orderedFactors)==1 || allFactorsBigger){
414 dataToPlot=data.frame(PC1=PACres$x[,1],PC2=PACres$x[,2],PC3=PACres$x[,3],Experiments=rownames(PACres$x), Attribute1=factorInfoMatrix[rownames(PACres$x),orderedFactors[iFactor]], hoverLabel=unlist(lapply(rownames(PACres$x),function(x)paste(factorInfoMatrix[x,-1],collapse=","))))
415 p <- plot_ly(dataToPlot,x = ~PC1, y = ~PC2, z = ~PC3, type = 'scatter3d', mode="markers", color=~Attribute1,colors=rainbow(length(levels(dataToPlot$Attribute1))+2),hoverinfo = 'text', text = ~paste(Experiments,"\n",hoverLabel),marker=list(size=5))%>%
416 layout(title = "Principal Component Analysis", scene = list(xaxis = list(title = "Component 1"),yaxis = list(title = "Component 2"),zaxis = list(title = "Component 3")),
417 legend=list(font = list(family = "sans-serif",size = 15,color = "#000")))
418 fileIdent=correspondanceNameTable[orderedFactors[iFactor],1]
419 #add text label to plot
420 ##p <- add_text(p,x = dataToPlot$PC1, y = dataToPlot$PC2 + (max(PACres$x[,2])-min(PACres$x[,2]))*0.02, z = dataToPlot$PC3, mode = 'text', inherit = F, text=rownames(PACres$x), hoverinfo='skip', showlegend = FALSE, color=I('black'))
421 #save the plotly plot
422 htmlwidgets::saveWidget(as_widget(p), paste(c(file.path(getwd(), "plotLyDir"),"/",opt$acp,"_",fileIdent,".html"),collapse=""),selfcontained = F)
423 }
424 }else{
425 for(iFactorBis in (iFactor+1):length(orderedFactors)){
426 if(length(table(factorInfoMatrix[,orderedFactors[iFactorBis]]))<=length(symbolset)){
427 dataToPlot=data.frame(PC1=PACres$x[,1],PC2=PACres$x[,2],PC3=PACres$x[,3],Experiments=rownames(PACres$x), Attribute1=factorInfoMatrix[rownames(PACres$x),orderedFactors[iFactor]], Attribute2=factorInfoMatrix[rownames(PACres$x),orderedFactors[iFactorBis]], hoverLabel=unlist(lapply(rownames(PACres$x),function(x)paste(factorInfoMatrix[x,-1],collapse=","))))
428 p <- plot_ly(dataToPlot,x = ~PC1, y = ~PC2, z = ~PC3, type = 'scatter3d', mode="markers", color=~Attribute1,colors=rainbow(length(levels(dataToPlot$Attribute1))+2),symbol=~Attribute2,symbols = symbolset,hoverinfo = 'text', text = ~paste(Experiments,"\n",hoverLabel),marker=list(size=5))%>%
429 layout(title = "Principal Component Analysis", scene = list(xaxis = list(title = "Component 1"),yaxis = list(title = "Component 2"),zaxis = list(title = "Component 3")),
430 legend=list(font = list(family = "sans-serif",size = 15,color = "#000")))
431 fileIdent=paste(correspondanceNameTable[orderedFactors[c(iFactor,iFactorBis)],1],collapse="_AND_")
432 #add text label to plot
433 ##p <- add_text(p,x = dataToPlot$PC1, y = dataToPlot$PC2 + (max(PACres$x[,2])-min(PACres$x[,2]))*0.02, z = dataToPlot$PC3, mode = 'text', inherit = F, text=rownames(PACres$x), hoverinfo='skip', showlegend = FALSE, color=I('black'))
434 #save the plotly plot
435 htmlwidgets::saveWidget(as_widget(p), paste(c(file.path(getwd(), "plotLyDir"),"/",opt$acp,"_",fileIdent,".html"),collapse=""),selfcontained = F)
436 }else{
437 addComment(c("[WARNING]PCA with",orderedFactors[iFactor],"and",orderedFactors[iFactorBis],"groups cannot be displayed, too many groups (max",length(symbolset),")"),T,opt$log,display=FALSE)
438 }
439 }
440 }
441 }
442 }else{
443 dataToPlot=data.frame(PC1=PACres$x[,1],PC2=PACres$x[,2],PC3=PACres$x[,3],Experiments=rownames(PACres$x))
444 p <- plot_ly(dataToPlot,x = ~PC1, y = ~PC2, z = ~PC3, type = 'scatter3d', mode="markers",marker=list(size=5,color="salmon"),hoverinfo = 'text',text = ~paste(Experiments))%>%
445 layout(title = "Principal Component Analysis", scene = list(xaxis = list(title = "Component 1"),yaxis = list(title = "Component 2"),zaxis = list(title = "Component 3")),
446 legend=list(font = list(family = "sans-serif",size = 15,color = "#000")))
447 ##p <- add_text(p,x = dataToPlot$PC1, y = dataToPlot$PC2 + (max(PACres$x[,2])-min(PACres$x[,2]))*0.02, z = dataToPlot$PC3, mode = 'text', inherit = F, text=rownames(PACres$x), hoverinfo='skip', showlegend = FALSE, color=I('black'))
448
449 #save plotly files
450 htmlwidgets::saveWidget(as_widget(p), paste(c(file.path(getwd(), "plotLyDir"),"/",opt$acp,"_plot.html"),collapse=""),selfcontained = F)
451 }
452 remove(p,dataToPlot,dataFiltered)
453 addComment("[INFO]ACP plot drawn",T,opt$log,display=FALSE)
454 }
455
456 #write correspondances between plot file names and displayed names in figure legends, usefull to define html items in xml file
457 write.table(correspondanceNameTable,file=file.path(getwd(), "correspondanceFileNames.csv"),quote=FALSE,sep="\t",col.names = F,row.names = F)
458
459 end.time <- Sys.time()
460 addComment(c("[INFO]Total execution time for R script:",as.numeric(end.time - start.time,units="mins"),"mins"),T,opt$log,display=FALSE)
461
462 addComment("[INFO]End of R script",T,opt$log,display=FALSE)
463
464 printSessionInfo(opt$log)
465 #sessionInfo()