comparison src/LIMMAscriptV4.R @ 0:f274c8d45613 draft

"planemo upload for repository https://github.com/juliechevalier/GIANT/tree/master commit cb276a594444c8f32e9819fefde3a21f121d35df"
author vandelj
date Fri, 26 Jun 2020 09:43:41 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:f274c8d45613
1 # A command-line interface for LIMMA to use with Galaxy
2 # written by Jimmy Vandel
3 # one of these arguments is required:
4 #
5 #
6 initial.options <- commandArgs(trailingOnly = FALSE)
7 file.arg.name <- "--file="
8 script.name <- sub(file.arg.name, "", initial.options[grep(file.arg.name, initial.options)])
9 script.basename <- dirname(script.name)
10 source(file.path(script.basename, "utils.R"))
11 source(file.path(script.basename, "getopt.R"))
12
13 #addComment("Welcome R!")
14
15 # setup R error handling to go to stderr
16 options( show.error.messages=F, error = function () { cat(geterrmessage(), file=stderr() ); q( "no", 1, F ) } )
17
18 # we need that to not crash galaxy with an UTF8 error on German LC settings.
19 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
20 loc <- Sys.setlocale("LC_NUMERIC", "C")
21
22 #get starting time
23 start.time <- Sys.time()
24
25 options(stringAsfactors = FALSE, useFancyQuotes = FALSE)
26 args <- commandArgs()
27
28 # get options, using the spec as defined by the enclosed list.
29 # we read the options from the default: commandArgs(TRUE).
30 spec <- matrix(c(
31 "dataFile", "i", 1, "character",
32 "factorInfo","a", 1, "character",
33 "blockingInfo","b", 1, "character",
34 "dicoRenaming","g",1,"character",
35 "blockingPolicy","u", 1, "character",
36 "fdrThreshold","t", 1, "double",
37 "thresholdFC","d", 1, "double",
38 "format", "f", 1, "character",
39 "histo","h", 1, "character",
40 "volcano","v", 1, "character",
41 "factorsContrast","r", 1, "character",
42 "contrastNames","p", 1, "character",
43 "firstGroupContrast","m", 1, "character",
44 "secondGroupContrast","n", 1, "character",
45 "controlGroups","c", 1, "character",
46 "fratioFile","s",1,"character",
47 "organismID","x",1,"character",
48 "rowNameType","y",1,"character",
49 "quiet", "q", 0, "logical",
50 "log", "l", 1, "character",
51 "outputFile" , "o", 1, "character",
52 "outputDfFile" , "z", 1, "character"),
53 byrow=TRUE, ncol=4)
54 opt <- getopt(spec)
55
56 # enforce the following required arguments
57 if (is.null(opt$log)) {
58 addComment("[ERROR]'log file' is required\n")
59 q( "no", 1, F )
60 }
61 addComment("[INFO]Start of R script",T,opt$log,display=FALSE)
62 if (is.null(opt$dataFile)) {
63 addComment("[ERROR]'dataFile' is required",T,opt$log)
64 q( "no", 1, F )
65 }
66 if (!is.null(opt$blockingInfo) && is.null(opt$blockingPolicy) ) {
67 addComment("[ERROR]blocking policy is missing",T,opt$log)
68 q( "no", 1, F )
69 }
70 if (is.null(opt$dicoRenaming)) {
71 addComment("[ERROR]renaming dictionnary is missing",T,opt$log)
72 q( "no", 1, F )
73 }
74 if (is.null(opt$factorsContrast)) {
75 addComment("[ERROR]factor informations are missing",T,opt$log)
76 q( "no", 1, F )
77 }
78 if (length(opt$firstGroupContrast)!=length(opt$secondGroupContrast)) {
79 addComment("[ERROR]some contrast groups seems to be empty",T,opt$log)
80 q( "no", 1, F )
81 }
82 if (is.null(opt$factorInfo)) {
83 addComment("[ERROR]factors info is missing",T,opt$log)
84 q( "no", 1, F )
85 }
86 if (is.null(opt$format)) {
87 addComment("[ERROR]'output format' is required",T,opt$log)
88 q( "no", 1, F )
89 }
90 if (is.null(opt$fdrThreshold)) {
91 addComment("[ERROR]'FDR threshold' is required",T,opt$log)
92 q( "no", 1, F )
93 }
94 if (is.null(opt$outputFile) || is.null(opt$outputDfFile)){
95 addComment("[ERROR]'output files' are required",T,opt$log)
96 q( "no", 1, F )
97 }
98 if (is.null(opt$thresholdFC)){
99 addComment("[ERROR]'FC threshold' is required",T,opt$log)
100 q( "no", 1, F )
101 }
102 if (is.null(opt$fratioFile)) {
103 addComment("[ERROR]F-ratio parameter is missing",T,opt$log)
104 q( "no", 1, F )
105 }
106
107 #demande si le script sera bavard
108 verbose <- if (is.null(opt$quiet)) {
109 TRUE
110 }else{
111 FALSE
112 }
113
114 #paramètres internes
115 #pour savoir si on remplace les FC calculés par LIMMA par un calcul du LS-MEAN (ie moyenne de moyennes de chaque groupe dans chaque terme du contraste plutôt qu'une moyenne globale dans chaque terme)
116 useLSmean=FALSE
117
118 addComment("[INFO]Parameters checked!",T,opt$log,display=FALSE)
119
120 addComment(c("[INFO]Working directory: ",getwd()),TRUE,opt$log,display=FALSE)
121 addComment(c("[INFO]Command line: ",args),TRUE,opt$log,display=FALSE)
122
123 #directory for plots
124 dir.create(file.path(getwd(), "plotDir"))
125 dir.create(file.path(getwd(), "plotLyDir"))
126
127 #charge des packages silencieusement
128 suppressPackageStartupMessages({
129 library("methods")
130 library("limma")
131 library("biomaRt")
132 library("ggplot2")
133 library("plotly")
134 library("stringr")
135 library("RColorBrewer")
136 })
137
138
139 #chargement du fichier dictionnaire de renommage
140 renamingDico=read.csv(file=file.path(getwd(), opt$dicoRenaming),header=F,sep="\t",colClasses="character")
141 rownames(renamingDico)=renamingDico[,2]
142
143
144 #chargement des fichiers en entrée
145 expDataMatrix=read.csv(file=file.path(getwd(), opt$dataFile),header=F,sep="\t",colClasses="character")
146 #remove first row to convert it as colnames (to avoid X before colnames with header=T)
147 colNamesData=expDataMatrix[1,-1]
148 expDataMatrix=expDataMatrix[-1,]
149 #remove first colum to convert it as rownames
150 rowNamesData=expDataMatrix[,1]
151 expDataMatrix=expDataMatrix[,-1]
152 if(is.data.frame(expDataMatrix)){
153 expDataMatrix=data.matrix(expDataMatrix)
154 }else{
155 expDataMatrix=data.matrix(as.numeric(expDataMatrix))
156 }
157 dimnames(expDataMatrix)=list(rowNamesData,colNamesData)
158
159 #test the number of rows that are constant in dataMatrix
160 nbConstantRows=length(which(unlist(apply(expDataMatrix,1,var))==0))
161 if(nbConstantRows>0){
162 addComment(c("[WARNING]",nbConstantRows,"rows are constant across conditions in input data file"),T,opt$log,display=FALSE)
163 }
164
165 #test if all condition names are present in dico
166 if(!all(colnames(expDataMatrix) %in% rownames(renamingDico))){
167 addComment("[ERROR]Missing condition names in renaming dictionary",T,opt$log)
168 q( "no", 1, F )
169 }
170
171 addComment("[INFO]Expression data loaded and checked",T,opt$log,display=FALSE)
172 addComment(c("[INFO]Dim of expression matrix:",dim(expDataMatrix)),T,opt$log,display=FALSE)
173
174 #chargement du fichier des facteurs
175 factorInfoMatrix=read.csv(file=file.path(getwd(), opt$factorInfo),header=F,sep="\t",colClasses="character")
176 #remove first row to convert it as colnames
177 colnames(factorInfoMatrix)=factorInfoMatrix[1,]
178 factorInfoMatrix=factorInfoMatrix[-1,]
179 #use first colum to convert it as rownames but not removing it to avoid conversion as vector in unique factor case
180 rownames(factorInfoMatrix)=factorInfoMatrix[,1]
181
182 if(length(setdiff(colnames(expDataMatrix),rownames(factorInfoMatrix)))!=0){
183 addComment("[ERROR]Missing samples in factor file",T,opt$log)
184 q( "no", 1, F )
185 }
186
187 #order sample as in expression matrix and remove spurious sample
188 factorInfoMatrix=factorInfoMatrix[colnames(expDataMatrix),]
189
190 #test if all values names are present in dico
191 if(!all(unlist(factorInfoMatrix) %in% rownames(renamingDico))){
192 addComment("[ERROR]Missing factor names in renaming dictionary",T,opt$log)
193 q( "no", 1, F )
194 }
195
196 addComment("[INFO]Factors OK",T,opt$log,display=FALSE)
197 addComment(c("[INFO]Dim of factorInfo matrix:",dim(factorInfoMatrix)),T,opt$log,display=FALSE)
198
199 ##manage blocking factor
200 blockingFactor=NULL
201 blockinFactorsList=NULL
202 if(!is.null(opt$blockingInfo)){
203
204 #chargement du fichier des blocking factors
205 blockingInfoMatrix=read.csv(file=file.path(getwd(), opt$blockingInfo),header=F,sep="\t",colClasses="character")
206 #remove first row to convert it as colnames
207 colnames(blockingInfoMatrix)=blockingInfoMatrix[1,]
208 blockingInfoMatrix=blockingInfoMatrix[-1,]
209 #use first colum to convert it as rownames but not removing it to avoid conversion as vector in unique factor case
210 rownames(blockingInfoMatrix)=blockingInfoMatrix[,1]
211
212
213 if(length(setdiff(colnames(expDataMatrix),rownames(blockingInfoMatrix)))!=0){
214 addComment("[ERROR]Missing samples in blocking factor file",T,opt$log)
215 q( "no", 1, F )
216 }
217
218 #order sample as in expression matrix
219 blockingInfoMatrix=blockingInfoMatrix[colnames(expDataMatrix),]
220
221 #test if all blocking names are present in dico
222 if(!all(unlist(blockingInfoMatrix) %in% rownames(renamingDico))){
223 addComment("[ERROR]Missing blocking names in renaming dictionary",T,opt$log)
224 q( "no", 1, F )
225 }
226
227 #remove blocking factors allready present as real factors
228 blockingNotInMainFactors=setdiff(colnames(blockingInfoMatrix)[-1],colnames(factorInfoMatrix)[-1])
229
230 if(length(blockingNotInMainFactors)<(ncol(blockingInfoMatrix)-1))addComment("[WARNING]Blocking factors cannot be principal factors",T,opt$log,display=FALSE)
231
232 if(length(blockingNotInMainFactors)>0){
233
234 blockingInfoMatrix=blockingInfoMatrix[,c(colnames(blockingInfoMatrix)[1],blockingNotInMainFactors)]
235
236 groupBlocking=rep("c",ncol(expDataMatrix))
237 #for each blocking factor
238 for(blockingFact in blockingNotInMainFactors){
239 if(opt$blockingPolicy=="correlated"){
240 indNewFact=as.numeric(factor(blockingInfoMatrix[,blockingFact]))
241 groupBlocking=paste(groupBlocking,indNewFact,sep="_")
242 }else{
243 if(is.null(blockinFactorsList))blockinFactorsList=list()
244 blockinFactorsList[[blockingFact]]=factor(unlist(lapply(blockingInfoMatrix[,blockingFact],function(x)paste(c(blockingFact,"_",x),collapse=""))))
245 }
246 }
247 if(opt$blockingPolicy=="correlated"){
248 blockingFactor=factor(groupBlocking)
249 if(length(levels(blockingFactor))==1){
250 addComment("[ERROR]Selected blocking factors seems to be constant",T,opt$log)
251 q( "no", 1, F )
252 }
253 }
254 addComment("[INFO]Blocking info OK",T,opt$log,display=FALSE)
255 }else{
256 addComment("[WARNING]No blocking factors will be considered",T,opt$log,display=FALSE)
257 }
258 }
259
260
261 ##rename different input parameters using renamingDictionary
262 opt$factorsContrast=renamingDico[unlist(lapply(unlist(strsplit(opt$factorsContrast,",")),function(x)which(renamingDico[,1]==x))),2]
263
264 userDefinedContrasts=FALSE
265 if(!is.null(opt$firstGroupContrast) && !is.null(opt$secondGroupContrast)){
266 userDefinedContrasts=TRUE
267 for(iContrast in 1:length(opt$firstGroupContrast)){
268 opt$firstGroupContrast[iContrast]=paste(unlist(lapply(unlist(strsplit(opt$firstGroupContrast[iContrast],",")),function(x)paste(renamingDico[unlist(lapply(unlist(strsplit(x,"\\*")),function(x)which(renamingDico[,1]==x))),2],collapse="*"))),collapse=",")
269 opt$secondGroupContrast[iContrast]=paste(unlist(lapply(unlist(strsplit(opt$secondGroupContrast[iContrast],",")),function(x)paste(renamingDico[unlist(lapply(unlist(strsplit(x,"\\*")),function(x)which(renamingDico[,1]==x))),2],collapse="*"))),collapse=",")
270 }
271 }
272
273 if(!is.null(opt$controlGroups)){
274 renamedGroups=c()
275 for(iGroup in unlist(strsplit(opt$controlGroups,","))){
276 renamedControlGroup=paste(renamingDico[unlist(lapply(unlist(strsplit(iGroup,":")),function(x)which(renamingDico[,1]==x))),2],collapse=":")
277 if(length(renamedControlGroup)==0 || any(which(unlist(gregexpr(text = renamedControlGroup,pattern = ":"))==-1))){
278 addComment("[ERROR]Control groups for interaction seem to mismatch, please check them.",T,opt$log)
279 q( "no", 1, F )
280 }
281 renamedGroups=c(renamedGroups,renamedControlGroup)
282 }
283 opt$controlGroups=renamedGroups
284 }
285 addComment("[INFO]Contrast variables are renamed to avoid confusion",T,opt$log,display=FALSE)
286 ##renaming done
287
288 #to convert factor as numeric value --> useless now ?
289 #expDataMatrix=apply(expDataMatrix,c(1,2),function(x)as.numeric(paste(x)))
290
291 #get factors info for LIMMA
292 factorsList=list()
293 for(iFactor in opt$factorsContrast){
294 if(!(iFactor %in% colnames(factorInfoMatrix))){
295 addComment("[ERROR]Required factors are missing in input file",T,opt$log)
296 q( "no", 1, F )
297 }
298 factorsList[[iFactor]]=factor(unlist(lapply(factorInfoMatrix[,iFactor],function(x)paste(c(iFactor,"_",x),collapse=""))))
299 if(length(levels(factorsList[[iFactor]]))==1){
300 addComment("[ERROR]One selected factor seems to be constant",T,opt$log)
301 q( "no", 1, F )
302 }
303 }
304
305 #check if there is at least 2 factors to allow interaction computation
306 if(!is.null(opt$controlGroups) && length(factorsList)<2){
307 addComment("[ERROR]You cannot ask for interaction with less than 2 factors",T,opt$log)
308 q( "no", 1, F )
309 }
310
311 #merge all factors as a single one
312 factorsMerged=as.character(factorsList[[opt$factorsContrast[1]]])
313 for(iFactor in opt$factorsContrast[-1]){
314 factorsMerged=paste(factorsMerged,as.character(factorsList[[iFactor]]),sep=".")
315 }
316 factorsMerged=factor(factorsMerged)
317
318 #checked that coefficient number (ie. factorsMerged levels) is strictly smaller than sample size
319 if(length(levels(factorsMerged))>=length(factorsMerged)){
320 addComment(c("[ERROR]No enough samples (",length(factorsMerged),") to estimate ",length(levels(factorsMerged))," coefficients"),T,opt$log)
321 q( "no", 1, F )
322 }
323
324 #get the sample size of each factor values
325 sampleSizeFactor=table(factorsMerged)
326
327
328 if(!is.null(blockinFactorsList)){
329 factorString=c("blockinFactorsList[['", names(blockinFactorsList)[1],"']]")
330 for(blockingFact in names(blockinFactorsList)[-1]){
331 factorString=c(factorString," + blockinFactorsList[['",blockingFact,"']]")
332 }
333 design = model.matrix(as.formula(paste(c("~ factorsMerged +",factorString," + 0"),collapse="")))
334
335 #rename design columns
336 coeffMeaning = levels(factorsMerged)
337 for(blockingFact in blockinFactorsList){
338 coeffMeaning=c(coeffMeaning,levels(blockingFact)[-1])
339 }
340 colnames(design) = coeffMeaning
341 }else{
342 design = model.matrix(as.formula( ~ factorsMerged + 0))
343
344 #rename degin columns
345 coeffMeaning = levels(factorsMerged)
346 colnames(design) = coeffMeaning
347 }
348
349 addComment(c("[INFO]Available coefficients: ",coeffMeaning),T,opt$log,display=F)
350
351 estimableCoeff=which(colSums(design)!=0)
352
353 addComment("[INFO]Design done",T,opt$log,display=F)
354
355 #use blocking factor if exists
356 if(!is.null(blockingFactor)){
357 corfit <- duplicateCorrelation(expDataMatrix, design, block=blockingFactor)
358
359 addComment(c("[INFO]Correlation within groups: ",corfit$consensus.correlation),T,opt$log,display=F)
360
361 #run linear model fit
362 data.fit = lmFit(expDataMatrix,design,block = blockingFactor, correlation=corfit$consensus.correlation)
363 }else{
364 #run linear model fit
365 data.fit = lmFit(expDataMatrix,design)
366 }
367
368 estimatedCoeff=which(!is.na(data.fit$coefficients[1,]))
369
370 addComment("[INFO]Lmfit done",T,opt$log,display=F)
371
372 #catch situation where some coefficients cannot be estimated, probably due to dependances between design columns
373 #if(length(setdiff(estimableCoeff,estimatedCoeff))>0){
374 # addComment("[ERROR]Error in design matrix, check your group definitions",T,opt$log)
375 # q( "no", 1, F )
376 #}
377 #to strong condition, should return ERROR only when coefficients relative to principal factors cannot be estimated, otherwise, return a simple WARNING
378
379 #define requested contrasts
380 requiredContrasts=c()
381 humanReadingContrasts=c()
382 persoContrastName=c()
383 if(userDefinedContrasts){
384 for(iContrast in 1:length(opt$firstGroupContrast)){
385 posGroup=unlist(lapply(unlist(strsplit(opt$firstGroupContrast[iContrast],",")),function(x)paste(paste(opt$factorsContrast,unlist(strsplit(x,"\\*")),sep="_"),collapse=".")))
386 negGroup=unlist(lapply(unlist(strsplit(opt$secondGroupContrast[iContrast],",")),function(x)paste(paste(opt$factorsContrast,unlist(strsplit(x,"\\*")),sep="_"),collapse=".")))
387 #clear posGroup and negGroup from empty groups
388 emptyPosGroups=which(!(posGroup%in%coeffMeaning))
389 if(length(emptyPosGroups)>0){
390 addComment(c("[WARNING]The group(s)",posGroup[emptyPosGroups],"is/are removed from contrast as it/they is/are empty"),T,opt$log,display=FALSE)
391 posGroup=posGroup[-emptyPosGroups]
392 currentHumanContrast=paste(unlist(strsplit(opt$firstGroupContrast[iContrast],","))[-emptyPosGroups],collapse="+")
393 }else{
394 currentHumanContrast=paste(unlist(strsplit(opt$firstGroupContrast[iContrast],",")),collapse="+")
395 }
396 emptyNegGroups=which(!(negGroup%in%coeffMeaning))
397 if(length(emptyNegGroups)>0){
398 addComment(c("[WARNING]The group(s)",negGroup[emptyNegGroups],"is/are removed from contrast as it/they is/are empty"),T,opt$log,display=FALSE)
399 negGroup=negGroup[-emptyNegGroups]
400 currentHumanContrast=paste(c(currentHumanContrast,unlist(strsplit(opt$secondGroupContrast[iContrast],","))[-emptyNegGroups]),collapse="-")
401 }else{
402 currentHumanContrast=paste(c(currentHumanContrast,unlist(strsplit(opt$secondGroupContrast[iContrast],","))),collapse="-")
403 }
404 if(length(posGroup)==0 || length(negGroup)==0 ){
405 addComment(c("[WARNING]Contrast",currentHumanContrast,"cannot be estimated due to empty group"),T,opt$log,display=FALSE)
406 }else{
407 if(all(posGroup%in%negGroup) && all(negGroup%in%posGroup)){
408 addComment(c("[WARNING]Contrast",currentHumanContrast,"cannot be estimated due to null contrast"),T,opt$log,display=FALSE)
409 }else{
410 #get coefficients required for first group added as positive
411 positiveCoeffWeights=sampleSizeFactor[posGroup]/sum(sampleSizeFactor[posGroup])
412 #positiveCoeffWeights=rep(1,length(posGroup))
413 #names(positiveCoeffWeights)=names(sampleSizeFactor[posGroup])
414 #get coefficients required for second group added as negative
415 negativeCoeffWeights=sampleSizeFactor[negGroup]/sum(sampleSizeFactor[negGroup])
416 #negativeCoeffWeights=rep(1,length(negGroup))
417 #names(negativeCoeffWeights)=names(sampleSizeFactor[negGroup])
418 #build the resulting contrast
419 currentContrast=paste(paste(positiveCoeffWeights[posGroup],posGroup,sep="*"),collapse="+")
420 currentContrast=paste(c(currentContrast,paste(paste(negativeCoeffWeights[negGroup],negGroup,sep="*"),collapse="-")),collapse="-")
421 requiredContrasts=c(requiredContrasts,currentContrast)
422
423 #build the human reading contrast
424 humanReadingContrasts=c(humanReadingContrasts,currentHumanContrast)
425 if(!is.null(opt$contrastNames) && nchar(opt$contrastNames[iContrast])>0){
426 persoContrastName=c(persoContrastName,opt$contrastNames[iContrast])
427 }else{
428 persoContrastName=c(persoContrastName,"")
429 }
430
431 addComment(c("[INFO]Contrast added : ",currentHumanContrast),T,opt$log,display=F)
432 addComment(c("with complete formula ",currentContrast),T,opt$log,display=F)
433 }
434 }
435 }
436 }
437
438
439 #define the true formula with interactions to get interaction coefficients
440 factorString=c("factorsList[['", names(factorsList)[1],"']]")
441 for(iFactor in names(factorsList)[-1]){
442 factorString=c(factorString," * factorsList[['",iFactor,"']]")
443 }
444
445 if(!is.null(blockinFactorsList)){
446 for(blockingFact in names(blockinFactorsList)){
447 factorString=c(factorString," + blockinFactorsList[['",blockingFact,"']]")
448 }
449 }
450
451 #should not be null at the end
452 allFtestMeanSquare=NULL
453 #to get the F-test values
454 estimatedInteractions=rownames(anova(lm(as.formula(paste(c("expDataMatrix[1,] ~ ",factorString),collapse="")))))
455 estimatedInteractions=c(unlist(lapply(estimatedInteractions[-length(estimatedInteractions)],function(x){temp=unlist(strsplit(x,"[ \" | : ]"));paste(temp[seq(2,length(temp),3)],collapse=":")})),estimatedInteractions[length(estimatedInteractions)])
456 #rename estimated interaction terms using renamingDico
457 estimatedInteractions=c(unlist(lapply(estimatedInteractions[-length(estimatedInteractions)],function(x)paste(renamingDico[unlist(strsplit(x,":")),1],collapse=":"))),estimatedInteractions[length(estimatedInteractions)])
458 t <- unlist(apply(expDataMatrix,1,function(x){temp=anova(lm(as.formula(paste(c("x ~ ",factorString),collapse=""))))$`Mean Sq`;temp/temp[length(temp)]}))
459 allFtestMeanSquare <- t(matrix(t,nrow=length(estimatedInteractions)))
460 #remove from allFtest rows containing NA
461 if(length(which(is.na(allFtestMeanSquare[,1])))>0)allFtestMeanSquare=allFtestMeanSquare[-(which(is.na(allFtestMeanSquare[,1]))),]
462 colnames(allFtestMeanSquare)=estimatedInteractions
463
464 #add contrasts corresponding to interaction terms
465 if(!is.null(opt$controlGroups)){
466 #first load user defined control group for each factor
467 controlGroup=rep(NA,length(factorsList))
468 names(controlGroup)=names(factorsList)
469 for(iGroup in opt$controlGroups){
470 splitGroup=unlist(strsplit(iGroup,":"))
471 splitGroup[2]=paste(splitGroup[1],splitGroup[2],sep = "_")
472 #check if defined control group is really a level of the corresponding factor
473 if(!splitGroup[1]%in%names(controlGroup) || !splitGroup[2]%in%factorsList[[splitGroup[1]]]){
474 addComment(c("[ERROR]The factor name",splitGroup[1],"does not exist or group name",splitGroup[2]),T,opt$log)
475 q( "no", 1, F )
476 }
477 if(!is.na(controlGroup[splitGroup[1]])){
478 addComment("[ERROR]Several control groups are defined for the same factor, please select only one control group for each factor if you want to compute interaction contrasts",T,opt$log)
479 q( "no", 1, F )
480 }
481 controlGroup[splitGroup[1]]=splitGroup[2]
482 }
483
484 #check if all factor have a defined control group
485 if(any(is.na(controlGroup))){
486 addComment("[ERROR]Missing control group for some factors, please check them if you want to compute interaction contrasts",T,opt$log)
487 q( "no", 1, F )
488 }
489
490 nbFactors=length(factorsList)
491 interactionContrasts=c()
492 contrastClass=c()
493 #initialize list for the first level
494 newPreviousLoopContrast=list()
495 for(iFactorA in 1:(nbFactors-1)){
496 nameFactorA=names(factorsList)[iFactorA]
497 compA=c()
498 for(levelA in setdiff(levels(factorsList[[iFactorA]]),controlGroup[nameFactorA])){
499 compA=c(compA,paste(levelA,controlGroup[nameFactorA],sep="-"))
500 }
501 newPreviousLoopContrast[[as.character(iFactorA)]]=compA
502 }
503 #make a loop for growing interaction set
504 for(globalIfactor in 1:(nbFactors-1)){
505 previousLoopContrast=newPreviousLoopContrast
506 newPreviousLoopContrast=list()
507 #factor A reuse contrasts made at previsous loop
508 for(iFactorA in names(previousLoopContrast)){
509 compA=previousLoopContrast[[iFactorA]]
510
511 if(max(as.integer(unlist(strsplit(iFactorA,"\\."))))<nbFactors){
512 #factor B is the new factor to include in intreraction set
513 for(iFactorB in (max(as.integer(unlist(strsplit(iFactorA,"\\."))))+1):nbFactors){
514 nameFactorB=names(factorsList)[iFactorB]
515 #keep contrasts identified trough interacting factors set
516 newPreviousLoopContrast[[paste(iFactorA,iFactorB,sep=".")]]=c()
517 for(iCompA in compA){
518 for(levelB in setdiff(levels(factorsList[[iFactorB]]),controlGroup[nameFactorB])){
519 #decompose the contrast compA to apply the new level of factor B on each term
520 temp=unlist(strsplit(iCompA,"[ + ]"))
521 splitCompA=temp[1]
522 for(iTemp in temp[-1])splitCompA=c(splitCompA,"+",iTemp)
523 splitCompA=unlist(lapply(splitCompA,function(x){temp=unlist(strsplit(x,"-"));splitCompB=temp[1];for(iTemp in temp[-1])splitCompB=c(splitCompB,"-",iTemp);splitCompB}))
524 #apply on each contrast term the new level of factor B
525 firstTerm=paste(unlist(lapply(splitCompA,function(x)if(x!="+" && x!="-"){paste(x,levelB,sep=".")}else{x})),collapse="")
526 secondTerm=negativeExpression(paste(unlist(lapply(splitCompA,function(x)if(x!="+" && x!="-"){paste(x,controlGroup[nameFactorB],sep=".")}else{x})),collapse=""))
527 currentContrast=paste(c(firstTerm,secondTerm),collapse="")
528
529 newPreviousLoopContrast[[paste(iFactorA,iFactorB,sep=".")]]=c(newPreviousLoopContrast[[paste(iFactorA,iFactorB,sep=".")]],currentContrast)
530 }
531 }
532 }
533 }
534 }
535 for(iContrast in names(newPreviousLoopContrast)){
536 contrastClass=c(contrastClass,rep(iContrast,length(newPreviousLoopContrast[[iContrast]])))
537 }
538 interactionContrasts=c(interactionContrasts,unlist(newPreviousLoopContrast))
539 }
540 #make human title for interactions
541 names(interactionContrasts)=contrastClass
542 humanReadingInteraction=unlist(lapply(interactionContrasts,function(x)gsub("\\.",":",unlist(strsplit(x,"[+-]"))[1])))
543
544 contrastToIgnore=c()
545
546 #complete with control groups and order to match to coeffs
547 for(iContrast in 1:length(interactionContrasts)){
548 missingFactor=setdiff(1:nbFactors,as.integer(unlist(strsplit(names(interactionContrasts[iContrast]),"\\."))))
549 #decompose the contrast
550 temp=unlist(strsplit(interactionContrasts[iContrast],"[ + ]"))
551 splitContrast=temp[1]
552 for(iTemp in temp[-1])splitContrast=c(splitContrast,"+",iTemp)
553 splitContrast=unlist(lapply(splitContrast,function(x){temp=unlist(strsplit(x,"-"));splitCompB=temp[1];for(iTemp in temp[-1])splitCompB=c(splitCompB,"-",iTemp);splitCompB}))
554 for(iFactor in missingFactor){
555 for(iTerm in 1:length(splitContrast)){
556 if(splitContrast[iTerm]!="+" && splitContrast[iTerm]!="-"){
557 splitTerm=unlist(strsplit(splitContrast[iTerm],"\\."))
558 if(iFactor==1)splitContrast[iTerm]=paste(c(controlGroup[names(factorsList)[iFactor]],splitTerm),collapse=".")
559 if(iFactor==nbFactors)splitContrast[iTerm]=paste(c(splitTerm,controlGroup[names(factorsList)[iFactor]]),collapse=".")
560 if(iFactor>1 && iFactor<nbFactors)splitContrast[iTerm]=paste(c(splitTerm[1:(iFactor-1)],controlGroup[names(factorsList)[iFactor]],splitTerm[iFactor:length(splitTerm)]),collapse=".")
561 }
562 }
563 }
564 interactionContrasts[iContrast]=paste(splitContrast,collapse="")
565 if(all(splitContrast[seq(1,length(splitContrast),2)]%in%coeffMeaning)){
566 addComment(c("[INFO]Interaction contrast added : ",humanReadingInteraction[iContrast]),T,opt$log,display=F)
567 addComment(c("with complete formula ",interactionContrasts[iContrast]),T,opt$log,display=F)
568 }else{
569 contrastToIgnore=c(contrastToIgnore,iContrast)
570 addComment(c("[WARNING]Interaction contrast",humanReadingInteraction[iContrast],"is removed due to empty group"),T,opt$log,display=F)
571 }
572 }
573
574 #add interaction contrasts to global contrast list
575 if(length(contrastToIgnore)>0){
576 requiredContrasts=c(requiredContrasts,interactionContrasts[-contrastToIgnore])
577 humanReadingContrasts=c(humanReadingContrasts,humanReadingInteraction[-contrastToIgnore])
578 persoContrastName=c(persoContrastName,rep("",length(humanReadingInteraction[-contrastToIgnore])))
579 }else{
580 requiredContrasts=c(requiredContrasts,interactionContrasts)
581 humanReadingContrasts=c(humanReadingContrasts,humanReadingInteraction)
582 persoContrastName=c(persoContrastName,rep("",length(humanReadingInteraction)))
583 }
584 }#end of intreaction contrasts
585
586
587 #remove from requiredContrasts contrasts that cannot be estimated
588 toRemove=unique(unlist(lapply(setdiff(coeffMeaning,names(estimatedCoeff)),function(x)grep(x,requiredContrasts))))
589 if(length(toRemove)>0){
590 addComment(c("[WARNING]",length(toRemove)," contrasts are removed, due to missing coefficients"),T,opt$log,display=FALSE)
591 requiredContrasts=requiredContrasts[-toRemove]
592 humanReadingContrasts=humanReadingContrasts[-toRemove]
593 persoContrastName=persoContrastName[-toRemove]
594 }
595
596 if(length(requiredContrasts)==0){
597 addComment("[ERROR]No contrast to compute, please check your contrast definition.",T,opt$log)
598 q( "no", 1, F )
599 }
600
601 #compute for each contrast mean of coefficients in posGroup and negGroup for FC computation of log(FC) with LSmean as in Partek
602 meanPosGroup=list()
603 meanNegGroup=list()
604 for(iContrast in 1:length(requiredContrasts)){
605 #define posGroup and negGroup
606 #first split contrast
607 temp=unlist(strsplit(requiredContrasts[iContrast],"[ + ]"))
608 splitContrast=temp[1]
609 for(iTemp in temp[-1])splitContrast=c(splitContrast,"+",iTemp)
610 splitContrast=unlist(lapply(splitContrast,function(x){temp=unlist(strsplit(x,"-"));splitCompB=temp[1];for(iTemp in temp[-1])splitCompB=c(splitCompB,"-",iTemp);splitCompB}))
611 #and then put each term in good group
612 posGroup=c()
613 negGroup=c()
614 nextIsPos=TRUE
615 for(iSplit in splitContrast){
616 if(iSplit=="+")nextIsPos=TRUE
617 if(iSplit=="-")nextIsPos=FALSE
618 if(iSplit!="-" && iSplit!="+"){
619 #remove weights of contrast terms
620 iSplitBis=unlist(strsplit(iSplit,"[*]"))
621 iSplitBis=iSplitBis[length(iSplitBis)]
622 if(nextIsPos)posGroup=c(posGroup,iSplitBis)
623 else negGroup=c(negGroup,iSplitBis)
624 }
625 }
626 #compute means for each group
627 meanPosGroup[[iContrast]]=apply(as.matrix(data.fit$coefficients[,posGroup],ncol=length(posGroup)),1,mean)
628 meanNegGroup[[iContrast]]=apply(as.matrix(data.fit$coefficients[,negGroup],ncol=length(negGroup)),1,mean)
629 }
630
631
632 contrast.matrix = makeContrasts(contrasts=requiredContrasts,levels=design)
633 data.fit.con = contrasts.fit(data.fit,contrast.matrix)
634
635 addComment("[INFO]Contrast definition done",T,opt$log,T,display=FALSE)
636
637 #compute LIMMA statistics
638 data.fit.eb = eBayes(data.fit.con)
639
640 addComment("[INFO]Estimation done",T,opt$log,T,display=FALSE)
641
642 #adjust p.value through FDR
643 data.fit.eb$adj_p.value=data.fit.eb$p.value
644 for(iComparison in 1:ncol(data.fit.eb$adj_p.value)){
645 data.fit.eb$adj_p.value[,iComparison]=p.adjust(data.fit.eb$p.value[,iComparison],"fdr")
646 }
647
648 #add a new field based on LS-means for each contrast instead of global mean like they were calculated in coefficients field
649 data.fit.eb$coefficientsLS=data.fit.eb$coefficients
650 if(ncol(data.fit.eb$coefficients)!=length(meanPosGroup)){
651 addComment("[ERROR]Estimated contrasts number unexpected",T,opt$log)
652 q( "no", 1, F )
653 }
654 for(iContrast in 1:length(meanPosGroup)){
655 data.fit.eb$coefficientsLS[,iContrast]=meanPosGroup[[iContrast]][rownames(data.fit.eb$coefficientsLS)]-meanNegGroup[[iContrast]][rownames(data.fit.eb$coefficientsLS)]
656 }
657
658 #if requested replace coefficient computed as global mean by LS-means values
659 if(useLSmean)data.fit.eb$coefficients=data.fit.eb$coefficientsLS
660
661 addComment("[INFO]Core treatment done",T,opt$log,T,display=FALSE)
662
663
664 ##convert humanReadingContrasts with namingDictionary to create humanReadingContrastsRenamed and keep original humanReadingContrasts names for file names
665 humanReadingContrastsRenamed=rep("",length(humanReadingContrasts))
666 for(iContrast in 1:length(humanReadingContrasts)){
667 if(persoContrastName[iContrast]==""){
668 #if(verbose)addComment(humanReadingContrasts[iContrast])
669 specialCharacters=str_extract_all(humanReadingContrasts[iContrast],"[+|*|_|:|-]")[[1]]
670 #if(verbose)addComment(specialCharacters)
671 nameConverted=unlist(lapply(strsplit(humanReadingContrasts[iContrast],"[+|*|_|:|-]")[[1]],function(x)renamingDico[x,1]))
672 #if(verbose)addComment(nameConverted)
673 humanReadingContrastsRenamed[iContrast]=paste(nameConverted,specialCharacters,collapse="",sep="")
674 #if(verbose)addComment(humanReadingContrastsRenamed[iContrast])
675 humanReadingContrastsRenamed[iContrast]=substr(humanReadingContrastsRenamed[iContrast],1,nchar(humanReadingContrastsRenamed[iContrast])-1)
676 }else{
677 humanReadingContrastsRenamed[iContrast]=persoContrastName[iContrast]
678 }
679 }
680
681 #write correspondances between plot file names (humanReadingContrasts) and displayed names in figure legends (humanReadingContrastsRenamed), usefull to define html items in xml file
682 correspondanceTable=matrix("",ncol=2,nrow=ncol(data.fit.eb$p.value))
683 correspondanceTable[,1]=unlist(lapply(humanReadingContrasts,function(x)gsub(":","_INT_",gsub("\\+","_PLUS_",gsub("\\*","_AND_",x)))))
684 correspondanceTable[,2]=humanReadingContrastsRenamed
685 rownames(correspondanceTable)=correspondanceTable[,2]
686 write.table(correspondanceTable,file=file.path(getwd(), "correspondanceFileNames.csv"),quote=FALSE,sep="\t",col.names = F,row.names = F)
687
688 #plot nominal p-val histograms for selected comparisons
689 histogramPerPage=6
690 if (!is.null(opt$histo)) {
691 iToPlot=1
692 plotVector=list()
693 nbComparisons=ncol(data.fit.eb$p.value)
694 for (iComparison in 1:nbComparisons){
695 dataToPlot=data.frame(pval=data.fit.eb$p.value[,iComparison],id=rownames(data.fit.eb$p.value))
696 p <- ggplot(data=dataToPlot, aes(x=pval)) + geom_histogram(colour="red", fill="salmon") +
697 theme_bw() + ggtitle(humanReadingContrastsRenamed[iComparison]) + ylab(label="Frequencies") + xlab(label="Nominal p-val") +
698 theme(panel.border=element_blank(),plot.title = element_text(hjust = 0.5))
699 plotVector[[length(plotVector)+1]]=p
700
701 pp <- ggplotly(p)
702 htmlwidgets::saveWidget(as_widget(pp), paste(c(file.path(getwd(), "plotLyDir"),"/",opt$histo,"_",correspondanceTable[humanReadingContrastsRenamed[iComparison],1],".html"),collapse=""),selfcontained = F)
703
704 if(iComparison==nbComparisons || length(plotVector)==histogramPerPage){
705 #plot and close the actual plot
706 if(opt$format=="pdf"){
707 pdf(paste(c("./plotDir/",opt$histo,iToPlot,".pdf"),collapse=""))}else{
708 png(paste(c("./plotDir/",opt$histo,iToPlot,".png"),collapse=""))
709 }
710 multiplot(plotlist=plotVector,cols=2)
711 dev.off()
712 if(iComparison<nbComparisons){
713 #prepare for a new plotting file if necessary
714 plotVector=list()
715 iToPlot=iToPlot+1
716 }
717 }
718 }
719 addComment("[INFO]Histograms drawn",T,opt$log,T,display=FALSE)
720
721 }
722
723 #plot F-test sum square barplot
724 if(!is.null(allFtestMeanSquare)){
725 dataToPlot=data.frame(Fratio=apply(allFtestMeanSquare,2,mean),Factors=factor(colnames(allFtestMeanSquare),levels = colnames(allFtestMeanSquare)))
726
727 p <- ggplot(data=dataToPlot, aes(x=Factors, y=Fratio, fill=Factors)) +
728 geom_bar(stat="identity") + scale_fill_manual(values = colorRampPalette(brewer.pal(9,"Set1"))(ncol(allFtestMeanSquare))[sample(ncol(allFtestMeanSquare))]) + ylab(label="mean F-ratio") +
729 theme_bw() + theme(panel.border=element_blank(),plot.title = element_text(hjust = 0.5)) + ggtitle("Source of variation")
730
731 if(opt$format=="pdf"){
732 pdf(paste(c("./plotDir/",opt$fratioFile,".pdf"),collapse=""))}else{
733 png(paste(c("./plotDir/",opt$fratioFile,".png"),collapse=""))
734 }
735 plot(p)
736 dev.off()
737
738 pp <- ggplotly(p)
739 htmlwidgets::saveWidget(as_widget(pp), paste(c(file.path(getwd(), "plotLyDir"),"/",opt$fratioFile,".html"),collapse=""),selfcontained = F)
740
741 addComment("[INFO]SumSquareTest drawn",T,opt$log,T,display=FALSE)
742 }
743
744 #plot VOLCANO plot
745 #volcanoplot(data.fit.eb,coef=1,highlight=10)
746 volcanoPerPage=1
747 logFCthreshold=log2(opt$thresholdFC)
748 if (!is.null(opt$volcano)) {
749 iToPlot=1
750 plotVector=list()
751 nbComparisons=ncol(data.fit.eb$adj_p.value)
752 for (iComparison in 1:nbComparisons){
753
754 #define the log10(p-val) threshold corresponding to FDR threshold fixed by user
755 probeWithLowFDR=-log10(data.fit.eb$p.value[which(data.fit.eb$adj_p.value[,iComparison]<=opt$fdrThreshold),iComparison])
756 pvalThresholdFDR=NULL
757 if(length(probeWithLowFDR)>0)pvalThresholdFDR=min(probeWithLowFDR)
758
759 #get significant points over FC and FDR thresholds
760 significativePoints=intersect(which(abs(data.fit.eb$coefficients[,iComparison])>=logFCthreshold),which(data.fit.eb$adj_p.value[,iComparison]<=opt$fdrThreshold))
761
762 #to reduce size of html plot, we keep 20000 points maximum sampled amongst genes with pval>=33%(pval) and abs(log2(FC))<=66%(abs(log2(FC)))
763 htmlPointsToRemove=intersect(which(abs(data.fit.eb$coefficients[,iComparison])<=quantile(abs(data.fit.eb$coefficients[,iComparison]),c(0.66))),which(data.fit.eb$p.value[,iComparison]>=quantile(abs(data.fit.eb$p.value[,iComparison]),c(0.33))))
764 if(length(htmlPointsToRemove)>20000){
765 htmlPointsToRemove=setdiff(htmlPointsToRemove,sample(htmlPointsToRemove,20000))
766 }else{
767 htmlPointsToRemove=c()
768 }
769
770 xMinLimPlot=min(data.fit.eb$coefficients[,iComparison])-0.2
771 xMaxLimPlot=max(data.fit.eb$coefficients[,iComparison])+0.2
772 yMaxLimPlot= max(-log10(data.fit.eb$p.value[,iComparison]))+0.2
773
774 if(length(significativePoints)>0){
775 dataSignifToPlot=data.frame(pval=-log10(data.fit.eb$p.value[significativePoints,iComparison]),FC=data.fit.eb$coefficients[significativePoints,iComparison],description=paste(names(data.fit.eb$coefficients[significativePoints,iComparison]),"\n","FC: " , round(2^data.fit.eb$coefficients[significativePoints,iComparison],2) , " | FDR p-val: ",prettyNum(data.fit.eb$adj_p.value[significativePoints,iComparison],digits=4), sep=""))
776 #to test if remains any normal points to draw
777 if(length(significativePoints)<nrow(data.fit.eb$p.value)){
778 dataToPlot=data.frame(pval=-log10(data.fit.eb$p.value[-significativePoints,iComparison]),FC=data.fit.eb$coefficients[-significativePoints,iComparison],description=paste("FC: " , round(2^data.fit.eb$coefficients[-significativePoints,iComparison],2) , " | FDR p-val: ",prettyNum(data.fit.eb$adj_p.value[-significativePoints,iComparison],digits=4), sep=""))
779 }else{
780 dataToPlot=data.frame(pval=0,FC=0,description="null")
781 }
782 }else{
783 dataToPlot=data.frame(pval=-log10(data.fit.eb$p.value[,iComparison]),FC=data.fit.eb$coefficients[,iComparison],description=paste("FC: " , round(2^data.fit.eb$coefficients[,iComparison],2) , " | FDR p-val: ",prettyNum(data.fit.eb$adj_p.value[,iComparison],digits=4), sep=""))
784 }
785
786 ##traditional plot
787 p <- ggplot(data=dataToPlot, aes(x=FC, y=pval)) + geom_point() +
788 theme_bw() + ggtitle(humanReadingContrastsRenamed[iComparison]) + ylab(label="-log10(p-val)") + xlab(label="Log2 Fold Change") +
789 theme(panel.border=element_blank(),plot.title = element_text(hjust = 0.5),legend.position="none")
790 if(logFCthreshold!=0) p <- p + geom_vline(xintercept=-logFCthreshold, color="salmon",linetype="dotted", size=1) + geom_vline(xintercept=logFCthreshold, color="salmon",linetype="dotted", size=1) + geom_text(data.frame(text=c(paste(c("log2(1/FC=",opt$thresholdFC,")"),collapse=""),paste(c("log2(FC=",opt$thresholdFC,")"),collapse="")),x=c(-logFCthreshold,logFCthreshold),y=c(0,0)),mapping=aes(x=x, y=y, label=text), size=4, angle=90, vjust=-0.4, hjust=0, color="salmon")
791 if(!is.null(pvalThresholdFDR)) p <- p + geom_hline(yintercept=pvalThresholdFDR, color="skyblue1",linetype="dotted", size=0.5) + geom_text(data.frame(text=c(paste(c("FDR pval limit(",opt$fdrThreshold,")"),collapse="")),x=c(xMinLimPlot),y=c(pvalThresholdFDR)),mapping=aes(x=x, y=y, label=text), size=4, vjust=0, hjust=0, color="skyblue3")
792 if(length(significativePoints)>0)p <- p + geom_point(data=dataSignifToPlot,aes(colour=description))
793
794 ##interactive plot
795 if(length(htmlPointsToRemove)>0){
796 pointToRemove=union(htmlPointsToRemove,significativePoints)
797 #to test if remains any normal points to draw
798 if(length(pointToRemove)<nrow(data.fit.eb$p.value)){
799 dataToPlot=data.frame(pval=-log10(data.fit.eb$p.value[-pointToRemove,iComparison]),FC=data.fit.eb$coefficients[-pointToRemove,iComparison],description=paste("FC: " , round(2^data.fit.eb$coefficients[-pointToRemove,iComparison],2) , " | FDR p-val: ", prettyNum(data.fit.eb$adj_p.value[-pointToRemove,iComparison],digits=4), sep=""))
800 }else{
801 dataToPlot=data.frame(pval=0,FC=0,description="null")
802 }
803 }
804
805 if((nrow(dataToPlot)+nrow(dataSignifToPlot))>40000)addComment(c("[WARNING]For",humanReadingContrastsRenamed[iComparison],"volcano, numerous points to plot(",nrow(dataToPlot)+nrow(dataSignifToPlot),"), resulting volcano could be heavy, using more stringent thresholds could be helpful."),T,opt$log)
806
807 phtml <- plot_ly(data=dataToPlot, x=~FC, y=~pval,type="scatter", mode="markers",showlegend = FALSE, marker = list(color="gray",opacity=0.5), text=~description, hoverinfo="text") %>%
808 layout(title = humanReadingContrastsRenamed[iComparison],xaxis=list(title="Log2 Fold Change",showgrid=TRUE, zeroline=FALSE),yaxis=list(title="-log10(p-val)", showgrid=TRUE, zeroline=FALSE))
809 if(length(significativePoints)>0) phtml=add_markers(phtml,data=dataSignifToPlot, x=~FC, y=~pval, mode="markers" , marker=list( color=log10(abs(dataSignifToPlot$FC)*dataSignifToPlot$pval),colorscale='Rainbow'), text=~description, hoverinfo="text", inherit = FALSE) %>% hide_colorbar()
810 if(logFCthreshold!=0){
811 phtml=add_trace(phtml,x=c(-logFCthreshold,-logFCthreshold), y=c(0,yMaxLimPlot), type="scatter", mode = "lines", line=list(color="coral",dash="dash"), hoverinfo='none', showlegend = FALSE,inherit = FALSE)
812 phtml=add_annotations(phtml,x=-logFCthreshold,y=0,xref = "x",yref = "y",text = paste(c("log2(1/FC=",opt$thresholdFC,")"),collapse=""),xanchor = 'right',showarrow = F,textangle=270,font=list(color="coral"))
813 phtml=add_trace(phtml,x=c(logFCthreshold,logFCthreshold), y=c(0, yMaxLimPlot), type="scatter", mode = "lines", line=list(color="coral",dash="dash"), hoverinfo='none', showlegend = FALSE,inherit = FALSE)
814 phtml=add_annotations(phtml,x=logFCthreshold,y=0,xref = "x",yref = "y",text = paste(c("log2(FC=",opt$thresholdFC,")"),collapse=""),xanchor = 'right',showarrow = F,textangle=270,font=list(color="coral"))
815 }
816 if(!is.null(pvalThresholdFDR)){
817 phtml=add_trace(phtml,x=c(xMinLimPlot,xMaxLimPlot), y=c(pvalThresholdFDR,pvalThresholdFDR), type="scatter", mode = "lines", line=list(color="cornflowerblue",dash="dash"), hoverinfo='none', showlegend = FALSE,inherit = FALSE)
818 phtml=add_annotations(phtml,x=xMinLimPlot,y=pvalThresholdFDR+0.1,xref = "x",yref = "y",text = paste(c("FDR pval limit(",opt$fdrThreshold,")"),collapse=""),xanchor = 'left',showarrow = F,font=list(color="cornflowerblue"))
819 }
820 plotVector[[length(plotVector)+1]]=p
821
822 #save plotly files
823 pp <- ggplotly(phtml)
824 htmlwidgets::saveWidget(as_widget(pp), paste(c(file.path(getwd(), "plotLyDir"),"/",opt$volcano,"_",correspondanceTable[humanReadingContrastsRenamed[iComparison],1],".html"),collapse=""),selfcontained = F)
825
826
827 if(iComparison==nbComparisons || length(plotVector)==volcanoPerPage){
828 #plot and close the actual plot
829 if(opt$format=="pdf"){
830 pdf(paste(c("./plotDir/",opt$volcano,"_",correspondanceTable[humanReadingContrastsRenamed[iComparison],1],".pdf"),collapse=""))}else{
831 png(paste(c("./plotDir/",opt$volcano,"_",correspondanceTable[humanReadingContrastsRenamed[iComparison],1],".png"),collapse=""))
832 }
833 multiplot(plotlist=plotVector,cols=1)
834 dev.off()
835 if(iComparison<nbComparisons){
836 #prepare for a new ploting file if necessary
837 plotVector=list()
838 iToPlot=iToPlot+1
839 }
840 }
841 }
842 remove(dataToPlot,dataSignifToPlot)
843 addComment("[INFO]Volcanos drawn",T,opt$log,T,display=FALSE)
844 }
845
846 rowItemInfo=NULL
847 if(!is.null(opt$rowNameType) && !is.null(opt$organismID)){
848 ##get gene information from BioMart
849 #if(!require("biomaRt")){
850 # source("https://bioconductor.org/biocLite.R")
851 # biocLite("biomaRt")
852 #}
853
854 ensembl_hs_mart <- useMart(biomart="ensembl", dataset=opt$organismID)
855 ensembl_df <- getBM(attributes=c(opt$rowNameType,"description"),mart=ensembl_hs_mart)
856 rowItemInfo=ensembl_df[which(ensembl_df[,1]!=""),2]
857 rowItemInfo=unlist(lapply(rowItemInfo,function(x)substr(unlist(strsplit(x," \\[Source"))[1],1,30)))
858 names(rowItemInfo)=ensembl_df[which(ensembl_df[,1]!=""),1]
859 }
860
861 #write(unlist(dimnames(data.fit.eb$adj_p.value)),opt$log,append = T)
862
863 #prepare additional output containing df informations
864 dfMatrix=matrix(0,ncol=3,nrow = nrow(data.fit.eb$coefficients),dimnames = list(rownames(data.fit.eb$coefficients),c("df.residual","df.prior","df.total")))
865 dfMatrix[,"df.residual"]=data.fit.eb$df.residual
866 dfMatrix[,"df.prior"]=data.fit.eb$df.prior
867 dfMatrix[,"df.total"]=data.fit.eb$df.total
868
869 #filter out genes with higher p-values for all comparisons
870 genesToKeep=names(which(apply(data.fit.eb$adj_p.value,1,function(x)length(which(x<=opt$fdrThreshold))>0)))
871 #filter out genes with lower FC for all comparisons
872 genesToKeep=intersect(genesToKeep,names(which(apply(data.fit.eb$coefficients,1,function(x)length(which(abs(x)>=logFCthreshold))>0))))
873
874 if(length(genesToKeep)>0){
875 data.fit.eb$adj_p.value=matrix(data.fit.eb$adj_p.value[genesToKeep,],ncol=ncol(data.fit.eb$adj_p.value))
876 rownames(data.fit.eb$adj_p.value)=genesToKeep
877 colnames(data.fit.eb$adj_p.value)=colnames(data.fit.eb$p.value)
878
879 data.fit.eb$p.value=matrix(data.fit.eb$p.value[genesToKeep,],ncol=ncol(data.fit.eb$p.value))
880 rownames(data.fit.eb$p.value)=genesToKeep
881 colnames(data.fit.eb$p.value)=colnames(data.fit.eb$adj_p.value)
882
883 data.fit.eb$coefficients=matrix(data.fit.eb$coefficients[genesToKeep,],ncol=ncol(data.fit.eb$coefficients))
884 rownames(data.fit.eb$coefficients)=genesToKeep
885 colnames(data.fit.eb$coefficients)=colnames(data.fit.eb$adj_p.value)
886
887 data.fit.eb$t=matrix(data.fit.eb$t[genesToKeep,],ncol=ncol(data.fit.eb$t))
888 rownames(data.fit.eb$t)=genesToKeep
889 colnames(data.fit.eb$t)=colnames(data.fit.eb$adj_p.value)
890
891 dfMatrix=dfMatrix[genesToKeep,,drop=FALSE]
892
893 }else{
894 addComment(c("[WARNING]No significative genes considering the given FDR threshold : ",opt$fdrThreshold),T,opt$log,display=FALSE)
895 }
896
897 addComment("[INFO]Significant genes filtering done",T,opt$log,T,display=FALSE)
898
899
900 #plot VennDiagramm for genes below threshold between comparisons
901 #t=apply(data.fit.eb$adj_p.value[,1:4],2,function(x)names(which(x<=opt$threshold)))
902 #get.venn.partitions(t)
903 #vennCounts(data.fit.eb$adj_p.value[,1:4]<=opt$threshold)
904
905 #make a simple sort genes based only on the first comparison
906 #newOrder=order(data.fit.eb$adj_p.value[,1])
907 #data.fit.eb$adj_p.value=data.fit.eb$adj_p.value[newOrder,]
908
909 #alternative sorting strategy based on the mean gene rank over all comparisons
910 if(length(genesToKeep)>1){
911 currentRank=rep(0,nrow(data.fit.eb$adj_p.value))
912 for(iComparison in 1:ncol(data.fit.eb$adj_p.value)){
913 currentRank=currentRank+rank(data.fit.eb$adj_p.value[,iComparison])
914 }
915 currentRank=currentRank/ncol(data.fit.eb$adj_p.value)
916 newOrder=order(currentRank)
917
918 data.fit.eb$adj_p.value=matrix(data.fit.eb$adj_p.value[newOrder,],ncol=ncol(data.fit.eb$adj_p.value))
919 rownames(data.fit.eb$adj_p.value)=rownames(data.fit.eb$p.value)[newOrder]
920 colnames(data.fit.eb$adj_p.value)=colnames(data.fit.eb$p.value)
921
922 data.fit.eb$p.value=matrix(data.fit.eb$p.value[newOrder,],ncol=ncol(data.fit.eb$p.value))
923 rownames(data.fit.eb$p.value)=rownames(data.fit.eb$adj_p.value)
924 colnames(data.fit.eb$p.value)=colnames(data.fit.eb$adj_p.value)
925
926 data.fit.eb$coefficients=matrix(data.fit.eb$coefficients[newOrder,],ncol=ncol(data.fit.eb$coefficients))
927 rownames(data.fit.eb$coefficients)=rownames(data.fit.eb$adj_p.value)
928 colnames(data.fit.eb$coefficients)=colnames(data.fit.eb$adj_p.value)
929
930 data.fit.eb$t=matrix(data.fit.eb$t[newOrder,],ncol=ncol(data.fit.eb$t))
931 rownames(data.fit.eb$t)=rownames(data.fit.eb$adj_p.value)
932 colnames(data.fit.eb$t)=colnames(data.fit.eb$adj_p.value)
933
934 dfMatrix=dfMatrix[newOrder,,drop=FALSE]
935 }
936
937
938 #formating output matrices depending on genes to keep
939 if(length(genesToKeep)==0){
940 outputData=matrix(0,ncol=ncol(data.fit.eb$adj_p.value)*5+2,nrow=3)
941 outputData[1,]=c("X","X",rep(humanReadingContrastsRenamed,each=5))
942 outputData[2,]=c("X","X",rep(c("p-val","FDR.p-val","FC","log2(FC)","t-stat"),ncol(data.fit.eb$adj_p.value)))
943 outputData[,1]=c("LIMMA","Gene","noGene")
944 outputData[,2]=c("Comparison","Info","noInfo")
945
946 outputDfData=matrix(0,ncol=3+1,nrow=2)
947 outputDfData[1,]=c("X","df.residual","df.prior","df.total")
948 outputDfData[,1]=c("Statistics","noGene")
949 }else{
950 if(length(genesToKeep)==1){
951 outputData=matrix(0,ncol=ncol(data.fit.eb$adj_p.value)*5+2,nrow=3)
952 outputData[1,]=c("X","X",rep(humanReadingContrastsRenamed,each=5))
953 outputData[2,]=c("X","X",rep(c("p-val","FDR.p-val","FC","log2(FC)","t-stat"),ncol(data.fit.eb$adj_p.value)))
954 outputData[,1]=c("LIMMA","Gene",genesToKeep)
955 outputData[,2]=c("Comparison","Info","na")
956 if(!is.null(rowItemInfo))outputData[3,2]=rowItemInfo[genesToKeep]
957 outputData[3,seq(3,ncol(outputData),5)]=prettyNum(data.fit.eb$p.value,digits=4)
958 outputData[3,seq(4,ncol(outputData),5)]=prettyNum(data.fit.eb$adj_p.value,digits=4)
959 outputData[3,seq(5,ncol(outputData),5)]=prettyNum(2^data.fit.eb$coefficients,digits=4)
960 outputData[3,seq(6,ncol(outputData),5)]=prettyNum(data.fit.eb$coefficients,digits=4)
961 outputData[3,seq(7,ncol(outputData),5)]=prettyNum(data.fit.eb$t,digits=4)
962
963 outputDfData=matrix(0,ncol=3+1,nrow=1+nrow(dfMatrix))
964 outputDfData[1,]=c("Statistics","df.residual","df.prior","df.total")
965 outputDfData[2,]=c(rownames(dfMatrix),prettyNum(dfMatrix[,c("df.residual","df.prior","df.total")],digits=4))
966 }else{
967 #format matrix to be correctly read by galaxy (move headers in first column and row)
968 outputData=matrix(0,ncol=ncol(data.fit.eb$adj_p.value)*5+2,nrow=nrow(data.fit.eb$adj_p.value)+2)
969 outputData[1,]=c("X","X",rep(humanReadingContrastsRenamed,each=5))
970 outputData[2,]=c("X","X",rep(c("p-val","FDR.p-val","FC","log2(FC)","t-stat"),ncol(data.fit.eb$adj_p.value)))
971 outputData[,1]=c("LIMMA","Gene",rownames(data.fit.eb$adj_p.value))
972 outputData[,2]=c("Comparison","Info",rep("na",nrow(data.fit.eb$adj_p.value)))
973 if(!is.null(rowItemInfo))outputData[3:nrow(outputData),2]=rowItemInfo[rownames(data.fit.eb$adj_p.value)]
974 outputData[3:nrow(outputData),seq(3,ncol(outputData),5)]=prettyNum(data.fit.eb$p.value,digits=4)
975 outputData[3:nrow(outputData),seq(4,ncol(outputData),5)]=prettyNum(data.fit.eb$adj_p.value,digits=4)
976 outputData[3:nrow(outputData),seq(5,ncol(outputData),5)]=prettyNum(2^data.fit.eb$coefficients,digits=4)
977 outputData[3:nrow(outputData),seq(6,ncol(outputData),5)]=prettyNum(data.fit.eb$coefficients,digits=4)
978 outputData[3:nrow(outputData),seq(7,ncol(outputData),5)]=prettyNum(data.fit.eb$t,digits=4)
979
980 outputDfData=matrix(0,ncol=3+1,nrow=1+nrow(dfMatrix))
981 outputDfData[1,]=c("Statistics","df.residual","df.prior","df.total")
982 outputDfData[2:(1+nrow(dfMatrix)),]=cbind(rownames(dfMatrix),prettyNum(dfMatrix[,c("df.residual")],digits=4),prettyNum(dfMatrix[,c("df.prior")],digits=4),prettyNum(dfMatrix[,c("df.total")],digits=4))
983 }
984 }
985 addComment("[INFO]Formated output",T,opt$log,display=FALSE)
986
987 #write output results
988 write.table(outputData,file=opt$outputFile,quote=FALSE,sep="\t",col.names = F,row.names = F)
989
990 #write df info file
991 write.table(outputDfData,file=opt$outputDfFile,quote=FALSE,sep="\t",col.names = F,row.names = F)
992
993 end.time <- Sys.time()
994 addComment(c("[INFO]Total execution time for R script:",as.numeric(end.time - start.time,units="mins"),"mins"),T,opt$log,display=FALSE)
995
996 addComment("[INFO]End of R script",T,opt$log,display=FALSE)
997
998 printSessionInfo(opt$log)
999 #sessionInfo()
1000
1001
1002