Mercurial > repos > vandelj > giant_plot_functions
comparison src/LIMMAscriptV4.R @ 0:488e6e8bb8cb draft
"planemo upload for repository https://github.com/juliechevalier/GIANT/tree/master commit cb276a594444c8f32e9819fefde3a21f121d35df"
| author | vandelj |
|---|---|
| date | Fri, 26 Jun 2020 09:41:56 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:488e6e8bb8cb |
|---|---|
| 1 # A command-line interface for LIMMA to use with Galaxy | |
| 2 # written by Jimmy Vandel | |
| 3 # one of these arguments is required: | |
| 4 # | |
| 5 # | |
| 6 initial.options <- commandArgs(trailingOnly = FALSE) | |
| 7 file.arg.name <- "--file=" | |
| 8 script.name <- sub(file.arg.name, "", initial.options[grep(file.arg.name, initial.options)]) | |
| 9 script.basename <- dirname(script.name) | |
| 10 source(file.path(script.basename, "utils.R")) | |
| 11 source(file.path(script.basename, "getopt.R")) | |
| 12 | |
| 13 #addComment("Welcome R!") | |
| 14 | |
| 15 # setup R error handling to go to stderr | |
| 16 options( show.error.messages=F, error = function () { cat(geterrmessage(), file=stderr() ); q( "no", 1, F ) } ) | |
| 17 | |
| 18 # we need that to not crash galaxy with an UTF8 error on German LC settings. | |
| 19 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") | |
| 20 loc <- Sys.setlocale("LC_NUMERIC", "C") | |
| 21 | |
| 22 #get starting time | |
| 23 start.time <- Sys.time() | |
| 24 | |
| 25 options(stringAsfactors = FALSE, useFancyQuotes = FALSE) | |
| 26 args <- commandArgs() | |
| 27 | |
| 28 # get options, using the spec as defined by the enclosed list. | |
| 29 # we read the options from the default: commandArgs(TRUE). | |
| 30 spec <- matrix(c( | |
| 31 "dataFile", "i", 1, "character", | |
| 32 "factorInfo","a", 1, "character", | |
| 33 "blockingInfo","b", 1, "character", | |
| 34 "dicoRenaming","g",1,"character", | |
| 35 "blockingPolicy","u", 1, "character", | |
| 36 "fdrThreshold","t", 1, "double", | |
| 37 "thresholdFC","d", 1, "double", | |
| 38 "format", "f", 1, "character", | |
| 39 "histo","h", 1, "character", | |
| 40 "volcano","v", 1, "character", | |
| 41 "factorsContrast","r", 1, "character", | |
| 42 "contrastNames","p", 1, "character", | |
| 43 "firstGroupContrast","m", 1, "character", | |
| 44 "secondGroupContrast","n", 1, "character", | |
| 45 "controlGroups","c", 1, "character", | |
| 46 "fratioFile","s",1,"character", | |
| 47 "organismID","x",1,"character", | |
| 48 "rowNameType","y",1,"character", | |
| 49 "quiet", "q", 0, "logical", | |
| 50 "log", "l", 1, "character", | |
| 51 "outputFile" , "o", 1, "character", | |
| 52 "outputDfFile" , "z", 1, "character"), | |
| 53 byrow=TRUE, ncol=4) | |
| 54 opt <- getopt(spec) | |
| 55 | |
| 56 # enforce the following required arguments | |
| 57 if (is.null(opt$log)) { | |
| 58 addComment("[ERROR]'log file' is required\n") | |
| 59 q( "no", 1, F ) | |
| 60 } | |
| 61 addComment("[INFO]Start of R script",T,opt$log,display=FALSE) | |
| 62 if (is.null(opt$dataFile)) { | |
| 63 addComment("[ERROR]'dataFile' is required",T,opt$log) | |
| 64 q( "no", 1, F ) | |
| 65 } | |
| 66 if (!is.null(opt$blockingInfo) && is.null(opt$blockingPolicy) ) { | |
| 67 addComment("[ERROR]blocking policy is missing",T,opt$log) | |
| 68 q( "no", 1, F ) | |
| 69 } | |
| 70 if (is.null(opt$dicoRenaming)) { | |
| 71 addComment("[ERROR]renaming dictionnary is missing",T,opt$log) | |
| 72 q( "no", 1, F ) | |
| 73 } | |
| 74 if (is.null(opt$factorsContrast)) { | |
| 75 addComment("[ERROR]factor informations are missing",T,opt$log) | |
| 76 q( "no", 1, F ) | |
| 77 } | |
| 78 if (length(opt$firstGroupContrast)!=length(opt$secondGroupContrast)) { | |
| 79 addComment("[ERROR]some contrast groups seems to be empty",T,opt$log) | |
| 80 q( "no", 1, F ) | |
| 81 } | |
| 82 if (is.null(opt$factorInfo)) { | |
| 83 addComment("[ERROR]factors info is missing",T,opt$log) | |
| 84 q( "no", 1, F ) | |
| 85 } | |
| 86 if (is.null(opt$format)) { | |
| 87 addComment("[ERROR]'output format' is required",T,opt$log) | |
| 88 q( "no", 1, F ) | |
| 89 } | |
| 90 if (is.null(opt$fdrThreshold)) { | |
| 91 addComment("[ERROR]'FDR threshold' is required",T,opt$log) | |
| 92 q( "no", 1, F ) | |
| 93 } | |
| 94 if (is.null(opt$outputFile) || is.null(opt$outputDfFile)){ | |
| 95 addComment("[ERROR]'output files' are required",T,opt$log) | |
| 96 q( "no", 1, F ) | |
| 97 } | |
| 98 if (is.null(opt$thresholdFC)){ | |
| 99 addComment("[ERROR]'FC threshold' is required",T,opt$log) | |
| 100 q( "no", 1, F ) | |
| 101 } | |
| 102 if (is.null(opt$fratioFile)) { | |
| 103 addComment("[ERROR]F-ratio parameter is missing",T,opt$log) | |
| 104 q( "no", 1, F ) | |
| 105 } | |
| 106 | |
| 107 #demande si le script sera bavard | |
| 108 verbose <- if (is.null(opt$quiet)) { | |
| 109 TRUE | |
| 110 }else{ | |
| 111 FALSE | |
| 112 } | |
| 113 | |
| 114 #paramètres internes | |
| 115 #pour savoir si on remplace les FC calculés par LIMMA par un calcul du LS-MEAN (ie moyenne de moyennes de chaque groupe dans chaque terme du contraste plutôt qu'une moyenne globale dans chaque terme) | |
| 116 useLSmean=FALSE | |
| 117 | |
| 118 addComment("[INFO]Parameters checked!",T,opt$log,display=FALSE) | |
| 119 | |
| 120 addComment(c("[INFO]Working directory: ",getwd()),TRUE,opt$log,display=FALSE) | |
| 121 addComment(c("[INFO]Command line: ",args),TRUE,opt$log,display=FALSE) | |
| 122 | |
| 123 #directory for plots | |
| 124 dir.create(file.path(getwd(), "plotDir")) | |
| 125 dir.create(file.path(getwd(), "plotLyDir")) | |
| 126 | |
| 127 #charge des packages silencieusement | |
| 128 suppressPackageStartupMessages({ | |
| 129 library("methods") | |
| 130 library("limma") | |
| 131 library("biomaRt") | |
| 132 library("ggplot2") | |
| 133 library("plotly") | |
| 134 library("stringr") | |
| 135 library("RColorBrewer") | |
| 136 }) | |
| 137 | |
| 138 | |
| 139 #chargement du fichier dictionnaire de renommage | |
| 140 renamingDico=read.csv(file=file.path(getwd(), opt$dicoRenaming),header=F,sep="\t",colClasses="character") | |
| 141 rownames(renamingDico)=renamingDico[,2] | |
| 142 | |
| 143 | |
| 144 #chargement des fichiers en entrée | |
| 145 expDataMatrix=read.csv(file=file.path(getwd(), opt$dataFile),header=F,sep="\t",colClasses="character") | |
| 146 #remove first row to convert it as colnames (to avoid X before colnames with header=T) | |
| 147 colNamesData=expDataMatrix[1,-1] | |
| 148 expDataMatrix=expDataMatrix[-1,] | |
| 149 #remove first colum to convert it as rownames | |
| 150 rowNamesData=expDataMatrix[,1] | |
| 151 expDataMatrix=expDataMatrix[,-1] | |
| 152 if(is.data.frame(expDataMatrix)){ | |
| 153 expDataMatrix=data.matrix(expDataMatrix) | |
| 154 }else{ | |
| 155 expDataMatrix=data.matrix(as.numeric(expDataMatrix)) | |
| 156 } | |
| 157 dimnames(expDataMatrix)=list(rowNamesData,colNamesData) | |
| 158 | |
| 159 #test the number of rows that are constant in dataMatrix | |
| 160 nbConstantRows=length(which(unlist(apply(expDataMatrix,1,var))==0)) | |
| 161 if(nbConstantRows>0){ | |
| 162 addComment(c("[WARNING]",nbConstantRows,"rows are constant across conditions in input data file"),T,opt$log,display=FALSE) | |
| 163 } | |
| 164 | |
| 165 #test if all condition names are present in dico | |
| 166 if(!all(colnames(expDataMatrix) %in% rownames(renamingDico))){ | |
| 167 addComment("[ERROR]Missing condition names in renaming dictionary",T,opt$log) | |
| 168 q( "no", 1, F ) | |
| 169 } | |
| 170 | |
| 171 addComment("[INFO]Expression data loaded and checked",T,opt$log,display=FALSE) | |
| 172 addComment(c("[INFO]Dim of expression matrix:",dim(expDataMatrix)),T,opt$log,display=FALSE) | |
| 173 | |
| 174 #chargement du fichier des facteurs | |
| 175 factorInfoMatrix=read.csv(file=file.path(getwd(), opt$factorInfo),header=F,sep="\t",colClasses="character") | |
| 176 #remove first row to convert it as colnames | |
| 177 colnames(factorInfoMatrix)=factorInfoMatrix[1,] | |
| 178 factorInfoMatrix=factorInfoMatrix[-1,] | |
| 179 #use first colum to convert it as rownames but not removing it to avoid conversion as vector in unique factor case | |
| 180 rownames(factorInfoMatrix)=factorInfoMatrix[,1] | |
| 181 | |
| 182 if(length(setdiff(colnames(expDataMatrix),rownames(factorInfoMatrix)))!=0){ | |
| 183 addComment("[ERROR]Missing samples in factor file",T,opt$log) | |
| 184 q( "no", 1, F ) | |
| 185 } | |
| 186 | |
| 187 #order sample as in expression matrix and remove spurious sample | |
| 188 factorInfoMatrix=factorInfoMatrix[colnames(expDataMatrix),] | |
| 189 | |
| 190 #test if all values names are present in dico | |
| 191 if(!all(unlist(factorInfoMatrix) %in% rownames(renamingDico))){ | |
| 192 addComment("[ERROR]Missing factor names in renaming dictionary",T,opt$log) | |
| 193 q( "no", 1, F ) | |
| 194 } | |
| 195 | |
| 196 addComment("[INFO]Factors OK",T,opt$log,display=FALSE) | |
| 197 addComment(c("[INFO]Dim of factorInfo matrix:",dim(factorInfoMatrix)),T,opt$log,display=FALSE) | |
| 198 | |
| 199 ##manage blocking factor | |
| 200 blockingFactor=NULL | |
| 201 blockinFactorsList=NULL | |
| 202 if(!is.null(opt$blockingInfo)){ | |
| 203 | |
| 204 #chargement du fichier des blocking factors | |
| 205 blockingInfoMatrix=read.csv(file=file.path(getwd(), opt$blockingInfo),header=F,sep="\t",colClasses="character") | |
| 206 #remove first row to convert it as colnames | |
| 207 colnames(blockingInfoMatrix)=blockingInfoMatrix[1,] | |
| 208 blockingInfoMatrix=blockingInfoMatrix[-1,] | |
| 209 #use first colum to convert it as rownames but not removing it to avoid conversion as vector in unique factor case | |
| 210 rownames(blockingInfoMatrix)=blockingInfoMatrix[,1] | |
| 211 | |
| 212 | |
| 213 if(length(setdiff(colnames(expDataMatrix),rownames(blockingInfoMatrix)))!=0){ | |
| 214 addComment("[ERROR]Missing samples in blocking factor file",T,opt$log) | |
| 215 q( "no", 1, F ) | |
| 216 } | |
| 217 | |
| 218 #order sample as in expression matrix | |
| 219 blockingInfoMatrix=blockingInfoMatrix[colnames(expDataMatrix),] | |
| 220 | |
| 221 #test if all blocking names are present in dico | |
| 222 if(!all(unlist(blockingInfoMatrix) %in% rownames(renamingDico))){ | |
| 223 addComment("[ERROR]Missing blocking names in renaming dictionary",T,opt$log) | |
| 224 q( "no", 1, F ) | |
| 225 } | |
| 226 | |
| 227 #remove blocking factors allready present as real factors | |
| 228 blockingNotInMainFactors=setdiff(colnames(blockingInfoMatrix)[-1],colnames(factorInfoMatrix)[-1]) | |
| 229 | |
| 230 if(length(blockingNotInMainFactors)<(ncol(blockingInfoMatrix)-1))addComment("[WARNING]Blocking factors cannot be principal factors",T,opt$log,display=FALSE) | |
| 231 | |
| 232 if(length(blockingNotInMainFactors)>0){ | |
| 233 | |
| 234 blockingInfoMatrix=blockingInfoMatrix[,c(colnames(blockingInfoMatrix)[1],blockingNotInMainFactors)] | |
| 235 | |
| 236 groupBlocking=rep("c",ncol(expDataMatrix)) | |
| 237 #for each blocking factor | |
| 238 for(blockingFact in blockingNotInMainFactors){ | |
| 239 if(opt$blockingPolicy=="correlated"){ | |
| 240 indNewFact=as.numeric(factor(blockingInfoMatrix[,blockingFact])) | |
| 241 groupBlocking=paste(groupBlocking,indNewFact,sep="_") | |
| 242 }else{ | |
| 243 if(is.null(blockinFactorsList))blockinFactorsList=list() | |
| 244 blockinFactorsList[[blockingFact]]=factor(unlist(lapply(blockingInfoMatrix[,blockingFact],function(x)paste(c(blockingFact,"_",x),collapse="")))) | |
| 245 } | |
| 246 } | |
| 247 if(opt$blockingPolicy=="correlated"){ | |
| 248 blockingFactor=factor(groupBlocking) | |
| 249 if(length(levels(blockingFactor))==1){ | |
| 250 addComment("[ERROR]Selected blocking factors seems to be constant",T,opt$log) | |
| 251 q( "no", 1, F ) | |
| 252 } | |
| 253 } | |
| 254 addComment("[INFO]Blocking info OK",T,opt$log,display=FALSE) | |
| 255 }else{ | |
| 256 addComment("[WARNING]No blocking factors will be considered",T,opt$log,display=FALSE) | |
| 257 } | |
| 258 } | |
| 259 | |
| 260 | |
| 261 ##rename different input parameters using renamingDictionary | |
| 262 opt$factorsContrast=renamingDico[unlist(lapply(unlist(strsplit(opt$factorsContrast,",")),function(x)which(renamingDico[,1]==x))),2] | |
| 263 | |
| 264 userDefinedContrasts=FALSE | |
| 265 if(!is.null(opt$firstGroupContrast) && !is.null(opt$secondGroupContrast)){ | |
| 266 userDefinedContrasts=TRUE | |
| 267 for(iContrast in 1:length(opt$firstGroupContrast)){ | |
| 268 opt$firstGroupContrast[iContrast]=paste(unlist(lapply(unlist(strsplit(opt$firstGroupContrast[iContrast],",")),function(x)paste(renamingDico[unlist(lapply(unlist(strsplit(x,"\\*")),function(x)which(renamingDico[,1]==x))),2],collapse="*"))),collapse=",") | |
| 269 opt$secondGroupContrast[iContrast]=paste(unlist(lapply(unlist(strsplit(opt$secondGroupContrast[iContrast],",")),function(x)paste(renamingDico[unlist(lapply(unlist(strsplit(x,"\\*")),function(x)which(renamingDico[,1]==x))),2],collapse="*"))),collapse=",") | |
| 270 } | |
| 271 } | |
| 272 | |
| 273 if(!is.null(opt$controlGroups)){ | |
| 274 renamedGroups=c() | |
| 275 for(iGroup in unlist(strsplit(opt$controlGroups,","))){ | |
| 276 renamedControlGroup=paste(renamingDico[unlist(lapply(unlist(strsplit(iGroup,":")),function(x)which(renamingDico[,1]==x))),2],collapse=":") | |
| 277 if(length(renamedControlGroup)==0 || any(which(unlist(gregexpr(text = renamedControlGroup,pattern = ":"))==-1))){ | |
| 278 addComment("[ERROR]Control groups for interaction seem to mismatch, please check them.",T,opt$log) | |
| 279 q( "no", 1, F ) | |
| 280 } | |
| 281 renamedGroups=c(renamedGroups,renamedControlGroup) | |
| 282 } | |
| 283 opt$controlGroups=renamedGroups | |
| 284 } | |
| 285 addComment("[INFO]Contrast variables are renamed to avoid confusion",T,opt$log,display=FALSE) | |
| 286 ##renaming done | |
| 287 | |
| 288 #to convert factor as numeric value --> useless now ? | |
| 289 #expDataMatrix=apply(expDataMatrix,c(1,2),function(x)as.numeric(paste(x))) | |
| 290 | |
| 291 #get factors info for LIMMA | |
| 292 factorsList=list() | |
| 293 for(iFactor in opt$factorsContrast){ | |
| 294 if(!(iFactor %in% colnames(factorInfoMatrix))){ | |
| 295 addComment("[ERROR]Required factors are missing in input file",T,opt$log) | |
| 296 q( "no", 1, F ) | |
| 297 } | |
| 298 factorsList[[iFactor]]=factor(unlist(lapply(factorInfoMatrix[,iFactor],function(x)paste(c(iFactor,"_",x),collapse="")))) | |
| 299 if(length(levels(factorsList[[iFactor]]))==1){ | |
| 300 addComment("[ERROR]One selected factor seems to be constant",T,opt$log) | |
| 301 q( "no", 1, F ) | |
| 302 } | |
| 303 } | |
| 304 | |
| 305 #check if there is at least 2 factors to allow interaction computation | |
| 306 if(!is.null(opt$controlGroups) && length(factorsList)<2){ | |
| 307 addComment("[ERROR]You cannot ask for interaction with less than 2 factors",T,opt$log) | |
| 308 q( "no", 1, F ) | |
| 309 } | |
| 310 | |
| 311 #merge all factors as a single one | |
| 312 factorsMerged=as.character(factorsList[[opt$factorsContrast[1]]]) | |
| 313 for(iFactor in opt$factorsContrast[-1]){ | |
| 314 factorsMerged=paste(factorsMerged,as.character(factorsList[[iFactor]]),sep=".") | |
| 315 } | |
| 316 factorsMerged=factor(factorsMerged) | |
| 317 | |
| 318 #checked that coefficient number (ie. factorsMerged levels) is strictly smaller than sample size | |
| 319 if(length(levels(factorsMerged))>=length(factorsMerged)){ | |
| 320 addComment(c("[ERROR]No enough samples (",length(factorsMerged),") to estimate ",length(levels(factorsMerged))," coefficients"),T,opt$log) | |
| 321 q( "no", 1, F ) | |
| 322 } | |
| 323 | |
| 324 #get the sample size of each factor values | |
| 325 sampleSizeFactor=table(factorsMerged) | |
| 326 | |
| 327 | |
| 328 if(!is.null(blockinFactorsList)){ | |
| 329 factorString=c("blockinFactorsList[['", names(blockinFactorsList)[1],"']]") | |
| 330 for(blockingFact in names(blockinFactorsList)[-1]){ | |
| 331 factorString=c(factorString," + blockinFactorsList[['",blockingFact,"']]") | |
| 332 } | |
| 333 design = model.matrix(as.formula(paste(c("~ factorsMerged +",factorString," + 0"),collapse=""))) | |
| 334 | |
| 335 #rename design columns | |
| 336 coeffMeaning = levels(factorsMerged) | |
| 337 for(blockingFact in blockinFactorsList){ | |
| 338 coeffMeaning=c(coeffMeaning,levels(blockingFact)[-1]) | |
| 339 } | |
| 340 colnames(design) = coeffMeaning | |
| 341 }else{ | |
| 342 design = model.matrix(as.formula( ~ factorsMerged + 0)) | |
| 343 | |
| 344 #rename degin columns | |
| 345 coeffMeaning = levels(factorsMerged) | |
| 346 colnames(design) = coeffMeaning | |
| 347 } | |
| 348 | |
| 349 addComment(c("[INFO]Available coefficients: ",coeffMeaning),T,opt$log,display=F) | |
| 350 | |
| 351 estimableCoeff=which(colSums(design)!=0) | |
| 352 | |
| 353 addComment("[INFO]Design done",T,opt$log,display=F) | |
| 354 | |
| 355 #use blocking factor if exists | |
| 356 if(!is.null(blockingFactor)){ | |
| 357 corfit <- duplicateCorrelation(expDataMatrix, design, block=blockingFactor) | |
| 358 | |
| 359 addComment(c("[INFO]Correlation within groups: ",corfit$consensus.correlation),T,opt$log,display=F) | |
| 360 | |
| 361 #run linear model fit | |
| 362 data.fit = lmFit(expDataMatrix,design,block = blockingFactor, correlation=corfit$consensus.correlation) | |
| 363 }else{ | |
| 364 #run linear model fit | |
| 365 data.fit = lmFit(expDataMatrix,design) | |
| 366 } | |
| 367 | |
| 368 estimatedCoeff=which(!is.na(data.fit$coefficients[1,])) | |
| 369 | |
| 370 addComment("[INFO]Lmfit done",T,opt$log,display=F) | |
| 371 | |
| 372 #catch situation where some coefficients cannot be estimated, probably due to dependances between design columns | |
| 373 #if(length(setdiff(estimableCoeff,estimatedCoeff))>0){ | |
| 374 # addComment("[ERROR]Error in design matrix, check your group definitions",T,opt$log) | |
| 375 # q( "no", 1, F ) | |
| 376 #} | |
| 377 #to strong condition, should return ERROR only when coefficients relative to principal factors cannot be estimated, otherwise, return a simple WARNING | |
| 378 | |
| 379 #define requested contrasts | |
| 380 requiredContrasts=c() | |
| 381 humanReadingContrasts=c() | |
| 382 persoContrastName=c() | |
| 383 if(userDefinedContrasts){ | |
| 384 for(iContrast in 1:length(opt$firstGroupContrast)){ | |
| 385 posGroup=unlist(lapply(unlist(strsplit(opt$firstGroupContrast[iContrast],",")),function(x)paste(paste(opt$factorsContrast,unlist(strsplit(x,"\\*")),sep="_"),collapse="."))) | |
| 386 negGroup=unlist(lapply(unlist(strsplit(opt$secondGroupContrast[iContrast],",")),function(x)paste(paste(opt$factorsContrast,unlist(strsplit(x,"\\*")),sep="_"),collapse="."))) | |
| 387 #clear posGroup and negGroup from empty groups | |
| 388 emptyPosGroups=which(!(posGroup%in%coeffMeaning)) | |
| 389 if(length(emptyPosGroups)>0){ | |
| 390 addComment(c("[WARNING]The group(s)",posGroup[emptyPosGroups],"is/are removed from contrast as it/they is/are empty"),T,opt$log,display=FALSE) | |
| 391 posGroup=posGroup[-emptyPosGroups] | |
| 392 currentHumanContrast=paste(unlist(strsplit(opt$firstGroupContrast[iContrast],","))[-emptyPosGroups],collapse="+") | |
| 393 }else{ | |
| 394 currentHumanContrast=paste(unlist(strsplit(opt$firstGroupContrast[iContrast],",")),collapse="+") | |
| 395 } | |
| 396 emptyNegGroups=which(!(negGroup%in%coeffMeaning)) | |
| 397 if(length(emptyNegGroups)>0){ | |
| 398 addComment(c("[WARNING]The group(s)",negGroup[emptyNegGroups],"is/are removed from contrast as it/they is/are empty"),T,opt$log,display=FALSE) | |
| 399 negGroup=negGroup[-emptyNegGroups] | |
| 400 currentHumanContrast=paste(c(currentHumanContrast,unlist(strsplit(opt$secondGroupContrast[iContrast],","))[-emptyNegGroups]),collapse="-") | |
| 401 }else{ | |
| 402 currentHumanContrast=paste(c(currentHumanContrast,unlist(strsplit(opt$secondGroupContrast[iContrast],","))),collapse="-") | |
| 403 } | |
| 404 if(length(posGroup)==0 || length(negGroup)==0 ){ | |
| 405 addComment(c("[WARNING]Contrast",currentHumanContrast,"cannot be estimated due to empty group"),T,opt$log,display=FALSE) | |
| 406 }else{ | |
| 407 if(all(posGroup%in%negGroup) && all(negGroup%in%posGroup)){ | |
| 408 addComment(c("[WARNING]Contrast",currentHumanContrast,"cannot be estimated due to null contrast"),T,opt$log,display=FALSE) | |
| 409 }else{ | |
| 410 #get coefficients required for first group added as positive | |
| 411 positiveCoeffWeights=sampleSizeFactor[posGroup]/sum(sampleSizeFactor[posGroup]) | |
| 412 #positiveCoeffWeights=rep(1,length(posGroup)) | |
| 413 #names(positiveCoeffWeights)=names(sampleSizeFactor[posGroup]) | |
| 414 #get coefficients required for second group added as negative | |
| 415 negativeCoeffWeights=sampleSizeFactor[negGroup]/sum(sampleSizeFactor[negGroup]) | |
| 416 #negativeCoeffWeights=rep(1,length(negGroup)) | |
| 417 #names(negativeCoeffWeights)=names(sampleSizeFactor[negGroup]) | |
| 418 #build the resulting contrast | |
| 419 currentContrast=paste(paste(positiveCoeffWeights[posGroup],posGroup,sep="*"),collapse="+") | |
| 420 currentContrast=paste(c(currentContrast,paste(paste(negativeCoeffWeights[negGroup],negGroup,sep="*"),collapse="-")),collapse="-") | |
| 421 requiredContrasts=c(requiredContrasts,currentContrast) | |
| 422 | |
| 423 #build the human reading contrast | |
| 424 humanReadingContrasts=c(humanReadingContrasts,currentHumanContrast) | |
| 425 if(!is.null(opt$contrastNames) && nchar(opt$contrastNames[iContrast])>0){ | |
| 426 persoContrastName=c(persoContrastName,opt$contrastNames[iContrast]) | |
| 427 }else{ | |
| 428 persoContrastName=c(persoContrastName,"") | |
| 429 } | |
| 430 | |
| 431 addComment(c("[INFO]Contrast added : ",currentHumanContrast),T,opt$log,display=F) | |
| 432 addComment(c("with complete formula ",currentContrast),T,opt$log,display=F) | |
| 433 } | |
| 434 } | |
| 435 } | |
| 436 } | |
| 437 | |
| 438 | |
| 439 #define the true formula with interactions to get interaction coefficients | |
| 440 factorString=c("factorsList[['", names(factorsList)[1],"']]") | |
| 441 for(iFactor in names(factorsList)[-1]){ | |
| 442 factorString=c(factorString," * factorsList[['",iFactor,"']]") | |
| 443 } | |
| 444 | |
| 445 if(!is.null(blockinFactorsList)){ | |
| 446 for(blockingFact in names(blockinFactorsList)){ | |
| 447 factorString=c(factorString," + blockinFactorsList[['",blockingFact,"']]") | |
| 448 } | |
| 449 } | |
| 450 | |
| 451 #should not be null at the end | |
| 452 allFtestMeanSquare=NULL | |
| 453 #to get the F-test values | |
| 454 estimatedInteractions=rownames(anova(lm(as.formula(paste(c("expDataMatrix[1,] ~ ",factorString),collapse=""))))) | |
| 455 estimatedInteractions=c(unlist(lapply(estimatedInteractions[-length(estimatedInteractions)],function(x){temp=unlist(strsplit(x,"[ \" | : ]"));paste(temp[seq(2,length(temp),3)],collapse=":")})),estimatedInteractions[length(estimatedInteractions)]) | |
| 456 #rename estimated interaction terms using renamingDico | |
| 457 estimatedInteractions=c(unlist(lapply(estimatedInteractions[-length(estimatedInteractions)],function(x)paste(renamingDico[unlist(strsplit(x,":")),1],collapse=":"))),estimatedInteractions[length(estimatedInteractions)]) | |
| 458 t <- unlist(apply(expDataMatrix,1,function(x){temp=anova(lm(as.formula(paste(c("x ~ ",factorString),collapse=""))))$`Mean Sq`;temp/temp[length(temp)]})) | |
| 459 allFtestMeanSquare <- t(matrix(t,nrow=length(estimatedInteractions))) | |
| 460 #remove from allFtest rows containing NA | |
| 461 if(length(which(is.na(allFtestMeanSquare[,1])))>0)allFtestMeanSquare=allFtestMeanSquare[-(which(is.na(allFtestMeanSquare[,1]))),] | |
| 462 colnames(allFtestMeanSquare)=estimatedInteractions | |
| 463 | |
| 464 #add contrasts corresponding to interaction terms | |
| 465 if(!is.null(opt$controlGroups)){ | |
| 466 #first load user defined control group for each factor | |
| 467 controlGroup=rep(NA,length(factorsList)) | |
| 468 names(controlGroup)=names(factorsList) | |
| 469 for(iGroup in opt$controlGroups){ | |
| 470 splitGroup=unlist(strsplit(iGroup,":")) | |
| 471 splitGroup[2]=paste(splitGroup[1],splitGroup[2],sep = "_") | |
| 472 #check if defined control group is really a level of the corresponding factor | |
| 473 if(!splitGroup[1]%in%names(controlGroup) || !splitGroup[2]%in%factorsList[[splitGroup[1]]]){ | |
| 474 addComment(c("[ERROR]The factor name",splitGroup[1],"does not exist or group name",splitGroup[2]),T,opt$log) | |
| 475 q( "no", 1, F ) | |
| 476 } | |
| 477 if(!is.na(controlGroup[splitGroup[1]])){ | |
| 478 addComment("[ERROR]Several control groups are defined for the same factor, please select only one control group for each factor if you want to compute interaction contrasts",T,opt$log) | |
| 479 q( "no", 1, F ) | |
| 480 } | |
| 481 controlGroup[splitGroup[1]]=splitGroup[2] | |
| 482 } | |
| 483 | |
| 484 #check if all factor have a defined control group | |
| 485 if(any(is.na(controlGroup))){ | |
| 486 addComment("[ERROR]Missing control group for some factors, please check them if you want to compute interaction contrasts",T,opt$log) | |
| 487 q( "no", 1, F ) | |
| 488 } | |
| 489 | |
| 490 nbFactors=length(factorsList) | |
| 491 interactionContrasts=c() | |
| 492 contrastClass=c() | |
| 493 #initialize list for the first level | |
| 494 newPreviousLoopContrast=list() | |
| 495 for(iFactorA in 1:(nbFactors-1)){ | |
| 496 nameFactorA=names(factorsList)[iFactorA] | |
| 497 compA=c() | |
| 498 for(levelA in setdiff(levels(factorsList[[iFactorA]]),controlGroup[nameFactorA])){ | |
| 499 compA=c(compA,paste(levelA,controlGroup[nameFactorA],sep="-")) | |
| 500 } | |
| 501 newPreviousLoopContrast[[as.character(iFactorA)]]=compA | |
| 502 } | |
| 503 #make a loop for growing interaction set | |
| 504 for(globalIfactor in 1:(nbFactors-1)){ | |
| 505 previousLoopContrast=newPreviousLoopContrast | |
| 506 newPreviousLoopContrast=list() | |
| 507 #factor A reuse contrasts made at previsous loop | |
| 508 for(iFactorA in names(previousLoopContrast)){ | |
| 509 compA=previousLoopContrast[[iFactorA]] | |
| 510 | |
| 511 if(max(as.integer(unlist(strsplit(iFactorA,"\\."))))<nbFactors){ | |
| 512 #factor B is the new factor to include in intreraction set | |
| 513 for(iFactorB in (max(as.integer(unlist(strsplit(iFactorA,"\\."))))+1):nbFactors){ | |
| 514 nameFactorB=names(factorsList)[iFactorB] | |
| 515 #keep contrasts identified trough interacting factors set | |
| 516 newPreviousLoopContrast[[paste(iFactorA,iFactorB,sep=".")]]=c() | |
| 517 for(iCompA in compA){ | |
| 518 for(levelB in setdiff(levels(factorsList[[iFactorB]]),controlGroup[nameFactorB])){ | |
| 519 #decompose the contrast compA to apply the new level of factor B on each term | |
| 520 temp=unlist(strsplit(iCompA,"[ + ]")) | |
| 521 splitCompA=temp[1] | |
| 522 for(iTemp in temp[-1])splitCompA=c(splitCompA,"+",iTemp) | |
| 523 splitCompA=unlist(lapply(splitCompA,function(x){temp=unlist(strsplit(x,"-"));splitCompB=temp[1];for(iTemp in temp[-1])splitCompB=c(splitCompB,"-",iTemp);splitCompB})) | |
| 524 #apply on each contrast term the new level of factor B | |
| 525 firstTerm=paste(unlist(lapply(splitCompA,function(x)if(x!="+" && x!="-"){paste(x,levelB,sep=".")}else{x})),collapse="") | |
| 526 secondTerm=negativeExpression(paste(unlist(lapply(splitCompA,function(x)if(x!="+" && x!="-"){paste(x,controlGroup[nameFactorB],sep=".")}else{x})),collapse="")) | |
| 527 currentContrast=paste(c(firstTerm,secondTerm),collapse="") | |
| 528 | |
| 529 newPreviousLoopContrast[[paste(iFactorA,iFactorB,sep=".")]]=c(newPreviousLoopContrast[[paste(iFactorA,iFactorB,sep=".")]],currentContrast) | |
| 530 } | |
| 531 } | |
| 532 } | |
| 533 } | |
| 534 } | |
| 535 for(iContrast in names(newPreviousLoopContrast)){ | |
| 536 contrastClass=c(contrastClass,rep(iContrast,length(newPreviousLoopContrast[[iContrast]]))) | |
| 537 } | |
| 538 interactionContrasts=c(interactionContrasts,unlist(newPreviousLoopContrast)) | |
| 539 } | |
| 540 #make human title for interactions | |
| 541 names(interactionContrasts)=contrastClass | |
| 542 humanReadingInteraction=unlist(lapply(interactionContrasts,function(x)gsub("\\.",":",unlist(strsplit(x,"[+-]"))[1]))) | |
| 543 | |
| 544 contrastToIgnore=c() | |
| 545 | |
| 546 #complete with control groups and order to match to coeffs | |
| 547 for(iContrast in 1:length(interactionContrasts)){ | |
| 548 missingFactor=setdiff(1:nbFactors,as.integer(unlist(strsplit(names(interactionContrasts[iContrast]),"\\.")))) | |
| 549 #decompose the contrast | |
| 550 temp=unlist(strsplit(interactionContrasts[iContrast],"[ + ]")) | |
| 551 splitContrast=temp[1] | |
| 552 for(iTemp in temp[-1])splitContrast=c(splitContrast,"+",iTemp) | |
| 553 splitContrast=unlist(lapply(splitContrast,function(x){temp=unlist(strsplit(x,"-"));splitCompB=temp[1];for(iTemp in temp[-1])splitCompB=c(splitCompB,"-",iTemp);splitCompB})) | |
| 554 for(iFactor in missingFactor){ | |
| 555 for(iTerm in 1:length(splitContrast)){ | |
| 556 if(splitContrast[iTerm]!="+" && splitContrast[iTerm]!="-"){ | |
| 557 splitTerm=unlist(strsplit(splitContrast[iTerm],"\\.")) | |
| 558 if(iFactor==1)splitContrast[iTerm]=paste(c(controlGroup[names(factorsList)[iFactor]],splitTerm),collapse=".") | |
| 559 if(iFactor==nbFactors)splitContrast[iTerm]=paste(c(splitTerm,controlGroup[names(factorsList)[iFactor]]),collapse=".") | |
| 560 if(iFactor>1 && iFactor<nbFactors)splitContrast[iTerm]=paste(c(splitTerm[1:(iFactor-1)],controlGroup[names(factorsList)[iFactor]],splitTerm[iFactor:length(splitTerm)]),collapse=".") | |
| 561 } | |
| 562 } | |
| 563 } | |
| 564 interactionContrasts[iContrast]=paste(splitContrast,collapse="") | |
| 565 if(all(splitContrast[seq(1,length(splitContrast),2)]%in%coeffMeaning)){ | |
| 566 addComment(c("[INFO]Interaction contrast added : ",humanReadingInteraction[iContrast]),T,opt$log,display=F) | |
| 567 addComment(c("with complete formula ",interactionContrasts[iContrast]),T,opt$log,display=F) | |
| 568 }else{ | |
| 569 contrastToIgnore=c(contrastToIgnore,iContrast) | |
| 570 addComment(c("[WARNING]Interaction contrast",humanReadingInteraction[iContrast],"is removed due to empty group"),T,opt$log,display=F) | |
| 571 } | |
| 572 } | |
| 573 | |
| 574 #add interaction contrasts to global contrast list | |
| 575 if(length(contrastToIgnore)>0){ | |
| 576 requiredContrasts=c(requiredContrasts,interactionContrasts[-contrastToIgnore]) | |
| 577 humanReadingContrasts=c(humanReadingContrasts,humanReadingInteraction[-contrastToIgnore]) | |
| 578 persoContrastName=c(persoContrastName,rep("",length(humanReadingInteraction[-contrastToIgnore]))) | |
| 579 }else{ | |
| 580 requiredContrasts=c(requiredContrasts,interactionContrasts) | |
| 581 humanReadingContrasts=c(humanReadingContrasts,humanReadingInteraction) | |
| 582 persoContrastName=c(persoContrastName,rep("",length(humanReadingInteraction))) | |
| 583 } | |
| 584 }#end of intreaction contrasts | |
| 585 | |
| 586 | |
| 587 #remove from requiredContrasts contrasts that cannot be estimated | |
| 588 toRemove=unique(unlist(lapply(setdiff(coeffMeaning,names(estimatedCoeff)),function(x)grep(x,requiredContrasts)))) | |
| 589 if(length(toRemove)>0){ | |
| 590 addComment(c("[WARNING]",length(toRemove)," contrasts are removed, due to missing coefficients"),T,opt$log,display=FALSE) | |
| 591 requiredContrasts=requiredContrasts[-toRemove] | |
| 592 humanReadingContrasts=humanReadingContrasts[-toRemove] | |
| 593 persoContrastName=persoContrastName[-toRemove] | |
| 594 } | |
| 595 | |
| 596 if(length(requiredContrasts)==0){ | |
| 597 addComment("[ERROR]No contrast to compute, please check your contrast definition.",T,opt$log) | |
| 598 q( "no", 1, F ) | |
| 599 } | |
| 600 | |
| 601 #compute for each contrast mean of coefficients in posGroup and negGroup for FC computation of log(FC) with LSmean as in Partek | |
| 602 meanPosGroup=list() | |
| 603 meanNegGroup=list() | |
| 604 for(iContrast in 1:length(requiredContrasts)){ | |
| 605 #define posGroup and negGroup | |
| 606 #first split contrast | |
| 607 temp=unlist(strsplit(requiredContrasts[iContrast],"[ + ]")) | |
| 608 splitContrast=temp[1] | |
| 609 for(iTemp in temp[-1])splitContrast=c(splitContrast,"+",iTemp) | |
| 610 splitContrast=unlist(lapply(splitContrast,function(x){temp=unlist(strsplit(x,"-"));splitCompB=temp[1];for(iTemp in temp[-1])splitCompB=c(splitCompB,"-",iTemp);splitCompB})) | |
| 611 #and then put each term in good group | |
| 612 posGroup=c() | |
| 613 negGroup=c() | |
| 614 nextIsPos=TRUE | |
| 615 for(iSplit in splitContrast){ | |
| 616 if(iSplit=="+")nextIsPos=TRUE | |
| 617 if(iSplit=="-")nextIsPos=FALSE | |
| 618 if(iSplit!="-" && iSplit!="+"){ | |
| 619 #remove weights of contrast terms | |
| 620 iSplitBis=unlist(strsplit(iSplit,"[*]")) | |
| 621 iSplitBis=iSplitBis[length(iSplitBis)] | |
| 622 if(nextIsPos)posGroup=c(posGroup,iSplitBis) | |
| 623 else negGroup=c(negGroup,iSplitBis) | |
| 624 } | |
| 625 } | |
| 626 #compute means for each group | |
| 627 meanPosGroup[[iContrast]]=apply(as.matrix(data.fit$coefficients[,posGroup],ncol=length(posGroup)),1,mean) | |
| 628 meanNegGroup[[iContrast]]=apply(as.matrix(data.fit$coefficients[,negGroup],ncol=length(negGroup)),1,mean) | |
| 629 } | |
| 630 | |
| 631 | |
| 632 contrast.matrix = makeContrasts(contrasts=requiredContrasts,levels=design) | |
| 633 data.fit.con = contrasts.fit(data.fit,contrast.matrix) | |
| 634 | |
| 635 addComment("[INFO]Contrast definition done",T,opt$log,T,display=FALSE) | |
| 636 | |
| 637 #compute LIMMA statistics | |
| 638 data.fit.eb = eBayes(data.fit.con) | |
| 639 | |
| 640 addComment("[INFO]Estimation done",T,opt$log,T,display=FALSE) | |
| 641 | |
| 642 #adjust p.value through FDR | |
| 643 data.fit.eb$adj_p.value=data.fit.eb$p.value | |
| 644 for(iComparison in 1:ncol(data.fit.eb$adj_p.value)){ | |
| 645 data.fit.eb$adj_p.value[,iComparison]=p.adjust(data.fit.eb$p.value[,iComparison],"fdr") | |
| 646 } | |
| 647 | |
| 648 #add a new field based on LS-means for each contrast instead of global mean like they were calculated in coefficients field | |
| 649 data.fit.eb$coefficientsLS=data.fit.eb$coefficients | |
| 650 if(ncol(data.fit.eb$coefficients)!=length(meanPosGroup)){ | |
| 651 addComment("[ERROR]Estimated contrasts number unexpected",T,opt$log) | |
| 652 q( "no", 1, F ) | |
| 653 } | |
| 654 for(iContrast in 1:length(meanPosGroup)){ | |
| 655 data.fit.eb$coefficientsLS[,iContrast]=meanPosGroup[[iContrast]][rownames(data.fit.eb$coefficientsLS)]-meanNegGroup[[iContrast]][rownames(data.fit.eb$coefficientsLS)] | |
| 656 } | |
| 657 | |
| 658 #if requested replace coefficient computed as global mean by LS-means values | |
| 659 if(useLSmean)data.fit.eb$coefficients=data.fit.eb$coefficientsLS | |
| 660 | |
| 661 addComment("[INFO]Core treatment done",T,opt$log,T,display=FALSE) | |
| 662 | |
| 663 | |
| 664 ##convert humanReadingContrasts with namingDictionary to create humanReadingContrastsRenamed and keep original humanReadingContrasts names for file names | |
| 665 humanReadingContrastsRenamed=rep("",length(humanReadingContrasts)) | |
| 666 for(iContrast in 1:length(humanReadingContrasts)){ | |
| 667 if(persoContrastName[iContrast]==""){ | |
| 668 #if(verbose)addComment(humanReadingContrasts[iContrast]) | |
| 669 specialCharacters=str_extract_all(humanReadingContrasts[iContrast],"[+|*|_|:|-]")[[1]] | |
| 670 #if(verbose)addComment(specialCharacters) | |
| 671 nameConverted=unlist(lapply(strsplit(humanReadingContrasts[iContrast],"[+|*|_|:|-]")[[1]],function(x)renamingDico[x,1])) | |
| 672 #if(verbose)addComment(nameConverted) | |
| 673 humanReadingContrastsRenamed[iContrast]=paste(nameConverted,specialCharacters,collapse="",sep="") | |
| 674 #if(verbose)addComment(humanReadingContrastsRenamed[iContrast]) | |
| 675 humanReadingContrastsRenamed[iContrast]=substr(humanReadingContrastsRenamed[iContrast],1,nchar(humanReadingContrastsRenamed[iContrast])-1) | |
| 676 }else{ | |
| 677 humanReadingContrastsRenamed[iContrast]=persoContrastName[iContrast] | |
| 678 } | |
| 679 } | |
| 680 | |
| 681 #write correspondances between plot file names (humanReadingContrasts) and displayed names in figure legends (humanReadingContrastsRenamed), usefull to define html items in xml file | |
| 682 correspondanceTable=matrix("",ncol=2,nrow=ncol(data.fit.eb$p.value)) | |
| 683 correspondanceTable[,1]=unlist(lapply(humanReadingContrasts,function(x)gsub(":","_INT_",gsub("\\+","_PLUS_",gsub("\\*","_AND_",x))))) | |
| 684 correspondanceTable[,2]=humanReadingContrastsRenamed | |
| 685 rownames(correspondanceTable)=correspondanceTable[,2] | |
| 686 write.table(correspondanceTable,file=file.path(getwd(), "correspondanceFileNames.csv"),quote=FALSE,sep="\t",col.names = F,row.names = F) | |
| 687 | |
| 688 #plot nominal p-val histograms for selected comparisons | |
| 689 histogramPerPage=6 | |
| 690 if (!is.null(opt$histo)) { | |
| 691 iToPlot=1 | |
| 692 plotVector=list() | |
| 693 nbComparisons=ncol(data.fit.eb$p.value) | |
| 694 for (iComparison in 1:nbComparisons){ | |
| 695 dataToPlot=data.frame(pval=data.fit.eb$p.value[,iComparison],id=rownames(data.fit.eb$p.value)) | |
| 696 p <- ggplot(data=dataToPlot, aes(x=pval)) + geom_histogram(colour="red", fill="salmon") + | |
| 697 theme_bw() + ggtitle(humanReadingContrastsRenamed[iComparison]) + ylab(label="Frequencies") + xlab(label="Nominal p-val") + | |
| 698 theme(panel.border=element_blank(),plot.title = element_text(hjust = 0.5)) | |
| 699 plotVector[[length(plotVector)+1]]=p | |
| 700 | |
| 701 pp <- ggplotly(p) | |
| 702 htmlwidgets::saveWidget(as_widget(pp), paste(c(file.path(getwd(), "plotLyDir"),"/",opt$histo,"_",correspondanceTable[humanReadingContrastsRenamed[iComparison],1],".html"),collapse=""),selfcontained = F) | |
| 703 | |
| 704 if(iComparison==nbComparisons || length(plotVector)==histogramPerPage){ | |
| 705 #plot and close the actual plot | |
| 706 if(opt$format=="pdf"){ | |
| 707 pdf(paste(c("./plotDir/",opt$histo,iToPlot,".pdf"),collapse=""))}else{ | |
| 708 png(paste(c("./plotDir/",opt$histo,iToPlot,".png"),collapse="")) | |
| 709 } | |
| 710 multiplot(plotlist=plotVector,cols=2) | |
| 711 dev.off() | |
| 712 if(iComparison<nbComparisons){ | |
| 713 #prepare for a new plotting file if necessary | |
| 714 plotVector=list() | |
| 715 iToPlot=iToPlot+1 | |
| 716 } | |
| 717 } | |
| 718 } | |
| 719 addComment("[INFO]Histograms drawn",T,opt$log,T,display=FALSE) | |
| 720 | |
| 721 } | |
| 722 | |
| 723 #plot F-test sum square barplot | |
| 724 if(!is.null(allFtestMeanSquare)){ | |
| 725 dataToPlot=data.frame(Fratio=apply(allFtestMeanSquare,2,mean),Factors=factor(colnames(allFtestMeanSquare),levels = colnames(allFtestMeanSquare))) | |
| 726 | |
| 727 p <- ggplot(data=dataToPlot, aes(x=Factors, y=Fratio, fill=Factors)) + | |
| 728 geom_bar(stat="identity") + scale_fill_manual(values = colorRampPalette(brewer.pal(9,"Set1"))(ncol(allFtestMeanSquare))[sample(ncol(allFtestMeanSquare))]) + ylab(label="mean F-ratio") + | |
| 729 theme_bw() + theme(panel.border=element_blank(),plot.title = element_text(hjust = 0.5)) + ggtitle("Source of variation") | |
| 730 | |
| 731 if(opt$format=="pdf"){ | |
| 732 pdf(paste(c("./plotDir/",opt$fratioFile,".pdf"),collapse=""))}else{ | |
| 733 png(paste(c("./plotDir/",opt$fratioFile,".png"),collapse="")) | |
| 734 } | |
| 735 plot(p) | |
| 736 dev.off() | |
| 737 | |
| 738 pp <- ggplotly(p) | |
| 739 htmlwidgets::saveWidget(as_widget(pp), paste(c(file.path(getwd(), "plotLyDir"),"/",opt$fratioFile,".html"),collapse=""),selfcontained = F) | |
| 740 | |
| 741 addComment("[INFO]SumSquareTest drawn",T,opt$log,T,display=FALSE) | |
| 742 } | |
| 743 | |
| 744 #plot VOLCANO plot | |
| 745 #volcanoplot(data.fit.eb,coef=1,highlight=10) | |
| 746 volcanoPerPage=1 | |
| 747 logFCthreshold=log2(opt$thresholdFC) | |
| 748 if (!is.null(opt$volcano)) { | |
| 749 iToPlot=1 | |
| 750 plotVector=list() | |
| 751 nbComparisons=ncol(data.fit.eb$adj_p.value) | |
| 752 for (iComparison in 1:nbComparisons){ | |
| 753 | |
| 754 #define the log10(p-val) threshold corresponding to FDR threshold fixed by user | |
| 755 probeWithLowFDR=-log10(data.fit.eb$p.value[which(data.fit.eb$adj_p.value[,iComparison]<=opt$fdrThreshold),iComparison]) | |
| 756 pvalThresholdFDR=NULL | |
| 757 if(length(probeWithLowFDR)>0)pvalThresholdFDR=min(probeWithLowFDR) | |
| 758 | |
| 759 #get significant points over FC and FDR thresholds | |
| 760 significativePoints=intersect(which(abs(data.fit.eb$coefficients[,iComparison])>=logFCthreshold),which(data.fit.eb$adj_p.value[,iComparison]<=opt$fdrThreshold)) | |
| 761 | |
| 762 #to reduce size of html plot, we keep 20000 points maximum sampled amongst genes with pval>=33%(pval) and abs(log2(FC))<=66%(abs(log2(FC))) | |
| 763 htmlPointsToRemove=intersect(which(abs(data.fit.eb$coefficients[,iComparison])<=quantile(abs(data.fit.eb$coefficients[,iComparison]),c(0.66))),which(data.fit.eb$p.value[,iComparison]>=quantile(abs(data.fit.eb$p.value[,iComparison]),c(0.33)))) | |
| 764 if(length(htmlPointsToRemove)>20000){ | |
| 765 htmlPointsToRemove=setdiff(htmlPointsToRemove,sample(htmlPointsToRemove,20000)) | |
| 766 }else{ | |
| 767 htmlPointsToRemove=c() | |
| 768 } | |
| 769 | |
| 770 xMinLimPlot=min(data.fit.eb$coefficients[,iComparison])-0.2 | |
| 771 xMaxLimPlot=max(data.fit.eb$coefficients[,iComparison])+0.2 | |
| 772 yMaxLimPlot= max(-log10(data.fit.eb$p.value[,iComparison]))+0.2 | |
| 773 | |
| 774 if(length(significativePoints)>0){ | |
| 775 dataSignifToPlot=data.frame(pval=-log10(data.fit.eb$p.value[significativePoints,iComparison]),FC=data.fit.eb$coefficients[significativePoints,iComparison],description=paste(names(data.fit.eb$coefficients[significativePoints,iComparison]),"\n","FC: " , round(2^data.fit.eb$coefficients[significativePoints,iComparison],2) , " | FDR p-val: ",prettyNum(data.fit.eb$adj_p.value[significativePoints,iComparison],digits=4), sep="")) | |
| 776 #to test if remains any normal points to draw | |
| 777 if(length(significativePoints)<nrow(data.fit.eb$p.value)){ | |
| 778 dataToPlot=data.frame(pval=-log10(data.fit.eb$p.value[-significativePoints,iComparison]),FC=data.fit.eb$coefficients[-significativePoints,iComparison],description=paste("FC: " , round(2^data.fit.eb$coefficients[-significativePoints,iComparison],2) , " | FDR p-val: ",prettyNum(data.fit.eb$adj_p.value[-significativePoints,iComparison],digits=4), sep="")) | |
| 779 }else{ | |
| 780 dataToPlot=data.frame(pval=0,FC=0,description="null") | |
| 781 } | |
| 782 }else{ | |
| 783 dataToPlot=data.frame(pval=-log10(data.fit.eb$p.value[,iComparison]),FC=data.fit.eb$coefficients[,iComparison],description=paste("FC: " , round(2^data.fit.eb$coefficients[,iComparison],2) , " | FDR p-val: ",prettyNum(data.fit.eb$adj_p.value[,iComparison],digits=4), sep="")) | |
| 784 } | |
| 785 | |
| 786 ##traditional plot | |
| 787 p <- ggplot(data=dataToPlot, aes(x=FC, y=pval)) + geom_point() + | |
| 788 theme_bw() + ggtitle(humanReadingContrastsRenamed[iComparison]) + ylab(label="-log10(p-val)") + xlab(label="Log2 Fold Change") + | |
| 789 theme(panel.border=element_blank(),plot.title = element_text(hjust = 0.5),legend.position="none") | |
| 790 if(logFCthreshold!=0) p <- p + geom_vline(xintercept=-logFCthreshold, color="salmon",linetype="dotted", size=1) + geom_vline(xintercept=logFCthreshold, color="salmon",linetype="dotted", size=1) + geom_text(data.frame(text=c(paste(c("log2(1/FC=",opt$thresholdFC,")"),collapse=""),paste(c("log2(FC=",opt$thresholdFC,")"),collapse="")),x=c(-logFCthreshold,logFCthreshold),y=c(0,0)),mapping=aes(x=x, y=y, label=text), size=4, angle=90, vjust=-0.4, hjust=0, color="salmon") | |
| 791 if(!is.null(pvalThresholdFDR)) p <- p + geom_hline(yintercept=pvalThresholdFDR, color="skyblue1",linetype="dotted", size=0.5) + geom_text(data.frame(text=c(paste(c("FDR pval limit(",opt$fdrThreshold,")"),collapse="")),x=c(xMinLimPlot),y=c(pvalThresholdFDR)),mapping=aes(x=x, y=y, label=text), size=4, vjust=0, hjust=0, color="skyblue3") | |
| 792 if(length(significativePoints)>0)p <- p + geom_point(data=dataSignifToPlot,aes(colour=description)) | |
| 793 | |
| 794 ##interactive plot | |
| 795 if(length(htmlPointsToRemove)>0){ | |
| 796 pointToRemove=union(htmlPointsToRemove,significativePoints) | |
| 797 #to test if remains any normal points to draw | |
| 798 if(length(pointToRemove)<nrow(data.fit.eb$p.value)){ | |
| 799 dataToPlot=data.frame(pval=-log10(data.fit.eb$p.value[-pointToRemove,iComparison]),FC=data.fit.eb$coefficients[-pointToRemove,iComparison],description=paste("FC: " , round(2^data.fit.eb$coefficients[-pointToRemove,iComparison],2) , " | FDR p-val: ", prettyNum(data.fit.eb$adj_p.value[-pointToRemove,iComparison],digits=4), sep="")) | |
| 800 }else{ | |
| 801 dataToPlot=data.frame(pval=0,FC=0,description="null") | |
| 802 } | |
| 803 } | |
| 804 | |
| 805 if((nrow(dataToPlot)+nrow(dataSignifToPlot))>40000)addComment(c("[WARNING]For",humanReadingContrastsRenamed[iComparison],"volcano, numerous points to plot(",nrow(dataToPlot)+nrow(dataSignifToPlot),"), resulting volcano could be heavy, using more stringent thresholds could be helpful."),T,opt$log) | |
| 806 | |
| 807 phtml <- plot_ly(data=dataToPlot, x=~FC, y=~pval,type="scatter", mode="markers",showlegend = FALSE, marker = list(color="gray",opacity=0.5), text=~description, hoverinfo="text") %>% | |
| 808 layout(title = humanReadingContrastsRenamed[iComparison],xaxis=list(title="Log2 Fold Change",showgrid=TRUE, zeroline=FALSE),yaxis=list(title="-log10(p-val)", showgrid=TRUE, zeroline=FALSE)) | |
| 809 if(length(significativePoints)>0) phtml=add_markers(phtml,data=dataSignifToPlot, x=~FC, y=~pval, mode="markers" , marker=list( color=log10(abs(dataSignifToPlot$FC)*dataSignifToPlot$pval),colorscale='Rainbow'), text=~description, hoverinfo="text", inherit = FALSE) %>% hide_colorbar() | |
| 810 if(logFCthreshold!=0){ | |
| 811 phtml=add_trace(phtml,x=c(-logFCthreshold,-logFCthreshold), y=c(0,yMaxLimPlot), type="scatter", mode = "lines", line=list(color="coral",dash="dash"), hoverinfo='none', showlegend = FALSE,inherit = FALSE) | |
| 812 phtml=add_annotations(phtml,x=-logFCthreshold,y=0,xref = "x",yref = "y",text = paste(c("log2(1/FC=",opt$thresholdFC,")"),collapse=""),xanchor = 'right',showarrow = F,textangle=270,font=list(color="coral")) | |
| 813 phtml=add_trace(phtml,x=c(logFCthreshold,logFCthreshold), y=c(0, yMaxLimPlot), type="scatter", mode = "lines", line=list(color="coral",dash="dash"), hoverinfo='none', showlegend = FALSE,inherit = FALSE) | |
| 814 phtml=add_annotations(phtml,x=logFCthreshold,y=0,xref = "x",yref = "y",text = paste(c("log2(FC=",opt$thresholdFC,")"),collapse=""),xanchor = 'right',showarrow = F,textangle=270,font=list(color="coral")) | |
| 815 } | |
| 816 if(!is.null(pvalThresholdFDR)){ | |
| 817 phtml=add_trace(phtml,x=c(xMinLimPlot,xMaxLimPlot), y=c(pvalThresholdFDR,pvalThresholdFDR), type="scatter", mode = "lines", line=list(color="cornflowerblue",dash="dash"), hoverinfo='none', showlegend = FALSE,inherit = FALSE) | |
| 818 phtml=add_annotations(phtml,x=xMinLimPlot,y=pvalThresholdFDR+0.1,xref = "x",yref = "y",text = paste(c("FDR pval limit(",opt$fdrThreshold,")"),collapse=""),xanchor = 'left',showarrow = F,font=list(color="cornflowerblue")) | |
| 819 } | |
| 820 plotVector[[length(plotVector)+1]]=p | |
| 821 | |
| 822 #save plotly files | |
| 823 pp <- ggplotly(phtml) | |
| 824 htmlwidgets::saveWidget(as_widget(pp), paste(c(file.path(getwd(), "plotLyDir"),"/",opt$volcano,"_",correspondanceTable[humanReadingContrastsRenamed[iComparison],1],".html"),collapse=""),selfcontained = F) | |
| 825 | |
| 826 | |
| 827 if(iComparison==nbComparisons || length(plotVector)==volcanoPerPage){ | |
| 828 #plot and close the actual plot | |
| 829 if(opt$format=="pdf"){ | |
| 830 pdf(paste(c("./plotDir/",opt$volcano,"_",correspondanceTable[humanReadingContrastsRenamed[iComparison],1],".pdf"),collapse=""))}else{ | |
| 831 png(paste(c("./plotDir/",opt$volcano,"_",correspondanceTable[humanReadingContrastsRenamed[iComparison],1],".png"),collapse="")) | |
| 832 } | |
| 833 multiplot(plotlist=plotVector,cols=1) | |
| 834 dev.off() | |
| 835 if(iComparison<nbComparisons){ | |
| 836 #prepare for a new ploting file if necessary | |
| 837 plotVector=list() | |
| 838 iToPlot=iToPlot+1 | |
| 839 } | |
| 840 } | |
| 841 } | |
| 842 remove(dataToPlot,dataSignifToPlot) | |
| 843 addComment("[INFO]Volcanos drawn",T,opt$log,T,display=FALSE) | |
| 844 } | |
| 845 | |
| 846 rowItemInfo=NULL | |
| 847 if(!is.null(opt$rowNameType) && !is.null(opt$organismID)){ | |
| 848 ##get gene information from BioMart | |
| 849 #if(!require("biomaRt")){ | |
| 850 # source("https://bioconductor.org/biocLite.R") | |
| 851 # biocLite("biomaRt") | |
| 852 #} | |
| 853 | |
| 854 ensembl_hs_mart <- useMart(biomart="ensembl", dataset=opt$organismID) | |
| 855 ensembl_df <- getBM(attributes=c(opt$rowNameType,"description"),mart=ensembl_hs_mart) | |
| 856 rowItemInfo=ensembl_df[which(ensembl_df[,1]!=""),2] | |
| 857 rowItemInfo=unlist(lapply(rowItemInfo,function(x)substr(unlist(strsplit(x," \\[Source"))[1],1,30))) | |
| 858 names(rowItemInfo)=ensembl_df[which(ensembl_df[,1]!=""),1] | |
| 859 } | |
| 860 | |
| 861 #write(unlist(dimnames(data.fit.eb$adj_p.value)),opt$log,append = T) | |
| 862 | |
| 863 #prepare additional output containing df informations | |
| 864 dfMatrix=matrix(0,ncol=3,nrow = nrow(data.fit.eb$coefficients),dimnames = list(rownames(data.fit.eb$coefficients),c("df.residual","df.prior","df.total"))) | |
| 865 dfMatrix[,"df.residual"]=data.fit.eb$df.residual | |
| 866 dfMatrix[,"df.prior"]=data.fit.eb$df.prior | |
| 867 dfMatrix[,"df.total"]=data.fit.eb$df.total | |
| 868 | |
| 869 #filter out genes with higher p-values for all comparisons | |
| 870 genesToKeep=names(which(apply(data.fit.eb$adj_p.value,1,function(x)length(which(x<=opt$fdrThreshold))>0))) | |
| 871 #filter out genes with lower FC for all comparisons | |
| 872 genesToKeep=intersect(genesToKeep,names(which(apply(data.fit.eb$coefficients,1,function(x)length(which(abs(x)>=logFCthreshold))>0)))) | |
| 873 | |
| 874 if(length(genesToKeep)>0){ | |
| 875 data.fit.eb$adj_p.value=matrix(data.fit.eb$adj_p.value[genesToKeep,],ncol=ncol(data.fit.eb$adj_p.value)) | |
| 876 rownames(data.fit.eb$adj_p.value)=genesToKeep | |
| 877 colnames(data.fit.eb$adj_p.value)=colnames(data.fit.eb$p.value) | |
| 878 | |
| 879 data.fit.eb$p.value=matrix(data.fit.eb$p.value[genesToKeep,],ncol=ncol(data.fit.eb$p.value)) | |
| 880 rownames(data.fit.eb$p.value)=genesToKeep | |
| 881 colnames(data.fit.eb$p.value)=colnames(data.fit.eb$adj_p.value) | |
| 882 | |
| 883 data.fit.eb$coefficients=matrix(data.fit.eb$coefficients[genesToKeep,],ncol=ncol(data.fit.eb$coefficients)) | |
| 884 rownames(data.fit.eb$coefficients)=genesToKeep | |
| 885 colnames(data.fit.eb$coefficients)=colnames(data.fit.eb$adj_p.value) | |
| 886 | |
| 887 data.fit.eb$t=matrix(data.fit.eb$t[genesToKeep,],ncol=ncol(data.fit.eb$t)) | |
| 888 rownames(data.fit.eb$t)=genesToKeep | |
| 889 colnames(data.fit.eb$t)=colnames(data.fit.eb$adj_p.value) | |
| 890 | |
| 891 dfMatrix=dfMatrix[genesToKeep,,drop=FALSE] | |
| 892 | |
| 893 }else{ | |
| 894 addComment(c("[WARNING]No significative genes considering the given FDR threshold : ",opt$fdrThreshold),T,opt$log,display=FALSE) | |
| 895 } | |
| 896 | |
| 897 addComment("[INFO]Significant genes filtering done",T,opt$log,T,display=FALSE) | |
| 898 | |
| 899 | |
| 900 #plot VennDiagramm for genes below threshold between comparisons | |
| 901 #t=apply(data.fit.eb$adj_p.value[,1:4],2,function(x)names(which(x<=opt$threshold))) | |
| 902 #get.venn.partitions(t) | |
| 903 #vennCounts(data.fit.eb$adj_p.value[,1:4]<=opt$threshold) | |
| 904 | |
| 905 #make a simple sort genes based only on the first comparison | |
| 906 #newOrder=order(data.fit.eb$adj_p.value[,1]) | |
| 907 #data.fit.eb$adj_p.value=data.fit.eb$adj_p.value[newOrder,] | |
| 908 | |
| 909 #alternative sorting strategy based on the mean gene rank over all comparisons | |
| 910 if(length(genesToKeep)>1){ | |
| 911 currentRank=rep(0,nrow(data.fit.eb$adj_p.value)) | |
| 912 for(iComparison in 1:ncol(data.fit.eb$adj_p.value)){ | |
| 913 currentRank=currentRank+rank(data.fit.eb$adj_p.value[,iComparison]) | |
| 914 } | |
| 915 currentRank=currentRank/ncol(data.fit.eb$adj_p.value) | |
| 916 newOrder=order(currentRank) | |
| 917 | |
| 918 data.fit.eb$adj_p.value=matrix(data.fit.eb$adj_p.value[newOrder,],ncol=ncol(data.fit.eb$adj_p.value)) | |
| 919 rownames(data.fit.eb$adj_p.value)=rownames(data.fit.eb$p.value)[newOrder] | |
| 920 colnames(data.fit.eb$adj_p.value)=colnames(data.fit.eb$p.value) | |
| 921 | |
| 922 data.fit.eb$p.value=matrix(data.fit.eb$p.value[newOrder,],ncol=ncol(data.fit.eb$p.value)) | |
| 923 rownames(data.fit.eb$p.value)=rownames(data.fit.eb$adj_p.value) | |
| 924 colnames(data.fit.eb$p.value)=colnames(data.fit.eb$adj_p.value) | |
| 925 | |
| 926 data.fit.eb$coefficients=matrix(data.fit.eb$coefficients[newOrder,],ncol=ncol(data.fit.eb$coefficients)) | |
| 927 rownames(data.fit.eb$coefficients)=rownames(data.fit.eb$adj_p.value) | |
| 928 colnames(data.fit.eb$coefficients)=colnames(data.fit.eb$adj_p.value) | |
| 929 | |
| 930 data.fit.eb$t=matrix(data.fit.eb$t[newOrder,],ncol=ncol(data.fit.eb$t)) | |
| 931 rownames(data.fit.eb$t)=rownames(data.fit.eb$adj_p.value) | |
| 932 colnames(data.fit.eb$t)=colnames(data.fit.eb$adj_p.value) | |
| 933 | |
| 934 dfMatrix=dfMatrix[newOrder,,drop=FALSE] | |
| 935 } | |
| 936 | |
| 937 | |
| 938 #formating output matrices depending on genes to keep | |
| 939 if(length(genesToKeep)==0){ | |
| 940 outputData=matrix(0,ncol=ncol(data.fit.eb$adj_p.value)*5+2,nrow=3) | |
| 941 outputData[1,]=c("X","X",rep(humanReadingContrastsRenamed,each=5)) | |
| 942 outputData[2,]=c("X","X",rep(c("p-val","FDR.p-val","FC","log2(FC)","t-stat"),ncol(data.fit.eb$adj_p.value))) | |
| 943 outputData[,1]=c("LIMMA","Gene","noGene") | |
| 944 outputData[,2]=c("Comparison","Info","noInfo") | |
| 945 | |
| 946 outputDfData=matrix(0,ncol=3+1,nrow=2) | |
| 947 outputDfData[1,]=c("X","df.residual","df.prior","df.total") | |
| 948 outputDfData[,1]=c("Statistics","noGene") | |
| 949 }else{ | |
| 950 if(length(genesToKeep)==1){ | |
| 951 outputData=matrix(0,ncol=ncol(data.fit.eb$adj_p.value)*5+2,nrow=3) | |
| 952 outputData[1,]=c("X","X",rep(humanReadingContrastsRenamed,each=5)) | |
| 953 outputData[2,]=c("X","X",rep(c("p-val","FDR.p-val","FC","log2(FC)","t-stat"),ncol(data.fit.eb$adj_p.value))) | |
| 954 outputData[,1]=c("LIMMA","Gene",genesToKeep) | |
| 955 outputData[,2]=c("Comparison","Info","na") | |
| 956 if(!is.null(rowItemInfo))outputData[3,2]=rowItemInfo[genesToKeep] | |
| 957 outputData[3,seq(3,ncol(outputData),5)]=prettyNum(data.fit.eb$p.value,digits=4) | |
| 958 outputData[3,seq(4,ncol(outputData),5)]=prettyNum(data.fit.eb$adj_p.value,digits=4) | |
| 959 outputData[3,seq(5,ncol(outputData),5)]=prettyNum(2^data.fit.eb$coefficients,digits=4) | |
| 960 outputData[3,seq(6,ncol(outputData),5)]=prettyNum(data.fit.eb$coefficients,digits=4) | |
| 961 outputData[3,seq(7,ncol(outputData),5)]=prettyNum(data.fit.eb$t,digits=4) | |
| 962 | |
| 963 outputDfData=matrix(0,ncol=3+1,nrow=1+nrow(dfMatrix)) | |
| 964 outputDfData[1,]=c("Statistics","df.residual","df.prior","df.total") | |
| 965 outputDfData[2,]=c(rownames(dfMatrix),prettyNum(dfMatrix[,c("df.residual","df.prior","df.total")],digits=4)) | |
| 966 }else{ | |
| 967 #format matrix to be correctly read by galaxy (move headers in first column and row) | |
| 968 outputData=matrix(0,ncol=ncol(data.fit.eb$adj_p.value)*5+2,nrow=nrow(data.fit.eb$adj_p.value)+2) | |
| 969 outputData[1,]=c("X","X",rep(humanReadingContrastsRenamed,each=5)) | |
| 970 outputData[2,]=c("X","X",rep(c("p-val","FDR.p-val","FC","log2(FC)","t-stat"),ncol(data.fit.eb$adj_p.value))) | |
| 971 outputData[,1]=c("LIMMA","Gene",rownames(data.fit.eb$adj_p.value)) | |
| 972 outputData[,2]=c("Comparison","Info",rep("na",nrow(data.fit.eb$adj_p.value))) | |
| 973 if(!is.null(rowItemInfo))outputData[3:nrow(outputData),2]=rowItemInfo[rownames(data.fit.eb$adj_p.value)] | |
| 974 outputData[3:nrow(outputData),seq(3,ncol(outputData),5)]=prettyNum(data.fit.eb$p.value,digits=4) | |
| 975 outputData[3:nrow(outputData),seq(4,ncol(outputData),5)]=prettyNum(data.fit.eb$adj_p.value,digits=4) | |
| 976 outputData[3:nrow(outputData),seq(5,ncol(outputData),5)]=prettyNum(2^data.fit.eb$coefficients,digits=4) | |
| 977 outputData[3:nrow(outputData),seq(6,ncol(outputData),5)]=prettyNum(data.fit.eb$coefficients,digits=4) | |
| 978 outputData[3:nrow(outputData),seq(7,ncol(outputData),5)]=prettyNum(data.fit.eb$t,digits=4) | |
| 979 | |
| 980 outputDfData=matrix(0,ncol=3+1,nrow=1+nrow(dfMatrix)) | |
| 981 outputDfData[1,]=c("Statistics","df.residual","df.prior","df.total") | |
| 982 outputDfData[2:(1+nrow(dfMatrix)),]=cbind(rownames(dfMatrix),prettyNum(dfMatrix[,c("df.residual")],digits=4),prettyNum(dfMatrix[,c("df.prior")],digits=4),prettyNum(dfMatrix[,c("df.total")],digits=4)) | |
| 983 } | |
| 984 } | |
| 985 addComment("[INFO]Formated output",T,opt$log,display=FALSE) | |
| 986 | |
| 987 #write output results | |
| 988 write.table(outputData,file=opt$outputFile,quote=FALSE,sep="\t",col.names = F,row.names = F) | |
| 989 | |
| 990 #write df info file | |
| 991 write.table(outputDfData,file=opt$outputDfFile,quote=FALSE,sep="\t",col.names = F,row.names = F) | |
| 992 | |
| 993 end.time <- Sys.time() | |
| 994 addComment(c("[INFO]Total execution time for R script:",as.numeric(end.time - start.time,units="mins"),"mins"),T,opt$log,display=FALSE) | |
| 995 | |
| 996 addComment("[INFO]End of R script",T,opt$log,display=FALSE) | |
| 997 | |
| 998 printSessionInfo(opt$log) | |
| 999 #sessionInfo() | |
| 1000 | |
| 1001 | |
| 1002 |
