comparison limma_voom.R @ 17:86b3df7db58b draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/limma_voom commit 42b1160a549a85c87ed5226a83b55c4e44648597
author iuc
date Mon, 18 Feb 2019 17:49:24 -0500
parents 5d903d528193
children 97e06a4c7c75
comparison
equal deleted inserted replaced
16:5d903d528193 17:86b3df7db58b
295 countfiles <- lapply(sampleTable$filename, function(x){read.delim(x, row.names=1)}) 295 countfiles <- lapply(sampleTable$filename, function(x){read.delim(x, row.names=1)})
296 counts <- do.call("cbind", countfiles) 296 counts <- do.call("cbind", countfiles)
297 297
298 } else { 298 } else {
299 # Process the single count matrix 299 # Process the single count matrix
300 counts <- read.table(opt$matrixPath, header=TRUE, sep="\t", strip.white=TRUE, stringsAsFactors=FALSE) 300 counts <- read.table(opt$matrixPath, header=TRUE, sep="\t", strip.white=TRUE, stringsAsFactors=FALSE, check.names=FALSE)
301 row.names(counts) <- counts[, 1] 301 row.names(counts) <- counts[, 1]
302 counts <- counts[ , -1] 302 counts <- counts[ , -1]
303 countsRows <- nrow(counts) 303 countsRows <- nrow(counts)
304 304
305 # Process factors 305 # Process factors
306 if (is.null(opt$factInput)) { 306 if (is.null(opt$factInput)) {
307 factorData <- read.table(opt$factFile, header=TRUE, sep="\t", strip.white=TRUE) 307 factorData <- read.table(opt$factFile, header=TRUE, sep="\t", strip.white=TRUE)
308 # order samples as in counts matrix 308 if(!setequal(factorData[, 1], colnames(counts)))
309 factorData <- factorData[match(colnames(counts), factorData[, 1]), ] 309 stop("Sample IDs in counts and factors files don't match")
310 factors <- factorData[, -1, drop=FALSE] 310 # order samples as in counts matrix
311 factorData <- factorData[match(colnames(counts), factorData[, 1]), ]
312 factors <- factorData[, -1, drop=FALSE]
311 } else { 313 } else {
312 factors <- unlist(strsplit(opt$factInput, "|", fixed=TRUE)) 314 factors <- unlist(strsplit(opt$factInput, "|", fixed=TRUE))
313 factorData <- list() 315 factorData <- list()
314 for (fact in factors) { 316 for (fact in factors) {
315 newFact <- unlist(strsplit(fact, split="::")) 317 newFact <- unlist(strsplit(fact, split="::"))
316 factorData <- rbind(factorData, newFact) 318 factorData <- rbind(factorData, newFact)
317 } # Factors have the form: FACT_NAME::LEVEL,LEVEL,LEVEL,LEVEL,... The first factor is the Primary Factor. 319 } # Factors have the form: FACT_NAME::LEVEL,LEVEL,LEVEL,LEVEL,... The first factor is the Primary Factor.
318 320
319 # Set the row names to be the name of the factor and delete first row 321 # Set the row names to be the name of the factor and delete first row
320 row.names(factorData) <- factorData[, 1] 322 row.names(factorData) <- factorData[, 1]
321 factorData <- factorData[, -1] 323 factorData <- factorData[, -1]
322 factorData <- sapply(factorData, sanitiseGroups) 324 factorData <- sapply(factorData, sanitiseGroups)
323 factorData <- sapply(factorData, strsplit, split=",") 325 factorData <- sapply(factorData, strsplit, split=",")
324 factorData <- sapply(factorData, make.names) 326 # Transform factor data into data frame of R factor objects
325 # Transform factor data into data frame of R factor objects 327 factors <- data.frame(factorData)
326 factors <- data.frame(factorData) 328 }
327 } 329 }
328 } 330 # check there are the same number of samples in counts and factors
331 if(nrow(factors) != ncol(counts)) {
332 stop("There are a different number of samples in the counts files and factors")
333 }
334 # make groups valid R names, required for makeContrasts
335 factors <- sapply(factors, make.names)
336 factors <- data.frame(factors)
329 337
330 # if annotation file provided 338 # if annotation file provided
331 if (haveAnno) { 339 if (haveAnno) {
332 geneanno <- read.table(opt$annoPath, header=TRUE, sep="\t", quote= "", strip.white=TRUE, stringsAsFactors=FALSE) 340 geneanno <- read.table(opt$annoPath, header=TRUE, sep="\t", quote= "", strip.white=TRUE, stringsAsFactors=FALSE)
333 } 341 }
337 345
338 # Split up contrasts seperated by comma into a vector then sanitise 346 # Split up contrasts seperated by comma into a vector then sanitise
339 contrastData <- unlist(strsplit(opt$contrastData, split=",")) 347 contrastData <- unlist(strsplit(opt$contrastData, split=","))
340 contrastData <- sanitiseEquation(contrastData) 348 contrastData <- sanitiseEquation(contrastData)
341 contrastData <- gsub(" ", ".", contrastData, fixed=TRUE) 349 contrastData <- gsub(" ", ".", contrastData, fixed=TRUE)
350 # in case input groups start with numbers this will make the names valid R names, required for makeContrasts
351 cons <- NULL
352 for (i in contrastData) {
353 i <- strsplit(i, split="-")
354 i <- lapply(i, make.names)
355 i <- lapply(i, paste, collapse="-")
356 cons <- append(cons, unlist(i))
357 }
342 358
343 plots <- character() 359 plots <- character()
344 if (!is.null(opt$plots)) { 360 if (!is.null(opt$plots)) {
345 plots <- unlist(strsplit(opt$plots, split=",")) 361 plots <- unlist(strsplit(opt$plots, split=","))
346 } 362 }
360 heatOutPdf <- character() 376 heatOutPdf <- character()
361 stripOutPdf <- character() 377 stripOutPdf <- character()
362 mdvolOutPng <- character() 378 mdvolOutPng <- character()
363 topOut <- character() 379 topOut <- character()
364 glimmaOut <- character() 380 glimmaOut <- character()
365 for (i in 1:length(contrastData)) { 381 for (i in 1:length(cons)) {
366 con <- contrastData[i] 382 con <- cons[i]
367 con <- gsub("\\(|\\)", "", con) 383 con <- gsub("\\(|\\)", "", con)
368 mdOutPdf[i] <- makeOut(paste0("mdplot_", con, ".pdf")) 384 mdOutPdf[i] <- makeOut(paste0("mdplot_", con, ".pdf"))
369 volOutPdf[i] <- makeOut(paste0("volplot_", con, ".pdf")) 385 volOutPdf[i] <- makeOut(paste0("volplot_", con, ".pdf"))
370 heatOutPdf[i] <- makeOut(paste0("heatmap_", con, ".pdf")) 386 heatOutPdf[i] <- makeOut(paste0("heatmap_", con, ".pdf"))
371 stripOutPdf[i] <- makeOut(paste0("stripcharts_", con, ".pdf")) 387 stripOutPdf[i] <- makeOut(paste0("stripcharts_", con, ".pdf"))
407 } 423 }
408 424
409 # Creating naming data 425 # Creating naming data
410 samplenames <- colnames(data$counts) 426 samplenames <- colnames(data$counts)
411 sampleanno <- data.frame("sampleID"=samplenames, factors) 427 sampleanno <- data.frame("sampleID"=samplenames, factors)
428 row.names(factors) <- samplenames # for "Summary of experimental data" table
412 429
413 # Creating colours for the groups 430 # Creating colours for the groups
414 cols <- as.numeric(factors[, 1]) 431 cols <- as.numeric(factors[, 1])
415 col.group <- palette()[cols] 432 col.group <- palette()[cols]
416 433
449 data$counts <- data$counts[keep, ] 466 data$counts <- data$counts[keep, ]
450 data$genes <- data$genes[keep, , drop=FALSE] 467 data$genes <- data$genes[keep, , drop=FALSE]
451 468
452 if (wantFilt) { 469 if (wantFilt) {
453 print("Outputting filtered counts") 470 print("Outputting filtered counts")
454 filt_counts <- data.frame(data$genes, data$counts) 471 filt_counts <- data.frame(data$genes, data$counts, check.names=FALSE)
455 write.table(filt_counts, file=filtOut, row.names=FALSE, sep="\t", quote=FALSE) 472 write.table(filt_counts, file=filtOut, row.names=FALSE, sep="\t", quote=FALSE)
456 linkData <- rbind(linkData, data.frame(Label=paste0(deMethod, "_", "filtcounts.tsv"), Link=paste0(deMethod, "_", "filtcounts"), stringsAsFactors=FALSE)) 473 linkData <- rbind(linkData, data.frame(Label=paste0(deMethod, "_", "filtcounts.tsv"), Link=paste0(deMethod, "_", "filtcounts"), stringsAsFactors=FALSE))
457 } 474 }
458 475
459 # Plot Density 476 # Plot Density
518 data$samples$norm.factors <- 1 535 data$samples$norm.factors <- 1
519 row.names(data$samples) <- colnames(data$counts) 536 row.names(data$samples) <- colnames(data$counts)
520 y <- new("DGEList", data) 537 y <- new("DGEList", data)
521 538
522 print("Generating Design") 539 print("Generating Design")
523 # Name rows of factors according to their sample
524 row.names(factors) <- names(data$counts)
525 factorList <- sapply(names(factors), pasteListName) 540 factorList <- sapply(names(factors), pasteListName)
526 formula <- "~0" 541 formula <- "~0"
527 for (i in 1:length(factorList)) { 542 for (i in 1:length(factorList)) {
528 formula <- paste(formula,factorList[i], sep="+") 543 formula <- paste(formula,factorList[i], sep="+")
529 } 544 }
538 logcounts <- y #store for plots 553 logcounts <- y #store for plots
539 y <- calcNormFactors(y, method=opt$normOpt) 554 y <- calcNormFactors(y, method=opt$normOpt)
540 555
541 # Generate contrasts information 556 # Generate contrasts information
542 print("Generating Contrasts") 557 print("Generating Contrasts")
543 contrasts <- makeContrasts(contrasts=contrastData, levels=design) 558 contrasts <- makeContrasts(contrasts=cons, levels=design)
544 559
545 ################################################################################ 560 ################################################################################
546 ### Data Output 561 ### Data Output
547 ################################################################################ 562 ################################################################################
548 563
766 voomFit <- lmFit(vData, design) 781 voomFit <- lmFit(vData, design)
767 } 782 }
768 783
769 # Save normalised counts (log2cpm) 784 # Save normalised counts (log2cpm)
770 if (wantNorm) { 785 if (wantNorm) {
771 norm_counts <- data.frame(vData$genes, vData$E) 786 norm_counts <- data.frame(vData$genes, vData$E, check.names=FALSE)
772 write.table(norm_counts, file=normOut, row.names=FALSE, sep="\t", quote=FALSE) 787 write.table(norm_counts, file=normOut, row.names=FALSE, sep="\t", quote=FALSE)
773 linkData <- rbind(linkData, c((paste0(deMethod, "_", "normcounts.tsv")), (paste0(deMethod, "_", "normcounts")))) 788 linkData <- rbind(linkData, c((paste0(deMethod, "_", "normcounts.tsv")), (paste0(deMethod, "_", "normcounts"))))
774 } 789 }
775 790
776 # Fit linear model and estimate dispersion with eBayes 791 # Fit linear model and estimate dispersion with eBayes
825 840
826 status = decideTests(fit, adjust.method=opt$pAdjOpt, p.value=opt$pValReq, 841 status = decideTests(fit, adjust.method=opt$pAdjOpt, p.value=opt$pValReq,
827 lfc=opt$lfcReq) 842 lfc=opt$lfcReq)
828 sumStatus <- summary(status) 843 sumStatus <- summary(status)
829 844
830 for (i in 1:length(contrastData)) { 845 for (i in 1:length(cons)) {
831 con <- contrastData[i] 846 con <- cons[i]
832 con <- gsub("\\(|\\)", "", con) 847 con <- gsub("\\(|\\)", "", con)
833 # Collect counts for differential expression 848 # Collect counts for differential expression
834 upCount[i] <- sumStatus["Up", i] 849 upCount[i] <- sumStatus["Up", i]
835 downCount[i] <- sumStatus["Down", i] 850 downCount[i] <- sumStatus["Down", i]
836 flatCount[i] <- sumStatus["NotSig", i] 851 flatCount[i] <- sumStatus["NotSig", i]
977 linkData <- rbind(linkData, c(linkName, linkAddr)) 992 linkData <- rbind(linkData, c(linkName, linkAddr))
978 invisible(dev.off()) 993 invisible(dev.off())
979 } 994 }
980 } 995 }
981 sigDiff <- data.frame(Up=upCount, Flat=flatCount, Down=downCount) 996 sigDiff <- data.frame(Up=upCount, Flat=flatCount, Down=downCount)
982 row.names(sigDiff) <- contrastData 997 row.names(sigDiff) <- cons
983 998
984 # Save relevant items as rda object 999 # Save relevant items as rda object
985 if (wantRda) { 1000 if (wantRda) {
986 print("Saving RData") 1001 print("Saving RData")
987 if (wantWeight) { 1002 if (wantWeight) {