Mercurial > repos > iuc > limma_voom
comparison limma_voom.R @ 17:86b3df7db58b draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/limma_voom commit 42b1160a549a85c87ed5226a83b55c4e44648597
author | iuc |
---|---|
date | Mon, 18 Feb 2019 17:49:24 -0500 |
parents | 5d903d528193 |
children | 97e06a4c7c75 |
comparison
equal
deleted
inserted
replaced
16:5d903d528193 | 17:86b3df7db58b |
---|---|
295 countfiles <- lapply(sampleTable$filename, function(x){read.delim(x, row.names=1)}) | 295 countfiles <- lapply(sampleTable$filename, function(x){read.delim(x, row.names=1)}) |
296 counts <- do.call("cbind", countfiles) | 296 counts <- do.call("cbind", countfiles) |
297 | 297 |
298 } else { | 298 } else { |
299 # Process the single count matrix | 299 # Process the single count matrix |
300 counts <- read.table(opt$matrixPath, header=TRUE, sep="\t", strip.white=TRUE, stringsAsFactors=FALSE) | 300 counts <- read.table(opt$matrixPath, header=TRUE, sep="\t", strip.white=TRUE, stringsAsFactors=FALSE, check.names=FALSE) |
301 row.names(counts) <- counts[, 1] | 301 row.names(counts) <- counts[, 1] |
302 counts <- counts[ , -1] | 302 counts <- counts[ , -1] |
303 countsRows <- nrow(counts) | 303 countsRows <- nrow(counts) |
304 | 304 |
305 # Process factors | 305 # Process factors |
306 if (is.null(opt$factInput)) { | 306 if (is.null(opt$factInput)) { |
307 factorData <- read.table(opt$factFile, header=TRUE, sep="\t", strip.white=TRUE) | 307 factorData <- read.table(opt$factFile, header=TRUE, sep="\t", strip.white=TRUE) |
308 # order samples as in counts matrix | 308 if(!setequal(factorData[, 1], colnames(counts))) |
309 factorData <- factorData[match(colnames(counts), factorData[, 1]), ] | 309 stop("Sample IDs in counts and factors files don't match") |
310 factors <- factorData[, -1, drop=FALSE] | 310 # order samples as in counts matrix |
311 factorData <- factorData[match(colnames(counts), factorData[, 1]), ] | |
312 factors <- factorData[, -1, drop=FALSE] | |
311 } else { | 313 } else { |
312 factors <- unlist(strsplit(opt$factInput, "|", fixed=TRUE)) | 314 factors <- unlist(strsplit(opt$factInput, "|", fixed=TRUE)) |
313 factorData <- list() | 315 factorData <- list() |
314 for (fact in factors) { | 316 for (fact in factors) { |
315 newFact <- unlist(strsplit(fact, split="::")) | 317 newFact <- unlist(strsplit(fact, split="::")) |
316 factorData <- rbind(factorData, newFact) | 318 factorData <- rbind(factorData, newFact) |
317 } # Factors have the form: FACT_NAME::LEVEL,LEVEL,LEVEL,LEVEL,... The first factor is the Primary Factor. | 319 } # Factors have the form: FACT_NAME::LEVEL,LEVEL,LEVEL,LEVEL,... The first factor is the Primary Factor. |
318 | 320 |
319 # Set the row names to be the name of the factor and delete first row | 321 # Set the row names to be the name of the factor and delete first row |
320 row.names(factorData) <- factorData[, 1] | 322 row.names(factorData) <- factorData[, 1] |
321 factorData <- factorData[, -1] | 323 factorData <- factorData[, -1] |
322 factorData <- sapply(factorData, sanitiseGroups) | 324 factorData <- sapply(factorData, sanitiseGroups) |
323 factorData <- sapply(factorData, strsplit, split=",") | 325 factorData <- sapply(factorData, strsplit, split=",") |
324 factorData <- sapply(factorData, make.names) | 326 # Transform factor data into data frame of R factor objects |
325 # Transform factor data into data frame of R factor objects | 327 factors <- data.frame(factorData) |
326 factors <- data.frame(factorData) | 328 } |
327 } | 329 } |
328 } | 330 # check there are the same number of samples in counts and factors |
331 if(nrow(factors) != ncol(counts)) { | |
332 stop("There are a different number of samples in the counts files and factors") | |
333 } | |
334 # make groups valid R names, required for makeContrasts | |
335 factors <- sapply(factors, make.names) | |
336 factors <- data.frame(factors) | |
329 | 337 |
330 # if annotation file provided | 338 # if annotation file provided |
331 if (haveAnno) { | 339 if (haveAnno) { |
332 geneanno <- read.table(opt$annoPath, header=TRUE, sep="\t", quote= "", strip.white=TRUE, stringsAsFactors=FALSE) | 340 geneanno <- read.table(opt$annoPath, header=TRUE, sep="\t", quote= "", strip.white=TRUE, stringsAsFactors=FALSE) |
333 } | 341 } |
337 | 345 |
338 # Split up contrasts seperated by comma into a vector then sanitise | 346 # Split up contrasts seperated by comma into a vector then sanitise |
339 contrastData <- unlist(strsplit(opt$contrastData, split=",")) | 347 contrastData <- unlist(strsplit(opt$contrastData, split=",")) |
340 contrastData <- sanitiseEquation(contrastData) | 348 contrastData <- sanitiseEquation(contrastData) |
341 contrastData <- gsub(" ", ".", contrastData, fixed=TRUE) | 349 contrastData <- gsub(" ", ".", contrastData, fixed=TRUE) |
350 # in case input groups start with numbers this will make the names valid R names, required for makeContrasts | |
351 cons <- NULL | |
352 for (i in contrastData) { | |
353 i <- strsplit(i, split="-") | |
354 i <- lapply(i, make.names) | |
355 i <- lapply(i, paste, collapse="-") | |
356 cons <- append(cons, unlist(i)) | |
357 } | |
342 | 358 |
343 plots <- character() | 359 plots <- character() |
344 if (!is.null(opt$plots)) { | 360 if (!is.null(opt$plots)) { |
345 plots <- unlist(strsplit(opt$plots, split=",")) | 361 plots <- unlist(strsplit(opt$plots, split=",")) |
346 } | 362 } |
360 heatOutPdf <- character() | 376 heatOutPdf <- character() |
361 stripOutPdf <- character() | 377 stripOutPdf <- character() |
362 mdvolOutPng <- character() | 378 mdvolOutPng <- character() |
363 topOut <- character() | 379 topOut <- character() |
364 glimmaOut <- character() | 380 glimmaOut <- character() |
365 for (i in 1:length(contrastData)) { | 381 for (i in 1:length(cons)) { |
366 con <- contrastData[i] | 382 con <- cons[i] |
367 con <- gsub("\\(|\\)", "", con) | 383 con <- gsub("\\(|\\)", "", con) |
368 mdOutPdf[i] <- makeOut(paste0("mdplot_", con, ".pdf")) | 384 mdOutPdf[i] <- makeOut(paste0("mdplot_", con, ".pdf")) |
369 volOutPdf[i] <- makeOut(paste0("volplot_", con, ".pdf")) | 385 volOutPdf[i] <- makeOut(paste0("volplot_", con, ".pdf")) |
370 heatOutPdf[i] <- makeOut(paste0("heatmap_", con, ".pdf")) | 386 heatOutPdf[i] <- makeOut(paste0("heatmap_", con, ".pdf")) |
371 stripOutPdf[i] <- makeOut(paste0("stripcharts_", con, ".pdf")) | 387 stripOutPdf[i] <- makeOut(paste0("stripcharts_", con, ".pdf")) |
407 } | 423 } |
408 | 424 |
409 # Creating naming data | 425 # Creating naming data |
410 samplenames <- colnames(data$counts) | 426 samplenames <- colnames(data$counts) |
411 sampleanno <- data.frame("sampleID"=samplenames, factors) | 427 sampleanno <- data.frame("sampleID"=samplenames, factors) |
428 row.names(factors) <- samplenames # for "Summary of experimental data" table | |
412 | 429 |
413 # Creating colours for the groups | 430 # Creating colours for the groups |
414 cols <- as.numeric(factors[, 1]) | 431 cols <- as.numeric(factors[, 1]) |
415 col.group <- palette()[cols] | 432 col.group <- palette()[cols] |
416 | 433 |
449 data$counts <- data$counts[keep, ] | 466 data$counts <- data$counts[keep, ] |
450 data$genes <- data$genes[keep, , drop=FALSE] | 467 data$genes <- data$genes[keep, , drop=FALSE] |
451 | 468 |
452 if (wantFilt) { | 469 if (wantFilt) { |
453 print("Outputting filtered counts") | 470 print("Outputting filtered counts") |
454 filt_counts <- data.frame(data$genes, data$counts) | 471 filt_counts <- data.frame(data$genes, data$counts, check.names=FALSE) |
455 write.table(filt_counts, file=filtOut, row.names=FALSE, sep="\t", quote=FALSE) | 472 write.table(filt_counts, file=filtOut, row.names=FALSE, sep="\t", quote=FALSE) |
456 linkData <- rbind(linkData, data.frame(Label=paste0(deMethod, "_", "filtcounts.tsv"), Link=paste0(deMethod, "_", "filtcounts"), stringsAsFactors=FALSE)) | 473 linkData <- rbind(linkData, data.frame(Label=paste0(deMethod, "_", "filtcounts.tsv"), Link=paste0(deMethod, "_", "filtcounts"), stringsAsFactors=FALSE)) |
457 } | 474 } |
458 | 475 |
459 # Plot Density | 476 # Plot Density |
518 data$samples$norm.factors <- 1 | 535 data$samples$norm.factors <- 1 |
519 row.names(data$samples) <- colnames(data$counts) | 536 row.names(data$samples) <- colnames(data$counts) |
520 y <- new("DGEList", data) | 537 y <- new("DGEList", data) |
521 | 538 |
522 print("Generating Design") | 539 print("Generating Design") |
523 # Name rows of factors according to their sample | |
524 row.names(factors) <- names(data$counts) | |
525 factorList <- sapply(names(factors), pasteListName) | 540 factorList <- sapply(names(factors), pasteListName) |
526 formula <- "~0" | 541 formula <- "~0" |
527 for (i in 1:length(factorList)) { | 542 for (i in 1:length(factorList)) { |
528 formula <- paste(formula,factorList[i], sep="+") | 543 formula <- paste(formula,factorList[i], sep="+") |
529 } | 544 } |
538 logcounts <- y #store for plots | 553 logcounts <- y #store for plots |
539 y <- calcNormFactors(y, method=opt$normOpt) | 554 y <- calcNormFactors(y, method=opt$normOpt) |
540 | 555 |
541 # Generate contrasts information | 556 # Generate contrasts information |
542 print("Generating Contrasts") | 557 print("Generating Contrasts") |
543 contrasts <- makeContrasts(contrasts=contrastData, levels=design) | 558 contrasts <- makeContrasts(contrasts=cons, levels=design) |
544 | 559 |
545 ################################################################################ | 560 ################################################################################ |
546 ### Data Output | 561 ### Data Output |
547 ################################################################################ | 562 ################################################################################ |
548 | 563 |
766 voomFit <- lmFit(vData, design) | 781 voomFit <- lmFit(vData, design) |
767 } | 782 } |
768 | 783 |
769 # Save normalised counts (log2cpm) | 784 # Save normalised counts (log2cpm) |
770 if (wantNorm) { | 785 if (wantNorm) { |
771 norm_counts <- data.frame(vData$genes, vData$E) | 786 norm_counts <- data.frame(vData$genes, vData$E, check.names=FALSE) |
772 write.table(norm_counts, file=normOut, row.names=FALSE, sep="\t", quote=FALSE) | 787 write.table(norm_counts, file=normOut, row.names=FALSE, sep="\t", quote=FALSE) |
773 linkData <- rbind(linkData, c((paste0(deMethod, "_", "normcounts.tsv")), (paste0(deMethod, "_", "normcounts")))) | 788 linkData <- rbind(linkData, c((paste0(deMethod, "_", "normcounts.tsv")), (paste0(deMethod, "_", "normcounts")))) |
774 } | 789 } |
775 | 790 |
776 # Fit linear model and estimate dispersion with eBayes | 791 # Fit linear model and estimate dispersion with eBayes |
825 | 840 |
826 status = decideTests(fit, adjust.method=opt$pAdjOpt, p.value=opt$pValReq, | 841 status = decideTests(fit, adjust.method=opt$pAdjOpt, p.value=opt$pValReq, |
827 lfc=opt$lfcReq) | 842 lfc=opt$lfcReq) |
828 sumStatus <- summary(status) | 843 sumStatus <- summary(status) |
829 | 844 |
830 for (i in 1:length(contrastData)) { | 845 for (i in 1:length(cons)) { |
831 con <- contrastData[i] | 846 con <- cons[i] |
832 con <- gsub("\\(|\\)", "", con) | 847 con <- gsub("\\(|\\)", "", con) |
833 # Collect counts for differential expression | 848 # Collect counts for differential expression |
834 upCount[i] <- sumStatus["Up", i] | 849 upCount[i] <- sumStatus["Up", i] |
835 downCount[i] <- sumStatus["Down", i] | 850 downCount[i] <- sumStatus["Down", i] |
836 flatCount[i] <- sumStatus["NotSig", i] | 851 flatCount[i] <- sumStatus["NotSig", i] |
977 linkData <- rbind(linkData, c(linkName, linkAddr)) | 992 linkData <- rbind(linkData, c(linkName, linkAddr)) |
978 invisible(dev.off()) | 993 invisible(dev.off()) |
979 } | 994 } |
980 } | 995 } |
981 sigDiff <- data.frame(Up=upCount, Flat=flatCount, Down=downCount) | 996 sigDiff <- data.frame(Up=upCount, Flat=flatCount, Down=downCount) |
982 row.names(sigDiff) <- contrastData | 997 row.names(sigDiff) <- cons |
983 | 998 |
984 # Save relevant items as rda object | 999 # Save relevant items as rda object |
985 if (wantRda) { | 1000 if (wantRda) { |
986 print("Saving RData") | 1001 print("Saving RData") |
987 if (wantWeight) { | 1002 if (wantWeight) { |