Mercurial > repos > iuc > deseq2
diff deseq2.R @ 15:9a616afdbda5 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 83eb5b2665d87c02b270596f8175499e69061032
author | iuc |
---|---|
date | Sat, 19 May 2018 03:55:48 -0400 |
parents | d0c39b5e78cf |
children | a416957ee305 |
line wrap: on
line diff
--- a/deseq2.R Thu Apr 12 17:29:45 2018 -0400 +++ b/deseq2.R Sat May 19 03:55:48 2018 -0400 @@ -8,14 +8,14 @@ # 'factors' a JSON list object from Galaxy # # the output file has columns: -# +# # baseMean (mean normalized count) # log2FoldChange (by default a moderated LFC estimate) # lfcSE (the standard error) # stat (the Wald statistic) # pvalue (p-value from comparison of Wald statistic to a standard Normal) # padj (adjusted p-value, Benjamini Hochberg correction on genes which pass the mean count filter) -# +# # the first variable in 'factors' will be the primary factor. # the levels of the primary factor are used in the order of appearance in factors. # @@ -48,6 +48,7 @@ "help", "h", 0, "logical", "outfile", "o", 1, "character", "countsfile", "n", 1, "character", + "header", "H", 0, "logical", "factors", "f", 1, "character", "files_to_labels", "l", 1, "character", "plots" , "p", 1, "character", @@ -86,6 +87,12 @@ FALSE } +if (!is.null(opt$header)) { + hasHeader <- TRUE +} else { + hasHeader <- FALSE +} + if (!is.null(opt$tximport)) { if (is.null(opt$tx2gene)) stop("A transcript-to-gene map or a GTF file is required for tximport") if (tolower(file_ext(opt$tx2gene)) == "gtf") { @@ -138,15 +145,6 @@ primaryFactor <- factors[1] designFormula <- as.formula(paste("~", paste(rev(factors), collapse=" + "))) -if (verbose) { - cat("DESeq2 run information\n\n") - cat("sample table:\n") - print(sampleTable[,-c(1:2),drop=FALSE]) - cat("\ndesign formula:\n") - print(designFormula) - cat("\n\n") -} - # these are plots which are made once for each analysis generateGenericPlots <- function(dds, factors) { library("ggplot2") @@ -202,13 +200,29 @@ # For JSON input from Galaxy, path is absolute dir <- "" -if (!useTXI) { +if (!useTXI & hasHeader) { + countfiles <- lapply(as.character(sampleTable$filename), function(x){read.delim(x, row.names=1)}) + tbl <- do.call("cbind", countfiles) + rownames(sampleTable) <- colnames(tbl) # take sample ids from header + + # check for htseq report lines (from DESeqDataSetFromHTSeqCount function) + oldSpecialNames <- c("no_feature", "ambiguous", "too_low_aQual", + "not_aligned", "alignment_not_unique") + specialRows <- (substr(rownames(tbl), 1, 1) == "_") | rownames(tbl) %in% oldSpecialNames + tbl <- tbl[!specialRows, , drop = FALSE] + + dds <- DESeqDataSetFromMatrix(countData = tbl, + colData = sampleTable[,-c(1:2), drop=FALSE], + design = designFormula) +} else if (!useTXI & !hasHeader) { + # construct the object from HTSeq files dds <- DESeqDataSetFromHTSeqCount(sampleTable = sampleTable, directory = dir, design = designFormula) labs <- unname(unlist(filenames_to_labels[colnames(dds)])) colnames(dds) <- labs + } else { # construct the object using tximport # first need to make the tx2gene table @@ -232,7 +246,16 @@ designFormula) } -if (verbose) cat(paste(ncol(dds), "samples with counts over", nrow(dds), "genes\n")) +if (verbose) { + cat("DESeq2 run information\n\n") + cat("sample table:\n") + print(sampleTable[,-c(1:2),drop=FALSE]) + cat("\ndesign formula:\n") + print(designFormula) + cat("\n\n") + cat(paste(ncol(dds), "samples with counts over", nrow(dds), "genes\n")) +} + # optional outlier behavior if (is.null(opt$outlier_replace_off)) { minRep <- 7 @@ -242,7 +265,7 @@ } if (is.null(opt$outlier_filter_off)) { cooksCutoff <- TRUE -} else { +} else { cooksCutoff <- FALSE if (verbose) cat("outlier filtering off\n") }