Mercurial > repos > iuc > deseq2
diff get_deseq_dataset.R @ 25:de44f8eff84a draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 9eb6d07510ccf27d6499172d62c81661078ec57b"
author | iuc |
---|---|
date | Wed, 25 Nov 2020 18:36:55 +0000 |
parents | 0696db066a5b |
children | 8fe98f7094de |
line wrap: on
line diff
--- a/get_deseq_dataset.R Sat Feb 29 17:08:08 2020 -0500 +++ b/get_deseq_dataset.R Wed Nov 25 18:36:55 2020 +0000 @@ -1,76 +1,80 @@ -get_deseq_dataset <- function(sampleTable, header, designFormula, tximport, txtype, tx2gene) { +get_deseq_dataset <- function(sample_table, header, design_formula, tximport, txtype, tx2gene) { dir <- "" - if (!is.null(header)) { - hasHeader <- TRUE - } else { - hasHeader <- FALSE - } - - if (!is.null(tximport)) { + has_header <- !is.null(header) + use_txi <- !is.null(tximport) + if (use_txi) { if (is.null(tx2gene)) stop("A transcript-to-gene map or a GTF/GFF3 file is required for tximport") if (tolower(file_ext(tx2gene)) == "gff") { - gffFile <-tx2gene + gff_file <- tx2gene } else { - gffFile <- NULL - tx2gene <- read.table(tx2gene, header=hasHeader) + gff_file <- NULL + tx2gene <- read.table(tx2gene, header = has_header) } - useTXI <- TRUE - } else { - useTXI <- FALSE } - if (!useTXI & hasHeader) { - countfiles <- lapply(as.character(sampleTable$filename), function(x){read.delim(x, row.names=1)}) + if (!use_txi & has_header) { + countfiles <- lapply(as.character(sample_table$filename), read.delim, row.names = 1) tbl <- do.call("cbind", countfiles) - colnames(tbl) <- rownames(sampleTable) # take sample ids from header + colnames(tbl) <- rownames(sample_table) # take sample ids from header # check for htseq report lines (from DESeqDataSetFromHTSeqCount function) - oldSpecialNames <- c("no_feature", "ambiguous", "too_low_aQual", - "not_aligned", "alignment_not_unique") - specialRows <- (substr(rownames(tbl), 1, 1) == "_") | rownames(tbl) %in% oldSpecialNames - tbl <- tbl[!specialRows, , drop = FALSE] + old_special_names <- c( + "no_feature", + "ambiguous", + "too_low_aQual", + "not_aligned", + "alignment_not_unique" + ) + special_rows <- (substr(rownames(tbl), 1, 1) == "_") | rownames(tbl) %in% old_special_names + tbl <- tbl[!special_rows, , drop = FALSE] - dds <- DESeqDataSetFromMatrix(countData = tbl, - colData = subset(sampleTable, select=-(filename)), - design = designFormula) - } else if (!useTXI & !hasHeader) { + dds <- DESeqDataSetFromMatrix( + countData = tbl, + colData = subset(sample_table, select = -filename), + design = design_formula + ) + } else if (!use_txi & !has_header) { # construct the object from HTSeq files - dds <- DESeqDataSetFromHTSeqCount(sampleTable = sampleTable, - directory = dir, - design = designFormula) - colnames(dds) <- row.names(sampleTable) + dds <- DESeqDataSetFromHTSeqCount( + sampleTable = sample_table, + directory = dir, + design = design_formula + ) + colnames(dds) <- row.names(sample_table) } else { # construct the object using tximport library("tximport") - txiFiles <- as.character(sampleTable$filename) - labs <- row.names(sampleTable) - names(txiFiles) <- labs - if (!is.null(gffFile)) { + txi_files <- as.character(sample_table$filename) + labs <- row.names(sample_table) + names(txi_files) <- labs + if (!is.null(gff_file)) { # first need to make the tx2gene table # this takes ~2-3 minutes using Bioconductor functions suppressPackageStartupMessages({ library("GenomicFeatures") }) - txdb <- makeTxDbFromGFF(gffFile) + txdb <- makeTxDbFromGFF(gff_file) k <- keys(txdb, keytype = "TXNAME") - tx2gene <- select(txdb, keys=k, columns="GENEID", keytype="TXNAME") - # Remove 'transcript:' from transcript IDs (when gffFile is a GFF3 from Ensembl and the transcript does not have a Name) - tx2gene$TXNAME <- sub('^transcript:', '', tx2gene$TXNAME) + tx2gene <- select(txdb, keys = k, columns = "GENEID", keytype = "TXNAME") + # Remove 'transcript:' from transcript IDs (when gff_file is a GFF3 from Ensembl and the transcript does not have a Name) + tx2gene$TXNAME <- sub("^transcript:", "", tx2gene$TXNAME) # nolint } - try(txi <- tximport(txiFiles, type=txtype, tx2gene=tx2gene)) + try(txi <- tximport(txi_files, type = txtype, tx2gene = tx2gene)) if (!exists("txi")) { # Remove version from transcript IDs in tx2gene... - tx2gene$TXNAME <- sub('\\.[0-9]+$', '', tx2gene$TXNAME) - # ...and in txiFiles - txi <- tximport(txiFiles, type=txtype, tx2gene=tx2gene, ignoreTxVersion=TRUE) + tx2gene$TXNAME <- sub("\\.[0-9]+$", "", tx2gene$TXNAME) # nolint + # ...and in txi_files + txi <- tximport(txi_files, type = txtype, tx2gene = tx2gene, ignoreTxVersion = TRUE) } - dds <- DESeqDataSetFromTximport(txi, - subset(sampleTable, select=-c(filename)), - designFormula) + dds <- DESeqDataSetFromTximport( + txi, + subset(sample_table, select = -c(filename)), + design_formula + ) } return(dds) }