Mercurial > repos > iuc > ruvseq
changeset 1:c24765926774 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ruvseq commit 9ed3d83cc447ee897af867362bf1dd67af8a11c2
author | iuc |
---|---|
date | Tue, 26 Mar 2019 06:25:38 -0400 |
parents | 61dffb20b6f9 |
children | fed9d0350d72 |
files | get_deseq_dataset.R ruvseq.R ruvseq.xml test-data/ruvseq_diag_sailfish.pdf |
diffstat | 4 files changed, 49 insertions(+), 35 deletions(-) [+] |
line wrap: on
line diff
--- a/get_deseq_dataset.R Wed Sep 05 15:54:16 2018 -0400 +++ b/get_deseq_dataset.R Tue Mar 26 06:25:38 2019 -0400 @@ -9,12 +9,12 @@ } if (!is.null(tximport)) { - if (is.null(tx2gene)) stop("A transcript-to-gene map or a GTF file is required for tximport") - if (tolower(file_ext(opt$tx2gene)) == "gtf") { - gtfFile <-tx2gene + if (is.null(tx2gene)) stop("A transcript-to-gene map or a GTF/GFF3 file is required for tximport") + if (tolower(file_ext(tx2gene)) == "gff") { + gffFile <-tx2gene } else { - gtfFile <- NULL - tx2gene <- read.table(tx2gene, header=FALSE) + gffFile <- NULL + tx2gene <- read.table(tx2gene, header=hasHeader) } useTXI <- TRUE } else { @@ -45,22 +45,29 @@ } else { # construct the object using tximport - # first need to make the tx2gene table - # this takes ~2-3 minutes using Bioconductor functions - if (!is.null(gtfFile)) { - suppressPackageStartupMessages({ - library("GenomicFeatures") - }) - txdb <- makeTxDbFromGFF(gtfFile, format="gtf") - k <- keys(txdb, keytype = "GENEID") - df <- select(txdb, keys = k, keytype = "GENEID", columns = "TXNAME") - tx2gene <- df[, 2:1] # tx ID, then gene ID - } library("tximport") txiFiles <- as.character(sampleTable$filename) labs <- row.names(sampleTable) names(txiFiles) <- labs - txi <- tximport(txiFiles, type=txtype, tx2gene=tx2gene) + if (!is.null(gffFile)) { + # first need to make the tx2gene table + # this takes ~2-3 minutes using Bioconductor functions + suppressPackageStartupMessages({ + library("GenomicFeatures") + }) + txdb <- makeTxDbFromGFF(gffFile) + k <- keys(txdb, keytype = "TXNAME") + tx2gene <- select(txdb, keys=k, columns="GENEID", keytype="TXNAME") + # Remove 'transcript:' from transcript IDs (when gffFile is a GFF3 from Ensembl and the transcript does not have a Name) + tx2gene$TXNAME <- sub('^transcript:', '', tx2gene$TXNAME) + } + try(txi <- tximport(txiFiles, type=txtype, tx2gene=tx2gene)) + if (!exists("txi")) { + # Remove version from transcript IDs in tx2gene... + tx2gene$TXNAME <- sub('\\.[0-9]+$', '', tx2gene$TXNAME) + # ...and in txiFiles + txi <- tximport(txiFiles, type=txtype, tx2gene=tx2gene, ignoreTxVersion=TRUE) + } dds <- DESeqDataSetFromTximport(txi, subset(sampleTable, select=-c(filename)), designFormula)
--- a/ruvseq.R Wed Sep 05 15:54:16 2018 -0400 +++ b/ruvseq.R Tue Mar 26 06:25:38 2019 -0400 @@ -83,9 +83,12 @@ create_seq_expression_set <- function (dds, min_mean_count) { count_values <- counts(dds) + print(paste0("feature count before filtering :",nrow(count_values),"\n")) + print(paste0("Filtering features which mean expression is less or eq. than ", min_mean_count, " counts\n")) filter <- apply(count_values, 1, function(x) mean(x) > min_mean_count) filtered <- count_values[filter,] - set = newSeqExpressionSet(as.matrix(count_values), + print(paste0("feature count after filtering :",nrow(filtered),"\n")) + set = newSeqExpressionSet(as.matrix(filtered), phenoData = data.frame(colData(dds)$condition, row.names=colnames(filtered))) plot_pca_rle(set = set, title = 'raw data') set <- betweenLaneNormalization(set, which="upper") @@ -103,7 +106,7 @@ fit <- glmFit(y, design) lrt <- glmLRT(fit, coef=2) top <- topTags(lrt, n=nrow(set))$table - top_rows <- rownames(top)[which(top$PValue > cutoff_p)] + top_rows <- rownames(top)[which(top$PValue < cutoff_p)] empirical <- rownames(set)[which(!(rownames(set) %in% top_rows))] return(empirical) } @@ -154,6 +157,7 @@ alpha <- opt$alpha min_k <- opt$min_k max_k <- opt$max_k +min_c <- opt$min_mean_count sample_json <- fromJSON(opt$sample_json) sample_paths <- sample_json$path sample_names <- sample_json$label @@ -169,7 +173,7 @@ } # Run through the ruvseq variants -set <- create_seq_expression_set(dds, min_mean_count = opt$min_mean_count) +set <- create_seq_expression_set(dds, min_mean_count = min_c) result <- list(no_correction = set) for (k in seq(min_k, max_k)) { result[[paste0('residual_method_k', k)]] <- ruv_residual_method(set, k=k)
--- a/ruvseq.xml Wed Sep 05 15:54:16 2018 -0400 +++ b/ruvseq.xml Tue Mar 26 06:25:38 2019 -0400 @@ -1,11 +1,12 @@ -<tool id="ruvseq" name="Remove Unwanted Variation" version="1.12.0"> +<tool id="ruvseq" name="Remove Unwanted Variation" version="1.16.0"> <description>from RNA-seq data</description> <requirements> - <requirement type="package" version="1.12.0">bioconductor-ruvseq</requirement> - <requirement type="package" version="1.18.1">bioconductor-deseq2</requirement> - <requirement type="package" version="1.6.0">bioconductor-tximport</requirement> - <requirement type="package" version="1.30.0">bioconductor-genomicfeatures</requirement> - <requirement type="package" version="0.6.5">r-ggrepel</requirement> + <requirement type="package" version="1.16.0">bioconductor-ruvseq</requirement> + <requirement type="package" version="1.22.1">bioconductor-deseq2</requirement> + <requirement type="package" version="1.10.0">bioconductor-tximport</requirement> + <requirement type="package" version="1.34.1">bioconductor-genomicfeatures</requirement> + <requirement type="package" version="0.8.0">r-ggrepel</requirement> + <requirement type="package" version="1.20.2">r-getopt</requirement> </requirements> <stdio> <regex match="Execution halted" @@ -44,6 +45,7 @@ --min_k $min_k --max_k $max_k +--min_mean_count $min_mean_count #if $tximport.tximport_selector == 'tximport': --txtype $tximport.txtype @@ -136,19 +138,19 @@ <element name="batch_effects_control_method_k1"> <assert_contents> <has_text_matching expression="identifier\tcondition\tW_1"/> - <has_text_matching expression="GSM461179.*\tTreated\t-0.49.*"/> + <has_text_matching expression="GSM461179.*\tTreated\t\-.+"/> </assert_contents> </element> <element name="batch_effects_replicate_method_k1"> <assert_contents> <has_text_matching expression="identifier\tcondition\tW_1"/> - <has_text_matching expression="GSM461179.*\tTreated\t-0.25.*"/> + <has_text_matching expression="GSM461179.*\tTreated\t\-.+"/> </assert_contents> </element> <element name="batch_effects_residual_method_k1"> <assert_contents> <has_text_matching expression="identifier\tcondition\tW_1"/> - <has_text_matching expression="GSM461179.*\tTreated\t-0.60.*"/> + <has_text_matching expression="GSM461179.*\tTreated\t\-.+"/> </assert_contents> </element> </output_collection> @@ -170,19 +172,19 @@ <element name="batch_effects_control_method_k1"> <assert_contents> <has_text_matching expression="identifier\tcondition\tW_1"/> - <has_text_matching expression="GSM461179.*\tTreated\t-0.49.*"/> + <has_text_matching expression="GSM461179.*\tTreated\t\-.+"/> </assert_contents> </element> <element name="batch_effects_replicate_method_k1"> <assert_contents> <has_text_matching expression="identifier\tcondition\tW_1"/> - <has_text_matching expression="GSM461179.*\tTreated\t-0.25.*"/> + <has_text_matching expression="GSM461179.*\tTreated\t\-.+"/> </assert_contents> </element> <element name="batch_effects_residual_method_k1"> <assert_contents> <has_text_matching expression="identifier\tcondition\tW_1"/> - <has_text_matching expression="GSM461179.*\tTreated\t-0.60.*"/> + <has_text_matching expression="GSM461179.*\tTreated\t\-.+"/> </assert_contents> </element> </output_collection> @@ -202,24 +204,25 @@ <param name="txtype" value="sailfish"/> <param name="mapping_format_selector" value="tabular"/> <param name="tabular_file" value="tx2gene.tab"/> + <param name="min_mean_count" value="0"/> <output name="plots" file="ruvseq_diag_sailfish.pdf" ftype="pdf" compare="sim_size"/> <output_collection name="unwanted_variation" type="list"> <element name="batch_effects_control_method_k1"> <assert_contents> <has_text_matching expression="identifier\tcondition\tW_1"/> - <has_text_matching expression="sailfish_quant.sf1.tab\tTreated\t-0.28.*"/> + <has_text_matching expression=".*sailfish_quant.sf1.tab\tTreated\t.+"/> </assert_contents> </element> <element name="batch_effects_replicate_method_k1"> <assert_contents> <has_text_matching expression="identifier\tcondition\tW_1"/> - <has_text_matching expression="sailfish_quant.sf1.tab\tTreated\t-0.44.*"/> + <has_text_matching expression=".*sailfish_quant.sf1.tab\tTreated\t.+"/> </assert_contents> </element> <element name="batch_effects_residual_method_k1"> <assert_contents> <has_text_matching expression="identifier\tcondition\tW_1"/> - <has_text_matching expression="sailfish_quant.sf1.tab\tTreated\t-0.22.*"/> + <has_text_matching expression=".*sailfish_quant.sf1.tab\tTreated\t.+"/> </assert_contents> </element> </output_collection>