Mercurial > repos > iuc > deseq2
changeset 30:8fe98f7094de draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 6868b66f73ddbe947986d1a20b546873cbd515a9
author | iuc |
---|---|
date | Fri, 26 Aug 2022 11:16:15 +0000 |
parents | cd9874cb9019 |
children | 9a882d108833 |
files | deseq2.R deseq2.xml deseq2_macros.xml get_deseq_dataset.R |
diffstat | 4 files changed, 69 insertions(+), 9 deletions(-) [+] |
line wrap: on
line diff
--- a/deseq2.R Mon Nov 29 18:16:48 2021 +0000 +++ b/deseq2.R Fri Aug 26 11:16:15 2022 +0000 @@ -31,8 +31,9 @@ # 3 "mean" # setup R error handling to go to stderr -options(show.error.messages = F, error = function() { - cat(geterrmessage(), file = stderr()); q("no", 1, F) +options(show.error.messages = FALSE, error = function() { + cat(geterrmessage(), file = stderr()) + q("no", 1, FALSE) }) # we need that to not crash galaxy with an UTF8 error on German LC settings. @@ -69,7 +70,9 @@ "outlier_filter_off", "b", 0, "logical", "auto_mean_filter_off", "c", 0, "logical", "beta_prior_off", "d", 0, "logical", - "alpha_ma", "A", 1, "numeric" + "alpha_ma", "A", 1, "numeric", + "prefilter", "P", 0, "logical", + "prefilter_value", "V", 1, "numeric" ), byrow = TRUE, ncol = 4) opt <- getopt(spec) @@ -239,7 +242,7 @@ size_factors <- estimateSizeFactorsForMatrix(counts(dds)) } } - write.table(size_factors, file = opt$sizefactorsfile, sep = "\t", col.names = F, quote = FALSE) + write.table(size_factors, file = opt$sizefactorsfile, sep = "\t", col.names = FALSE, quote = FALSE) } apply_batch_factors <- function(dds, batch_factors) { @@ -253,7 +256,7 @@ dds_data <- colData(dds) # Merge dds_data with batch_factors using indexes, which are sample names # Set sort to False, which maintains the order in dds_data - reordered_batch <- merge(dds_data, batch_factors, by.x = 0, by.y = 0, sort = F) + reordered_batch <- merge(dds_data, batch_factors, by.x = 0, by.y = 0, sort = FALSE) batch_factors <- reordered_batch[, ncol(dds_data):ncol(reordered_batch)] for (factor in colnames(batch_factors)) { dds[[factor]] <- batch_factors[[factor]] @@ -263,7 +266,7 @@ } if (!is.null(opt$batch_factors)) { - batch_factors <- read.table(opt$batch_factors, sep = "\t", header = T) + batch_factors <- read.table(opt$batch_factors, sep = "\t", header = TRUE) dds <- apply_batch_factors(dds = dds, batch_factors = batch_factors) batch_design <- colnames(batch_factors)[-c(1, 2)] design_formula <- as.formula(paste("~", paste(c(batch_design, rev(factors)), collapse = " + "))) @@ -280,6 +283,12 @@ cat(paste(ncol(dds), "samples with counts over", nrow(dds), "genes\n")) } +# minimal pre-filtering +if (!is.null(opt$prefilter)) { + keep <- rowSums(counts(dds)) >= opt$prefilter_value + dds <- dds[keep, ] +} + # optional outlier behavior if (is.null(opt$outlier_replace_off)) { min_rep <- 7
--- a/deseq2.xml Mon Nov 29 18:16:48 2021 +0000 +++ b/deseq2.xml Fri Aug 26 11:16:15 2022 +0000 @@ -92,6 +92,10 @@ #if $batch_factors: --batch_factors '$batch_factors' #end if + #if $advanced_options.prefilter_conditional.prefilter: + $advanced_options.prefilter_conditional.prefilter + -V $advanced_options.prefilter_conditional.prefilter_value + #end if #if $advanced_options.outlier_replace_off: -a #end if @@ -194,6 +198,19 @@ <param name="auto_mean_filter_off" type="boolean" truevalue="1" falsevalue="0" checked="false" label="Turn off independent filtering" help=" DESeq2 performs independent filtering by default using the mean of normalized counts as a filter statistic" /> + <conditional name="prefilter_conditional"> + <param name="prefilter" type="select" label="Perform pre-filtering" help="While it is not necessary to pre-filter + low count genes before running the DESeq2 functions, there are two reasons which make pre-filtering useful: + by removing rows in which there are very few reads, we reduce the required memory, and we increase the speed. + It can also improve visualizations, as features with no information for differential expression are not plotted."> + <option value="-P">Enabled</option> + <option value="" selected="true">Disabled</option> + </param> + <when value="-P"> + <param name="prefilter_value" type="integer" min="0" value="10" label="Pre-filter value" help="Keep only rows that have at least N reads total." /> + </when> + <when value=""/> + </conditional> </section> <section name="output_options" title="Output options"> <param name="output_selector" type="select" multiple="True" optional="true" display="checkboxes" label="Output selector"> @@ -272,6 +289,7 @@ <output name="deseq_out" > <assert_contents> <has_text_matching expression="FBgn0003360\t1933\.9504.*\t-2\.8399.*\t0\.1309.*\t-21\.68.*\t.*e-104\t.*e-101" /> + <has_n_lines n="3999"/> </assert_contents> </output> </test> @@ -579,6 +597,39 @@ </assert_contents> </output> </test> + <!--Test prefilter parameter --> + <test expect_num_outputs="2"> + <repeat name="rep_factorName"> + <param name="factorName" value="Treatment"/> + <repeat name="rep_factorLevel"> + <param name="factorLevel" value="Treated"/> + <param name="countsFile" value="GSM461179_treat_single.counts,GSM461180_treat_paired.counts,GSM461181_treat_paired.counts"/> + </repeat> + <repeat name="rep_factorLevel"> + <param name="factorLevel" value="Untreated"/> + <param name="countsFile" value="GSM461176_untreat_single.counts,GSM461177_untreat_paired.counts,GSM461178_untreat_paired.counts,GSM461182_untreat_single.counts"/> + </repeat> + </repeat> + <section name="advanced_options"> + <conditional name="prefilter_conditional"> + <param name="prefilter" value="-P"/> + <param name="prefilter_value" value="10"/> + </conditional> + </section> + <section name="output_options"> + <param name="output_selector" value="normCounts"/> + </section> + <output name="counts_out"> + <assert_contents> + <has_n_lines n="2922"/> + </assert_contents> + </output> + <output name="deseq_out" > + <assert_contents> + <has_n_lines n="2921"/> <!-- Smallen value when compared with the first test--> + </assert_contents> + </output> + </test> </tests> <help><![CDATA[ .. class:: infomark
--- a/deseq2_macros.xml Mon Nov 29 18:16:48 2021 +0000 +++ b/deseq2_macros.xml Fri Aug 26 11:16:15 2022 +0000 @@ -33,7 +33,7 @@ </requirements> </xml> <token name="@TOOL_VERSION@">2.11.40.7</token> - <token name="@SUFFIX_VERSION@">1</token> + <token name="@SUFFIX_VERSION@">2</token> <xml name="edam_ontology"> <edam_topics> <edam_topic>topic_3308</edam_topic>
--- a/get_deseq_dataset.R Mon Nov 29 18:16:48 2021 +0000 +++ b/get_deseq_dataset.R Fri Aug 26 11:16:15 2022 +0000 @@ -14,7 +14,7 @@ } } - if (!use_txi & has_header) { + if (!use_txi && has_header) { countfiles <- lapply(as.character(sample_table$filename), read.delim, row.names = 1) tbl <- do.call("cbind", countfiles) colnames(tbl) <- rownames(sample_table) # take sample ids from header @@ -35,7 +35,7 @@ colData = subset(sample_table, select = -filename), design = design_formula ) - } else if (!use_txi & !has_header) { + } else if (!use_txi && !has_header) { # construct the object from HTSeq files dds <- DESeqDataSetFromHTSeqCount(