Mercurial > repos > iuc > deseq2
changeset 31:9a882d108833 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 469558ddf5bc6249874fe5826637fd6ee81588cf
author | iuc |
---|---|
date | Tue, 18 Jul 2023 14:58:52 +0000 |
parents | 8fe98f7094de |
children | |
files | deseq2.R deseq2.xml deseq2_macros.xml macros.xml test-data/size_factors_out.tsv |
diffstat | 5 files changed, 260 insertions(+), 135 deletions(-) [+] |
line wrap: on
line diff
--- a/deseq2.R Fri Aug 26 11:16:15 2022 +0000 +++ b/deseq2.R Tue Jul 18 14:58:52 2023 +0000 @@ -36,9 +36,6 @@ q("no", 1, FALSE) }) -# we need that to not crash galaxy with an UTF8 error on German LC settings. -loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") - library("getopt") library("tools") options(stringAsFactors = FALSE, useFancyQuotes = FALSE) @@ -69,7 +66,7 @@ "outlier_replace_off", "a", 0, "logical", "outlier_filter_off", "b", 0, "logical", "auto_mean_filter_off", "c", 0, "logical", - "beta_prior_off", "d", 0, "logical", + "use_beta_priors", "d", 0, "logical", "alpha_ma", "A", 1, "numeric", "prefilter", "P", 0, "logical", "prefilter_value", "V", 1, "numeric" @@ -217,11 +214,27 @@ } dds <- get_deseq_dataset(sample_table, header = opt$header, design_formula = design_formula, tximport = opt$tximport, txtype = opt$txtype, tx2gene = opt$tx2gene) -# estimate size factors for the chosen method + +# use/estimate size factors with the chosen method if (!is.null(opt$esf)) { - dds <- estimateSizeFactors(dds, type = opt$esf) + if (opt$esf %in% list("ratio", "poscounts", "iterate")) { + cat("Calculating size factors de novo\n") + dds <- estimateSizeFactors(dds, type = opt$esf) + } else { + sf_table <- read.table(opt$esf) + # Sort the provided size factors just in case the order differs from the input file order. + merged_table <- merge(sample_table, sf_table, by.x = 0, by.y = 1, sort = FALSE) + sf_values <- as.numeric(unlist(merged_table[5])) + "sizeFactors"(dds) <- sf_values + + cat("Using user-provided size factors:\n") + print(sf_values) + } +} else { + cat("No size factor was used\n") } + # estimate size factors for each sample # - https://support.bioconductor.org/p/97676/ if (!is.null(opt$sizefactorsfile)) { @@ -312,12 +325,15 @@ } # shrinkage of LFCs -if (is.null(opt$beta_prior_off)) { - beta_prior <- TRUE +if (is.null(opt$use_beta_priors)) { + beta_prior <- FALSE + if (verbose) + cat("Applied default - beta prior off\n") } else { - beta_prior <- FALSE - if (verbose) cat("beta prior off\n") + beta_prior <- opt$use_beta_priors } +sprintf("use_beta_prior is set to %s", beta_prior) + # dispersion fit type if (is.null(opt$fit_type)) {
--- a/deseq2.xml Fri Aug 26 11:16:15 2022 +0000 +++ b/deseq2.xml Tue Jul 18 14:58:52 2023 +0000 @@ -1,24 +1,24 @@ -<tool id="deseq2" name="DESeq2" version="@TOOL_VERSION@+galaxy@SUFFIX_VERSION@"> +<tool id="deseq2" name="DESeq2" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> <description>Determines differentially expressed features from count tables</description> <macros> - <import>deseq2_macros.xml</import> + <import>macros.xml</import> </macros> + <expand macro='edam_ontology'/> + <expand macro='xrefs'/> <expand macro='requirements'/> - <expand macro='edam_ontology' /> - <expand macro='xrefs'/> <stdio> <regex match="Execution halted" source="both" level="fatal" - description="Execution halted." /> + description="Execution halted."/> <regex match="Error in" source="both" level="fatal" - description="An undefined error occurred, please check your input carefully and contact your administrator." /> + description="An undefined error occurred, please check your input carefully and contact your administrator."/> <regex match="Fatal error" source="both" level="fatal" - description="An undefined error occurred, please check your input carefully and contact your administrator." /> + description="An undefined error occurred, please check your input carefully and contact your administrator."/> </stdio> <version_command><![CDATA[ echo $(R --version | grep version | grep -v GNU)", DESeq2 version" $(R --vanilla --slave -e "library(DESeq2); cat(sessionInfo()\$otherPkgs\$DESeq2\$Version)" 2> /dev/null | grep -v -i "WARNING: ") @@ -85,8 +85,12 @@ -f '#echo json.dumps(temp_factor_names)#' -l '#echo json.dumps(filename_to_element_identifiers)#' - #if $advanced_options.esf: - -e $advanced_options.esf + #if $advanced_options.esf_cond.esf: + #if $advanced_options.esf_cond.esf == "user": + -e $advanced_options.esf_cond.size_factor_input + #else: + -e $advanced_options.esf_cond.esf + #end if #end if -t $advanced_options.fit_type #if $batch_factors: @@ -96,15 +100,12 @@ $advanced_options.prefilter_conditional.prefilter -V $advanced_options.prefilter_conditional.prefilter_value #end if - #if $advanced_options.outlier_replace_off: - -a - #end if - #if $advanced_options.outlier_filter_off: - -b - #end if - #if $advanced_options.auto_mean_filter_off: - -c - #end if + + $advanced_options.outlier_replace_off + $advanced_options.outlier_filter_off + $advanced_options.auto_mean_filter_off + $advanced_options.use_beta_priors + #if 'many_contrasts' in $output_options.output_selector -m #end if @@ -116,7 +117,6 @@ #else: -x mapping.txt #end if - #end if ]]></command> <inputs> @@ -139,7 +139,7 @@ </conditional> <param name="batch_factors" type="data" format="tabular" optional="true" label="(Optional) provide a tabular file with additional batch factors to include in the model." help="You can produce this file using RUVSeq or svaseq."/> - <param name="header" type="boolean" truevalue="-H" falsevalue="" checked="true" label="Files have header?" help="If this option is set to Yes, the tool will assume that the count files have column headers in the first row. Default: Yes" /> + <param name="header" type="boolean" truevalue="-H" falsevalue="" checked="true" label="Files have header?" help="If this option is set to Yes, the tool will assume that the count files have column headers in the first row. Default: Yes"/> <conditional name="tximport"> <param name="tximport_selector" type="select" label="Choice of Input data"> @@ -165,39 +165,49 @@ </when> </conditional> </when> - <when value="count" /> + <when value="count"/> </conditional> <section name="advanced_options" title="Advanced options"> - <param name="esf" type="select" label="Method for estimateSizeFactors" - help="Method for estimation: either 'ratio', 'poscounts', or 'iterate'. 'ratio' uses the standard median ratio method introduced in DESeq. - The size factor is the median ratio of the sample over a 'pseudosample': for each gene, the geometric mean of all samples. - 'poscounts' and 'iterate' offer alternative estimators, which can be used even when all genes contain a sample with a zero (a problem - for the default method, as the geometric mean becomes zero, and the ratio undefined). The 'poscounts' estimator deals with a gene with - some zeros, by calculating a modified geometric mean by taking the n-th root of the product of the non-zero counts. This evolved out of - use cases with Paul McMurdie's phyloseq package for metagenomic samples. The 'iterate' estimator iterates between estimating the dispersion - with a design of ~1, and finding a size factor vector by numerically optimizing the likelihood of the ~1 model."> - <option value="" selected="true">No Selection (use default)</option> - <option value="ratio">ratio</option> - <option value="poscounts">poscounts</option> - <option value="iterate">iterate</option> - </param> + <conditional name="esf_cond"> + <param name="esf" type="select" label="Method for estimateSizeFactors" + help="Method for estimation: either 'ratio', 'poscounts', or 'iterate'. 'ratio' uses the standard median ratio method introduced in DESeq. + The size factor is the median ratio of the sample over a 'pseudosample': for each gene, the geometric mean of all samples. + 'poscounts' and 'iterate' offer alternative estimators, which can be used even when all genes contain a sample with a zero (a problem + for the default method, as the geometric mean becomes zero, and the ratio undefined). The 'poscounts' estimator deals with a gene with + some zeros, by calculating a modified geometric mean by taking the n-th root of the product of the non-zero counts. This evolved out of + use cases with Paul McMurdie's phyloseq package for metagenomic samples. The 'iterate' estimator iterates between estimating the dispersion + with a design of ~1, and finding a size factor vector by numerically optimizing the likelihood of the ~1 model."> + <option value="" selected="true">No Selection (use default)</option> + <option value="ratio">ratio</option> + <option value="poscounts">poscounts</option> + <option value="iterate">iterate</option> + <option value="user">User-provided</option> + </param> + <when value=""/> + <when value="ratio"/> + <when value="poscounts"/> + <when value="iterate"/> + <when value="user"> + <param name="size_factor_input" type="data" format="tabular" label="File with custom size factors" help="The input must be a 2-column file: col1 should have the input file names. Col2 should contain your custom size factors."/> + </when> + </conditional> <param name="fit_type" type="select" label="Fit type"> <option value="1" selected="true">parametric</option> <option value="2">local</option> <option value="3">mean</option> </param> - <param name="outlier_replace_off" type="boolean" truevalue="1" falsevalue="0" checked="false" + <param name="outlier_replace_off" type="boolean" truevalue="-a" falsevalue="" checked="false" label="Turn off outliers replacement (only affects with >6 replicates)" help="When there are more than 6 replicates for a given sample, the DESeq2 will automatically replace counts with large Cook’s distance with the trimmed mean over all samples, scaled up by the size factor - or normalization factor for that sample" /> - <param name="outlier_filter_off" type="boolean" truevalue="1" falsevalue="0" checked="false" + or normalization factor for that sample"/> + <param name="outlier_filter_off" type="boolean" truevalue="-b" falsevalue="" checked="false" label="Turn off outliers filtering (only affects with >2 replicates)" help="When there are more than 2 replicates for a given sample, the DESeq2 will automatically - filter genes which contain a Cook’s distance above a cutoff" /> - <param name="auto_mean_filter_off" type="boolean" truevalue="1" falsevalue="0" checked="false" + filter genes which contain a Cook’s distance above a cutoff"/> + <param name="auto_mean_filter_off" type="boolean" truevalue="-c" falsevalue="" checked="false" label="Turn off independent filtering" - help=" DESeq2 performs independent filtering by default using the mean of normalized counts as a filter statistic" /> + help=" DESeq2 performs independent filtering by default using the mean of normalized counts as a filter statistic"/> <conditional name="prefilter_conditional"> <param name="prefilter" type="select" label="Perform pre-filtering" help="While it is not necessary to pre-filter low count genes before running the DESeq2 functions, there are two reasons which make pre-filtering useful: @@ -207,28 +217,31 @@ <option value="" selected="true">Disabled</option> </param> <when value="-P"> - <param name="prefilter_value" type="integer" min="0" value="10" label="Pre-filter value" help="Keep only rows that have at least N reads total." /> + <param name="prefilter_value" type="integer" min="0" value="10" label="Pre-filter value" help="Keep only rows that have at least N reads total."/> </when> <when value=""/> </conditional> + <param name="use_beta_priors" type="boolean" truevalue="-d" falsevalue="" checked="false" + label="Use beta priors" + help="Whether or not to put a zero-mean normal prior on the non-intercept coefficients"/> </section> <section name="output_options" title="Output options"> <param name="output_selector" type="select" multiple="True" optional="true" display="checkboxes" label="Output selector"> <option value="pdf" selected="True">Generate plots for visualizing the analysis results</option> - <option value="sizefactors" >Output sample size factors</option> + <option value="sizefactors">Output sample size factors</option> <option value="normCounts">Output normalised counts</option> <option value="normVST">Output VST normalized table</option> <option value="normRLog">Output rLog normalized table</option> <option value="many_contrasts">Output all levels vs all levels of primary factor (use when you have >2 levels for primary factor)</option> </param> - <param name="alpha_ma" type="float" min="0" max="0.5" value="0.1" label="Alpha value for MA-plot" help="Default value is 0.1. This option is only meaninful when plots are generated" /> + <param name="alpha_ma" type="float" min="0" max="0.5" value="0.1" label="Alpha value for MA-plot" help="Default value is 0.1. This option is only meaninful when plots are generated"/> </section> </inputs> <outputs> <data name="deseq_out" format="tabular" label="DESeq2 result file on ${on_string}"> <filter>'many_contrasts' not in output_options['output_selector']</filter> <actions> - <action name="column_names" type="metadata" default="GeneID,Base mean,log2(FC),StdErr,Wald-Stats,P-value,P-adj" /> + <action name="column_names" type="metadata" default="GeneID,Base mean,log2(FC),StdErr,Wald-Stats,P-value,P-adj"/> </actions> </data> <collection name="split_output" type="list" label="DESeq2 result files on ${on_string}"> @@ -265,30 +278,33 @@ <param name="countsFile" value="GSM461176_untreat_single.counts,GSM461177_untreat_paired.counts,GSM461178_untreat_paired.counts,GSM461182_untreat_single.counts"/> </repeat> </repeat> + <section name="advanced_options"> + <param name="use_beta_priors" value="1"/> + </section> <section name="output_options"> <param name="output_selector" value="normCounts,normRLog,normVST"/> </section> <output name="counts_out"> <assert_contents> - <has_text_matching expression="GSM461176_untreat_single.counts\tGSM461177_untreat_paired.counts\tGSM461178_untreat_paired.counts\tGSM461182_untreat_single.counts\tGSM461179_treat_single.counts\tGSM461180_treat_paired.counts\tGSM461181_treat_paired.counts" /> - <has_text_matching expression="FBgn0000003\t0\t0\t0\t0\t0\t0\t0" /> + <has_text_matching expression="GSM461176_untreat_single.counts\tGSM461177_untreat_paired.counts\tGSM461178_untreat_paired.counts\tGSM461182_untreat_single.counts\tGSM461179_treat_single.counts\tGSM461180_treat_paired.counts\tGSM461181_treat_paired.counts"/> + <has_text_matching expression="FBgn0000003\t0\t0\t0\t0\t0\t0\t0"/> </assert_contents> </output> <output name="rlog_out"> <assert_contents> - <has_text_matching expression="GSM461176_untreat_single.counts\tGSM461177_untreat_paired.counts\tGSM461178_untreat_paired.counts\tGSM461182_untreat_single.counts\tGSM461179_treat_single.counts\tGSM461180_treat_paired.counts\tGSM461181_treat_paired.counts" /> - <has_text_matching expression="FBgn0000003\t0\t0\t0\t0\t0\t0\t0" /> + <has_text_matching expression="GSM461176_untreat_single.counts\tGSM461177_untreat_paired.counts\tGSM461178_untreat_paired.counts\tGSM461182_untreat_single.counts\tGSM461179_treat_single.counts\tGSM461180_treat_paired.counts\tGSM461181_treat_paired.counts"/> + <has_text_matching expression="FBgn0000003\t0\t0\t0\t0\t0\t0\t0"/> </assert_contents> </output> <output name="vst_out"> <assert_contents> - <has_text_matching expression="GSM461176_untreat_single.counts\tGSM461177_untreat_paired.counts\tGSM461178_untreat_paired.counts\tGSM461182_untreat_single.counts\tGSM461179_treat_single.counts\tGSM461180_treat_paired.counts\tGSM461181_treat_paired.counts" /> - <has_text_matching expression="FBgn0000003\t5.*\t5.*\t5.*\t5.*\t5.*\t5.*\t5.*" /> + <has_text_matching expression="GSM461176_untreat_single.counts\tGSM461177_untreat_paired.counts\tGSM461178_untreat_paired.counts\tGSM461182_untreat_single.counts\tGSM461179_treat_single.counts\tGSM461180_treat_paired.counts\tGSM461181_treat_paired.counts"/> + <has_text_matching expression="FBgn0000003\t5.*\t5.*\t5.*\t5.*\t5.*\t5.*\t5.*"/> </assert_contents> </output> <output name="deseq_out" > <assert_contents> - <has_text_matching expression="FBgn0003360\t1933\.9504.*\t-2\.8399.*\t0\.1309.*\t-21\.68.*\t.*e-104\t.*e-101" /> + <has_text_matching expression="FBgn0003360\t1933\.9504.*\t-2\.8399.*\t0\.1309.*\t-21\.68.*\t.*e-104\t.*e-101"/> <has_n_lines n="3999"/> </assert_contents> </output> @@ -307,12 +323,41 @@ </repeat> </repeat> <param name="batch_factors" value="batch_factors.tab"/> + <section name="advanced_options"> + <param name="use_beta_priors" value="1"/> + </section> <section name="output_options"> <param name="output_selector" value="normCounts"/> </section> <output name="deseq_out"> <assert_contents> - <has_text_matching expression="FBgn0003360\t1933.*\t-2.9.*\t0.1.*\t-26.*\t1.*-152\t4.*-149" /> + <has_text_matching expression="FBgn0003360\t1933.*\t-2.9.*\t0.1.*\t-26.*\t1.*-152\t4.*-149"/> + </assert_contents> + </output> + </test> + <!-- Same as above, but without beta priors --> + <test expect_num_outputs="2"> + <repeat name="rep_factorName"> + <param name="factorName" value="Treatment"/> + <repeat name="rep_factorLevel"> + <param name="factorLevel" value="Treated"/> + <param name="countsFile" value="GSM461179_treat_single.counts,GSM461180_treat_paired.counts,GSM461181_treat_paired.counts"/> + </repeat> + <repeat name="rep_factorLevel"> + <param name="factorLevel" value="Untreated"/> + <param name="countsFile" value="GSM461176_untreat_single.counts,GSM461177_untreat_paired.counts,GSM461178_untreat_paired.counts,GSM461182_untreat_single.counts"/> + </repeat> + </repeat> + <param name="batch_factors" value="batch_factors.tab"/> + <section name="advanced_options"> + <param name="use_beta_priors" value="0"/> + </section> + <section name="output_options"> + <param name="output_selector" value="normCounts"/> + </section> + <output name="deseq_out"> + <assert_contents> + <has_text_matching expression="FBgn0003360\t1933.*\t-3.*\t0.1.*\t-26.*\t6.*-151\t1.*-147"/> </assert_contents> </output> </test> @@ -330,30 +375,33 @@ </repeat> </repeat> <param name="header" value="False"/> + <section name="advanced_options"> + <param name="use_beta_priors" value="1"/> + </section> <section name="output_options"> <param name="output_selector" value="normCounts,normRLog,normVST"/> </section> <output name="counts_out"> <assert_contents> - <has_text_matching expression="GSM461176_untreat_single.counts.noheader\tGSM461177_untreat_paired.counts.noheader\tGSM461178_untreat_paired.counts.noheader\tGSM461182_untreat_single.counts.noheader\tGSM461179_treat_single.counts.noheader\tGSM461180_treat_paired.counts.noheader\tGSM461181_treat_paired.counts.noheader" /> - <has_text_matching expression="FBgn0000003\t0\t0\t0\t0\t0\t0\t0" /> + <has_text_matching expression="GSM461176_untreat_single.counts.noheader\tGSM461177_untreat_paired.counts.noheader\tGSM461178_untreat_paired.counts.noheader\tGSM461182_untreat_single.counts.noheader\tGSM461179_treat_single.counts.noheader\tGSM461180_treat_paired.counts.noheader\tGSM461181_treat_paired.counts.noheader"/> + <has_text_matching expression="FBgn0000003\t0\t0\t0\t0\t0\t0\t0"/> </assert_contents> </output> <output name="rlog_out"> <assert_contents> - <has_text_matching expression="GSM461176_untreat_single.counts.noheader\tGSM461177_untreat_paired.counts.noheader\tGSM461178_untreat_paired.counts.noheader\tGSM461182_untreat_single.counts.noheader\tGSM461179_treat_single.counts.noheader\tGSM461180_treat_paired.counts.noheader\tGSM461181_treat_paired.counts.noheader" /> - <has_text_matching expression="FBgn0000003\t0\t0\t0\t0\t0\t0\t0" /> + <has_text_matching expression="GSM461176_untreat_single.counts.noheader\tGSM461177_untreat_paired.counts.noheader\tGSM461178_untreat_paired.counts.noheader\tGSM461182_untreat_single.counts.noheader\tGSM461179_treat_single.counts.noheader\tGSM461180_treat_paired.counts.noheader\tGSM461181_treat_paired.counts.noheader"/> + <has_text_matching expression="FBgn0000003\t0\t0\t0\t0\t0\t0\t0"/> </assert_contents> </output> <output name="vst_out"> <assert_contents> - <has_text_matching expression="GSM461176_untreat_single.counts.noheader\tGSM461177_untreat_paired.counts.noheader\tGSM461178_untreat_paired.counts.noheader\tGSM461182_untreat_single.counts.noheader\tGSM461179_treat_single.counts.noheader\tGSM461180_treat_paired.counts.noheader\tGSM461181_treat_paired.counts.noheader" /> - <has_text_matching expression="FBgn0000003\t5.*\t5.*\t5.*\t5.*\t5.*\t5.*\t5.*" /> + <has_text_matching expression="GSM461176_untreat_single.counts.noheader\tGSM461177_untreat_paired.counts.noheader\tGSM461178_untreat_paired.counts.noheader\tGSM461182_untreat_single.counts.noheader\tGSM461179_treat_single.counts.noheader\tGSM461180_treat_paired.counts.noheader\tGSM461181_treat_paired.counts.noheader"/> + <has_text_matching expression="FBgn0000003\t5.*\t5.*\t5.*\t5.*\t5.*\t5.*\t5.*"/> </assert_contents> </output> <output name="deseq_out" > <assert_contents> - <has_text_matching expression="FBgn0003360\t1933\.9504.*\t-2\.8399.*\t0\.1309.*\t-21\.68.*\t.*e-104\t.*e-101" /> + <has_text_matching expression="FBgn0003360\t1933\.9504.*\t-2\.8399.*\t0\.1309.*\t-21\.68.*\t.*e-104\t.*e-101"/> </assert_contents> </output> </test> @@ -370,6 +418,9 @@ <param name="countsFile" value="sailfish/sailfish_quant.sf4.tab,sailfish/sailfish_quant.sf5.tab,sailfish/sailfish_quant.sf6.tab"/> </repeat> </repeat> + <section name="advanced_options"> + <param name="use_beta_priors" value="1"/> + </section> <section name="output_options"> <param name="output_selector" value=""/> </section> @@ -379,7 +430,7 @@ <param name="tabular_file" value="tx2gene.tab"/> <output name="deseq_out" > <assert_contents> - <has_text_matching expression="UGT3A2\t1.8841.*\t-0.1329.*\t0.6936.*\t-0.1917.*\t0.8479.*\t0.9999.*" /> + <has_text_matching expression="UGT3A2\t1.8841.*\t-0.1329.*\t0.6936.*\t-0.1917.*\t0.8479.*\t0.9999.*"/> </assert_contents> </output> </test> @@ -396,6 +447,9 @@ <param name="countsFile" value="sailfish/sailfish_quant.sf4.tab,sailfish/sailfish_quant.sf5.tab,sailfish/sailfish_quant.sf6.tab"/> </repeat> </repeat> + <section name="advanced_options"> + <param name="use_beta_priors" value="1"/> + </section> <section name="output_options"> <param name="output_selector" value=""/> </section> @@ -405,7 +459,7 @@ <param name="gtf_file" value="GRCh38_latest_genomic.gff"/> <output name="deseq_out" > <assert_contents> - <has_text_matching expression="UGT3A2\t1.8841.*\t-0.1329.*\t0.6936.*\t-0.1917.*\t0.8479.*\t0.9999.*" /> + <has_text_matching expression="UGT3A2\t1.8841.*\t-0.1329.*\t0.6936.*\t-0.1917.*\t0.8479.*\t0.9999.*"/> </assert_contents> </output> </test> @@ -422,6 +476,9 @@ <param name="countsFile" value="sailfish_ensembl/sailfish_quant.sf4.tab,sailfish_ensembl/sailfish_quant.sf5.tab,sailfish_ensembl/sailfish_quant.sf6.tab"/> </repeat> </repeat> + <section name="advanced_options"> + <param name="use_beta_priors" value="1"/> + </section> <section name="output_options"> <param name="output_selector" value=""/> </section> @@ -431,7 +488,7 @@ <param name="gtf_file" value="Homo_sapiens.GRCh38.94.gtf" ftype="gtf"/> <output name="deseq_out" > <assert_contents> - <has_text_matching expression="ENSG00000168671\t1.8841.*\t-0.1180.*\t0.7429.*\t-0.1589.*\t0.8737.*\t0.9999.*" /> + <has_text_matching expression="ENSG00000168671\t1.8841.*\t-0.1180.*\t0.7429.*\t-0.1589.*\t0.8737.*\t0.9999.*"/> </assert_contents> </output> </test> @@ -459,6 +516,9 @@ <param name="groups" value="primary:untreated"/> </repeat> </repeat> + <section name="advanced_options"> + <param name="use_beta_priors" value="1"/> + </section> <section name="output_options"> <param name="output_selector" value=""/> </section> @@ -496,6 +556,9 @@ <param name="groups" value="primary:untreated"/> </repeat> </repeat> + <section name="advanced_options"> + <param name="use_beta_priors" value="1"/> + </section> <section name="output_options"> <param name="output_selector" value="many_contrasts"/> </section> @@ -524,6 +587,9 @@ <param name="countsFile" value="sailfish_ensembl/sailfish_quant.sf4.tab,sailfish_ensembl/sailfish_quant.sf5.tab,sailfish_ensembl/sailfish_quant.sf6.tab"/> </repeat> </repeat> + <section name="advanced_options"> + <param name="use_beta_priors" value="1"/> + </section> <section name="output_options"> <param name="output_selector" value=""/> <param name="alpha_ma" value="0.05"/> @@ -534,7 +600,7 @@ <param name="gtf_file" value="Homo_sapiens.GRCh38.94.gtf" ftype="gtf"/> <output name="deseq_out" > <assert_contents> - <has_text_matching expression="ENSG00000168671\t1.8841.*\t-0.1180.*\t0.7429.*\t-0.1589.*\t0.8737.*\t0.9999.*" /> + <has_text_matching expression="ENSG00000168671\t1.8841.*\t-0.1180.*\t0.7429.*\t-0.1589.*\t0.8737.*\t0.9999.*"/> </assert_contents> </output> </test> @@ -561,8 +627,42 @@ <param name="gtf_file" value="Homo_sapiens.GRCh38.94.gtf" ftype="gtf"/> <output name="sizefactors_out"> <assert_contents> - <has_text_matching expression="sailfish_quant\.sf4\.tab\t0\.8\d+" /> - <has_text_matching expression="sailfish_quant\.sf3\.tab\t1\.0\d+" /> + <has_text_matching expression="sailfish_quant\.sf4\.tab\t0\.8\d+"/> + <has_text_matching expression="sailfish_quant\.sf3\.tab\t1\.0\d+"/> + </assert_contents> + </output> + </test> + <!--Test alpha_ma option, but with user-provided size factors --> + <test expect_num_outputs="1"> + <repeat name="rep_factorName"> + <param name="factorName" value="Treatment"/> + <repeat name="rep_factorLevel"> + <param name="factorLevel" value="Treated"/> + <param name="countsFile" value="sailfish_ensembl/sailfish_quant.sf1.tab,sailfish_ensembl/sailfish_quant.sf2.tab,sailfish_ensembl/sailfish_quant.sf3.tab"/> + </repeat> + <repeat name="rep_factorLevel"> + <param name="factorLevel" value="Untreated"/> + <param name="countsFile" value="sailfish_ensembl/sailfish_quant.sf4.tab,sailfish_ensembl/sailfish_quant.sf5.tab,sailfish_ensembl/sailfish_quant.sf6.tab"/> + </repeat> + </repeat> + <section name="advanced_options"> + <param name="use_beta_priors" value="1"/> + <conditional name="esf_cond"> + <param name="esf" value="user"/> + <param name="size_factor_input" value="size_factors_out.tsv"/> + </conditional> + </section> + <section name="output_options"> + <param name="output_selector" value=""/> + <param name="alpha_ma" value="0.05"/> + </section> + <param name="tximport_selector" value="tximport"/> + <param name="txtype" value="sailfish"/> + <param name="mapping_format_selector" value="gtf"/> + <param name="gtf_file" value="Homo_sapiens.GRCh38.94.gtf" ftype="gtf"/> + <output name="deseq_out"> + <assert_contents> + <has_text_matching expression="ENSG00000168671\t1.90.*\t-0.05.*\t0.75.*\t-0.07.*\t0.94.*\t0.95.*"/> </assert_contents> </output> </test> @@ -580,7 +680,7 @@ </repeat> </repeat> <section name="advanced_options"> - <param name="esf" value="poscounts" /> + <param name="esf" value="poscounts"/> </section> <section name="output_options"> <param name="output_selector" value="sizefactors"/> @@ -592,8 +692,8 @@ <param name="gtf_file" value="Homo_sapiens.GRCh38.94.gtf" ftype="gtf"/> <output name="sizefactors_out" > <assert_contents> - <has_text_matching expression="sailfish_quant\.sf4\.tab\t0\.8\d+" /> - <has_text_matching expression="sailfish_quant\.sf3\.tab\t1\.0\d+" /> + <has_text_matching expression="sailfish_quant\.sf4\.tab\t0\.8\d+"/> + <has_text_matching expression="sailfish_quant\.sf3\.tab\t1\.0\d+"/> </assert_contents> </output> </test> @@ -636,7 +736,7 @@ **What it does** -Estimate variance-mean dependence in count data from high-throughput sequencing assays and test for differential expression based on a model using the negative binomial distribution +Uses DESeq2 version @DESEQ2_VERSION@ to estimate variance-mean dependence in count data from high-throughput sequencing assays and test for differential expression based on a model using the negative binomial distribution. ----- @@ -732,5 +832,5 @@ .. _DESeq2: http://master.bioconductor.org/packages/release/bioc/html/DESeq2.html .. _tximport: https://bioconductor.org/packages/devel/bioc/vignettes/tximport/inst/doc/tximport.html ]]></help> - <expand macro="citations" /> + <expand macro="citations"/> </tool>
--- a/deseq2_macros.xml Fri Aug 26 11:16:15 2022 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,55 +0,0 @@ -<macros> - <xml name="factor_repeat"> - <repeat name="rep_factorName" title="Factor" min="1"> - <param name="factorName" type="text" value="FactorName" label="Specify a factor name, e.g. effects_drug_x or cancer_markers" - help="Only letters, numbers and underscores will be retained in this field"> - <sanitizer> - <valid initial="string.letters,string.digits"><add value="_" /></valid> - </sanitizer> - </param> - <repeat name="rep_factorLevel" title="Factor level" min="2" default="2"> - <param name="factorLevel" type="text" value="FactorLevel" label="Specify a factor level, typical values could be 'tumor', 'normal', 'treated' or 'control'" - help="Only letters, numbers and underscores will be retained in this field"> - <sanitizer> - <valid initial="string.letters,string.digits"><add value="_" /></valid> - </sanitizer> - </param> - <yield/> - </repeat> - </repeat> - </xml> - <xml name="requirements"> - <requirements> - <requirement type="package" version="1.34.0">bioconductor-deseq2</requirement> - <!-- Optional dependency of tximport, needed to import kallisto results https://github.com/galaxyproject/usegalaxy-playbook/issues/161 --> - <requirement type="package" version="2.38.0">bioconductor-rhdf5</requirement> - <requirement type="package" version="1.22.0">bioconductor-tximport</requirement> - <requirement type="package" version="1.46.1">bioconductor-genomicfeatures</requirement> - <requirement type="package" version="1.20.3">r-getopt</requirement> - <requirement type="package" version="0.9.1">r-ggrepel</requirement> - <requirement type="package" version="3.1.1">r-gplots</requirement> - <requirement type="package" version="1.0.12">r-pheatmap</requirement> - <requirement type="package" version="0.2.20">r-rjson</requirement> - </requirements> - </xml> - <token name="@TOOL_VERSION@">2.11.40.7</token> - <token name="@SUFFIX_VERSION@">2</token> - <xml name="edam_ontology"> - <edam_topics> - <edam_topic>topic_3308</edam_topic> - </edam_topics> - <edam_operations> - <edam_operation>operation_3800</edam_operation> - </edam_operations> - </xml> - <xml name="citations"> - <citations> - <citation type="doi">10.1186/s13059-014-0550-8</citation> - </citations> - </xml> - <xml name="xrefs"> - <xrefs> - <xref type='bio.tools'>DESeq2</xref> - </xrefs> - </xml> -</macros> \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Tue Jul 18 14:58:52 2023 +0000 @@ -0,0 +1,58 @@ +<macros> + <xml name="factor_repeat"> + <repeat name="rep_factorName" title="Factor" min="1"> + <param name="factorName" type="text" value="FactorName" label="Specify a factor name, e.g. effects_drug_x or cancer_markers" + help="Only letters, numbers and underscores will be retained in this field"> + <sanitizer> + <valid initial="string.letters,string.digits"><add value="_" /></valid> + </sanitizer> + </param> + <repeat name="rep_factorLevel" title="Factor level" min="2" default="2"> + <param name="factorLevel" type="text" value="FactorLevel" label="Specify a factor level, typical values could be 'tumor', 'normal', 'treated' or 'control'" + help="Only letters, numbers and underscores will be retained in this field"> + <sanitizer> + <valid initial="string.letters,string.digits"><add value="_" /></valid> + </sanitizer> + </param> + <yield/> + </repeat> + </repeat> + </xml> + <xml name="requirements"> + <requirements> + <requirement type="package" version="@DESEQ2_VERSION@">bioconductor-deseq2</requirement> + <!-- Optional dependency of tximport, needed to import kallisto results https://github.com/galaxyproject/usegalaxy-playbook/issues/161 --> + <requirement type="package" version="2.44.0">bioconductor-rhdf5</requirement> + <requirement type="package" version="1.28.0">bioconductor-tximport</requirement> + <requirement type="package" version="1.52.1">bioconductor-genomicfeatures</requirement> + <requirement type="package" version="1.20.3">r-getopt</requirement> + <requirement type="package" version="0.9.3">r-ggrepel</requirement> + <requirement type="package" version="3.1.3">r-gplots</requirement> + <requirement type="package" version="1.0.12">r-pheatmap</requirement> + <requirement type="package" version="0.2.21">r-rjson</requirement> + </requirements> + </xml> + <token name="@TOOL_VERSION@">2.11.40.8</token> + <token name="@DESEQ2_VERSION@">1.40.2</token> + <token name="@VERSION_SUFFIX@">0</token> + <token name="@PROFILE@">22.01</token> + <xml name="edam_ontology"> + <edam_topics> + <edam_topic>topic_3308</edam_topic> + </edam_topics> + <edam_operations> + <edam_operation>operation_3800</edam_operation> + </edam_operations> + </xml> + <xml name="citations"> + <citations> + <citation type="doi">10.1186/s13059-014-0550-8</citation> + </citations> + </xml> + <xml name="xrefs"> + <xrefs> + <xref type="bio.tools">DESeq2</xref> + <xref type="bioconductor">deseq2</xref> + </xrefs> + </xml> +</macros>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/size_factors_out.tsv Tue Jul 18 14:58:52 2023 +0000 @@ -0,0 +1,6 @@ +sailfish_quant.sf4.tab 0.84800690799672 +sailfish_quant.sf5.tab 1.10790786350701 +sailfish_quant.sf6.tab 1.21319523337605 +sailfish_quant.sf1.tab 1.19061589081921 +sailfish_quant.sf2.tab 0.712203801356132 +sailfish_quant.sf3.tab 1.03464248515867