Mercurial > repos > iuc > deseq2
changeset 27:d027d1f4984e draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit c8e7020d39d581d7374a13cf94c64998a6481e05"
author | iuc |
---|---|
date | Mon, 28 Jun 2021 05:14:25 +0000 |
parents | 6a3a025714d3 |
children | 7ff33c2d525b |
files | deseq2.R deseq2.xml deseq2_macros.xml |
diffstat | 3 files changed, 153 insertions(+), 93 deletions(-) [+] |
line wrap: on
line diff
--- a/deseq2.R Fri Jan 08 20:29:54 2021 +0000 +++ b/deseq2.R Mon Jun 28 05:14:25 2021 +0000 @@ -67,7 +67,8 @@ "outlier_replace_off", "a", 0, "logical", "outlier_filter_off", "b", 0, "logical", "auto_mean_filter_off", "c", 0, "logical", - "beta_prior_off", "d", 0, "logical" + "beta_prior_off", "d", 0, "logical", + "alpha_ma", "A", 1, "numeric" ), byrow = TRUE, ncol = 4) opt <- getopt(spec) @@ -200,7 +201,7 @@ text(x = c(0, length(h1$counts)), y = 0, label = paste(c(0, 1)), adj = c(0.5, 1.7), xpd = NA) legend("topright", fill = rev(colori), legend = rev(names(colori)), bg = "white") } - plotMA(res, main = paste("MA-plot for", title_suffix), ylim = range(res$log2FoldChange, na.rm = TRUE)) + plotMA(res, main = paste("MA-plot for", title_suffix), ylim = range(res$log2FoldChange, na.rm = TRUE), alpha = opt$alpha_ma) } if (verbose) {
--- a/deseq2.xml Fri Jan 08 20:29:54 2021 +0000 +++ b/deseq2.xml Mon Jun 28 05:14:25 2021 +0000 @@ -1,20 +1,11 @@ -<tool id="deseq2" name="DESeq2" version="2.11.40.6+galaxy1"> +<tool id="deseq2" name="DESeq2" version="@TOOL_VERSION@+galaxy@SUFFIX_VERSION@"> <description>Determines differentially expressed features from count tables</description> <macros> <import>deseq2_macros.xml</import> </macros> - <requirements> - <requirement type="package" version="1.22.1">bioconductor-deseq2</requirement> - <!-- Optional dependency of tximport, needed to import kallisto results https://github.com/galaxyproject/usegalaxy-playbook/issues/161 --> - <requirement type="package" version="2.26.2">bioconductor-rhdf5</requirement> - <requirement type="package" version="1.10.0">bioconductor-tximport</requirement> - <requirement type="package" version="1.34.1">bioconductor-genomicfeatures</requirement> - <requirement type="package" version="1.20.2">r-getopt</requirement> - <requirement type="package" version="0.8.0">r-ggrepel</requirement> - <requirement type="package" version="3.0.1">r-gplots</requirement> - <requirement type="package" version="1.0.10">r-pheatmap</requirement> - <requirement type="package" version="0.2.20">r-rjson</requirement> - </requirements> + <expand macro='requirements'/> + <expand macro='edam_ontology' /> + <expand macro='xrefs'/> <stdio> <regex match="Execution halted" source="both" @@ -49,16 +40,17 @@ Rscript '${__tool_directory__}/deseq2.R' --cores \${GALAXY_SLOTS:-1} -o '$deseq_out' - #if $pdf: + #if 'pdf' in $output_options.output_selector: -p '$plots' #end if - #if $normCounts: + -A $output_options.alpha_ma + #if 'normCounts' in $output_options.output_selector: -n '$counts_out' #end if - #if $normRLog: + #if 'normRLog' in $output_options.output_selector: -r '$rlog_out' #end if - #if $normVST: + #if 'normVST' in $output_options.output_selector: -v '$vst_out' #end if #set $filename_to_element_identifiers = {} @@ -90,23 +82,23 @@ -f '#echo json.dumps(temp_factor_names)#' -l '#echo json.dumps(filename_to_element_identifiers)#' - #if $esf: - -e $esf + #if $advanced_options.esf: + -e $advanced_options.esf #end if - -t $fit_type + -t $advanced_options.fit_type #if $batch_factors: --batch_factors '$batch_factors' #end if - #if $outlier_replace_off: + #if $advanced_options.outlier_replace_off: -a #end if - #if $outlier_filter_off: + #if $advanced_options.outlier_filter_off: -b #end if - #if $auto_mean_filter_off: + #if $advanced_options.auto_mean_filter_off: -c #end if - #if $many_contrasts: + #if 'many_contrasts' in $output_options.output_selector -m #end if #if $tximport.tximport_selector == 'tximport': @@ -168,71 +160,71 @@ </when> <when value="count" /> </conditional> - <param name="pdf" type="boolean" truevalue="1" falsevalue="0" checked="true" - label="Visualising the analysis results" - help="output an additional PDF files" /> - <param name="normCounts" type="boolean" truevalue="1" falsevalue="0" checked="false" - label="Output normalized counts table" /> - <param name="normRLog" type="boolean" truevalue="1" falsevalue="0" checked="false" - label="Output rLog normalized table" /> - <param name="normVST" type="boolean" truevalue="1" falsevalue="0" checked="false" - label="Output VST normalized table" /> - <param name="many_contrasts" type="boolean" truevalue="1" falsevalue="0" checked="false" - label="Output all levels vs all levels of primary factor (use when you have >2 levels for primary factor)" - help=" DESeq2 performs independent filtering by default using the mean of normalized counts as a filter statistic" /> - <param name="esf" type="select" label="(Optional) Method for estimateSizeFactors" - help="Method for estimation: either 'ratio', 'poscounts', or 'iterate'. 'ratio' uses the standard median ratio method introduced in DESeq. - The size factor is the median ratio of the sample over a 'pseudosample': for each gene, the geometric mean of all samples. - 'poscounts' and 'iterate' offer alternative estimators, which can be used even when all genes contain a sample with a zero (a problem - for the default method, as the geometric mean becomes zero, and the ratio undefined). The 'poscounts' estimator deals with a gene with - some zeros, by calculating a modified geometric mean by taking the n-th root of the product of the non-zero counts. This evolved out of - use cases with Paul McMurdie's phyloseq package for metagenomic samples. The 'iterate' estimator iterates between estimating the dispersion - with a design of ~1, and finding a size factor vector by numerically optimizing the likelihood of the ~1 model."> - <option value="" selected="true">No Selection (use default)</option> - <option value="ratio">ratio</option> - <option value="poscounts">poscounts</option> - <option value="iterate">iterate</option> - </param> - <param name="fit_type" type="select" label="Fit type"> - <option value="1" selected="true">parametric</option> - <option value="2">local</option> - <option value="3">mean</option> - </param> - <param name="outlier_replace_off" type="boolean" truevalue="1" falsevalue="0" checked="false" - label="Turn off outliers replacement (only affects with >6 replicates)" - help="When there are more than 6 replicates for a given sample, the DESeq2 will automatically replace - counts with large Cook’s distance with the trimmed mean over all samples, scaled up by the size factor - or normalization factor for that sample" /> - <param name="outlier_filter_off" type="boolean" truevalue="1" falsevalue="0" checked="false" - label="Turn off outliers filtering (only affects with >2 replicates)" - help="When there are more than 2 replicates for a given sample, the DESeq2 will automatically - filter genes which contain a Cook’s distance above a cutoff" /> - <param name="auto_mean_filter_off" type="boolean" truevalue="1" falsevalue="0" checked="false" - label="Turn off independent filtering" - help=" DESeq2 performs independent filtering by default using the mean of normalized counts as a filter statistic" /> + <section name="advanced_options" title="Advanced options"> + <param name="esf" type="select" label="Method for estimateSizeFactors" + help="Method for estimation: either 'ratio', 'poscounts', or 'iterate'. 'ratio' uses the standard median ratio method introduced in DESeq. + The size factor is the median ratio of the sample over a 'pseudosample': for each gene, the geometric mean of all samples. + 'poscounts' and 'iterate' offer alternative estimators, which can be used even when all genes contain a sample with a zero (a problem + for the default method, as the geometric mean becomes zero, and the ratio undefined). The 'poscounts' estimator deals with a gene with + some zeros, by calculating a modified geometric mean by taking the n-th root of the product of the non-zero counts. This evolved out of + use cases with Paul McMurdie's phyloseq package for metagenomic samples. The 'iterate' estimator iterates between estimating the dispersion + with a design of ~1, and finding a size factor vector by numerically optimizing the likelihood of the ~1 model."> + <option value="" selected="true">No Selection (use default)</option> + <option value="ratio">ratio</option> + <option value="poscounts">poscounts</option> + <option value="iterate">iterate</option> + </param> + <param name="fit_type" type="select" label="Fit type"> + <option value="1" selected="true">parametric</option> + <option value="2">local</option> + <option value="3">mean</option> + </param> + <param name="outlier_replace_off" type="boolean" truevalue="1" falsevalue="0" checked="false" + label="Turn off outliers replacement (only affects with >6 replicates)" + help="When there are more than 6 replicates for a given sample, the DESeq2 will automatically replace + counts with large Cook’s distance with the trimmed mean over all samples, scaled up by the size factor + or normalization factor for that sample" /> + <param name="outlier_filter_off" type="boolean" truevalue="1" falsevalue="0" checked="false" + label="Turn off outliers filtering (only affects with >2 replicates)" + help="When there are more than 2 replicates for a given sample, the DESeq2 will automatically + filter genes which contain a Cook’s distance above a cutoff" /> + <param name="auto_mean_filter_off" type="boolean" truevalue="1" falsevalue="0" checked="false" + label="Turn off independent filtering" + help=" DESeq2 performs independent filtering by default using the mean of normalized counts as a filter statistic" /> + </section> + <section name="output_options" title="Output options"> + <param name="output_selector" type="select" multiple="True" display="checkboxes" label="Output selector"> + <option value="pdf" selected="True">Generate plots for visualizing the analysis results</option> + <option value="normCounts">Output rLog normalized table</option> + <option value="normVST">Output VST normalized table</option> + <option value="normRLog">Output rLog normalized table</option> + <option value="many_contrasts">Output all levels vs all levels of primary factor (use when you have >2 levels for primary factor)</option> + </param> + <param name="alpha_ma" type="float" min="0" max="0.5" value="0.1" label="Alpha value for MA-plot" help="Default value is 0.1. This option is only meaninful when plots are generated" /> + </section> </inputs> <outputs> <data name="deseq_out" format="tabular" label="DESeq2 result file on ${on_string}"> - <filter>many_contrasts is False</filter> + <filter>'many_contrasts' not in output_options['output_selector']</filter> <actions> <action name="column_names" type="metadata" default="GeneID,Base mean,log2(FC),StdErr,Wald-Stats,P-value,P-adj" /> </actions> </data> <collection name="split_output" type="list" label="DESeq2 result files on ${on_string}"> - <filter>many_contrasts is True</filter> + <filter>output_options['output_selector'] and 'many_contrasts' in output_options['output_selector']</filter> <discover_datasets pattern="(?P<designation>.+_vs_.+)" format="tabular" directory="." visible="false"/> </collection> <data name="plots" format="pdf" label="DESeq2 plots on ${on_string}"> - <filter>pdf == True</filter> + <filter>output_options['output_selector'] and 'pdf' in output_options['output_selector']</filter> </data> <data name="counts_out" format="tabular" label="Normalized counts file on ${on_string}"> - <filter>normCounts == True</filter> + <filter>output_options['output_selector'] and 'normCounts' in output_options['output_selector']</filter> </data> <data name="rlog_out" format="tabular" label="rLog-Normalized counts file on ${on_string}"> - <filter>normRLog == True</filter> + <filter>output_options['output_selector'] and 'normRLog' in output_options['output_selector']</filter> </data> <data name="vst_out" format="tabular" label="VST-Normalized counts file on ${on_string}"> - <filter>normVST == True</filter> + <filter>output_options['output_selector'] and 'normVST' in output_options['output_selector']</filter> </data> </outputs> <tests> @@ -249,10 +241,9 @@ <param name="countsFile" value="GSM461176_untreat_single.counts,GSM461177_untreat_paired.counts,GSM461178_untreat_paired.counts,GSM461182_untreat_single.counts"/> </repeat> </repeat> - <param name="pdf" value="False"/> - <param name="normCounts" value="True"/> - <param name="normRLog" value="True"/> - <param name="normVST" value="True"/> + <section name="output_options"> + <param name="output_selector" value="normCounts,normRLog,normVST"/> + </section> <output name="counts_out"> <assert_contents> <has_text_matching expression="GSM461176_untreat_single.counts\tGSM461177_untreat_paired.counts\tGSM461178_untreat_paired.counts\tGSM461182_untreat_single.counts\tGSM461179_treat_single.counts\tGSM461180_treat_paired.counts\tGSM461181_treat_paired.counts" /> @@ -291,8 +282,9 @@ </repeat> </repeat> <param name="batch_factors" value="batch_factors.tab"/> - <param name="pdf" value="False"/> - <param name="normCounts" value="True"/> + <section name="output_options"> + <param name="output_selector" value="normCounts"/> + </section> <output name="deseq_out"> <assert_contents> <has_text_matching expression="FBgn0003360\t1933.*\t-2.9.*\t0.1.*\t-26.*\t1.*-152\t4.*-149" /> @@ -313,10 +305,9 @@ </repeat> </repeat> <param name="header" value="False"/> - <param name="pdf" value="False"/> - <param name="normCounts" value="True"/> - <param name="normRLog" value="True"/> - <param name="normVST" value="True"/> + <section name="output_options"> + <param name="output_selector" value="normCounts,normRLog,normVST"/> + </section> <output name="counts_out"> <assert_contents> <has_text_matching expression="GSM461176_untreat_single.counts.noheader\tGSM461177_untreat_paired.counts.noheader\tGSM461178_untreat_paired.counts.noheader\tGSM461182_untreat_single.counts.noheader\tGSM461179_treat_single.counts.noheader\tGSM461180_treat_paired.counts.noheader\tGSM461181_treat_paired.counts.noheader" /> @@ -354,7 +345,9 @@ <param name="countsFile" value="sailfish/sailfish_quant.sf4.tab,sailfish/sailfish_quant.sf5.tab,sailfish/sailfish_quant.sf6.tab"/> </repeat> </repeat> - <param name="pdf" value="False"/> + <section name="output_options"> + <param name="output_selector" value=""/> + </section> <param name="tximport_selector" value="tximport"/> <param name="txtype" value="sailfish"/> <param name="mapping_format_selector" value="tabular"/> @@ -378,7 +371,9 @@ <param name="countsFile" value="sailfish/sailfish_quant.sf4.tab,sailfish/sailfish_quant.sf5.tab,sailfish/sailfish_quant.sf6.tab"/> </repeat> </repeat> - <param name="pdf" value="False"/> + <section name="output_options"> + <param name="output_selector" value=""/> + </section> <param name="tximport_selector" value="tximport"/> <param name="txtype" value="sailfish"/> <param name="mapping_format_selector" value="gtf"/> @@ -402,7 +397,9 @@ <param name="countsFile" value="sailfish_ensembl/sailfish_quant.sf4.tab,sailfish_ensembl/sailfish_quant.sf5.tab,sailfish_ensembl/sailfish_quant.sf6.tab"/> </repeat> </repeat> - <param name="pdf" value="False"/> + <section name="output_options"> + <param name="output_selector" value=""/> + </section> <param name="tximport_selector" value="tximport"/> <param name="txtype" value="sailfish"/> <param name="mapping_format_selector" value="gtf"/> @@ -437,7 +434,9 @@ <param name="groups" value="primary:untreated"/> </repeat> </repeat> - <param name="pdf" value="False"/> + <section name="output_options"> + <param name="output_selector" value=""/> + </section> <param name="tximport_selector" value="tximport"/> <param name="txtype" value="sailfish"/> <param name="mapping_format_selector" value="tabular"/> @@ -472,12 +471,13 @@ <param name="groups" value="primary:untreated"/> </repeat> </repeat> - <param name="pdf" value="False"/> + <section name="output_options"> + <param name="output_selector" value="many_contrasts"/> + </section> <param name="tximport_selector" value="tximport"/> <param name="txtype" value="sailfish"/> <param name="mapping_format_selector" value="tabular"/> <param name="tabular_file" value="tx2gene.tab"/> - <param name="many_contrasts" value="true"/> <output_collection name="split_output" type="list" count="1"> <element name="Treatment_Treated_vs_Untreated"> <assert_contents> @@ -486,6 +486,33 @@ </element> </output_collection> </test> + <!--Test alpha_ma option--> + <test expect_num_outputs="1"> + <repeat name="rep_factorName"> + <param name="factorName" value="Treatment"/> + <repeat name="rep_factorLevel"> + <param name="factorLevel" value="Treated"/> + <param name="countsFile" value="sailfish_ensembl/sailfish_quant.sf1.tab,sailfish_ensembl/sailfish_quant.sf2.tab,sailfish_ensembl/sailfish_quant.sf3.tab"/> + </repeat> + <repeat name="rep_factorLevel"> + <param name="factorLevel" value="Untreated"/> + <param name="countsFile" value="sailfish_ensembl/sailfish_quant.sf4.tab,sailfish_ensembl/sailfish_quant.sf5.tab,sailfish_ensembl/sailfish_quant.sf6.tab"/> + </repeat> + </repeat> + <section name="output_options"> + <param name="output_selector" value=""/> + <param name="alpha_ma" value="0.05"/> + </section> + <param name="tximport_selector" value="tximport"/> + <param name="txtype" value="sailfish"/> + <param name="mapping_format_selector" value="gtf"/> + <param name="gtf_file" value="Homo_sapiens.GRCh38.94.gtf" ftype="gtf"/> + <output name="deseq_out" > + <assert_contents> + <has_text_matching expression="ENSG00000168671\t1.8841.*\t-0.1180.*\t0.7429.*\t-0.1589.*\t0.8737.*\t0.9999.*" /> + </assert_contents> + </output> + </test> </tests> <help><![CDATA[ .. class:: infomark @@ -584,7 +611,5 @@ .. _DESeq2: http://master.bioconductor.org/packages/release/bioc/html/DESeq2.html .. _tximport: https://bioconductor.org/packages/devel/bioc/vignettes/tximport/inst/doc/tximport.html ]]></help> - <citations> - <citation type="doi">10.1186/s13059-014-0550-8</citation> - </citations> + <expand macro="citations" /> </tool>
--- a/deseq2_macros.xml Fri Jan 08 20:29:54 2021 +0000 +++ b/deseq2_macros.xml Mon Jun 28 05:14:25 2021 +0000 @@ -18,4 +18,38 @@ </repeat> </repeat> </xml> + <xml name="requirements"> + <requirements> + <requirement type="package" version="1.22.1">bioconductor-deseq2</requirement> + <!-- Optional dependency of tximport, needed to import kallisto results https://github.com/galaxyproject/usegalaxy-playbook/issues/161 --> + <requirement type="package" version="2.26.2">bioconductor-rhdf5</requirement> + <requirement type="package" version="1.10.0">bioconductor-tximport</requirement> + <requirement type="package" version="1.34.1">bioconductor-genomicfeatures</requirement> + <requirement type="package" version="1.20.2">r-getopt</requirement> + <requirement type="package" version="0.8.0">r-ggrepel</requirement> + <requirement type="package" version="3.0.1">r-gplots</requirement> + <requirement type="package" version="1.0.10">r-pheatmap</requirement> + <requirement type="package" version="0.2.20">r-rjson</requirement> + </requirements> + </xml> + <token name="@TOOL_VERSION@">2.11.40.6</token> + <token name="@SUFFIX_VERSION@">2</token> + <xml name="edam_ontology"> + <edam_topics> + <edam_topic>topic_3308</edam_topic> + </edam_topics> + <edam_operations> + <edam_operation>operation_3800</edam_operation> + </edam_operations> + </xml> + <xml name="citations"> + <citations> + <citation type="doi">10.1186/s13059-014-0550-8</citation> + </citations> + </xml> + <xml name="xrefs"> + <xrefs> + <xref type='bio.tools'>DESeq2</xref> + </xrefs> + </xml> </macros> \ No newline at end of file