view aldex2.xml @ 0:f4d0bd4b4d6d draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/aldex2 commit b99f09cf03f075a6881d192b0f1233233289fa60
author iuc
date Wed, 29 Jun 2022 07:36:45 +0000
parents
children
line wrap: on
line source

<tool id="aldex2" name="ALDEx2" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
  <description>differential abundance analysis</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="biotools"/>
    <expand macro="requirements"/>
    <command detect_errors="exit_code"><![CDATA[
## Enable building the conditions_vector.
#set group_names = []
#set num_cols = []
#for $condition_items in $conditions:
    $group_names.append(str($condition_items.group_name))
    $num_cols.append(str($condition_items.num_cols))
#end for
#set group_names = ','.join($group_names)
#set num_cols = ','.join($num_cols)

#if str($analysis_type_cond.analysis_type) == 'aldex_corr':
    ## Enable building the cond_var (a continuous numeric vector).
    #set group_nums = []
    #set num_cols_in_groups = []
    #for $cont_var_items in $analysis_type_cond.cont_var:
        $group_nums.append(str($cont_var_items.group_num))
        $num_cols_in_groups.append(str($cont_var_items.num_cols_in_group))
    #end for
    #set group_nums = ','.join($group_nums)
    #set num_cols_in_groups = ','.join($num_cols_in_groups)
#end if

Rscript '${__tool_directory__}/aldex2.R' 
--reads '$reads' 
--group_names '$group_names'
--num_cols '$num_cols'
--num_mc_samples $num_mc_samples
--denom '$denom'
--analysis_type '$analysis_type_cond.analysis_type'
#if str($analysis_type_cond.analysis_type) == 'aldex':
    --aldex_test '$analysis_type_cond.aldex_test'
    --effect '$analysis_type_cond.effect'
    --include_sample_summary '$analysis_type_cond.include_sample_summary'
    --iterate '$analysis_type_cond.iterate'
#else if str($analysis_type_cond.analysis_type) == 'aldex_corr':
    --group_nums '$group_nums'
    --num_cols_in_groups '$num_cols_in_groups'
#else if str($analysis_type_cond.analysis_type) == 'aldex_effect':
    --include_sample_summary '$analysis_type_cond.include_sample_summary'
#else if str($analysis_type_cond.analysis_type) == 'aldex_plot':
    --aldex_test '$analysis_type_cond.aldex_test'
    --effect '$analysis_type_cond.effect'
    --include_sample_summary '$analysis_type_cond.include_sample_summary'
    --iterate '$analysis_type_cond.iterate'
    --plot_type '$analysis_type_cond.plot_type'
    --plot_test '$analysis_type_cond.plot_test'
    --cutoff_pval $analysis_type_cond.cutoff_pval
    --cutoff_effect $analysis_type_cond.cutoff_effect
    #if str($analysis_type_cond.xlab) != '':
        --xlab '$analysis_type_cond.xlab'
    #end if
    #if str($analysis_type_cond.ylab) != '':
        --ylab '$analysis_type_cond.ylab'
    #end if
#else if str($analysis_type_cond.analysis_type) == 'aldex_plot_feature':
    --feature_name '$analysis_type_cond.feature_name'
#else if str($analysis_type_cond.analysis_type) == 'aldex_ttest':
    --paired_test '$analysis_type_cond.paired_test'
    --hist_plot '$analysis_type_cond.hist_plot'
#end if
#if str($analysis_type_cond.analysis_type) == 'aldex':
    --output '$output_aldex'
#else if str($analysis_type_cond.analysis_type) == 'aldex_corr':
    --output '$output_aldex_corr'
#else if str($analysis_type_cond.analysis_type) == 'aldex_effect':
    --output '$output_aldex_effect'
#else if str($analysis_type_cond.analysis_type) == 'aldex_expected_distance':
    --output '$output_aldex_expected_distance'
#else if str($analysis_type_cond.analysis_type) == 'aldex_kw':
    --output '$output_aldex_kw'
#else if str($analysis_type_cond.analysis_type) == 'aldex_plot':
    --output '$output_aldex_plot'
#else if str($analysis_type_cond.analysis_type) == 'aldex_plot_feature':
    --output '$output_aldex_plot_feature'
#else if str($analysis_type_cond.analysis_type) == 'aldex_ttest':
    --output '$output_aldex_ttest'
    #if str($analysis_type_cond.hist_plot) == 'true':
        && mv 'Rplots.pdf' '$output_aldex_ttest_plot'
    #end if
#end if
    ]]></command>
    <inputs>
        <param argument="--reads" type="data" format="tabular" label="Reads table file" help="Rows contain genes and columns contain sequencing read counts"/>
        <repeat name="conditions" title="Comparison group" min="1">
            <param name="group_name" type="text" value="Grp1" label="Group name" help="Specify a group label and the number of sequencing read count columns associated with it">
                <validator type="empty_field"/>
                <expand macro="sanitize_query" validinitial="string.ascii_letters,string.punctuation"/>
            </param>
            <param name="num_cols" type="integer" min="1" value="1" label="Number of columns in group"/>
        </repeat>
        <param argument="--num_mc_samples" type="integer" min="1" value="128" label="Number of Monte Carlo samples to use when estimating the underlying distributions"/>
        <param argument="--denom" type="select" label="Select features to retain as the denominator for the Geometric Mean calculation">
            <option value="all" selected="true">All features</option>
            <option value="median">Median abundance of all features</option>
            <option value="iqlr">Features that are between the first and third quartile of the variance of the clr values across all samples</option>
            <option value="zero">Non-zero features in each group as the denominator</option>
            <option value="lvha">features that have low variance and high relative abundance</option>
        </param>
        <conditional name="analysis_type_cond">
            <param name="analysis_type" type="select" label="Select the analysis to be performed">
                <option value="aldex" selected="true">Compute an ALDEx2 object (aldex)</option>
                <option value="aldex_corr">Calculate correlation with a continuous variable (aldex.corr)</option>
                <option value="aldex_effect">Calculate effect sizes and differences between conditions (aldex.effect)</option>
                <option value="aldex_expected_distance">Calculate the expected values of distances between samples (aldex.expectedDistance)</option>
                <option value="aldex_kw">Calculate the Kruskal-Wallis test and glm ANOVA statistics (aldex.kw)</option>
                <option value="aldex_plot">Plot an ALDEx2 object (aldex.plot)</option>
                <option value="aldex_plot_feature">Show dispersion of the expected values returned by aldex.effect (aldex.plotFeature)</option>
                <option value="aldex_ttest">Calculate Welch’s t-test and Wilcoxon test statistics (aldex.ttest)</option>
            </param>
            <when value="aldex">
                <expand macro="aldex_test_param"/>
                <expand macro="effect_param"/>
                <expand macro="include_sample_summary_param"/>
                <expand macro="iterate_param"/>
            </when>
            <when value="aldex_corr">
                <repeat name="cont_var" title="Group number" min="1">
                    <param name="group_num" type="integer" min="1" value="1" label="Group number" help="Specify a group number and the number of columns from the Reads table file associated with the group"/>
                    <param name="num_cols_in_group" type="integer" min="1" value="1" label="Number of columns in group"/>
                </repeat>
            </when>
            <when value="aldex_effect">
                <expand macro="include_sample_summary_param"/>
            </when>
            <when value="aldex_expected_distance"/>
            <when value="aldex_kw"/>
            <when value="aldex_plot">
                <expand macro="aldex_test_param"/>
                <expand macro="effect_param"/>
                <expand macro="include_sample_summary_param"/>
                <expand macro="iterate_param"/>
                <param name="plot_type" type="select" label="Select the plot type">
                    <option value="MA" selected="true">A Bland-Altman style plot</option>
                    <option value="MW">A difference between to a variance within plot</option>
                </param>
                <param name="plot_test" type="select" label="Select the method of calculating significance">
                    <option value="welch" selected="true">Welch’s t test</option>
                    <option value="wilcox">Wilcox rank test</option>
                    <option value="kruskal">Kruskal-Wallace test</option>
                </param>
                <param name="cutoff_pval" type="float" value="0.1" min="0" label="Benjamini-Hochberg fdr cutoff"/>
                <param name="cutoff_effect" type="integer" value="1" min="0" label="Effect size cutoff for plotting"/>
                <param name="xlab" type="text" value="" optional="true" label="x-label for the plot">
                    <expand macro="sanitize_query" validinitial="string.ascii_letters,string.punctuation"/>
                </param>
                <param name="ylab" type="text" value="" optional="true" label="y-label for the plot">
                    <expand macro="sanitize_query" validinitial="string.ascii_letters,string.punctuation"/>
                </param>
            </when>
            <when value="aldex_plot_feature">
                <param name="feature_name" type="text" value="" label="Name of the feature from the input data to be plotted">
                    <expand macro="sanitize_query" validinitial="string.ascii_letters,string.punctuation"/>
                </param>
            </when>
            <when value="aldex_ttest">
                <param argument="--paired_test" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Calculate effect size for paired samples?"/>
                <param argument="--hist_plot" type="select" label="Plot a histogram of p-values for the first Dirichlet Monte Carlo instance?">
                    <option value="false" selected="true">No</option>
                    <option value="true">Yes</option>
                </param>
            </when>
        </conditional>
    </inputs>
    <outputs>
        <data name="output_aldex" format="tabular" label="${tool.name} on ${on_string}: (aldex)">
            <filter>analysis_type_cond['analysis_type'] == 'aldex'</filter>
        </data>
        <data name="output_aldex_corr" format="tabular" label="${tool.name} on ${on_string}: (aldex.corr)">
            <filter>analysis_type_cond['analysis_type'] == 'aldex_corr'</filter>
        </data>
        <data name="output_aldex_effect" format="tabular" label="${tool.name} on ${on_string}: (aldex.effect)">
            <filter>analysis_type_cond['analysis_type'] == 'aldex_effect'</filter>
        </data>
        <data name="output_aldex_expected_distance" format="png" label="${tool.name} on ${on_string}: (aldex.expectedDistance)">
            <filter>analysis_type_cond['analysis_type'] == 'aldex_expected_distance'</filter>
        </data>
        <data name="output_aldex_kw" format="tabular" label="${tool.name} on ${on_string}: (aldex.kw)">
            <filter>analysis_type_cond['analysis_type'] == 'aldex_kw'</filter>
        </data>
        <data name="output_aldex_plot" format="png" label="${tool.name} on ${on_string}: (aldex.plot)">
            <filter>analysis_type_cond['analysis_type'] == 'aldex_plot'</filter>
        </data>
        <data name="output_aldex_plot_feature" format="png" label="${tool.name} on ${on_string}: (aldex.plotFeature)">
            <filter>analysis_type_cond['analysis_type'] == 'aldex_plot_feature'</filter>
        </data>
        <data name="output_aldex_ttest" format="tabular" label="${tool.name} on ${on_string}: (aldex.ttest)">
            <filter>analysis_type_cond['analysis_type'] == 'aldex_ttest'</filter>
        </data>
        <data name="output_aldex_ttest_plot" format="pdf" label="${tool.name} on ${on_string}: (aldex.ttest hist plot)">
            <filter>analysis_type_cond['analysis_type'] == 'aldex_ttest' and analysis_type_cond['hist_plot'] == 'true'</filter>
        </data>
    </outputs>
    <tests>
        <test expect_num_outputs="1">
            <param name="reads" value="reads.tabular" ftype="tabular"/>
            <repeat name="conditions">
                <param name="group_name" value="NS"/>
                <param name="num_cols" value="7"/>
            </repeat>
            <repeat name="conditions">
                <param name="group_name" value="S"/>
                <param name="num_cols" value="7"/>
            </repeat>
            <output name="output_aldex" ftype="tabular">
                <assert_contents>
                    <has_text text="rab.all"/>
                    <has_size value="39906" delta="100"/>
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="1">
            <param name="reads" value="reads.tabular" ftype="tabular"/>
            <repeat name="conditions">
                <param name="group_name" value="NS"/>
                <param name="num_cols" value="7"/>
            </repeat>
            <repeat name="conditions">
                <param name="group_name" value="S"/>
                <param name="num_cols" value="7"/>
            </repeat>
            <param name="denom" value="zero"/>
            <output name="output_aldex" ftype="tabular">
                <assert_contents>
                    <has_text text="rab.all"/>
                    <has_size value="39908" delta="100"/>
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="1">
            <param name="reads" value="reads.tabular" ftype="tabular"/>
            <repeat name="conditions">
                <param name="group_name" value="NS"/>
                <param name="num_cols" value="7"/>
            </repeat>
            <repeat name="conditions">
                <param name="group_name" value="S"/>
                <param name="num_cols" value="7"/>
            </repeat>
            <param name="effect" value="false"/>
            <param name="include_sample_summary" value="true"/>
            <param name="iterate" value="true"/>
            <output name="output_aldex" ftype="tabular">
                <assert_contents>
                    <has_text text="we.ep"/>
                    <has_size value="14595" delta="100"/>
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="1">
            <param name="reads" value="reads.tabular" ftype="tabular"/>
            <repeat name="conditions">
                <param name="group_name" value="NS"/>
                <param name="num_cols" value="7"/>
            </repeat>
            <repeat name="conditions">
                <param name="group_name" value="S"/>
                <param name="num_cols" value="7"/>
            </repeat>
            <param name="aldex_test" value="kw"/>
            <output name="output_aldex" ftype="tabular">
                <assert_contents>
                    <has_text text="kw.ep"/>
                    <has_size value="14615" delta="100"/>
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="1">
            <param name="reads" value="reads.tabular" ftype="tabular"/>
            <repeat name="conditions">
                <param name="group_name" value="NS"/>
                <param name="num_cols" value="7"/>
            </repeat>
            <repeat name="conditions">
                <param name="group_name" value="S"/>
                <param name="num_cols" value="7"/>
            </repeat>
            <param name="analysis_type" value="aldex_corr"/>
            <repeat name="cont_var">
                <param name="group_num" value="1"/>
                <param name="num_cols_in_group" value="7"/>
            </repeat>
            <repeat name="cont_var">
                <param name="group_num" value="2"/>
                <param name="num_cols_in_group" value="7"/>
            </repeat>
            <output name="output_aldex_corr" ftype="tabular">
                <assert_contents>
                    <has_text text="pearson.ecor"/>
                    <has_size value="33232" delta="100"/>
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="1">
            <param name="reads" value="reads.tabular" ftype="tabular"/>
            <repeat name="conditions">
                <param name="group_name" value="NS"/>
                <param name="num_cols" value="7"/>
            </repeat>
            <repeat name="conditions">
                <param name="group_name" value="S"/>
                <param name="num_cols" value="7"/>
            </repeat>
            <param name="analysis_type" value="aldex_effect"/>
            <output name="output_aldex_effect" ftype="tabular">
                <assert_contents>
                    <has_text text="rab.all"/>
                    <has_size value="25268" delta="100"/>
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="1">
            <param name="reads" value="reads.tabular" ftype="tabular"/>
            <repeat name="conditions">
                <param name="group_name" value="NS"/>
                <param name="num_cols" value="7"/>
            </repeat>
            <repeat name="conditions">
                <param name="group_name" value="S"/>
                <param name="num_cols" value="7"/>
            </repeat>
            <param name="analysis_type" value="aldex_expected_distance"/>
            <output name="output_aldex_expected_distance" ftype="png">
                <assert_contents>
                    <has_size value="70183" delta="2000"/>
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="1">
            <param name="reads" value="reads.tabular" ftype="tabular"/>
            <repeat name="conditions">
                <param name="group_name" value="NS"/>
                <param name="num_cols" value="7"/>
            </repeat>
            <repeat name="conditions">
                <param name="group_name" value="S"/>
                <param name="num_cols" value="7"/>
            </repeat>
            <param name="analysis_type" value="aldex_kw"/>
            <output name="output_aldex_kw" ftype="tabular">
                <assert_contents>
                    <has_text text="kw.ep"/>
                    <has_size value="14623" delta="100"/>
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="1">
            <param name="reads" value="reads.tabular" ftype="tabular"/>
            <repeat name="conditions">
                <param name="group_name" value="NS"/>
                <param name="num_cols" value="7"/>
            </repeat>
            <repeat name="conditions">
                <param name="group_name" value="S"/>
                <param name="num_cols" value="7"/>
            </repeat>
            <param name="analysis_type" value="aldex_plot"/>
            <output name="output_aldex_plot" ftype="png">
                <assert_contents>
                    <has_size value="19064" delta="500"/>
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="1">
            <param name="reads" value="reads.tabular" ftype="tabular"/>
            <repeat name="conditions">
                <param name="group_name" value="NS"/>
                <param name="num_cols" value="7"/>
            </repeat>
            <repeat name="conditions">
                <param name="group_name" value="S"/>
                <param name="num_cols" value="7"/>
            </repeat>
            <param name="analysis_type" value="aldex_plot_feature"/>
            <param name="feature_name" value="A:D:A:E"/>
            <output name="output_aldex_plot_feature" ftype="png">
                <assert_contents>
                    <has_size value="30772" delta="2000"/>
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="1">
            <param name="reads" value="reads.tabular" ftype="tabular"/>
            <repeat name="conditions">
                <param name="group_name" value="NS"/>
                <param name="num_cols" value="7"/>
            </repeat>
            <repeat name="conditions">
                <param name="group_name" value="S"/>
                <param name="num_cols" value="7"/>
            </repeat>
            <param name="analysis_type" value="aldex_ttest"/>
            <output name="output_aldex_ttest" ftype="tabular">
                <assert_contents>
                    <has_text text="we.ep"/>
                    <has_size value="14596" delta="100"/>
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="2">
            <param name="reads" value="reads.tabular" ftype="tabular"/>
            <repeat name="conditions">
                <param name="group_name" value="NS"/>
                <param name="num_cols" value="7"/>
            </repeat>
            <repeat name="conditions">
                <param name="group_name" value="S"/>
                <param name="num_cols" value="7"/>
            </repeat>
            <param name="analysis_type" value="aldex_ttest"/>
            <param name="paired_test" value="true"/>
            <param name="hist_plot" value="true"/>
            <output name="output_aldex_ttest" ftype="tabular">
                <assert_contents>
                    <has_text text="we.ep"/>
                    <has_size value="13822" delta="100"/>
                </assert_contents>
            </output>
            <output name="output_aldex_ttest_plot" ftype="pdf">
                <assert_contents>
                    <has_text text="%PDF"/>
                    <has_size value="7331" delta="100"/>
                </assert_contents>
            </output>
        </test>
    </tests>
    <help>
**What it does**

Performs a differential abundance analysis for the comparison of two or more conditions (e.g., single-organism
and meta-RNA-seq high-throughput sequencing assays, or of selected and unselected values from in-vitro sequence
selections).  This tool generates Monte Carlo samples of the Dirichlet distribution for each sample, converts
each instance using a log-ratio transform, then returns test results for two sample (Welch’s t, Wilcoxon) or
multi-sample (glm, Kruskal-Wallace) tests.  The tool also estimates effect size for two sample analyses.

**Options**

  * **Reads table file** - a tabular file with unique names for all rows and columns. Rows should contain genes and columns should contain sequencing read counts (i.e., sample vectors).  Rows with 0 reads in each sample are deleted prior to analysis.
  * **Define the comparison groups describing the reads data structure** - Specify a group label and the number of sequencing read count columns associated with it.  The total number of columns must be equal to the number of numeric columns in the selected Reads table file.
  * **Number of Monte Carlo samples to use** - the number of Monte Carlo samples to use when estimating the underlying distributions. Since we are estimating central tendencies, 128 is usually sufficient.
  * **Select features to retain as the denominator for the Geometric Mean calculation** - indicates which features to retain as the denominator for the Geometric Mean calculation. 
  * **Select the analysis to be performed**

    * **Compute an ALDEx2 object (aldex)** - performs log-ratio transformation and statistical testing. Specifically, this function: (a) generates Monte Carlo samples of the Dirichlet distribution for each sample, (b) converts each instance using a log-ratio transform, then (c) retcalculates the expected values for the correlation between each feature and a contin- uous variable, using data returned returned by aldex.clr and a vector of the continuous variable. Returns results of Pearson, Spearman and Kendall tests.urns test results for two sample (Welch’s t, Wilcoxon) or multi-sample (glm, Kruskal-Wallace) tests. This function also estimates effect size for two sample analyses.

      * **Select the tests to be performed** - select the tests to be performed when crearing the ALDEx2 object.
      * **Calculate abundances and effect sizes** - applies only if the selected test is Welch’s t and Wilcox tests, or if tests are performed iteratively.
      * **Include median clr values for each sample** - specify whether to include median clr values for each sample (applies only if abundances and effect sizes are calculated).
      * **Perform tests iteratively** - Specify whether to iteratively perform a test.  For example, this will use the results from an initial Welch’s t and Wilcox test routine to seed the reference (i.e., denominator of Geometric Mean calculation) for a second Welch’s t and Wilcox test routine.

    * **Calculate correlation with a continuous variable (aldex.corr)** - calculates the expected values for the correlation between each feature and a continuous variable using data returned by aldex.clr and a vector of the continuous variable. Returns results of Pearson, Spearman and Kendall tests.
    * **Calculate effect sizes and differences between conditions (aldex.effect)** - calculates (1) the median clr abundances per sample, per condition, (2) the median differences in abundance between 2 conditions and (3) the median effect size and proportion of effect that overlaps 0.
    * **Calculate the expected values of distances between samples (aldex.expectedDistance)** - calculates the expected value of distances between samples using the median value of distances derived from N Monte-Carlo replicates.
    * **Calculate the Kruskal-Wallis test and glm ANOVA statistics (aldex.kw)** - calculates the expected values of the Kruskal-Wallis test and a glm ANOVA.
    * **Plot an ALDEx2 object (aldex.plot)** - generate an MW-plot or an MA-plot of the given ALDEx2 object.
    * **Show dispersion of the expected values returned by aldex.effect (aldex.plotFeature)** - generates density plots showing the dispersion of the expected values given in the output from aldex.effect. The expected values are shown in the plots. This is a diagnostic visualization to help determine if the expected values are trustworthy.
    * **Calculate Welch’s t-test and Wilcoxon test statistics (aldex.ttest)** - calculate the expected values of the Wilcoxon Rank Sum test and Welch's t-test on the data returned by aldex.clr

      * **Calculate paired tests** - specify whether to calculate effect size for paired samples (applies only if abundances and effect sizes are calculated and the selected test is Welch’s t and Wilcox tests).
      * **Plot a histogram of p-values for the first Dirichlet Monte Carlo instance** - specify whether to plot a histogram of p-values for the first Dirichlet Monte Carlo instance.
    </help>
    <expand macro="citations"/>
</tool>