view maaslin2.xml @ 1:ef2cace85809 draft

planemo upload for repository https://github.com/biobakery/Maaslin2 commit 1bed0ce83796932c1a21006f0c657656b49ccd8f
author iuc
date Wed, 13 Mar 2024 10:32:52 +0000
parents c629e6de3c49
children faacef62bb54
line wrap: on
line source

<tool id="maaslin2" name="MaAsLin 2" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
    <description>Microbiome Multivariable Association with Linear Models</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="edam_ontology"/>
    <expand macro="xrefs"/>
    <expand macro="requirements"/>
    <command detect_errors="exit_code"><![CDATA[
ln -s '$input_data' 'input_data.tsv'
&&
ln -s '$input_metadata' 'input_metadata.tsv'
&&   
Maaslin2.R
#if $additional_options.min_abundance   
    --min_abundance '$additional_options.min_abundance'
#end if
#if $additional_options.min_prevalence
    --min_prevalence '$additional_options.min_prevalence'
#end if
#if $additional_options.max_significance
    --max_significance '$additional_options.max_significance'
#end if
#if $additional_options.normalization
    --normalization '$additional_options.normalization'
#end if
#if $additional_options.transform
    --transform '$additional_options.transform'
#end if
#if $additional_options.analysis_method
    --analysis_method '$additional_options.analysis_method'
#end if
#if $random_effects
    --random_effects '$random_effects'
#end if
#if $fixed_effects
    --fixed_effects '$fixed_effects'
#end if
#if $additional_options.correction
    --correction '$additional_options.correction'
#end if
    $additional_options.standardize
    $output.plot_heatmap
#if $output.heatmap_first_n
    --heatmap_first_n '$output.heatmap_first_n'
#end if
    $output.plot_scatter
    --cores \${GALAXY_SLOTS:-4}
    'input_data.tsv'
    'input_metadata.tsv'
    'outputFolder'
&&
cd outputFolder && mkdir -p figures/ && cp *.pdf figures
    ]]></command>
    <inputs>
        <param name="input_data" type="data" format="tabular" label="Data (or features) file"/>
        <param name="input_metadata" type="data" format="tabular" label="Metadata file"/>
        <param argument="--fixed_effects" type="select" multiple="true" optional="true" label="Interactions: Fixed effects" help="The fixed effects for the model, comma-delimited for multiple effects">
            <option value="diagnosis" selected="true">diagnosis</option>
            <option value="dysbiosisnonIBD" selected="true">dysbiosisnonIBD</option>
            <option value="dysbiosisUC" selected="true">dysbiosisUC</option>
            <option value="dysbiosisCD" selected="true">dysbiosisCD</option>
            <option value="antibiotics" selected="true">antibiotics</option>
            <option value="age" selected="true">age</option>
        </param>        
        <param argument="--random_effects" type="text" multiple="true" optional="true" label="Random effects" help="The random effects for the model,  comma-delimited for multiple effects"/>        
        <section name="additional_options" title="Additional Options" expanded="true">
            <param argument="--min_abundance" type="float" value="0.0" optional="true" label="Minimum abundance" help="The minimum abundance for each feature"/>
            <param argument="--min_prevalence" type="float" value="0.1" optional="true" label="Minimum prevalence" help="The minimum percent of samples for which a feature is detected at minimum abundance"/>
            <param argument="--max_significance" type="float" value="0.25" optional="true" label="Maximum significance" help="The q-value threshold for significance"/>
            <param argument="--normalization" type="select" optional="true" label="The normalization method to apply">
                <option value="TSS" selected="true">TSS</option>
                <option value="CLR">CLR</option>
                <option value="CSS">CSS</option>
                <option value="NONE">NONE</option>
                <option value="TMM">TMM</option>
            </param>
            <param argument="--transform" type="select" optional="true" label="The transform to apply">
                <option value="LOG" selected="true">LOG</option>
                <option value="LOGIT">LOGIT</option>
                <option value="AST">AST</option>
                <option value="NONE">NONE</option>
            </param>
            <param argument="--analysis_method" type="select" optional="true" label="The analysis method to apply">
                <option value="LM" selected="true">LM</option>
                <option value="CPLM">CPLM</option>
                <option value="NEGBIN">NEGBIN</option>
                <option value="ZINB">ZINB</option>
            </param>
            <param argument="--correction" type="text" value="BH" optional="true" label="Correction" help="The correction method for computing  the q-value"/>
            <param argument="--standardize" type="boolean" truevalue="--standardize TRUE" falsevalue="--standardize FALSE" checked="true" label="Apply z-score so continuous metadata are on  the same scale"/>
        </section>          
        <section name="output" title="Set Plotting Output" expanded="true">
            <param argument="--plot_heatmap" type="boolean" truevalue="--plot_heatmap TRUE" falsevalue="--plot_heatmap FALSE" checked="true" label="Generate a heatmap for the significant  associations"/>
            <param argument="--heatmap_first_n" type="integer" value="50" optional="true" label="Heatmap plot first N" help="In heatmap, plot top N features with significant associations"/>
            <param argument="--plot_scatter" type="boolean" truevalue="--plot_scatter TRUE" falsevalue="--plot_scatter FALSE" checked="true" label="Generate scatter plots for the significant associations"/> 
            <param name="residuals_output" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="true" label="Output data frame with residuals for each feature"/>    
        </section>   
   </inputs>
   <outputs>    
        <data name="all_results" format="tabular" from_work_dir="outputFolder/all_results.tsv" label="All results ordered by increasing q-value"/>
        <data name="significant_results" format="tabular" from_work_dir="outputFolder/significant_results.tsv" label="Q-values smaller than or equal to the threshold"/>
        <data name="residuals" format="rdata" from_work_dir="outputFolder/residuals.rds" label="Data frame with residuals for each feature">       
            <filter>output['residuals_output'] is True</filter>
        </data>         
        <data format="pdf" name="headmap" from_work_dir="outputFolder/figures/heatmap.pdf" label="Heatmap of the significant associations" >
            <filter>output['plot_heatmap'] is True</filter>
        </data>        
        <collection name="figures_pdfs" type="list" label="Plots" >
            <discover_datasets pattern="(?P&lt;designation&gt;.+)" directory="outputFolder/figures/" format="pdf"/>
            <filter>output['plot_scatter'] is True</filter>
        </collection>
    </outputs>
    <tests>
        <test expect_num_outputs="5">
            <param name="input_data" value="HMP2_taxonomy.tsv"/>
            <param name="input_metadata" value="HMP2_metadata.tsv"/>
            <param name="random_effects" value="site,subject"/>
            <param name="fixed_effects" value="diagnosis,dysbiosisnonIBD,dysbiosisUC,dysbiosisCD,antibiotics,age"/>
            <section name="additional_options">
                <param name="min_abundance" value="0.0"/>
                <param name="min_prevalence" value="0.1"/>
                <param name="max_significance" value="0.25"/>
                <param name="normalization" value="NONE"/>
                <param name="transform" value="AST"/>
                <param name="analysis_method" value="LM"/>
                <param name="correction" value="BH"/>
                <param name="standardize" value="false"/>
            </section>
            <section name="output">
                <param name="plot_heatmap" value="true"/>
                <param name="heatmap_first_n" value="50"/>
                <param name="plot_scatter" value="true"/>
                <param name="residuals_output" value="true"/>
            </section>
            <output name="all_results">
                <assert_contents>
                    <has_text text="feature"/>
                    <has_n_lines n="610"/>
                    <has_n_columns n="9"/>
                </assert_contents>
            </output>
            <output name="significant_results">
                <assert_contents>
                    <has_text text="dysbiosisCD"/>
                    <has_n_lines n="159"/>
                    <has_n_columns n="9"/>
                </assert_contents>
            </output>
            <output name="residuals">
                <assert_contents>
                    <has_size value="462746" delta="1000"/>
                </assert_contents>
            </output>
            <output name="headmap">
                <assert_contents>
                    <has_size value="7373" delta="1000" />
                </assert_contents>
            </output>
            <output_collection name="figures_pdfs" type="list">
                <element name="heatmap.pdf" ftype="pdf">
                    <assert_contents>
                        <has_size value="7373" delta="1000" />
                    </assert_contents>
                </element>
                <element name="age.pdf" ftype="pdf">
                    <assert_contents>
                        <has_size value="407859" delta="100000" />
                    </assert_contents>
                </element>
                <element name="antibiotics.pdf" ftype="pdf">
                    <assert_contents>
                        <has_size value="1590476" delta="1000000" />
                    </assert_contents>
                </element>
                <element name="diagnosis.pdf" ftype="pdf">
                    <assert_contents>
                        <has_size value="1407572" delta="1000000" />
                    </assert_contents>
                </element>
                <element name="dysbiosisCD.pdf" ftype="pdf">
                    <assert_contents>
                        <has_size value="2328523" delta="1000000" />
                    </assert_contents>
                </element>
                <element name="dysbiosisnonIBD.pdf" ftype="pdf">
                    <assert_contents>
                        <has_size value="1032602" delta="1000000" />
                    </assert_contents>
                </element>
                <element name="dysbiosisUC.pdf" ftype="pdf">
                    <assert_contents>
                        <has_size value="1037005" delta="1000000" />
                    </assert_contents>
                </element>                                                                        
            </output_collection>
        </test>
        <test expect_num_outputs="5">
            <param name="input_data" value="HMP2_taxonomy.tsv"/>
            <param name="input_metadata" value="HMP2_metadata.tsv"/>
            <param name="fixed_effects" value="diagnosis,dysbiosisnonIBD"/>
            <section name="additional_options">
                <param name="min_abundance" value="0.0"/>
                <param name="min_prevalence" value="0.1"/>
                <param name="max_significance" value="0.25"/>
                <param name="normalization" value="NONE"/>
                <param name="transform" value="AST"/>
                <param name="analysis_method" value="LM"/>
                <param name="correction" value="BH"/>
                <param name="standardize" value="false"/>
            </section>
            <section name="output">
                <param name="plot_heatmap" value="true"/>
                <param name="heatmap_first_n" value="50"/>
                <param name="plot_scatter" value="true"/>
                <param name="residuals_output" value="true"/>
            </section>
            <output name="all_results">
                <assert_contents>
                    <has_text text="feature"/>
                    <has_n_lines n="262"/>
                    <has_n_columns n="9"/>
                </assert_contents>
            </output>
            <output name="significant_results">
                <assert_contents>
                    <has_text text="diagnosis"/>
                    <has_n_lines n="175"/>
                    <has_n_columns n="9"/>
                </assert_contents>
            </output>
            <output name="residuals">
                <assert_contents>
                    <has_size value="367224" delta="1000"/>
                </assert_contents>
            </output>
            <output_collection name="figures_pdfs" type="list">
                <element name="diagnosis.pdf" ftype="pdf">
                    <assert_contents>
                        <has_size value="6234127" delta="1000000" />
                    </assert_contents>
                </element>                                                              
            </output_collection>
        </test>
        <test expect_num_outputs="5">
            <param name="input_data" value="HMP2_taxonomy.tsv"/>
            <param name="input_metadata" value="HMP2_metadata.tsv"/>
            <param name="fixed_effects" value="diagnosis,dysbiosisnonIBD"/>
            <section name="additional_options">
                <param name="min_abundance" value="0.0001"/>
                <param name="min_prevalence" value="0.1"/>
                <param name="max_significance" value="0.25"/>
                <param name="normalization" value="NONE"/>
                <param name="transform" value="AST"/>
                <param name="analysis_method" value="LM"/>
                <param name="correction" value="BH"/>
                <param name="standardize" value="false"/>
            </section>
            <section name="output">
                <param name="plot_heatmap" value="true"/>
                <param name="heatmap_first_n" value="50"/>
                <param name="plot_scatter" value="true"/>
                <param name="residuals_output" value="true"/>
            </section>
            <output name="all_results">
                <assert_contents>
                    <has_text text="feature"/>
                    <has_n_lines n="250"/>
                    <has_n_columns n="9"/>
                </assert_contents>
            </output>
            <output name="significant_results">
                <assert_contents>
                    <has_text text="diagnosis"/>
                    <has_n_lines n="172"/>
                    <has_n_columns n="9"/>
                </assert_contents>
            </output>
            <output name="residuals">
                <assert_contents>
                    <has_size value="359943" delta="1000"/>
                </assert_contents>
            </output>
            <output name="headmap">
                <assert_contents>
                    <has_size value="6554" delta="1000" />
                </assert_contents>
            </output>
            <output_collection name="figures_pdfs" type="list">
                <element name="heatmap.pdf" ftype="pdf">
                    <assert_contents>
                        <has_size value="6554" delta="1000" />
                    </assert_contents>
                </element>
                <element name="diagnosis.pdf" ftype="pdf">
                    <assert_contents>
                        <has_size value="6061545" delta="1000000" />
                    </assert_contents>
                </element>
                <element name="dysbiosisnonIBD.pdf" ftype="pdf">
                    <assert_contents>
                        <has_size value="2599373" delta="1000000" />
                    </assert_contents>
                </element>                                                                    
            </output_collection>
        </test>   
    </tests>
    <help><![CDATA[
@HELP_HEADER@
MaAsLin 2
=========
MaAsLin2 is the next generation of MaAsLin (Microbiome Multivariable Association with Linear Models).

Input
=====
MaAsLin2 requires two input files:

    - Data (or features) tabular file
        - This file is tab-delimited.
        - Formatted with features as columns and samples as rows.
        - The transpose of this format is also okay.
        - Possible features in this file include taxonomy or genes.
    - Metadata tabular file
        - This file is tab-delimited.
        - Formatted with features as columns and samples as rows.
        - The transpose of this format is also okay.
        - Possible metadata in this file include gender or age.

The data file can contain samples not included in the metadata file (along with the reverse case). For both cases, those samples not included in both files will be removed from the analysis. Also the samples do not need to be in the same order in the two files.

Output
======
1- Data output files
    - All results ordered by increasing q-value (tabular file)
        - Full list of associations that pass MaAsLin2's significance threshold, ordered by increasing q-values
        - This includes the same data as the data.frame returned.
        - This file contains all results ordered by increasing q-value.
        - The first columns are the metadata and feature names.
        - The next two columns are the value and coefficient from the model.
        - The next column is the standard deviation from the model.
        - The N column is the total number of data points.
        - The N.not.zero column is the total of non-zero data points.
        - The pvalue from the calculation is the second to last column.
        - The qvalue is computed with p.adjust with the correction method.
    - Q-values smaller than or equal to the threshold (tabular file)
        - This file is a subset of the results in the first file.
        - It only includes associations with q-values <= to the threshold.
    - Data frame with residuals for each feature (R data file)
        - This file contains a data frame with residuals for each feature.
2- Visualization output files
    - Heatmap of the significant associations (PDF file)
        - This file contains a heatmap of the significant associations.
    - A plot for every significant association found (PDF file(s))
        - A plot is generated for each significant association.
        - Scatter plots are used for continuous metadata.
        - Box plots are for categorical data.
        - Data points plotted are after normalization, filtering, and transform.

    ]]></help> 
    <citations>
        <citation type="doi">10.1101/2021.01.20.427420</citation>
    </citations>
 </tool>